function [net, currSample_video]=process_epoch_STGConvNet(para, net, currSample_video)


    
    %% compute statistics of the observed sequence
    tic
    res_obs = compute_stat_STGConvNet(net, para.imageSet);
    disp(['compute the observed statistics: ' num2str(toc)]);
   

    %% sampling by Langevin dynamics
    currSample_video=gpuArray(currSample_video);
    currSample_video = sampling_sequence_by_Langevin(net, para.L, para.stepsize, currSample_video);

    %% compute statistics of the synthesized sequence
    tic
    res_syn = compute_stat_STGConvNet(net, currSample_video);
    disp(['compute the synthesized statistics: ' num2str(toc)]);   
    currSample_video=gather(currSample_video);

    for l=1:numel(net.layers)

        %% compute the gradient for weights and bias
        gradient_weight = res_obs(l+1).stat_weights - res_syn(l+1).stat_weights *( para.sz / size(currSample_video,3));
        gradient_bias = res_obs(l+1).stat_bias - res_syn(l+1).stat_bias *( para.sz / size(currSample_video,3));

        disp(['Layer ' num2str(l) ': SSD_weight: ' num2str(mean(abs(gradient_weight(:))))]);

        
        maximum_num_activations = res_obs(l+1).size_row * res_obs(l+1).size_col * res_obs(l+1).size_time;
        gradient_weight = gradient_weight / maximum_num_activations;
        if max(abs(gradient_weight(:))) > para.max_gradient %50
            gradient_weight = gradient_weight / max(abs(gradient_weight(:))) * para.max_gradient;
        end
        
        gradient_bias = gradient_bias / maximum_num_activations;
        if max(abs(gradient_bias(:))) > para.max_gradient %50
            gradient_bias = gradient_bias / max(abs(gradient_bias(:))) * para.max_gradient;
        end
        
%         net.layers{l}.filters = net.layers{l}.filters + net.layers{l}.lambdaLearningRate * gradient_weight;
%         net.layers{l}.bias = net.layers{l}.bias + ( net.layers{l}.lambdaLearningRate *2 ) * gradient_bias;  
        
        
        net.layers{l}.momentum_filter = para.momentum(1) * net.layers{l}.momentum_filter + gradient_weight - para.decay(1) * net.layers{l}.filters;    
        net.layers{l}.momentum_bias = para.momentum(2) * net.layers{l}.momentum_bias + gradient_bias - para.decay(2) * net.layers{l}.bias;
    
        net.layers{l}.filters = net.layers{l}.filters + net.layers{l}.lambdaLearningRate*  net.layers{l}.momentum_filter;
        net.layers{l}.bias = net.layers{l}.bias + ( net.layers{l}.lambdaLearningRate * 2 ) * net.layers{l}.momentum_bias;  
        
          
        
%         %% update the weights and bias
%         
%         for iFilter = 1:net.layers{l}.numFilter
%             adaptivelambdaLearningRate = net.layers{l}.lambdaLearningRate / (res_obs(l+1).stat_bias(iFilter) +eps);
%             net.layers{l}.filters(:,:,:,:,iFilter)=net.layers{l}.filters(:,:,:,:,iFilter)+ adaptivelambdaLearningRate * gradient_weight(:,:,:,:,iFilter);
%             net.layers{l}.bias(iFilter) = net.layers{l}.bias(iFilter) + adaptivelambdaLearningRate * gradient_bias(iFilter);
%         end

    end
    