 function [net, currSample_video, video_masked, error_recover]=process_epoch_STGConvNet_inpainting(para, net, currSample_video, video_masked)



    %% compute statistics of the observed sequence
    tic
    res_obs = compute_stat_STGConvNet(net, video_masked);
    disp(['compute the observed statistics: ' num2str(toc)]);


    %% filling
    video_masked=gpuArray(video_masked);
    image_temp=sampling_sequence_by_Langevin(net, para.L, para.stepsize, video_masked);
    video_masked(para.masks==1) = image_temp(para.masks==1);

    %% sampling by Langevin dynamics
    currSample_video=gpuArray(currSample_video);
    currSample_video = sampling_sequence_by_Langevin(net, para.L, para.stepsize, currSample_video);

    %% evaluate the error
    error_recover = gather(mean(abs(para.imageSet(para.masks==1)- video_masked(para.masks==1) )));
    disp(['The average error of recovery is '  num2str(error_recover)]);

    %% compute statistics of the synthesized sequence
    tic
    res_syn = compute_stat_STGConvNet(net, currSample_video);
    disp(['compute the synthesized statistics: ' num2str(toc)]);
    currSample_video=gather(currSample_video);

    for l=1:numel(net.layers)

        %% compute the gradient for weights and bias
        gradient_weight = res_obs(l+1).stat_weights - res_syn(l+1).stat_weights *( para.sz / size(currSample_video,3));
        gradient_bias = res_obs(l+1).stat_bias - res_syn(l+1).stat_bias *( para.sz / size(currSample_video,3));

        disp(['Layer ' num2str(l) ': SSD_weight: ' num2str(mean(abs(gradient_weight(:))))]);


        maximum_num_activations = res_obs(l+1).size_row * res_obs(l+1).size_col * res_obs(l+1).size_time;
        gradient_weight = gradient_weight / maximum_num_activations;
        if max(abs(gradient_weight(:))) > para.max_gradient %50
            gradient_weight = gradient_weight / max(abs(gradient_weight(:))) * para.max_gradient;
        end

        gradient_bias = gradient_bias / maximum_num_activations;
        if max(abs(gradient_bias(:))) > para.max_gradient %50
            gradient_bias = gradient_bias / max(abs(gradient_bias(:))) * para.max_gradient;
        end



        net.layers{l}.momentum_filter = para.momentum(1) * net.layers{l}.momentum_filter + gradient_weight - para.decay(1) * net.layers{l}.filters;
        net.layers{l}.momentum_bias = para.momentum(2) * net.layers{l}.momentum_bias + gradient_bias - para.decay(2) * net.layers{l}.bias;

        net.layers{l}.filters = net.layers{l}.filters + net.layers{l}.lambdaLearningRate*  net.layers{l}.momentum_filter;
        net.layers{l}.bias = net.layers{l}.bias + ( net.layers{l}.lambdaLearningRate * 2 ) * net.layers{l}.momentum_bias;


    end








