function [para, net, imageSet_masked] = config_STGConvNet_recovery(category, type, mask_type, loading_mask)



para.categoryName = category;

para.mask_ratio = 0.8;
para.mask_size = 25;
para.mask_duration = 3;
para.region_mask_size = 60; % type  3
para.type = type;

% parameters for occlusion
para.mask_type = mask_type;  % 1: pepper salt; 2: missing frame; 3: single region mask 

para.numChain =1; % number of synthesized images
para.inPath = ['trainingVideo' filesep 'data_recovery' filesep para.categoryName  filesep];
para.outPath= ['output_recovery' filesep para.categoryName '_masktype_' num2str(para.mask_type) filesep];
para.outPath_images = [para.outPath 'observed_sequence'];
para.outPath_synthesis = [para.outPath 'synthesis_sequence'];
para.outPath_variable = [para.outPath 'variable'];
para.outPath_final_result = [para.outPath 'final_result'];

para.outPath_images_masked = [para.outPath 'masked_images'];
para.outPath_recovered = [para.outPath 'imageFrame_recovered'];

if ~exist(para.outPath_recovered)
    mkdir(para.outPath_recovered); % working directory to store synthesized images files.
end

if ~exist(para.outPath_images_masked)
    mkdir(para.outPath_images_masked); % working directory to store synthesized images files.
end

if ~exist(para.outPath_synthesis)
    mkdir(para.outPath_synthesis); % working directory to store synthesized images files.
end

%% sizes of the training images
switch para.type
    
    case 'FC_ST_2'
        para.sx=150;  % 160 180 200 (height)
        para.sy=150;  % 220 240 270 (length)
    case 'FC_S_3'
        para.sx=100;  % 160 180 200 (height)
        para.sy=100;  % 220 240 270 (length)
    otherwise
        error('No such an option');
end


%% MCMC parameters
para.stepsize = 0.3;%0.03 ; % 0.25  % Langevin step size
para.L= 20;  %10    % number of iterations in MCMC

%% others
para.isColor=true;  % true/ false

%%
para.maximumNumFrames = 70;  % the maximum number of frames is used for each training video.

%% optimization parameters
para.momentum(1) = 0;  % for weights
para.momentum(2) = 0; % for bias
para.decay(1) = 0;%0.0001;
para.decay(2) = 0;
para.max_gradient = 60;

%para.numIteration=1500;  %1000


% color or grey image
if para.isColor==true
    para.numChannel=3;
else
    para.numChannel=1;
end


%% read video and save into images
videoFile = dir([para.inPath '*.mp4']);   % avi
para.numVideos = length(videoFile);

if para.numVideos==0
    videoFile = dir([para.inPath '*.avi']);   % avi
    para.numVideos = length(videoFile);
end
if para.numVideos==0
   error('No training images'); 
end

numFramesInTrainingVideos=zeros(1,para.numVideos);
for iVideo = 1: para.numVideos
    
    shuttleVideo = VideoReader([para.inPath videoFile(iVideo).name]);    
    outputFolder= [para.outPath_images, filesep 'sequence_' num2str(iVideo)];
    if ~exist(outputFolder)
        mkdir(outputFolder); % working directory to store synthesized images files.
    end
    
    ii = 0;
    while hasFrame(shuttleVideo)
        ii = ii+1;
        img = readFrame(shuttleVideo);
        img = imresize(img,[para.sx, para.sy], 'bilinear');
        filename = [sprintf('%03d',ii) '.jpg'];
        fullname = fullfile(outputFolder, filename);
        imwrite(img,fullname)    % Write out to a JPEG file (img1.jpg, img2.jpg, etc.)        
    end
    
    if ii==0
        error('No training images');
    end    
    numFramesInTrainingVideos(iVideo)=ii;    
end

para.numFrames = min( min(numFramesInTrainingVideos), para.maximumNumFrames); % need to fix it
para.sz = para.numFrames;
para.imageSet = gpuArray.zeros([para.sx, para.sy, para.numFrames, para.numChannel, para.numVideos], 'single');


%% generate and save the masked videos (occlusion)
switch para.mask_type
    case 1        
        %mask_path = [para.outPath_images_masked filesep 'mask_type1.mat'];
        mask_path = [para.inPath 'mask_type' num2str(para.mask_type) '.mat'];
        if loading_mask            
            if exist(mask_path)                 %#ok<EXIST>
                load(mask_path,'masks');  
            else
                error('No masks saved');
            end        
        else
            disp('generating salt pepper masks');
            masks = generate_salt_pepper([para.sx, para.sy, para.numChannel, para.numFrames], para.mask_ratio, para.mask_size);    
            masks = permute(masks,[1 2 4 3]);
            save(mask_path,'masks');
        end
    case 2
        %mask_path = [para.outPath_images_masked filesep 'mask_type2.mat'];
        mask_path = [para.inPath 'mask_type' num2str(para.mask_type) '.mat'];
        if loading_mask
            if exist(mask_path)
                load(mask_path,'masks');
            else
                error('No masks saved');
            end        
        else
            disp('generating missing frames masks');
            masks = generate_missing_patch([para.sx, para.sy, para.numChannel, para.numFrames], para.mask_ratio);
            masks = permute(masks,[1 2 4 3]);
            save(mask_path,'masks');
        end
        
    case 3
        %mask_path = [para.outPath_images_masked filesep 'mask_type3.mat'];
        mask_path = [para.inPath 'mask_type' num2str(para.mask_type) '.mat'];
        if loading_mask
            if exist(mask_path)
                load(mask_path,'masks');
            else
                error('No masks saved');
            end        
        else
            disp('generating region mask');
            masks = generate_masks([para.sx, para.sy, para.numChannel, para.numFrames], para.region_mask_size);
            masks = permute(masks,[1 2 4 3]);
            save(mask_path,'masks');
        end         
        
    otherwise
        error('No such an option for masks');
end


for iVideo = 1: para.numVideos
    
    outputFolder= [para.outPath_images, filesep 'sequence_' num2str(iVideo)];
    imagesFile = dir([outputFolder filesep '*.jpg']);
    
    outputFolder_masked= [para.outPath_images_masked, filesep 'sequence_' num2str(iVideo)];
    if ~exist(outputFolder_masked)
        mkdir(outputFolder_masked); % working directory to store synthesized images files.
    end
    
    
    for iFrame = 1: para.numFrames
        
        %% read training image and rescale
        img = imread(fullfile(outputFolder, imagesFile(iFrame).name));
        if size(img,3)~=para.numChannel
            if para.numChannel==3
                disp('This is not a color image.')
            else
                img = rgb2gray(img);
                disp('This is not a grey image. We will change it into grey scale.')
            end
        end
        
        img = imresize(img, [para.sx, para.sy], 'bilinear' );
        
        %% mask the observed images and save them
        img_temp=img;
        img_temp(masks(:,:,iFrame,:)) = 0;
        filename = [sprintf('%03d',iFrame) '.jpg'];
        fullname = fullfile(outputFolder_masked, filename);  
        imwrite(img_temp, fullname);   % Write out to a JPEG file (img1.jpg, img2.jpg, etc.)            
        
        img=single(img);
        para.imageSet(:,:,iFrame,:,iVideo) = img;        
    end
end

% make a copy of the original data
% if ~loading_mask
%     ground_truth_path = [para.outPath_images_masked filesep 'imageSet_ground_truth.mat'];
%     save(ground_truth_path,'para.imageSet');
% end

% not necessary to save the following two
para.masks = masks;
para.num_missing_pixels = sum(masks(:));

para.mean_img = mean(para.imageSet(:)); % global mean
para.imageSet = para.imageSet - para.mean_img; % substract mean for all images
imageSet_masked=para.imageSet;

%% set zeros to the masked regon of the original data
for iVideo = 1: para.numVideos
    for iFrame = 1: para.numFrames
        img =  imageSet_masked(:,:,iFrame,:,iVideo);
        img(masks(:,:,iFrame,:)) = 0;
        imageSet_masked(:,:,iFrame,:,iVideo)=img;
    end
end

switch para.type
    
    case 'FC_ST_2'   % two-layer / single spatial-temporal fully connected filter in the top layer 
        
        para.learningScheme='end_to_end';
        para.numIteration=1000;  %1000
        net.FC=true;   % fully connected layer on the top layer + no reLU
        
        %% filter at the first layer
        net.layers{1}.numFilter=200;    % number of filters
        net.layers{1}.sizeFilter_x=7; %15    odd number
        net.layers{1}.sizeFilter_y=7; %15    odd number
        net.layers{1}.durationFilter= 7; %15
        
        net.layers{1}.subsampling_width_x=3; %round(firstLayer.halfSizeFilter/2);
        net.layers{1}.subsampling_width_y=3; %round(firstLayer.halfSizeFilter/2);
        net.layers{1}.subsampling_width_z=3; %max(1,round(firstLayer.halfSizeDurationFilter/2));
        
        net.layers{1}.halfSizeFilter_x= floor(net.layers{1}.sizeFilter_x/2);
        net.layers{1}.halfSizeFilter_y= floor(net.layers{1}.sizeFilter_y/2);
        net.layers{1}.halfSizeDurationFilter= floor(net.layers{1}.durationFilter/2);
        
        net.layers{1}.pad_x = net.layers{1}.halfSizeFilter_x;
        net.layers{1}.pad_y = net.layers{1}.halfSizeFilter_y;
        net.layers{1}.pad_z = net.layers{1}.halfSizeDurationFilter;
        
        net.layers{1}.pad=[net.layers{1}.pad_x, net.layers{1}.pad_x, net.layers{1}.pad_y, net.layers{1}.pad_y, net.layers{1}.pad_z, net.layers{1}.pad_z];
        net.layers{1}.stride=[net.layers{1}.subsampling_width_x, net.layers{1}.subsampling_width_y, net.layers{1}.subsampling_width_z];
        
        net.layers{1}.filters = gpuArray.randn([net.layers{1}.sizeFilter_x, net.layers{1}.sizeFilter_y, net.layers{1}.durationFilter, para.numChannel, net.layers{1}.numFilter], 'single')*0.001;
        net.layers{1}.bias = gpuArray.zeros(1, net.layers{1}.numFilter,'single');
        
        net.layers{1}.sumMap_sx= floor( (para.sx + 2* net.layers{1}.pad_x - net.layers{1}.sizeFilter_x) /  net.layers{1}.subsampling_width_x)+1;
        net.layers{1}.sumMap_sy= floor( (para.sy + 2* net.layers{1}.pad_y - net.layers{1}.sizeFilter_y) /  net.layers{1}.subsampling_width_y)+1;
        net.layers{1}.sumMap_sz= floor( (para.sz + 2* net.layers{1}.pad_z - net.layers{1}.durationFilter) /  net.layers{1}.subsampling_width_z)+1;
        
        net.layers{1}.momentum_filter = zeros(size(net.layers{1}.filters), 'single');
        net.layers{1}.momentum_bias = zeros(size(net.layers{1}.bias), 'single');
        
        
        %% filter at the second layer
        net.layers{2}.numFilter=1;
        net.layers{2}.sizeFilter_x=net.layers{1}.sumMap_sx; %15
        net.layers{2}.sizeFilter_y=net.layers{1}.sumMap_sy; %15
        net.layers{2}.durationFilter=net.layers{1}.sumMap_sz;% 5; %5
        
        net.layers{2}.subsampling_width_x=1%round(firstLayer.sizeFilter* 0.9);
        net.layers{2}.subsampling_width_y=net.layers{2}.subsampling_width_x;
        net.layers{2}.subsampling_width_z=1%max(1,round(secondLayer.halfSizeDurationFilter/2));
        
        net.layers{2}.halfSizeFilter_x= floor(net.layers{2}.sizeFilter_x/2);
        net.layers{2}.halfSizeFilter_y= floor(net.layers{2}.sizeFilter_y/2);
        net.layers{2}.halfSizeDurationFilter= floor(net.layers{2}.durationFilter/2);
        
        net.layers{2}.pad_x = 0;
        net.layers{2}.pad_y = 0;
        net.layers{2}.pad_z = 0;
        
        net.layers{2}.pad=[net.layers{2}.pad_x, net.layers{2}.pad_x, net.layers{2}.pad_y, net.layers{2}.pad_y, net.layers{2}.pad_z, net.layers{2}.pad_z];  % (fully connected layer)
        net.layers{2}.stride=[net.layers{2}.subsampling_width_x, net.layers{2}.subsampling_width_y, net.layers{2}.subsampling_width_z];
        
        % initialize the filters at the second layer
        net.layers{2}.filters = gpuArray.randn([net.layers{2}.sizeFilter_x, net.layers{2}.sizeFilter_y, net.layers{2}.durationFilter, net.layers{1}.numFilter, net.layers{2}.numFilter], 'single')*0.001;
        net.layers{2}.bias = gpuArray.zeros(1, net.layers{2}.numFilter,'single');
        
        net.layers{2}.sumMap_sx= floor( (net.layers{1}.sumMap_sx + 2* net.layers{2}.pad_x - net.layers{2}.sizeFilter_x) /  net.layers{2}.subsampling_width_x)+1;
        net.layers{2}.sumMap_sy= floor( (net.layers{1}.sumMap_sy + 2* net.layers{2}.pad_y - net.layers{2}.sizeFilter_y) /  net.layers{2}.subsampling_width_y)+1;
        net.layers{2}.sumMap_sz= floor( (net.layers{1}.sumMap_sz + 2* net.layers{2}.pad_z - net.layers{2}.durationFilter) /  net.layers{2}.subsampling_width_z)+1;
        
        net.layers{2}.momentum_filter = zeros(size(net.layers{2}.filters), 'single');
        net.layers{2}.momentum_bias = zeros(size(net.layers{2}.bias), 'single');
        
        
        %% learning rate      
        net.layers{2}.lambdaLearningRate = 0.001;  %0.0000005; % 0.0007 is small but save. 0.002 is good but sometimes fails
        net.layers{1}.lambdaLearningRate = 0.01;     %0.005;
 
        
        
    case 'FC_S_3'   % three-layer / single spatial-temporal fully connected filter in the top layer
        
        
        para.learningScheme='end_to_end';  % learning scheme is end-to-end
        para.numIteration=500;  %1000
        net.FC=false;
        %% filter at the first layer
        net.layers{1}.numFilter=120;    % number of filters
        net.layers{1}.sizeFilter_x=7; %15    odd number
        net.layers{1}.sizeFilter_y=7; %15    odd number
        net.layers{1}.durationFilter= 7; %15
        
        net.layers{1}.subsampling_width_x=2; %round(firstLayer.halfSizeFilter/2);
        net.layers{1}.subsampling_width_y=2; %round(firstLayer.halfSizeFilter/2);
        net.layers{1}.subsampling_width_z=2; %max(1,round(firstLayer.halfSizeDurationFilter/2));
        
        net.layers{1}.halfSizeFilter_x= floor(net.layers{1}.sizeFilter_x/2);
        net.layers{1}.halfSizeFilter_y= floor(net.layers{1}.sizeFilter_y/2);
        net.layers{1}.halfSizeDurationFilter= floor(net.layers{1}.durationFilter/2);
        
        net.layers{1}.pad_x = net.layers{1}.halfSizeFilter_x;
        net.layers{1}.pad_y = net.layers{1}.halfSizeFilter_y;
        net.layers{1}.pad_z = net.layers{1}.halfSizeDurationFilter;
        
        net.layers{1}.pad=[net.layers{1}.pad_x, net.layers{1}.pad_x, net.layers{1}.pad_y, net.layers{1}.pad_y, net.layers{1}.pad_z, net.layers{1}.pad_z];
        net.layers{1}.stride=[net.layers{1}.subsampling_width_x, net.layers{1}.subsampling_width_y, net.layers{1}.subsampling_width_z];
        
        net.layers{1}.filters = gpuArray.randn([net.layers{1}.sizeFilter_x, net.layers{1}.sizeFilter_y, net.layers{1}.durationFilter, para.numChannel, net.layers{1}.numFilter], 'single')*0.001;
        net.layers{1}.bias = gpuArray.zeros(1, net.layers{1}.numFilter,'single');
        
        net.layers{1}.sumMap_sx= floor( (para.sx + 2* net.layers{1}.pad_x - net.layers{1}.sizeFilter_x) /  net.layers{1}.subsampling_width_x)+1;
        net.layers{1}.sumMap_sy= floor( (para.sy + 2* net.layers{1}.pad_y - net.layers{1}.sizeFilter_y) /  net.layers{1}.subsampling_width_y)+1;
        net.layers{1}.sumMap_sz= floor( (para.sz + 2* net.layers{1}.pad_z - net.layers{1}.durationFilter) /  net.layers{1}.subsampling_width_z)+1;
        
        net.layers{1}.momentum_filter = zeros(size(net.layers{1}.filters), 'single');
        net.layers{1}.momentum_bias = zeros(size(net.layers{1}.bias), 'single');
        
        
        %% filter at the second layer
        net.layers{2}.numFilter=30;% 60; % 30
        net.layers{2}.sizeFilter_x=net.layers{1}.sumMap_sx; %15
        net.layers{2}.sizeFilter_y=net.layers{1}.sumMap_sy; %15
        net.layers{2}.durationFilter=5;%  %9 or 5
        
        net.layers{2}.subsampling_width_x=2;%round(firstLayer.sizeFilter* 0.9);
        net.layers{2}.subsampling_width_y=2;
        net.layers{2}.subsampling_width_z=2; % 4;%max(1,round(secondLayer.halfSizeDurationFilter/2));
        
        net.layers{2}.halfSizeFilter_x= floor(net.layers{2}.sizeFilter_x/2);
        net.layers{2}.halfSizeFilter_y= floor(net.layers{2}.sizeFilter_y/2);
        net.layers{2}.halfSizeDurationFilter= floor(net.layers{2}.durationFilter/2);
        
        
        net.layers{2}.pad_x = 0;
        net.layers{2}.pad_y = 0;
        net.layers{2}.pad_z = net.layers{2}.halfSizeDurationFilter;
        
        
        net.layers{2}.pad=[net.layers{2}.pad_x, net.layers{2}.pad_x, net.layers{2}.pad_y, net.layers{2}.pad_y, net.layers{2}.pad_z, net.layers{2}.pad_z];  % (fully connected layer)
        net.layers{2}.stride=[net.layers{2}.subsampling_width_x, net.layers{2}.subsampling_width_y, net.layers{2}.subsampling_width_z];
        
        % initialize the filters at the second layer
        net.layers{2}.filters = gpuArray.randn([net.layers{2}.sizeFilter_x, net.layers{2}.sizeFilter_y, net.layers{2}.durationFilter, net.layers{1}.numFilter, net.layers{2}.numFilter], 'single')*0.001;
        net.layers{2}.bias = gpuArray.zeros(1, net.layers{2}.numFilter,'single');
        
        net.layers{2}.sumMap_sx= floor( (net.layers{1}.sumMap_sx + 2* net.layers{2}.pad_x - net.layers{2}.sizeFilter_x) /  net.layers{2}.subsampling_width_x)+1;
        net.layers{2}.sumMap_sy= floor( (net.layers{1}.sumMap_sy + 2* net.layers{2}.pad_y - net.layers{2}.sizeFilter_y) /  net.layers{2}.subsampling_width_y)+1;
        net.layers{2}.sumMap_sz= floor( (net.layers{1}.sumMap_sz + 2* net.layers{2}.pad_z - net.layers{2}.durationFilter) /  net.layers{2}.subsampling_width_z)+1;
        
        net.layers{2}.momentum_filter = zeros(size(net.layers{2}.filters), 'single');
        net.layers{2}.momentum_bias = zeros(size(net.layers{2}.bias), 'single');
        
        
        %% filter at the third layer
        net.layers{3}.numFilter=5;%1;
        net.layers{3}.sizeFilter_x=1;%net.layers{2}.sumMap_sx; %15
        net.layers{3}.sizeFilter_y=1;%net.layers{2}.sumMap_sx; %15
        net.layers{3}.durationFilter= 2;%net.layers{2}.sumMap_sz;% 5; %5
        
        net.layers{3}.subsampling_width_x=2;%round(firstLayer.sizeFilter* 0.9);
        net.layers{3}.subsampling_width_y=net.layers{3}.subsampling_width_x;
        net.layers{3}.subsampling_width_z=1;%max(1,round(secondLayer.halfSizeDurationFilter/2));
        
        net.layers{3}.halfSizeFilter_x= floor(net.layers{3}.sizeFilter_x/2);
        net.layers{3}.halfSizeFilter_y= floor(net.layers{3}.sizeFilter_y/2);
        net.layers{3}.halfSizeDurationFilter= floor(net.layers{3}.durationFilter/2);
        
        
        net.layers{3}.pad_x = 0;
        net.layers{3}.pad_y = 0;
        net.layers{3}.pad_z = net.layers{3}.halfSizeDurationFilter;%0;
        
        
        net.layers{3}.pad=[net.layers{3}.pad_x, net.layers{3}.pad_x, net.layers{3}.pad_y, net.layers{3}.pad_y, net.layers{3}.pad_z, net.layers{3}.pad_z];  % (fully connected layer)
        net.layers{3}.stride=[net.layers{3}.subsampling_width_x, net.layers{3}.subsampling_width_y, net.layers{3}.subsampling_width_z];
        
        % initialize the filters at the second layer
        net.layers{3}.filters = gpuArray.randn([net.layers{3}.sizeFilter_x, net.layers{3}.sizeFilter_y, net.layers{3}.durationFilter, net.layers{2}.numFilter, net.layers{3}.numFilter], 'single')*0.001;
        net.layers{3}.bias = gpuArray.zeros(1, net.layers{3}.numFilter,'single');
        
        net.layers{3}.sumMap_sx= floor( (net.layers{2}.sumMap_sx + 2* net.layers{3}.pad_x - net.layers{3}.sizeFilter_x) /  net.layers{3}.subsampling_width_x)+1;
        net.layers{3}.sumMap_sy= floor( (net.layers{2}.sumMap_sy + 2* net.layers{3}.pad_y - net.layers{3}.sizeFilter_y) /  net.layers{3}.subsampling_width_y)+1;
        net.layers{3}.sumMap_sz= floor( (net.layers{2}.sumMap_sz + 2* net.layers{3}.pad_z - net.layers{3}.durationFilter) /  net.layers{3}.subsampling_width_z)+1;
        
        net.layers{3}.momentum_filter = zeros(size(net.layers{3}.filters), 'single');
        net.layers{3}.momentum_bias = zeros(size(net.layers{3}.bias), 'single');
        
        
        
        %% learning rate       
        net.layers{3}.lambdaLearningRate = 0.0001;  
        net.layers{2}.lambdaLearningRate = 0.001; 
        net.layers{1}.lambdaLearningRate = 0.01;
        
      
    otherwise
        
        error('No such an option');
        
        
end

