%% remove existing files
for cc = 1:numOfCluster
    delete(['working/cluster' num2str(cc) '/*.*']);
end

%% collect templates
allS2T = cell(numOfCluster*numPartRotate, numCandPart);
allS3T = cell(numOfCluster*numRotate,1);
for cc = 1 : numOfCluster
    load (sprintf('./working/smallTemplate_Iter%d_Cluster%d.mat',iter,cc),'S2T','S3T');
    startIndT2 = (cc-1)*numPartRotate;
    allS2T(startIndT2+1:startIndT2+numPartRotate, :) = S2T;
    startIndT3 = (cc-1)*numRotate;
    allS3T(startIndT3+1:startIndT3+numRotate,:) = S3T;
end

%% start encoding
numOfT = ones(1, numOfCluster);
numOfPartTemplate = numPartRotate*numOfCluster;

activations = [];

for img = 1:numOfI

    disp(['    start detecting in image ' num2str(img)]); tic
    selectedPart = find(PartOnOff);
    
    %% load data
    load( sprintf('./working/SUM1map_image_%d.mat',img), 'SUM1mapFind','MAX1map','M1Trace','M1RowShift','M1ColShift','M1OriShifted','J');
    
    disp('SUM2...')
   tic
    %% compute SUM2 maps for non-overlapping parts
    SUM2map = cell(numOfPartTemplate, numCandPart, numImgResolution);
    for iRes = 1:numImgResolution
        tmpS2 = mexc_ComputeSUM2( numOrient,...
            MAX1map(iRes,:), allS2T(:), subsampleS2 );
        SUM2map(:,:,iRes) = reshape(tmpS2, [numOfPartTemplate numCandPart]);
    end
    toc
    
    disp('MAX2...')
    tic
    %% compute MAX2 maps for overlapping parts (local maximization w.r.t. translation and rotation)
    MAX2map = cell(size(SUM2map));
    MAX2LocTrace = cell(size(SUM2map));
    MAX2TransformTrace = cell(size(SUM2map));
    for iRes = 1:numImgResolution
        [tmpMAX2 tmpMAX2LocTrace tmpMAX2TransformTrace...
            M2RowColShift] = mexc_ComputeMAX2( allTemplateAffinityMatrix(:), ...
            SUM2map(:,:,iRes), ...
            locationPerturbFraction, ...
            int32(partSize*ones(numOfPartTemplate*numCandPart,1)/subsampleS2), subsampleM2 );
        MAX2map(:,:,iRes) = reshape(tmpMAX2,[numOfPartTemplate numCandPart]);
        MAX2LocTrace(:,:,iRes) = reshape(tmpMAX2LocTrace,[numOfPartTemplate numCandPart]);
        MAX2TransformTrace(:,:,iRes) = reshape(tmpMAX2TransformTrace,[numOfPartTemplate numCandPart]);
    end
    toc
    disp('SUM3...')
    %% compute SUM3map
    SUM3map = cell(numOfCluster*nTransform, numImgResolution);
    for cc = 1 : numOfCluster
        for r = 1:length(rotationRange) % this is the rotation of the S3 template
            for iRes = 1:numImgResolution
                startInd = (cc-1)*numPartRotate;
                tmpM2 = MAX2map(startInd+1:startInd+numPartRotate,:,iRes);
                startIndT3 = (cc-1)*numRotate;
                tmpS3 = mexc_ComputeSUM3( tmpM2(:), allS3T(startIndT3+r), 1, numPartRotate);
                SUM3map(r+(cc-1)*length(rotationRange), iRes) = tmpS3;
            end
        end
    end
    
    %% matching pursuit
    if iter <=7
        tmp_threshold = 0;
    else
        tmp_threshold = S3Thre;
    end
    tmpActivations = mexc_ComputeMAX2MP( SUM3map, int32(locationPerturbationFraction2*templateSize(1)/subsampleS2/subsampleM2), tmp_threshold );
    tmpActivations(1:4,:) = tmpActivations(1:4,:)+1;
    activations = [activations, [single(img*ones(1,size(tmpActivations,2))); tmpActivations]];
    numOfPatch = size(tmpActivations, 2);
    
    %% releasing memory
    clear MAX1map;
    clear SUM2map;
    clear MAX2map;
    clear SUM3map;
    
    %% cropping morphed SUM1map
    for ii = 1:numOfPatch
        tmp_unwarp_img = zeros(templateSize,'single');
        tmpSUM1mapFind=cell(1,numOrient);
        for iOrient = 1:numOrient
            tmpSUM1mapFind{1,iOrient}=zeros(templateSize,'single');
        end
        therex = tmpActivations(1,ii);
        therey = tmpActivations(2,ii);
        clusterID = ceil( ( tmpActivations(4,ii)) / nTransform );
        bestRotInd = tmpActivations(4,ii)-(clusterID-1)*nTransform;
        bestPartRes = tmpActivations(3,ii);
        bestRes = bestPartRes;
        startIndex = (clusterID-1)*numPartRotate;
        partSizeX = partSize;
        partSizeY = partSize;
        
        for iPart = 1:numCandPart
            r = find( allS3SelectedOri(bestRotInd,iPart) == partRotationRange ); % the index of part rotation
            Fx = therex + floor(.5+allS3SelectedRow(bestRotInd,iPart)/subsampleM2/subsampleS2);
            Fy = therey + floor(.5+allS3SelectedCol(bestRotInd,iPart)/subsampleM2/subsampleS2); % sub-sampled position
            imagesize = size(MAX2LocTrace{startIndex+r,iPart,bestPartRes}); % subsampled image size
            if Fx >= 1 && Fx <= imagesize(1) && Fy >= 1 && Fy <= imagesize(2)
                pos = startIndex+r;
                translationInd = MAX2LocTrace{pos,iPart,bestPartRes}(Fx,Fy) + 1;
                transformInd = MAX2TransformTrace{pos,iPart,bestPartRes}(Fx,Fy) + 1;
                actualPartRotationInd = transformInd - numPartRotate*(ceil(double(transformInd)/numPartRotate)-1);
                % now go to the part position on SUM2 map
                Fx = floor( Fx*subsampleM2 + M2RowColShift(translationInd,1) * sqrt(partSizeX*partSizeY)/subsampleS2 );
                Fy = floor( Fy*subsampleM2 + M2RowColShift(translationInd,2) * sqrt(partSizeX*partSizeY)/subsampleS2 );
            else
                % part center is already out of image boundary, then assume
                % part rotation the same as template rotation.
                actualPartRotationInd = r;
                Fx = Fx*subsampleM2;
                Fy = Fy*subsampleM2;
            end

        	% find the part location at the higher resolution
        	Fx = (Fx-1 + .5) * subsampleS2 ;
        	Fy = (Fy-1 + .5) * subsampleS2 ;
        	
        	% crop the feature patch that is registered to the part template
            tmpSUM1mapLearn = mexc_CropInstance(SUM1mapFind(bestPartRes,:),Fx,Fy,...
                partRotationRange(actualPartRotationInd),tScale,1,...
                partOutRow{actualPartRotationInd},partOutCol{actualPartRotationInd},...
                numOrient,1,partSizeX,partSizeY);

            for o = 1:numOrient
               tmpSUM1mapFind{1,o}(PartLocX(iPart)-1+(1:partSizeX),PartLocY(iPart)-1+(1:partSizeY)) = tmpSUM1mapLearn{o};
            end
            
            tmp_patch = mexc_CropInstance(J(bestPartRes),Fx,Fy,...
                0,tScale,1,...
                partOutRow{actualPartRotationInd},partOutCol{actualPartRotationInd},...
                1,1,partSizeX,partSizeY);
            tmp_unwarp_img(PartLocX(iPart)-1+(1:partSizeX),PartLocY(iPart)-1+(1:partSizeY))=tmp_patch{1};
        end
        partSUM1map = tmpSUM1mapFind;
        activationScore = tmpActivations(5,ii);
        save(['./working' '/' 'cluster' num2str(clusterID) '/' sprintf('ImageAndFeature_%d.mat',numOfT(clusterID))],'partSUM1map','activationScore');
        imwrite(tmp_unwarp_img/255,['./working' '/' 'cluster' num2str(clusterID) '/' sprintf('ImageAndFeature_%d.png',numOfT(clusterID))]);

        numOfT(clusterID)= numOfT(clusterID)+1; 
    end
    
end