partSizeX = sizeTemplatex/numTemplateSplit;
partSizeY = sizeTemplatey/numTemplateSplit;

selectedPart = find(PartOnOff);


%% specify the output information

% hierarchical deformation of HAB
objLocation = zeros(1,2);
objRotation = 0;
objResolution = 0;
partAbsoluteLocation = zeros(numCandPart,2);
partAbsoluteRotation = zeros(numCandPart,1);
partAbsoluteResolution = zeros(numCandPart,1);
gaborAbsoluteLocation = zeros(numElement,2);
gaborAbsoluteRotation = zeros(numElement,1);
gaborAbsoluteResolution = zeros(numElement,1);

% hierarchical score of HAB
objScore = 0; % records the MAX3 score
partScores = zeros(numCandPart,1); % records the MAX2 scores for each non-overlapping partial templates
GaborResponses = zeros(numElement,1); % records the responses of deformed Gabors

if doMorphBackS1map
	croppedSUM1map = cell(1,numOrient);
	for i = 1:numel(croppedSUM1map)
		morphedSUM1map{i} = zeros( sizeTemplatex, sizeTemplatey, 'single' );
	end
	morphedPatch = zeros( sizeTemplatex, sizeTemplatey, 'single' );
end
resolutionStart = .8;
resolutionStep = .2;
disp(['    start detecting in image ' imageName]); tic

%% compute SUM2 maps for non-overlapping parts
SUM2map = cell(numPartRotate,numCandPart,numImgResolution);
for iRes = 1:numImgResolution
	tmpS2 = mexc_ComputeSUM2( numOrient,...
		MAX1map(iRes,:), S2T(:), subsampleS2 );
	SUM2map(:,:,iRes) = reshape(tmpS2,[numPartRotate numCandPart]);
end

% prepare the equivalence relationship between rotated templates (this is similar to the Correlation matrix for Gabor elements)
templateAffinityMatrix = cell(numPartRotate,numCandPart);
for iPart = 1:numCandPart
	for r1 = 1:length(partRotationRange)
		angle1 = pi/numOrient * partRotationRange(r1);
		templateAffinityMatrix{r1,iPart} = [];
		jPart = iPart;
		for r2 = 1:length(partRotationRange)
			angle2 = pi/numOrient*partRotationRange(r2);
			if (sin(angle1) - sin(angle2))^2 + (cos(angle1)-cos(angle2))^2 <= minRotationDif
				templateAffinityMatrix{r1,iPart} = int32( [templateAffinityMatrix{r1,iPart} r2+(jPart-1)*numPartRotate-1] );
			end
		end
	end
end

tmpMAX2map = cell(size(SUM2map));
MAX2LocTrace = cell(size(SUM2map));
MAX2TransformTrace = cell(size(SUM2map));

for iRes = 1:numImgResolution
    subsampleM2 = 1;
	[tmpMAX2 tmpMAX2LocTrace tmpMAX2TransformTrace...
		M2RowColShift] = mexc_ComputeMAX2( templateAffinityMatrix(:), ...
		SUM2map(:,:,iRes), ...
		partLocRange, ...
		int32(sqrt(partSizeX*partSizeY)*ones(numPartRotate*numCandPart,1)/subsampleS2), subsampleM2 );
	tmpMAX2map(:,:,iRes) = reshape(tmpMAX2,[numPartRotate numCandPart]);
	MAX2LocTrace(:,:,iRes) = reshape(tmpMAX2LocTrace,[numPartRotate numCandPart]);
	MAX2TransformTrace(:,:,iRes) = reshape(tmpMAX2TransformTrace,[numPartRotate numCandPart]);
end

% max over resolution
resolutionShiftLimit = 1;
MAX2ResolutionTrace = cell(size(tmpMAX2map)); % to initialize
MAX2map = tmpMAX2map;
for iRes = 1:numImgResolution
	current_size = size( tmpMAX2map{1,1,iRes} );
	for j = 1:size(MAX2map,1)
		for k = 1:size(tmpMAX2map,2)
			map = -1e10 * ones( current_size, 'single' ); % an auxiliary variable, to find the ARGMAX resolution
			MAX2ResolutionTrace{j,k,iRes} = int32( -1 * ones( current_size ) );
			for jRes = 1:numImgResolution
				if abs(jRes-iRes) <= resolutionShiftLimit
					ref = tmpMAX2map{j,k,jRes};
					ref = imresize(ref,current_size,'nearest');
					ind = ref > map;
					map(ind) = ref(ind);
					tocopy = tmpMAX2map{j,k,jRes};
					tocopy = imresize(tocopy,current_size,'nearest');
					MAX2map{j,k,iRes}(ind) = tocopy(ind);
					MAX2ResolutionTrace{j,k,iRes}(ind) = jRes - 1; % start from 0
				end
			end
		end
	end
end


% for debug:

%% locate the object

MMAX3 = -1e10; bestS3Loc = -1; bestRes = 0; bestRot = 0;

%S3T = cell(length(rotationRange),1);
for r = 1:length(rotationRange) % this is the rotation of the S3 template
	rot = rotationRange(r);

	% MAX2score, Fx, Fy are only for temperary storage
	MAX2score = single(zeros(1, numImgResolution)); Fx = zeros(1, numImgResolution); Fy = zeros(1, numImgResolution);

	% compute SUM3 maps
	selectedTransform = zeros(length(selectedPart),1,'single');
	for j = 1:length(selectedPart)
		selectedTransform(j) = find( allS3SelectedOri(r,j) == partRotationRange );
	end
	SUM3map = cell(numImgResolution,1);
	for iRes = 1:numImgResolution
		tmpM2 = MAX2map(:,:,iRes);
		SUM3map(iRes) = mexc_ComputeSUM3( tmpM2(:), S3T(r), 1, numPartRotate );
		[MAX3 loc] = max(SUM3map{iRes}(:));
		if MAX3 > MMAX3
			MMAX3 = MAX3;
			bestS3Loc = loc;
			bestRes = iRes; % best object resolution
			bestRot = rot; % starting from a negative number
		end
	end

end

% argmax location of SUM3map
bestRotInd = find(bestRot==rotationRange); % best object rotation index, starting from 1
therey = ceil(bestS3Loc/size(SUM3map{bestRes},1)); % best object location (column)
therex = bestS3Loc - (therey-1) * size(SUM3map{bestRes},1); % best object location (row)

objLocation(1) = floor((therex+.5)*subsampleS2);
objLocation(2) = floor((therey+.5)*subsampleS2);
objRotation = bestRot;
objResolution = bestRes;

%% copy the detected patch (at object level)
if doCropBackImage
	denseX = -floor(sizeTemplatex/2) + (1:sizeTemplatex);
	denseY = -floor(sizeTemplatey/2) + (1:sizeTemplatey);
	count = 0;
	inRow = zeros(length(denseX)*length(denseY),1,'single');
	inCol = zeros(length(denseX)*length(denseY),1,'single');
	for y = denseY
		for x = denseX
			count = count+1;
			inRow(count) = x;
			inCol(count) = y;
		end
	end
	tScale = 0; rScale = 1; cScale = 1; inO = zeros(numel(inRow),1,'single'); inS = zeros(numel(inRow),1,'single');
	[outRow, outCol] = ...
		mexc_TemplateAffineTransform(tScale,rScale,cScale,...
		bestRot,inRow,inCol,inO,inS,numOrient);
	patch = mexc_CropInstance(ImageMultiResolution(bestRes),floor((therex+.5)*subsampleS2),floor((therey+.5)*subsampleS2),...
		bestRot,0,1,...
		outRow,outCol,...
		1,1,sizeTemplatex,sizeTemplatey);
    patch = patch{1};
end

%% prepare the output variable for visualization of matched template
if showMatchedTemplate
	imageSizeAtBestObjectResolution = size( ImageMultiResolution{bestRes} );
	matchedSym = zeros( imageSizeAtBestObjectResolution );
    matchedBoundingBox = zeros( [imageSizeAtBestObjectResolution,3] );
    
    if showObjectBoundingBox
        margin = 2;
        xx = repmat((1:sizeTemplatex),1,margin*2);
        yy = [];
        for y = [1:margin sizeTemplatey-margin+1:sizeTemplatey]
            yy = [yy,ones(1,sizeTemplatex)*y];
        end
        yy = [yy,repmat((1:sizeTemplatey),1,margin*2)];
        for x = [1:margin sizeTemplatex-margin+1:sizeTemplatey]
            xx = [xx,ones(1,sizeTemplatey)*x];
        end
        inRow = single(xx-floor(sizeTemplatex/2)); inCol = single(yy-floor(sizeTemplatey/2));
        tScale = 0; rScale = 1; cScale = 1; inO = zeros(numel(inRow),1,'single'); inS = zeros(numel(inRow),1,'single');
        [outRow, outCol] = ...
            mexc_TemplateAffineTransform(tScale,rScale,cScale,...
                bestRot,inRow,inCol,inO,inS,numOrient);

        % directly overwrite the corresponding pixels
        for p = 1:length(outRow)
            x = floor(.5 + floor((therex+.5)*subsampleS2) + outRow(p)); y = floor(.5 + floor((therey+.5)*subsampleS2) + outCol(p));
            if x > 0 && x <= size(matchedBoundingBox,1) && y > 0 && y <= size(matchedBoundingBox,2)
                matchedBoundingBox(x,y,:) = [0.1 0.1 0.1];
                
            end
        end
    end
end

%% trace back the deformation tree and the associated scores
gaborCount = 0;
for iPart = 1:numCandPart
	r = find( allS3SelectedOri(bestRotInd,iPart) == partRotationRange ); % the index of part rotation
	Fx = therex + floor(.5+allS3SelectedRow(bestRotInd,iPart)/subsampleM2/subsampleS2);
	Fy = therey + floor(.5+allS3SelectedCol(bestRotInd,iPart)/subsampleM2/subsampleS2); % sub-sampled position
	imagesize = size(MAX2map{r,iPart,bestRes}); % subsampled image size
	
	% set default values of some output variables
	bestPartRes = bestRes;
	partScores = min(MAX2map{r,iPart,bestRes}(:));
	
	if Fx >= 1 && Fx <= imagesize(1) && Fy >= 1 && Fy <= imagesize(2)
		tmp = MAX2map{r,iPart,bestRes};
		partScores(iPart) = tmp(Fx,Fy);
		
		tmp = MAX2ResolutionTrace{r,iPart,bestRes};
		bestPartRes = tmp(Fx,Fy) + 1; % best part resolution
		current_size = size(tmp);
		
		tmp = MAX2LocTrace{r,iPart,bestPartRes};
		new_size = size(tmp);
		Fx = floor(.5+Fx*double(new_size)/current_size);
		Fy = floor(.5+Fy*double(new_size)/current_size);
		
		if Fx >= 1 && Fx <= size(tmp,1) && Fy >= 1 && Fy <= size(tmp,2);
			translationInd = tmp(Fx,Fy) + 1;
		else
			translationInd = floor(size(M2RowColShift,1)/2);
		end
		
		tmp = MAX2TransformTrace{r,iPart,bestPartRes};
		if Fx >= 1 && Fx <= size(tmp,1) && Fy >= 1 && Fy <= size(tmp,2);
			transformInd = tmp(Fx,Fy) + 1;
		else
			transformInd = floor(numPartRotate/2) + 1;
		end
		
		actualPartRotationInd = transformInd - numPartRotate*(ceil(double(transformInd)/numPartRotate)-1);
		Fx = floor( Fx + M2RowColShift(translationInd,1) * partSizeX/subsampleS2 );
		Fy = floor( Fy + M2RowColShift(translationInd,2) * partSizeY/subsampleS2 );
	else
		actualPartRotationInd = r;
	end
	actualPartRotation = partRotationRange(actualPartRotationInd);

	% find the part location at the higher resolution
	Fx = (Fx-1 + .5) * subsampleS2 * subsampleM2;
	Fy = (Fy-1 + .5) * subsampleS2 * subsampleM2;
	partAbsoluteLocation(iPart,1) = Fx;
	partAbsoluteLocation(iPart,2) = Fy;
	partAbsoluteResolution(iPart) = bestPartRes;
	partAbsoluteRotation(iPart) = actualPartRotation;

	if doMorphBackS1map
        
        % some precomputation
        denseX = -floor(partSizeX/2) + (1:partSizeX);
        denseY = -floor(partSizeY/2) + (1:partSizeY);
        count = 0;
        inRow = zeros(length(denseX)*length(denseY),1,'single');
        inCol = zeros(length(denseX)*length(denseY),1,'single');
        for y = denseY
            for x = denseX
                count = count+1;
                inRow(count) = x;
                inCol(count) = y;
            end
        end
        tScale = 0; rScale = 1; cScale = 1; inO = zeros(numel(inRow),1,'single'); inS = zeros(numel(inRow),1,'single');
        [outRow, outCol] = ...
            mexc_TemplateAffineTransform(tScale,rScale,cScale,...
                actualPartRotation,inRow,inCol,inO,inS,numOrient);
        
		% crop the feature patch that is registered to the part template
        
		tmpCropped = mexc_CropInstance(SUM1mapFind(bestPartRes,:),Fx,Fy,...
			actualPartRotation,tScale,1,...
			outRow,outCol,...
			numOrient,1,partSizeX,partSizeY);
		for o = 1:numOrient
			morphedSUM1map{o}(PartLocX(iPart)-1+(1:partSizeX),PartLocY(iPart)-1+(1:partSizeY)) = tmpCropped{o};
		end
		% also crop the corresponding image patch (for each part)
		tmpCropped = mexc_CropInstance(ImageMultiResolution(bestPartRes),Fx,Fy,...
			actualPartRotation,tScale,1,...
			outRow,outCol,...
			1,1,partSizeX,partSizeY);
		morphedPatch(PartLocX(iPart)-1+(1:partSizeX),PartLocY(iPart)-1+(1:partSizeY)) = tmpCropped{1};
	end
	
	% ==== continue to trace back Gabor elements based on the part localization ====

	gaborXX = [];
	gaborYY = [];
	gaborOO = [];
	gaborMM = [];

	% Gabor basis elements locations
	for j = 1:length( S2T{actualPartRotationInd,iPart}.selectedLambda )
		gaborX = floor(Fx +  S2T{actualPartRotationInd,iPart}.selectedRow(j));
		gaborY = floor(Fy +  S2T{actualPartRotationInd,iPart}.selectedCol(j));
		gaborO = S2T{actualPartRotationInd,iPart}.selectedOri(j);
		if gaborX > 0 && gaborX <= size(M1Trace{bestPartRes,1},1) && gaborY > 0 && gaborY <= size(M1Trace{bestPartRes,1},2)
			trace = M1Trace{bestPartRes,gaborO+1}(gaborX,gaborY) + 1;
			dx = M1RowShift{gaborO+1}(trace);
			dy = M1ColShift{gaborO+1}(trace);
			shiftedo = M1OriShifted{gaborO+1}(trace);
			gaborX = floor(.5 + gaborX + single(dx));
			gaborY = floor(.5 + gaborY + single(dy));
			gaborO = single(shiftedo);
		end
		gaborXX = [gaborXX;gaborX];
		gaborYY = [gaborYY;gaborY];
		gaborOO = [gaborOO;gaborO];
		gaborCount = gaborCount + 1;
		gaborAbsoluteLocation(gaborCount,1) = gaborX;
		gaborAbsoluteLocation(gaborCount,2) = gaborY;
		gaborAbsoluteRotation(gaborCount) = gaborO; % start from 0
		gaborAbsoluteResolution(gaborCount) = bestPartRes;
		if gaborX > 0 && gaborX <= size(M1Trace{bestPartRes,1},1) && gaborY > 0 && gaborY <= size(M1Trace{bestPartRes,1},2)
			val = SUM1mapFind{bestPartRes,gaborO+1}(gaborX,gaborY);
		else
			val = 0;
		end
		gaborMM = [gaborMM; max(0,sqrt(val)-.2)];
		gaborResponses(gaborCount) = val;
	end
	
	if showMatchedTemplate
		% render the template for each part separately, then overlay the rendered images
		tmpMatchedSym = displayMatchedTemplate(size(ImageMultiResolution{bestPartRes}),gaborXX,...
			gaborYY,gaborOO,zeros(length(gaborXX),1,'single'),gaborMM,allSymbol,numOrient);
		tmpMatchedSym = double( imresize(tmpMatchedSym,imageSizeAtBestObjectResolution,'bilinear') );
		matchedSym = max(matchedSym,tmpMatchedSym);
		if showPartBoundingBox
            margin = 3;
            xx = repmat((1:partSizeX),1,margin*2);
            yy = [];
            for y = [1:margin partSizeY-margin+1:partSizeY]
                yy = [yy,ones(1,partSizeX)*y];
            end
            yy = [yy,repmat((1:partSizeY),1,margin*2)];
            for x = [1:margin partSizeX-margin+1:partSizeX]
                xx = [xx,ones(1,partSizeY)*x];
            end
            inRow = single(xx-floor(partSizeX/2)); inCol = single(yy-floor(partSizeY/2));
            tScale = 0; rScale = 1; cScale = 1; inO = zeros(numel(inRow),1,'single'); inS = zeros(numel(inRow),1,'single');
            [outRow, outCol] = ...
                mexc_TemplateAffineTransform(tScale,rScale,cScale,...
                    actualPartRotation,inRow,inCol,inO,inS,numOrient);
                
            % directly overwrite the corresponding pixels
            matchedBoundingBox = imresize(matchedBoundingBox,size(ImageMultiResolution{bestPartRes}),'nearest');
            %for p = 1:length(outRow)
            %    x = floor(.5 + outRow(p) + Fx); y = floor(.5 + outCol(p) + Fy);
            %    if x > 0 && x <= size(matchedBoundingBox,1) && y > 0 && y <= size(matchedBoundingBox,2)
            %        matchedBoundingBox(x,y,:) = [1 .5 .3];
            %    end
            %end
            matchedBoundingBox = imresize(matchedBoundingBox,size(ImageMultiResolution{bestRes}),'nearest');
		end
	end
end

tmpSizeX = size(matchedSym, 1);
tmpSizeY = size(matchedSym, 2);
matchedI = ones(tmpSizeX, tmpSizeY, 3);
[tmpIndX, tmpIndY] = find(matchedSym(:,:)>100);
for ii = 1:length(tmpIndX)
    matchedI(tmpIndX(ii), tmpIndY(ii), :) = colors(activatedCluster(imgInd(iMember)),:);
end

matchedSym = matchedI;
if showPartBoundingBox
    
    for y = 1:size(matchedSym,2)
        for x = 1:size(matchedSym,1)
            if sum(abs(matchedBoundingBox(x,y,:))) > 0
                matchedSym(x,y,:) = matchedBoundingBox(x,y,:);
            end
        end
    end
    
end

if showMatchedTemplate
    imwrite( matchedSym, ['sketch' '\' sprintf('matched_%s_%d.png',imageName,iMember)] );
    resizedMatch = imresize(matchedSym, [size(indX{1}, 1), size(indX{1}, 2)],'bilinear');
    for ii = 1:size(indX{1},1)
        for jj = 1:size(indX{1},2)
            tmp1 = indX{1}(ii, jj);
            tmp2 = indY{1}(ii, jj);
            if (tmp1>=1 && tmp1<=size(newI,1) && tmp2>=1 && tmp2<=size(newI,2))
                newI(tmp1,tmp2,:) = resizedMatch(ii,jj,:);
            end;
        end
    end
    resizedNewI = imresize(newI, [sizeIx, sizeIy], 'bilinear');
    sketchI = min(sketchI, resizedNewI);
    partI{iMember} = newI;
end



if doCropBackImage
    patch = uint8( 255 * (patch-min(patch(:)))/(max(patch(:))-min(patch(:))) );
    imwrite( patch, ['sketch' '\' sprintf('cropped_%s_%d.png',imageName,iMember)] );
end

if doMorphBackS1map
    morphedPatch = uint8( 255 * (morphedPatch-min(morphedPatch(:)))/(max(morphedPatch(:))-min(morphedPatch(:))) );
    imwrite( morphedPatch, ['sketch' '\' sprintf('morphed_%s_%d.png',imageName,iMember)] );
end

disp(['mex-C finding time: ' num2str(toc) ' seconds']);