function loss = compute_loss_l2(opts, imdb, getBatch, subset, net_cpu, syn_mats)
net = vl_simplenn_move(net_cpu, 'gpu') ;
loss = 0;
res = [];
num_batch = 0;
for t=1:opts.batchSize:numel(subset)
    for s = 1:opts.numSubBatches
        batchStart = t +(labindex-1) + (s-1) * numlabs;
        batchEnd = min(t+opts.batchSize-1, numel(subset)) ;
        batch = subset(batchStart : opts.numSubBatches*numlabs: batchEnd) ;
        im = getBatch(imdb, batch) ;    
        im = gpuArray(im);
        if opts.prefetch
            if s == opts.numSubBatches
                batchStart = t + (labindex-1) + opts.batchSize;
                batchEnd = min(t + 2*opts.batchSize-1, numel(subset));
            else
                batchStart = batchStart + numlabs;
            end
            nextBatch = subset(batchStart : opts.numSubBatches*numlabs : batchEnd);
            getBatch(imdb, nextBatch);
        end
        cell_idx = (ceil(t / opts.batchSize)-1)*numlabs + labindex;
        syn_mat = gpuArray(syn_mats{cell_idx});
        res = vl_nfa(net, syn_mat, im, res, ...
            'conserveMemory', 1, ...         
            'cudnn',1) ;
        loss = loss + gather( mean(reshape(sqrt((res(end).x - im).^2), [], 1)));
        num_batch = num_batch + 1;
    end
end
loss  = loss / num_batch;
end