#' Penalty weights
#' 
#' Computes penalty weights based on the minimax
#' concave penalty. Internal function for spc.
#' @param theta a numeric matrix of cluster center 
#' parameters.
#' @param dt a numeric matrix of Eucledian distances between
#' cluster centers in theta.
#' @param cutw a concativy parameter delta.
#' @param lam a regularization parameter lambda.
#' @return a matrix of penalty weights.
#' @export
penweights <- function(theta, dt, cutw, lam) {
  temp <- dt
  penw <- 1 - temp/( lam*cutw )
  penw[ which( penw < 0 ) ] <- 0
  diag( penw ) <- 0
  return(penw)
  }	


#' Solution path clustering with adaptive concave penalty
#' 
#' Performs unsupervised clustering of a given dataset
#' and provides a path of solutions, each solution 
#' consisting of an estimated cluster assignments
#' and the number of clusters. Recommended for small
#' datasets.
#' 
#' @param y a numeric matrix of data with rows 
#' representing data points and columns representing
#' dimensions.
#' @param omega an approximate proportion of 
#' nearest neighbors that are expected to merge 
#' in the initial solution. The value is in (0, 1).
#' @param phi a constant in (0, 1). Defaults to 0.5. 
#' Represents minimization step size parameter, which is
#' an approximation of proportion of a distance between 
#' nearest neighbors.
#' @param tau a value in (0, omega). Defaults to 0.9*omega. Is
#' used to compute the value of lambda, the regularization
#' paramter, for the initial solution.
#' @param alpha a constant in (0, 1). Defaults to 0.9. Represents 
#' a proportion of the value of the concavity parameter delta for
#' the next iteration if a bias-variance criterion is met. 
#' @param H a number of values of lambda to be generated. Defaults
#' to min(20, p), where p is the number of dimensions. 
#' @param xi a small constant used in calculating a stopping
#' criterion for each iteration. Defaults to 1e-4.  
#' @param k0 an integer for a cluster cut off size. Defaults to 3.
#' @param out logical. Should intermediate summary output be printed.
#' @return \item{assignV}{ a list of cluster assignments for each solution.}
#' \item{path}{ a matrix representing a summary solution path. Each 
#' column represents delta, lambda, total number of clusters, clusters after 
#' some cut off size, number of iterations, time in seconds, respectively.}  
#' \item{tuniV}{ a list of matrices with cluster centers for each solution.}
#' 
#' @references Marchetti, Y., Zhou, Q. Solution path clustering with adaptive concave penalty. 
#' Electron. J. Statist. 8 (2014), no. 1, 1569--1603.
#' 
#' @examples
#' 
#' # simulate a small dataset with 10 clusters and noise
#' sim <- simclust(K=10, p=20, ncl=400, noise=200, overnk=0, hcube=c(-5, 5), random=8432)
#' 
#' # cluster using solution path clustering
#' fitspc <- spc(y=sim$y, omega=0.5, out=FALSE)
#' fitspc$path
#' 
#' # select a solution from the solution path
#' ss <- solsel(y=sim$y, assignV=fitspc$assignV, a=0.05)
#' ARI(assignV=fitspc$assignV[[ss$bestsol]], k0=3, assignT1=sim$assignT1)
#' 
#' # try a different parameter omega
#' fitspc <- spc(y=sim$y, omega=0.1, out=FALSE)
#' fitspc$path
#' 
#' @export
spc <- function(y, omega, phi=0.5, tau="default", alpha=0.9, H="default", xi=1e-4, k0=3, out=TRUE) {
  library(fields)  
  
  n <- nrow( y )
  p <- ncol( y )
  theta <- matrix( y, n, p )
  if(tau == "default") {
    tau <- 0.9*omega
  } else { tau <- tau }
  if(tau >= omega) {
    print("tau should be less than omega")
    break
    }
  if(H == "default") {
    H <- max(p, 20)
  } else { H <- H }

  # the convergence criterion depends on the variance
  sc <- mean( sqrt( apply( y, 2, var ) ) )
  eps <- sc*sqrt( p )*xi

  tuniV <- vector( "list", 1 )
  assignV <- vector( "list", 1 )
  itvec <- c()
  cutwvec <- c()
  lamvec <- c()
  runvec <- c()
  kestvec <- c()

  tuni <- unique( theta ) # initialize cluster centers
  assign <- 1:nrow( y ) # initialize assignment
  distnn <- rdist( y ) 
  diag( distnn ) <- NA
  nn <- apply( distnn, 2, function(x) min( x, na.rm=TRUE ) )
  kest <- length(unique(assign))
  dymax <- max(distnn, na.rm=TRUE)
  Qomega <- quantile(nn, probs=omega)
  sortnn <- sort(nn)
  Qtau <- quantile(nn, probs=tau)

  lam_lb <- ( 2*phi*Qomega*Qtau ) / ( ( 1 - phi )*(Qomega - Qtau) )
  deltaseq <- Qomega/lam_lb
  xx <- 1
  cond_bias <- FALSE

  while( kest > 1 ) {
    if( cond_bias ) {
      hh <- ( lam^2*cutw )/2
	    xx <- xx + 1
	    deltaseq <- c( deltaseq, cutw*alpha )
	    cutw <- deltaseq[ xx ]
	    lam_lb <- sqrt( ( 2*hh )/cutw )
	    }
		
	  cutw <- deltaseq[ xx ]
	  tempdist <- rdist( tuni )
	  sortT1 <- sort( unique( as.vector( tempdist ) ) )
	  lam_ub <- dymax + dymax/deltaseq[ xx ]
		
	  if( length( sortT1 ) > 1 ) { 
	    begi <- which( sortT1/cutw >= lam_lb )[2] 
	    if( xx == 1 ) { 
        begi <- which( sortT1/cutw <= lam_lb )[ length( which( sortT1/cutw <= lam_lb ) ) ] } 
	    beg <- ( sortT1/cutw )[ begi ]
	    end <- lam_ub
	    if( is.na( begi ) ) {
		    beg <- lam_lb
		    end <- lam_ub
		  }
	    lamseq <- exp( seq( from=log( beg ), to=log( end ), length.out=H ) )
	    } else {
	      if( length(sortT1) == 1 ) {
		      lamseq <- seq( from=lam_lb, to=lam_ub, length.out=H )
		    } else { lamseq <- lamseq }
	    }

	  tuniV[[xx]] <- vector( "list", 1 )
	  assignV[[xx]] <- vector( "list", 1 )
		
	  for( yy in 1:length( lamseq ) ) {
	    ptm <- proc.time()[3]
	    lam <- lamseq[ yy ]
	    tempdist <- rdist( tuni )
	    diag( tempdist ) <- NA
	    sortT2 <- sort( unique( as.vector( tempdist ) ) )
	  
	    # skip to next lambda if no data pairs are below the threshold
	    len.dlprod2 <- length( which( sortT2 <= cutw*lam ) )
	    if( len.dlprod2 == 0 ) next
	   
	    kest_t <- kest
	    iter <- 1
	    tconv <- 10
	    tcvec <- c()
	    diag( tempdist ) <- 0
	  
	    # initial majorization step calculates the initial weights
	    penw <- penweights(theta=tuni, dt=tempdist, cutw=cutw, lam=lam )
		
	    while( tconv > eps )	{
	      theta_t <- theta
		    nkvec <- sapply( 1:nrow( tuni ), function(x) length( which( assign == x ) ) )
		
		    # minimization step updates each cluster center using the weight
	    	# e.g. cycles over a block of coordinates consisting of all the centers
		    for( k in 1:nrow( tuni ) ) {
		      tunik <- tuni[ k, ]
		      gind <- which( assign == k )
		      gdist <- rdist( tuni, t( as.matrix( tunik ) ) )
		      h <- which( gdist == 0 )
	
		      if( length( h ) > 0 ) { next }
		      Nk <- nkvec[ k ]
		      yg <- colSums( matrix( y[ gind , ], nrow=Nk ) )
		      nlvec <- nkvec[ -k ]
		      penwk <- penw[ k, ]
		      wgl <- ( penwk[-k]*lam*Nk*nlvec ) / ( 2*gdist[ -k ] )
		      wthetagl <- wgl*tuni[ -k, 1:p ]
		      if( is.null( nrow( wthetagl ) ) ) { 
		  	    sumwthetag <- wthetagl 
		        } else { sumwthetag <- colSums( wthetagl ) }
		      sumwg <- sum( wgl )
		      majg1 <- ( yg + sumwthetag )
		      majg2 <- ( Nk + sumwg )
		      majg <- majg1 / majg2
	
		      tuni[ k, ] <- majg
		      Ttheta <- t( theta )
		      Ttheta[ , gind ] <- majg
		      theta <- t( Ttheta )
		      }
		  
		    # set centers that are arbitrarily close to each other to
		    # their weighted mean
		    dt <- rdist( tuni )
		    diag( dt ) <- NA
		    h <- which( dt < eps )
		    diag( dt ) <- 0
		    if( length( h ) > 0 ) {
		      dt <- as.dist( dt )
		      hct <- hclust( dt, method="single" )
		      indeps <- cutree( hct, h=eps )
		      for( kk in unique( indeps ) ) {
		     	  kk1 <- which( indeps == kk )
		  	    if( length( kk1 ) > 1 ) {
		  	      tmean <- apply( tuni[ kk1 , ], 2, mean )
		  	      indy <- unlist( sapply( kk1, function(x) which( assign == x )) )
		  	      theta[ indy, ] <- rep( tmean, each=length( indy ) )
		  	      assign[ indy ] <- kk
		  	      } else { 
		  	  	    indy <- unlist( sapply( kk1, function(x) which( assign == x )) )
		  	  	    assign[ indy ] <- kk 
		  	  	    }
		  	  	}
		  	  	
		    tuni <- unique( theta )
		    dt <- rdist( tuni )	
		    }
		  
		  # majorization updates the weights for the next iteration/cycle
		  penw <- penweights( theta=tuni, dt=dt, cutw=cutw, lam=lam )
		
	  	# stopping criterion depends on the maximum distance between the centers from
		  # the previous and current iterations
		  tconv <- max( apply( theta - theta_t, 1, function(x) sqrt( sum( ( x )^2 ) ) ) )
		
		  ### stop after 50 iterations if not terminated by tconv
		  cond_decr <- FALSE
		  if( iter >= 50 ) { 
		    cond_decr <- TRUE 
		    if( cond_decr ) break
		    }
		  
		  iter <- iter + 1
		
      } # stop solution when reaches stopping criterion or 50 iterations
	
	  itvec <- c(itvec, iter)
	  tuniV[[ xx ]][[ yy ]] <- tuni
	  assignV[[ xx ]][[ yy ]] <- as.vector( assign )
	  kest <- length(unique(as.vector(assign)))
	  kestvec <- c(kestvec, kest)
	  cutwvec <- c(cutwvec, cutw)
	  lamvec <- c(lamvec, lam)
	  runvec <- c(runvec, proc.time()[3] - ptm)
	
	  if( out ) {
	    print( c( cutw, lam, length(unique(as.vector(assign))), proc.time()[3] - ptm))
	    }
	  
	  cutw_t <- cutw

	  # compute bias-variance ratio to determine whether any centers
	  # move out of the range of the data points in that cluster
	  ybias <- matrix( NA, ncol=3, nrow=nrow(tuni) )
	  yvar <- matrix( NA, ncol=1, nrow=nrow(tuni) )
	  cond_mean <- c()
	  for( g in 1:nrow(tuni) ) {
	    ymeani <- which( assign == g )
	    if( length(ymeani) > 1 ) {
	  	  ymean <- apply( y[ ymeani , ], 2, mean )
	  	  temp <- t( apply( y[ ymeani, ], 1, function(x) ( x - ymean )^2 ) )
		    yvar[ g, ] <- mean( sqrt( rowSums( temp ) ) )
		    ybias[ g , 1 ] <- sqrt( sum( ( ymean - tuni[ g, ] )^2 ) )
		    ybias[ g , 3 ] <- 1
		    } else {
		      ymean <- y[ ymeani, ] 
		      yvar[ g,  ] <- 0.5*( nn[ ymeani ] )
		      ybias[ g , 1 ] <- sqrt( sum( ( ymean - tuni[ g, ] )^2 ) )
		      ybias[ g , 3 ] <- 0
		      }
	    cond_temp <- (tuni[ g, ] - ymean) < 1e-6
	    cond_temp <- which(cond_temp == FALSE)
	    if(length(cond_temp) > 0) cond_mean <- c(cond_mean, TRUE)
	    }
	
	  # if at least one center is beyond the range, then 
    # set the condition to compute new delta for the next iteration
	  ybias[ , 2 ] <- ybias[ , 1 ] / yvar
	  ybiasi <- which( ybias[ , 2 ] > 1 )
 	  if( length( ybiasi ) > 0 ) {
 	    cond_bias <- TRUE
 	    } else { 
 	    	cond_bias <- FALSE
 	  	  }
	
	  if( cond_bias ) { break }
	  if( kest <= 1 ) { break }
	
    } # stop solutions for all lambdas
    
  } # when all data points in one cluster end

  # clean the list of cluster assignments and centers
  nullV <- sapply( tuniV, function(x) sapply( x, function(y) length(y) ), simplify=FALSE )
  nullVi <- sapply( nullV, function(x) which( x == 0 ), simplify=FALSE )
  for( x in 1:length( tuniV ) ) {
  	if( length( nullVi[[ x ]] ) > 0 ) {
  	  tuniV[[ x ]] <- tuniV[[ x ]][ -nullVi[[ x ]] ]
  	  assignV[[ x ]] <- assignV[[ x ]][ -nullVi[[ x ]] ]
  	  }
  }
  
  # number of clusters with a certain size
  # cut off k0
  Kclust <- c()
  assignV <- unlist(assignV, recursive=FALSE)
  for(j in 1:length(assignV)) {
    NK <- sapply(unique(assignV[[j]]), function(x) length(which(assignV[[j]] == x)))
    indK <- which(NK > k0)
    Kclust <- c(Kclust, length(indK))
    }
  
  # put together a summary solution path
  path <- cbind(cutwvec, lamvec, kestvec, Kclust, itvec, runvec)
  colnames(path) <- c("delta", "lambda", "K total", "K clust", "# iter", "time/sec")
  tuniV <- unlist(tuniV, recursive=FALSE)
  
  # the output consists of the assignment vector assignV, cluster centers tuniV
  # and a summary solution path
  return(list(assignV=assignV, path=path, tuniV=tuniV))	

} # end solpath function








