# function to calculate ARI cluster and ARI noise scores

#' ARI cluster and ARI noise 
#' 
#' Computes Adjusted Rand Index for clusters and
#' for noise for an estimated partition given
#' a true partition.
#' 
#' @param assignV a numberic vector of estimated
#' cluster assignments.
#' @param k0 an integer. Cluster size cut off. 
#' All clusters below or equal to the cut off are considered noise.
#' @param assignT1 a numeric vector of true cluster 
#' assignments. Noise should be assigned to small or singleton clusters.
#' @return a vector of ARI cluster and ARI noise scores (ARI cluster, ARI noise).
#' 
#' @references Marchetti, Y., Zhou, Q. Solution path clustering with adaptive concave penalty. 
#' Electron. J. Statist. 8 (2014), no. 1, 1569--1603.
#' 
#' @examples
#' # simulate a dataset
#' sim <- simclust(K=10, p=20, ncl=400, noise=200, overnk=0, hcube=c(-5, 5), random=TRUE)
#' 
#' # cluster using k-means 
#' fitkm <- kmeans(x=sim$y, centers=10)
#' # cluster using solution path clustering
#' fitspc <- spc(y=sim$y, omega=0.5, out=FALSE)
#' 
#' # compute ARI cluster and ARI noise scores for the k-means partition 
#' # and solution 4 of solution path clustering
#' ari_kmeans <- ARI(assignV=fitkm$cluster, k0=3, assignT1=sim$assignT1)
#' ari_spc <- ARI(assignV=fitspc$assignV[[4]], k0=3, assignT1=sim$assignT1)
#' 
#' @export 
ARI <- function(assignV, k0, assignT1) {
  library(mclust)
  
  assignT2 <- assignT1
  Nk <- sapply(unique(assignT1), function(x) length(which(assignT1 == x)))
  indK <- which(Nk > k0)
  ind <- which(assignT1 %in% indK)
  assignT2[ind] <- 1
  assignT2[-ind] <- 2 
  ncl <- length(which(assignT2 == 1))
  ifelse(ncl == length(assignT2), scatter <- FALSE, scatter <- TRUE)
	assignV2 <- assignV
  indC1 <- sapply(1:length(unique(assignV)), function(x) length(which(assignV == x)))
  indC <- which(indC1 > k0)
  ind1 <- as.vector(unlist(sapply(indC, function(x) which(assignV == x))))
  ind1cl <- ind1[which(ind1 <= ncl)]
  if(length(ind1) != 0) { 
	  assignV2[ ind1 ] <- 1
    assignV2[ -ind1 ] <- 2
    bigcl1 <- sort( indC1, decreasing=TRUE )
    bigcl <- subset( bigcl1, bigcl1 > k0 )
    indC2 <- which( indC1 %in% bigcl )
    ind2 <- which( assignV %in% indC2 )
    ind3n <- subset( ind2, ind2 > ncl )
    if( length( ind3n ) == 0 ) { 
			ind3 <- 1:length(assignT1)
      } else { ind3 <- (1:length(assignT1))[-ind3n] }
		ari_C <- adjustedRandIndex( x=assignV[ ind2 ], y=assignT1[ ind2 ])
		if(scatter) {
		 	ari_N <- adjustedRandIndex(x=assignV2[ ind3 ], y=assignT2[ ind3 ])
		 	if(is.na( ari_N )) { ari_N <- 0 }
			} else {
				ari_N <- 1-length(which(assignV2[ind3] == 2))/(ncl - length(ind3n)) }	
		} else {
      ari_C <- 0; ari_N <- 0
		  }

  return(c(ari_C, ari_N))
} # end function	

		
		