% Generated by roxygen2 (4.0.2): do not edit by hand
\name{isspc}
\alias{isspc}
\title{Iterative subsampling in solution path clustering}
\usage{
isspc(y, omega1, a, Beta, eta, trimprop, center = "mean", omega2 = 0.1,
  k0 = 3, phi = 0.5, tau = "default", alpha = 0.9, H = "default",
  xi = 1e-04, out = FALSE, rand = TRUE)
}
\arguments{
\item{y}{a numeric matrix of data.}

\item{omega1}{a value in (0, 1). An approximate proportion
of nearest neighbors expected to merge in the initial solution
for the first recursion of the iterative subsampling
solution path clustering.}

\item{a}{a scalar in nu=a*sqrt(n), where n is the size of the data
or nrow(y) and nu is subsample size. Recommended to be in [1, 10].}

\item{Beta}{a cut off for false discovery rate (FDR) that is
greater than 0. Recommended to be in (0.01, 0.1).}

\item{eta}{an integer in (1, p), where p is the number of
dimensions of the data. Represents the number of
dimensions with the false discovery rate smaller than Beta.}

\item{trimprop}{a proportion of a cluster size to be trimmed for
computing a trimmed mean and variance and subsequently likelihood.
In general, the higher proportions result in a larger number of
tight clusters. If trimprop = 0, no trimming is performed.
Recommended not to exceed 0.25.}

\item{center}{"mean" or "median". Defaults to "mean".
If "median" no trimming is done.}

\item{omega2}{same as omega1, but used in all recursions greater
than 1. Defaults to 0.1.}

\item{k0}{an integer representing cluster size cut off.}

\item{phi}{a constant in (0, 1). Defaults to 0.5.
Represents minimization step size parameter, which is
an approximation of proportion of a distance between
nearest neighbors.}

\item{tau}{a value in (0, omega). Defaults to 0.9*omega. Is
used to compute the value of lambda, the regularization
paramter, for the initial solution.}

\item{alpha}{a constant in (0, 1). Defaults to 0.9. Represents
a proportion of the value of the concavity parameter delta for
the next iteration if a bias-variance criterion is met.}

\item{H}{a number of values of lambda to be generated. Defaults
to min(20, p), where p is the number of dimensions.}

\item{xi}{a small constant used in calculating a stopping
criterion for each iteration. Defaults to 1e-4.}

\item{out}{logical. Should intermediate summary output be printed.}

\item{rand}{TRUE or an integer for a seed number. Should the seed be
generated randomly for subsampling. If a seed number is provided,
a vector of seeds is randomly generated in advance to be used for the iterations.}
}
\value{
\item{classall}{ a numeric vector of cluster assignments.}
\item{centers}{ a numeric matrix of cluster centers.}
\item{Kest}{ an estimated number of clusters.}
\item{clust}{ the number of clustered data identified.}
\item{noise}{ the number of noisy data points identified.}
\item{iter}{ a number of recursions or iterations of the iterative
subsampling procedure.}
\item{randseed}{ a vector of integers for random seeds used for
generating subsamples. The length is equal to the number of
iterations.}
}
\description{
Performes clustering and estimates the number of
clusters automatically based on recursions of
clustering of a subsample of the full dataset
and sequentially assigning the remaining data
to the identified clusters. Recommended for large
datasets.
}
\examples{
# simulate a clustered dataset with 50\% noise
K <- 10
p <- 20
ncl <- 5000
noise <- 5000
sim <- simclust(K=K, p=p, ncl=ncl, noise=noise, overnk=0, hcube=c(-5, 5), random=TRUE)

# cluster with iterative subsampling solution path clustering
fit <- isspc(y=sim$y, omega1=0.5, a=2, Beta=0.01, eta=10, trimprop=0, rand=6834)
ARI(assignV=fit$classall, k0=3, assignT1=sim$assignT1)
}
\references{
Marchetti, Y., Zhou, Q. Iterative Subsampling in
Solution Path Clustering of Noisy Big Data. arXiv preprint arXiv:1412.1559.
}

