% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/clustMD_UserFunctions.R
\name{clustMD}
\alias{clustMD}
\title{Model Based Clustering for Mixed Data}
\usage{
clustMD(
  X,
  G,
  CnsIndx,
  OrdIndx,
  Nnorms,
  MaxIter,
  model,
  store.params = FALSE,
  scale = FALSE,
  startCL = "hc_mclust",
  autoStop = FALSE,
  ma.band = 50,
  stop.tol = NA
)
}
\arguments{
\item{X}{a data matrix where the variables are ordered so that the 
continuous variables come first, the binary (coded 1 and 2) and
ordinal variables (coded 1, 2, ...) come second and the nominal
variables (coded 1, 2, ...) are in last position.}

\item{G}{the number of mixture components to be fitted.}

\item{CnsIndx}{the number of continuous variables in the data set.}

\item{OrdIndx}{the sum of the number of continuous, binary and ordinal
variables in the data set.}

\item{Nnorms}{the number of Monte Carlo samples to be used for the 
intractable E-step in the presence of nominal data. Irrelevant if
there are no nominal variables.}

\item{MaxIter}{the maximum number of iterations for which the (MC)EM 
algorithm should run.}

\item{model}{a string indicating which clustMD model is to be fitted. This
may be one of: \code{EII, VII, EEI, VEI, EVI, VVI} or \code{BD}.}

\item{store.params}{a logical argument indicating if the parameter estimates
at each iteration should be saved and returned by the clustMD function.}

\item{scale}{a logical argument indicating if the continuous variables
should be standardised.}

\item{startCL}{a string indicating which clustering method should be used to
initialise the (MC)EM algorithm. This may be one of "kmeans" (K means
clustering), "hclust" (hierarchical clustering), "mclust" (finite 
mixture of Gaussian distributions), "hc_mclust" (model-based 
hierarchical clustering) or "random" (random cluster allocation).}

\item{autoStop}{a logical argument indicating whether the (MC)EM algorithm
    should use a stopping criterion to decide if convergence has been 
    reached. Otherwise the algorithm will run for \code{MaxIter} iterations. 

    If only continuous variables are present the algorithm will use Aitken's
    acceleration criterion with tolerance \code{stop.tol}.

    If categorical variables are present, the stopping criterion is based
    on a moving average of the approximated log likelihood values. Let 
    \code{t} denote the current interation. The average of the 
    \code{ma.band} most recent approximated log likelihood values is 
    compared to the average of another \code{ma.band} iterations with a lag
    of 10 iterations. If this difference is less than the tolerance the
    algorithm will be said to have converged.}

\item{ma.band}{the number of iterations to be included in the moving average
calculation for the stopping criterion.}

\item{stop.tol}{the tolerance of the (MC)EM stopping criterion.}
}
\value{
An object of class clustMD is returned. The output components are as
    follows:
    \item{model }{The covariance model fitted to the data.}
    \item{G }{The number of clusters fitted to the data.}
    \item{Y }{The observed data matrix.}
    \item{cl }{The cluster to which each observation belongs.}
    \item{tau }{A \code{N x G} matrix of the probabilities of 
        each observation blonging to each cluster.}
    \item{means }{A \code{D x G} matrix of the cluster means. Where D is the 
        dimension of the combined observed and latent continuous space.}
    \item{A }{A \code{G x D} matrix containing the diagonal entries of the 
        \code{A} matrix corresponding to each cluster.}
    \item{Lambda }{A \code{G x D} matrix of volume parameters corresponding
        to each observed or latent dimension for each cluster.}
    \item{Sigma }{A \code{D x D x G} array of the covariance matrices for 
        each cluster.}
    \item{BIChat }{The estimated Bayesian information criterion for the 
        model fitted.}
    \item{ICLhat }{The estimated integrated classification likelihood criterion
        for the model fitted.}
    \item{paramlist }{If store.params is \code{TRUE} then paramlist is a 
        list of the stored parameter values in the order given above with 
        the saved estimated likelihood values in last position.}
    \item{Varnames }{A character vector of names corresponding to the 
        columns of \code{Y}}
    \item{Varnames_sht }{A truncated version of \code{Varnames}. Used for
        plotting.}
    \item{likelihood.store }{A vector containing the estimated log 
        likelihood at each iteration.}
}
\description{
A function that fits the clustMD model to a data set consisting of any
combination of continuous, binary, ordinal and nominal variables.
}
\examples{
	data(Byar)
    # Transformation skewed variables
    Byar$Size.of.primary.tumour <- sqrt(Byar$Size.of.primary.tumour)
    Byar$Serum.prostatic.acid.phosphatase <- log(Byar$Serum.prostatic.acid.phosphatase)

    # Order variables (Continuous, ordinal, nominal)
    Y <- as.matrix(Byar[, c(1, 2, 5, 6, 8, 9, 10, 11, 3, 4, 12, 7)])

    # Start categorical variables at 1 rather than 0
    Y[, 9:12] <- Y[, 9:12] + 1

    # Standardise continuous variables
    Y[, 1:8] <- scale(Y[, 1:8])

    # Merge categories of EKG variable for efficiency
    Yekg <- rep(NA, nrow(Y))
    Yekg[Y[,12]==1] <- 1
    Yekg[(Y[,12]==2)|(Y[,12]==3)|(Y[,12]==4)] <- 2
    Yekg[(Y[,12]==5)|(Y[,12]==6)|(Y[,12]==7)] <- 3
    Y[, 12] <- Yekg

    \dontrun{
    res <- clustMD(X = Y, G = 3, CnsIndx = 8, OrdIndx = 11, Nnorms = 20000,
    MaxIter = 500, model = "EVI", store.params = FALSE, scale = TRUE, 
    startCL = "kmeans", autoStop= TRUE, ma.band=30, stop.tol=0.0001)
    }
}
\references{
McParland, D. and Gormley, I.C. (2016). Model based clustering
    for mixed data: clustMD. Advances in Data Analysis and Classification,
    10 (2):155-169.
}
