% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/PredictFastClass.R
\name{PredictFastClass}
\alias{PredictFastClass}
\title{Fast class prediction from peak lists using linear regressions}
\usage{
PredictFastClass(
  peaks,
  mod_peaks,
  Y_mod_peaks,
  moz = "ALL",
  tolerance = 6,
  normalizeFun = TRUE,
  noMatch = 0,
  chunk_size = 2000L,
  ncores = 1L,
  verbose = FALSE
)
}
\arguments{
\item{peaks}{List of MALDIquant::MassPeaks objects to classify (one per spectrum).
Each element must expose \verb{@mass} (numeric m/z) and \verb{@intensity} (numeric) of
the same length. Names/metaData are used to populate the \code{name} column.}

\item{mod_peaks}{Numeric training matrix of dimension n_train x p (rows =
spectra, columns = m/z features) used as regressors per class. Column names
must be m/z values (character) and must include all m/z requested in \code{moz}.}

\item{Y_mod_peaks}{Factor of length n_train giving the class labels for rows of
\code{mod_peaks}.}

\item{moz}{Either "ALL" or a numeric vector of target m/z. If "ALL" (default),
the column names of \code{mod_peaks} are used. Otherwise, the provided m/z are used
(they must all be present among the column names of \code{mod_peaks}).}

\item{tolerance}{Numeric (Da). A target m/z is matched to the nearest peak only
if the absolute difference is <= \code{tolerance}. Default 6.}

\item{normalizeFun}{Logical; if TRUE, per-spectrum max normalization is applied
after matching (i.e., each row of the new matrix is divided by its maximum).
Default TRUE.}

\item{noMatch}{Numeric; intensity value inserted when no peak is matched for a
given target m/z. Default 0.}

\item{chunk_size}{Integer; rows per block when building the new matrix from
\code{peaks} (passed to \code{\link[=build_X_from_peaks_fast]{build_X_from_peaks_fast()}}, if used). Default 2000.}

\item{ncores}{Integer; number of cores to use when building the new matrix from
\code{peaks} (R side). Default 1.}

\item{verbose}{Logical; print progress messages. Default FALSE.}
}
\value{
A data.frame with columns:
\itemize{
\item name: spectrum name (from MassPeaks metaData fullName/file if available).
\item p_not_in_DB: minimum F-test p-value across classes (smaller suggests the
spectrum matches the training database; larger suggests “not in DB”).
\item pred_cat: predicted class (label with smallest AIC).
}
}
\description{
Builds a sample-by-m/z matrix from a list of MALDIquant MassPeaks and predicts
the class of each spectrum by fitting, for each class, a linear regression of
the spectrum’s intensities on the training spectra of that class. The class
minimizing the AIC is selected as the predicted label. In parallel, an F-test
p-value is computed per class to quantify how unlikely the spectrum is to
belong to the training database; the minimum across classes is returned as
\code{p_not_in_DB}. The peak-to-m/z matching is done in C++ via
\code{\link[=build_X_from_peaks_fast]{build_X_from_peaks_fast()}} for speed.
}
\details{
\itemize{
\item Matrix building: \code{\link[=build_X_from_peaks_fast]{build_X_from_peaks_fast()}} maps each spectrum in \code{peaks}
to the target m/z grid with nearest-within-tolerance matching (C++). If
\code{normalizeFun = TRUE}, each row is divided by its maximum (guarded to avoid
division by zero). Spectra with initially no matches are retried with a
slightly increased tolerance (internal bumping).
\item Alignment to training: columns of the new matrix must align to \code{mod_peaks}.
The function stops if any requested m/z is missing from \code{mod_peaks}.
\item Per-class regression: for each class k, it regresses the new spectrum’s
intensities on the columns of \code{mod_peaks} belonging to class k (after
removing entries where the new spectrum is non-finite). If the number of
training spectra exceeds the number of non-missing points in the spectrum,
a random subset of columns (size = length(non-missing) - 1) is used to avoid
singular fits. Fitting is done via stats::lm.fit for speed.
\item Selection and scores: \code{pred_cat} is the class with smallest AIC across fitted
models. For each class, an F-test p-value is computed from the model summary;
\code{p_not_in_DB} is the minimum across classes (1 if a class model fails).
}
}
\examples{
\dontrun{
if (requireNamespace("MALDIquant", quietly = TRUE)) {
  set.seed(1)
  # Create a small training set (mod_peaks) with 2 classes
  p <- 6
  moz <- as.character(round(seq(1000, 1500, length.out = p), 2))
  mod_peaks <- rbind(
    matrix(runif(5 * p, 0, 1), nrow = 5, dimnames = list(NULL, moz)),
    matrix(runif(5 * p, 0, 1), nrow = 5, dimnames = list(NULL, moz))
  )
  Y_mod <- factor(rep(c("A", "B"), each = 5))

  # Two spectra to classify: generate MassPeaks near moz
  mk_peaks <- function(shift = 0) {
    MALDIquant::createMassPeaks(
      mass = as.numeric(moz) + rnorm(length(moz), shift, 0.2),
      intensity = runif(length(moz), 10, 100)
    )
  }
  peaks <- list(mk_peaks(0.1), mk_peaks(-0.1))

  res <- PredictFastClass(
    peaks = peaks,
    mod_peaks = mod_peaks,
    Y_mod_peaks = Y_mod,
    moz = "ALL",
    tolerance = 1,
    normalizeFun = TRUE
  )
  res
}
}

}
\seealso{
build_X_from_peaks_fast; MALDIquant::createMassPeaks; stats::lm.fit
}
