% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/hcr_resampling.R
\name{hcr_resampling}
\alias{hcr_resampling}
\title{Heterogeneity-constrained random resampling (HCR)}
\usage{
hcr_resampling(
  data_wide,
  transform = c("none", "sqrt", "log1p", "binary"),
  score_dist = "bray",
  beta_dist = c("bray", "jaccard"),
  adaptive_n = TRUE,
  n_plots = NA,
  min_plots = 10,
  max_plots = 100,
  min_stratum_n = 10,
  trials = 1000,
  group_vec = NULL,
  group_limits = NULL,
  write_csv = NULL,
  progress = interactive(),
  seed = NULL
)
}
\arguments{
\item{data_wide}{a data-frame like object with the following column contents: 
\itemize{
  \item column 1: sample ids
  \item column 2: strata
  \item columns 3...n: species.
}}

\item{transform}{One of \code{c("none","sqrt","log1p","binary")}. If "binary", values become 0/1 and
\code{vegan::vegdist(binary = TRUE)} is used.}

\item{score_dist}{Dissimilarity method for trial scoring; any method accepted by
\code{vegan::vegdist} (e.g., "bray","jaccard", "hellinger", "euclidean", "canberra",
"gower", "kulczynski","morisita","horn","mountford","raup","binomial",
"chao","cao", …).}

\item{beta_dist}{One of \code{c("bray","jaccard")} for per-stratum mean dissimilarity used to calculate the 
adaptive number of plots.
With \code{transform="binary"}, "bray" equals Sørensen.}

\item{adaptive_n}{Logical. If TRUE, adapt the number of plots per stratum from
\code{beta_mean * max_plots} bounded to \code{[min_plots, max_plots]}; if FALSE, use fixed \code{n_plots}.}

\item{n_plots}{Fixed number of plots per stratum when \code{adaptive_n=FALSE}. If \code{NA},
defaults to \code{max_plots} (capped at stratum size).}

\item{min_plots, max_plots}{Global default min/max number of plots per stratum}

\item{min_stratum_n}{Minimum stratum size under which the whole stratum is selected (no resampling).}

\item{trials}{Number of random trials per stratum (default 1000).}

\item{group_vec}{Optional vector (length \code{nrow(data_wide)}) assigning each sample to a higher-level
group (e.g., country, region). Used only if \code{adaptive_n=TRUE}.}

\item{group_limits}{Optional \code{data.frame} with group-specific limits. The first column
must contain group names; it must also contain numeric columns named \code{"min_plots"} and \code{"max_plots"}.
Other columns are ignored.}

\item{write_csv}{Optional file path to write a CSV with columns \code{sample_id, selected}. If \code{NULL}, no file.}

\item{progress}{Show a text progress bar (default: \code{interactive()}).}

\item{seed}{Optional integer seed for reproducibility of random subset trials.}
}
\value{
A \code{data.frame} with \code{sample_id} and \code{selected} (0/1).
  Attributes: \code{selected_rows} (logical) and \code{params}.
}
\description{
Performs heterogeneity-constrained random (HCR) resampling (Lengyel, Chytrý & Tichý, 2011) of community data.
Within each stratum (e.g., grid cell), many random subsets of plots are evaluated and the subset with
the highest mean dissimilarity and the lowest variance of dissimilarities is retained. Optionally, the
number of plots per stratum is adapted from the stratum’s mean pairwise dissimilarity (\eqn{\beta}-diversity).
}
\section{Details}{

The algorithm follows Lengyel, Chytrý & Tichý (2011) and was based upon the JUICE implementation (Tichý, 2002). 
For speed, it precomputes per-stratum distance matrices (once) and reuses them across trials, which
enables large numbers of trials (default \code{trials = 1000}). 

Within each stratum candidate subsets are scored using \code{score_dist} by high mean dissimilarity and low variance of dissimilarities.

If \code{adaptive_n = TRUE} (default), the target number of plots is computed as a linear function of the mean pairwise 
dissimilarity (\eqn{\beta}-diversity; \code{beta_dist}) and the maximum number of plots (\code{beta_mean * max_plots}; Wiser & de Cáceres, 2013) and then
bounded to \code{[min_plots, max_plots]} and the stratum size. 

Additionally group-specific limits for minimum and maximum numbers of plots per stratum can be supplied via 
\code{group_vec} and \code{group_limits}. Each sample is assigned to a higher-level group 
(e.g., country or region), and the minimum and maximum number of plots are defined per group. 
This allows, for example, larger plot limits to be set for larger countries or regions.
}

\references{
Lengyel, A., Chytrý, M., & Tichý, L. (2011). Heterogeneity-constrained random resampling of phytosociological databases.
\emph{Journal of Vegetation Science}, \strong{22(1)}, 175–183. \doi{10.1111/j.1654-1103.2010.01225.x}

Tichý, L. (2002). JUICE, software for vegetation classification.  \emph{Journal of Vegetation Science}, \strong{13(3)}, 451. 
\doi{10.1658/1100-9233(2002)013[0451:JSFVC]2.0.CO;2}

Wiser, S. K., & de Cáceres, M. (2013). Updating vegetation classifications: an example with New Zealand's woody vegetation. 
\emph{Journal of Vegetation Science}, \strong{24(1)}, 80–93.  \doi{10.1111/j.1654-1103.2012.01450.x}
}
\author{
Friedemann von Lampe
}
