
get_fac_lvls <- \(lst, fac.names = NULL) {

    if (!is.null(attr(lst, "fac.lvls"))) {
        fac.lvls <- attr(lst, "fac.lvls")
    } else if (!is.list(lst)) {
        fac.lvls <- nrow(as.matrix(lst))
    } else {
        pos <- rrapply(lst, f = \(x, .xpos) .xpos, how = "flatten")
        depth <- sapply(pos, length) |> unique()
        stopifnot(length(depth) == 1)
        fac.lvls <- sapply(seq_len(depth), \(k) length(unique(sapply(pos, getElement, k))))
    }

    if (!is.null(attr(lst, "fac.names")) && is.null(fac.names)) {
        fac.names <- attr(lst, "fac.names")
    }
    if (is.null(fac.names)) {
        fac.names <- paste0("F", seq_along(fac.lvls))
    } else {
        stopifnot(length(fac.lvls) == length(fac.names))
    }
    names(fac.lvls) <- fac.names
    fac.lvls
}

get_fac_mat <- \(lst, add.names = FALSE) {
    if (!is.list(lst)) {
        res <- as.matrix(lst)
        if (add.names && is.null(rownames(res))) {
            rownames(res) <- as.character(seq_len(nrow(res)))
        }
    } else {
        res <- do.call(rbind, rrapply(lst, f = as.vector, how = "flatten", options = list(namesep = ".")))
        if (add.names && is.null(rownames(res))) {
            rownames(res) <- rrapply(lst, f = \(x, .xpos) paste0(.xpos, collapse = "."), how = "flatten", options = list(simplify = TRUE))
        }
    }
    res
}

#' @title Test linear relationships between probability vectors in factorial designs
#' @description Perform FDOTT, an optimal transport (OT) based test in factorial designs,
#' to test linear relationships between probability vectors, based on samples from them.
#' @param samples nested list of depth \eqn{D} (representing a \eqn{D}-way layout) containing count vectors.
#' A count vector is a vector of length \eqn{N} that contains the number of times a sample was observed at the respective points.
#' Can also be given as a matrix (row-wise), which is viewed as a one-way layout.
#' @param costm semi-metric cost matrix \eqn{c \in \mathbb{R}^{N \times N}}.
#' @param H0 null hypothesis, see details.
#' @param fac.names names of the \eqn{D} factors. Used for printing. Default `NULL` corresponds to `"F1"` for factor 1, and so on.
#' @param null.mu probability vectors \eqn{\mu} underlying the null distribution used only for `method = "plug-in"`.
#' Must be of the same structure as `samples`.
#' @param num.sim number of samples to draw from the limiting null distribution.
#' @param m.p exponent \eqn{p \in (0, 1)} used only for `method = "bootstrap-m"`.
#' @param is.metric value indicating whether \eqn{c} is a metric cost matrix, see [`is_metric_cost_mat`].
#' @param method the method to use to simulate from the null distribution, see details.
#' @param verbose logical value indicating whether additional information should be printed.
#' @returns A `FDOTT` object containing:
#' \tabular{ll}{
#'  `fac.lvls`     \tab vector of levels of the factors \cr
#'  `mu`           \tab matrix, empirical version \eqn{\hat{\mu}_n} of \eqn{\mu} that is based on `samples` \cr
#'  `n`            \tab vector of sample sizes \eqn{n} \cr
#'  `L`            \tab matrix \eqn{L} for the null hypothesis \eqn{H_0^L} \cr
#'  `p.value`      \tab the \eqn{p}-value \cr
#'  `statistic`    \tab the value of the test statistic \eqn{T^L(\hat{\mu}_n)} \cr
#'  `null.samples` \tab samples drawn from the null distribution \cr
#' }
#' @details Denote with \eqn{\mu} the matrix (row-wise) of the probability vectors (in lexicographical order of the factor combinations)
#' that underlie `samples`. FDOTT deals with null hypotheses of the form
#' \deqn{
#' H^L_0 : \; L\mu = 0\,,
#' }
#' where \eqn{L} is a suitable matrix with row sums all equal to \eqn{0}. The FDOTT statistic is defined as
#' \deqn{
#'  T^L(\hat{\mu}_n) := \frac{\sqrt{\rho_n}}{s} \sum_{m=1}^M \mathrm{OT}^{\pm}_c([L\hat{\mu}_n]_m, 0)\,,
#' }
#' where \eqn{\rho_n} and \eqn{s} are scaling factors, \eqn{[L\mu]_m} is the \eqn{m}-th row-vector of \eqn{L\mu}
#' and \eqn{\mathrm{OT}^{\pm}_c} the extended OT functional, see [`ot_cost_sgn`].
#' The test is based on the asymptotic distribution of \eqn{T^L(\hat{\mu}_n)} under under the null, for more details see Groppe et al. (2025).
#'
#' The form of \eqn{H_0^L} allows for testing hypotheses like interaction effects in classical ANOVA, obtained by formally substituting means by measures.
#' The following values are allowed for `H0`:
#' \itemize{
#' \item `H0 = "*"` (the default). Test all interaction (including main effects) of the factors. A specific interaction or main effect can be tested by
#' including the corresponding indices of the factors in a list, e.g., `H0 = list("*", c(1, 3))` corresponds to the interaction effect between factor 1 and 3.
#' Note that in a one-way layout, `H0 = "*"` reduces to `H0 = "="`.
#' \item `H0 = "|"`. Test all simple factor effects. A specific simple factor effect can be tested by by including the
#' corresponding indices of the factors in a list, e.g., `H0 = list("|", c(1, 3))` corresponds to the simple factor effect of factor 1 and 3 within the
#' other remaining factors.
#' \item `H0 = "="`. Test for treatment effect, i.e., whether all underlying probability vectors are the same. Note that each pairwise comparison can be tested
#' simultaneously via [`FDOTT_HSD`].
#' \item `H0 = L`. Test \eqn{H_0^L} for the directly supplied \eqn{L} matrix. The name of the tested effect (useful for printing)
#' and the scaling \eqn{s} (by default `nrow(L)`) can be supplied by setting the `"effect"` and `"scaling"` attribute of `L`, respectively.
#' \item `H0 = list(...)`. Test a combined null hypothesis. Each element of the list represents a null hypothesis and can be given by one of the options above.
#' This is useful in combination with [`FDOTT_HSD`], which allows to test all the given null hypotheses simultaneously.
#' }
#'
#' To simulate from the limiting null distribution, there are four different methods:
#' \itemize{
#' \item `"plug-in"`: uses the limiting distribution where \eqn{\mu} is substituted by its empirical version (or `null.mu`, when specified).
#' \item `"bootstrap-deriv"`: uses the so-called derivative bootstrap.
#' \item `"bootstrap-m"`: uses \eqn{m}-out-of-\eqn{n} bootstrap with \eqn{m = \lfloor n^p \rfloor}.
#' \item `"permutation"`: uses a permutation approach, only works for `H0 = "="`.
#' }
#' These simulations can be done in parallel via [`future::plan`] and the progress can be shown with [`progressr::with_progress`].
#' @references M. Groppe, L. Niemöller, S. Hundrieser, D. Ventzke, A. Blob, S. Köster and A. Munk (2025). Optimal Transport Based Testing in Factorial Design.
#' arXiv preprint. \doi{10.48550/arXiv.2509.13970}.
#' @example examples/fd.R
#' @seealso [`FDOTT_HSD`]
#' @export
FDOTT <- \(samples, costm, H0 = "*", fac.names = NULL, method = c("plug-in", "bootstrap-deriv", "bootstrap-m", "permutation"), num.sim = 1000,
           null.mu = NULL, m.p = 0.5, is.metric = is_metric_cost_mat(costm, tol.ti = Inf), verbose = FALSE) {

    print_info <- print_info_base("FDOTT", verbose)

    print_info("Checking input")

    method <- match.arg(method)
    samplemat <- get_fac_mat(samples, TRUE)
    check_samplemat(samplemat)
    fac.lvls <- get_fac_lvls(samples, fac.names)
    N <- ncol(samplemat)
    K <- nrow(samplemat)
    check_cost_mat(costm, N, is.metric)
    n <- get_n(samplemat)
    mu <- get_mu(samplemat, n)
    L <- get_Lmat(H0, fac.lvls)
    func.L <- \(x) L %*% x
    Lmu <- func.L(mu)

    print_info("Identified factorial design of size (%s)", paste0(fac.lvls, collapse = ", "))
    print_info("           with N = %d and %d hypotheses of size (%s)", N, length(attr(L, "size")), paste0(attr(L, "size"), collapse = ", "))

    if (method == "plug-in" && isTRUE(null.mu == "pool")) {
        print_info("Pooling samples")
        null.mu <- colSums(samplemat)
        null.mu <- matrix(null.mu / sum(null.mu), K, N, byrow = TRUE)
    }
    if (method != "plug-in" || is.null(null.mu)) {
        null.mu <- mu
    }

    s <- get_s(L)

    print_info("Calculating limit coefficients")
    tmp <- limit_coeffs(n)
    rho <- tmp$rho
    delta <- tmp$delta

    print_info("Calculating test statistic")
    rdiff <- sqrt(rho) * ot_cost_sgn_rowwise_posneg(Lmu, costm) / s
    tstat <- sum(rdiff)

    print_info("Sampling from the limiting distribution for %d times with method %s", num.sim, method)
    if (method == "permutation") {
        stopifnot("method = \"permutation\" is only supported for H0 = \"=\"" = attr(L, "effect") == "=")
        ls.diff <- simulate_limit_FDOTT_null_one_way_perm(samples, func.L, costm, n, rho, num.sim)
        # first permutation = id
        ls.diff <- cbind(rdiff, ls.diff, deparse.level = 0)
    } else {
        ls.diff <- simulate_limit_FDOTT_null_base(null.mu, func.L, costm, n, delta,
                                                  num.sim, m.p, get_gen_G, method = method)
    }
    ls.diff <- sweep(ls.diff, 1, s, "/")
    ls <- apply(ls.diff, 2, sum)

    print_info("Calculating p-value")
    p <- sum(ls >= tstat) / num.sim

    print_info("Returning results")
    list(
        fac.lvls           = fac.lvls,
        mu                 = mu,
        n                  = n,
        null.mu            = null.mu,
        L                  = L,
        costm              = costm,
        p.value            = p,
        statistic          = tstat,
        diff               = rdiff,
        null.samples       = ls,
        null.samples.diff  = ls.diff,
        method             = method
    ) |> structure(class = "FDOTT")
}

#' @export
print.FDOTT <- \(x, ...) {
    lvls <- x$fac.lvls
    cat("\n")
    cat("FDOTT: Optimal Transport Based Test in Factorial Design\n")
    cat("\n")
    catf("data: %d-way layout with factor%s %s of size %s and on ground space of size %d\n",
         length(lvls), if (length(lvls) == 1) "" else "s",
         paste0(names(lvls), collapse = ", "), paste0(lvls, collapse = ", "), nrow(x$costm))
    catf("null hypothesis: %s\n", paste(attr(x$L, "effect"), paste0("[", attr(x$L, "size"), " eqs]"), collapse = " & "))
    catf("p-value: %f\n", x$p.value)
    cat("\n")
    invisible(x)
}
