#' Add columns to a data frame
#'
#' Add new columns to a data frame by specifying their names and a value to
#' initialize them with.
#'
#' @keywords internal
#' @param df The data frame to extend.
#' @param cols The vector containing new column names.
#' @param value The values stored in the newly added columns. NA by default.
#' @return A data frame containing with the same columns as the `df` argument
#'  as well as the new columns specified in the `cols` argument.
add_columns <- function(df, cols, value = NA) {
    df[cols] <- value
    return(df)
}

#' Convert character-type columns of a data frame to factor-type
#'
#' @keywords internal
#' @param df A data frame.
#' @return The data frame with factor columns instead of char columns.
char_to_fac <- function(df) {
    # Select all the columns that are character type
    char_cols <- df |>
        dplyr::select_if(is.character) |>
        colnames()
    # Convert all those columns to factor type
    for (col in char_cols) {
        df[, col] <- as.factor(df[, col])
    }
    return(df)
}

#' Check validity of similarity matrices
#'
#' Check to see if similarity matrices in a list have the following properties:
#'  1. The maximum value in the entire matrix is 0.5
#'  2. Every value in the diagonal is 0.5
#'
#' @param similarity_matrices A list of similarity matrices
#' @return valid_matrices Boolean indicating if properties are met by all
#'  similarity matrices
#' @export
check_similarity_matrices <- function(similarity_matrices) {
    invalid_mats <- similarity_matrices |>
        lapply(
            function(x) {
                length(unique(diag(x))) != 1 | max(diag(x)) != 0.5
            }
        ) |>
        unlist() |>
        which()
    if (length(invalid_mats) > 0) {
        metasnf_warning(
            length(invalid_mats), "/", length(similarity_matrices),
            " SNF runs yielded irregularly structured similarity matrices: ",
            cli::col_red(invalid_mats)
        )
        return(FALSE)
    } else {
        return(TRUE)
    }
}

#' Return a colour ramp for a given vector
#'
#' Given a numeric vector and min and max colour values, return a colour ramp
#' that assigns a colour to each element in the vector. This function is a
#' wrapper for `circlize::colorRamp2`.'
#'
#' @param data Vector of numeric values.
#' @param min_colour Minimum colour value.
#' @param max_colour Maximum colour value.
#' @return A "function" class object that can build a circlize-style colour
#' ramp.
#' @export
colour_scale <- function(data, min_colour, max_colour) {
    colours <- circlize::colorRamp2(
        c(min(data), max(data)),
        c(min_colour, max_colour)
    )
    return(colours)
}

#' Helper function to remove columns from a data frame
#'
#' @keywords internal
#' @param x A data frame
#' @param cols Vector of column names to be removed
#' @return x without columns in cols
drop_cols <- function(x, cols) {
    x <- x[, !(colnames(x) %in% cols), drop = FALSE]
    return(x)
}

#' Pull complete-data UIDs from a list of data frames
#'
#' This function identifies all observations within a list of data frames that
#' have no missing data across all data frames. This function is useful when
#' constructing data lists of distinct feature sets from the same sample of
#' observations. As `data_list()` strips away observations with any missing
#' data, distinct sets of observations may be generated by building a data
#' list from the same group of observations over different sets of features.
#' Reducing the pool of observations to only those with complete UIDs first
#' will avoid downstream generation of data lists of differing sizes.
#'
#' @param list_of_dfs List of data frames.
#' @param uid Name of column across data frames containing UIDs
#' @return A character vector of the UIDs of observations that have complete
#'  data across the provided list of data frames.
#' @export
#' @examples
#' complete_uids <- get_complete_uids(
#'     list(income, pubertal, anxiety, depress),
#'     uid = "unique_id"
#' )
#' 
#' income <- income[income$"unique_id" %in% complete_uids, ]
#' pubertal <- pubertal[pubertal$"unique_id" %in% complete_uids, ]
#' anxiety <- anxiety[anxiety$"unique_id" %in% complete_uids, ]
#' depress <- depress[depress$"unique_id" %in% complete_uids, ]
#' 
#' input_dl <- data_list(
#'     list(income, "income", "demographics", "ordinal"),
#'     list(pubertal, "pubertal", "demographics", "continuous"),
#'     uid = "unique_id"
#' )
#' 
#' target_dl <- data_list(
#'     list(anxiety, "anxiety", "behaviour", "ordinal"),
#'     list(depress, "depressed", "behaviour", "ordinal"),
#'     uid = "unique_id"
#' )
get_complete_uids <- function(list_of_dfs, uid) {
    merged_df <- merge_df_list(
        list_of_dfs,
        join = "inner",
        uid = uid,
        no_na = TRUE
    ) 
    complete_uids <- merged_df[, uid] |>
        data.frame() |>
        unlist() |>
        as.character()
    return(complete_uids)
}

#' Helper function to drop columns from a data frame by grepl search
#'
#' @keywords internal
#' @param x Data frame to drop columns from.
#' @param pattern Pattern used to match columns to drop.
#' @return x without columns matching pattern.
gexclude <- function(x, pattern) {
    drop_cols <- unlist(
        lapply(pattern, function(p) colnames(x)[grepl(p, colnames(x))])
    )
    keep_cols <- setdiff(colnames(x), drop_cols)
    x <- x[ , keep_cols, drop = FALSE]
    return(x)
}

#' Helper function to pick columns from a data frame by `grepl` search
#'
#' @keywords internal
#' @param x Data frame to select columns from.
#' @param pattern Pattern used to match columns to select.
#' @return x with only columns matching pattern.
gselect <- function(x, pattern) {
    keep_cols <- unlist(
        lapply(pattern, function(p) colnames(x)[grepl(p, colnames(x))])
    )
    x <- x[ , keep_cols, drop = FALSE]
    return(x)
}

#' Merge list of data frames into a single data frame
#'
#' This helper function combines all data frames in a single-level list into a
#' single data frame.
#'
#' @param df_list list of data frames.
#' @param join String indicating if join should be "inner" or "full".
#' @param uid Column name to join on. Default is "uid".
#' @param no_na Whether to remove NA values from the merged data frame.
#' @return Inner join of all data frames in list.
#' @export
#' @examples
#' merge_df_list(list(income, pubertal), uid = "unique_id")
merge_df_list <- function(df_list,
                          join = "inner",
                          uid = "uid",
                          no_na = FALSE) {
    if (join == "inner") {
        merged_df <- df_list |> purrr::reduce(
            dplyr::inner_join,
            by = uid
        )
    } else if (join == "full") {
        merged_df <- df_list |> purrr::reduce(
            dplyr::full_join,
            by = uid
        )
    } else {
        metasnf_error(
            "Invalid join type specified. Options are 'inner' and 'full'."
        )
    }
    if (no_na) {
        merged_df <- stats::na.omit(merged_df)
    }
    return(merged_df)
}

#' Convert columns of a data frame to numeric type (if possible)
#'
#' Converts all columns in a data frame that can be converted to numeric type to
#' numeric type.
#'
#' @keywords internal
#' @param df A data frame.
#' @return The data frame coercible columns converted to type numeric.
numcol_to_numeric <- function(df) {
    df[] <- lapply(df,
        function(x) {
            tryCatch(
                {
                    return(as.numeric(x))
                }, warning = function(cond) {
                    if (cond$"message" == "NAs introduced by coercion") {
                        return(x)
                    }
                }
            )
        }
    )
    return(df)
}

#' Helper function to pick columns from a data frame
#'
#' @keywords internal
#' @param x A data frame
#' @param cols Vector of column names to be picked
#' @return x with only columns in cols
pick_cols <- function(x, cols) {
    x <- x[, colnames(x) %in% cols, drop = FALSE]
    return(x)
}

#' Helper function to pluralize a string
#'
#' @keywords internal
#' @param x A vector of length 1 or greater.
#' @return A string "s" if the length of x is greater than 1, otherwise an
#' empty string.
#' @export
pl <- function(x) {
    if (x == 1) "" else "s"
}

#' Helper resampling function found in ?sample
#'
#' Like sample, but when given a single value x, returns back that single
#'  value instead of a random value from 1 to x.
#'
#' @param x Vector or single value to sample from
#' @param ... Remaining arguments for base::sample function
#' @return Numeric vector result of running base::sample.
#' @export
resample <- function(x, ...) {
    return(x[sample.int(length(x), ...)])
}

#' Generate a complete path and filename to store an similarity matrix
#'
#' @keywords internal
#' @param similarity_matrix_dir Directory to store similarity matrices.
#' @param i Corresponding solution.
#' @return Complete path and filename to store an similarity matrix.
similarity_matrix_path <- function(similarity_matrix_dir, i) {
    path <- paste0(
        similarity_matrix_dir,
        "/",
        gsub("-", "_", Sys.Date()), # Today's date
        "_",
        "similarity_matrix_",
        i,
        ".csv"
    )
    path <- gsub("//", "/", path)
    return(path)
}

#' Adjust the diagonals of a matrix
#'
#' Adjust the diagonals of a matrix to reduce contrast with off-diagonals
#' during plotting.
#'
#' @keywords internal
#' @param matrix Matrix to rescale.
#' @param method Method of rescaling. Can be:
#' * "mean" (replace diagonals with average value of off-diagonals)
#' * "zero" (replace diagonals with 0)
#' * "min" (replace diagonals with min value of off-diagonals)
#' * "max" (replace diagonals with max value of off-diagonals)
#' @return A "matrix" class object with rescaled diagonals.
scale_diagonals <- function(matrix, method = "mean") {
    if (method == "mean") {
        off_diagonals <- matrix[col(matrix) != row(matrix)]
        diag(matrix) <- mean(off_diagonals)
    } else if (method == "zero") {
        diag(matrix) <- 0
    } else if (method == "min") {
        off_diagonals <- matrix[col(matrix) != row(matrix)]
        diag(matrix) <- min(off_diagonals)
    } else if (method == "max") {
        off_diagonals <- matrix[col(matrix) != row(matrix)]
        diag(matrix) <- max(off_diagonals)
    } else if (method != "none") {
        metasnf_error("Invalid scaling method specified.")
    }
    return(matrix)
}

#' Training and testing split
#'
#' Given a vector of uid_id and a threshold, returns a list of which members
#'  should be in the training set and which should be in the testing set. The
#'  function relies on whether or not the absolute value of the Jenkins's
#'  one_at_a_time hash function exceeds the maximum possible value
#'  (2147483647) multiplied by the threshold.
#'
#' @param train_frac The fraction (0 to 1) of observations for training
#' @param uids A character vector of UIDs to be distributed into training and
#'  test sets.
#' @param seed Seed used for Jenkins's one_at_a_time hash function.
#' @return A named list containing the training and testing uid_ids.
#' @export
train_test_assign <- function(train_frac, uids, seed = 42) {
    train_thresh <- 2147483647 * train_frac
    hash <- abs(digest::digest2int(uids, seed = seed))
    train <- uids[hash < train_thresh]
    test <- uids[hash >= train_thresh]
    assigned_obs <- list(train = train, test = test)
    if (length(train) == 0 || length(test) == 0) {
        metasnf_warning("Empty train or test set.")
    }
    return(assigned_obs)
}
