############################################### Simple functions written in R
## get levels for each factor
#' @noRd
get_level <- function(D, p) {
  lapply(1:p, function(i) sort(unique(D[[i]])))
}

## transfer all the columns of D into numeric vectors
#' @noRd
check_D <- function(D) {
  D.colname <- colnames(D)
  D <- apply(D, 2, as.numeric)
  colnames(D) <- D.colname
  D <- data.frame(unique(D))
  return(D)
}

## scale contrasts
#' @noRd
contr_scale <- function(x, level_num) {
  scale_factors <- 1 / sqrt(colSums(x^2) / level_num)
  x_scaled <- sweep(x, 2, scale_factors, FUN = "*")
  
  return(x_scaled)
}

## model matrix and corresponding information
#' @noRd
model_matrix <- function(D, quali_id = NULL, quanti_id = NULL,
                         quali_sum_idx = NULL,
                         user_def_coding = NULL, user_def_coding_idx = NULL,
                         model_type = 1) {
  # Preprocessing
  initial_colnames <- colnames(D)
  D <- check_D(D)
  np <- dim(D)
  n <- np[1]
  p <- np[2]
  uni_level <- get_level(D, p)
  mi <- lengths(uni_level)
  two_level_id <- which(mi == 2)
  
  # Error handling
  if(!is.null(quali_id)) {
    if ( any(quali_id <= 0 | quali_id > p) ) {stop("Incorrect range of quali_id")}
  }
  if(!is.null(quanti_id)) {
    if ( any(quanti_id <= 0 | quanti_id > p) ) {stop("Incorrect range of quanti_id")}
  }
  quali_helmert_idx <- setdiff(quali_id, unlist(c(quali_sum_idx, user_def_coding_idx)))
  
  # Main effects number
  me_num <- mi-1
  me_num[quanti_id] <- ifelse(me_num[quanti_id] > 2, 2, me_num[quanti_id])
  
  # Assign contrast to factors
  if(!is.null(quali_id)) {
    for(i in quali_id){
      D[,i] <- as.factor(D[,i])
      if(i %in% quali_helmert_idx) { # helmert coding
        contrasts(D[,i]) <- contr.helmert(levels(D[,i]))
      } else if(i %in% quali_sum_idx) { # sum coding
        contrasts(D[,i]) <- contr.sum(levels(D[,i]))
      } else {
        a <- which(sapply(user_def_coding_idx, function(x) {i %in% x} ))
        contrasts(D[,i]) <- user_def_coding[[a]]
      }
      contrasts(D[,i]) <- contr_scale(contrasts(D[,i]), mi[i])
      colnames(contrasts(D[,i])) <- paste0(".",1:(mi[i]-1))
    }
  }
  
  # Get contrasts for each factor
  contrast <- lapply(1:p, function(i) {
    if(i %in% two_level_id) {
      a <- matrix(c(-1,1), nrow = 2)
    } else if(i %in% quali_id) {
      a <- contrasts(D[,i])
    } else {
      a <- poly(uni_level[[i]], mi[i]-1)
      a <- contr_scale(a, mi[i])
    }
    return(a)
  })
  
  quanti_D_norm <- rep(list(NA), p)
  if(!is.null(quanti_id)) {
    for(i in quanti_id){
      D[,i] <- predict(contrast[[i]], D[,i])
      quanti_D_norm[[i]] <- diag(t(D[,i])%*%D[,i])
      D[,i] <- contr_scale(D[,i], n)
      D[,i] <- D[,i][,1:me_num[i]]
    }
  }
  
  U_j_list <- lapply(1:p, function(i) {
    a <- contrast[[i]]
    a <- cbind(1, a)
    return(a)
  })

  if(model_type == 2) { # full quadratic model
    D1 <- D
    if(!is.null(quanti_id)) {
      for(i in quanti_id){
        D1[,i] <- D[,i][,1] # linear effect
      }
    }
    U1 <- model.matrix(~.^2, D1)
    D2 <- data.frame(matrix(nrow = n, ncol = 0))
    if(!is.null(quanti_id)) {
      for(i in quanti_id){
        D2 <- cbind(D2, D[,i][,2])
        colnames(D2)[ncol(D2)] <- paste0(initial_colnames[i], ":", initial_colnames[i])
      }
    }
    U2 <- model.matrix(~., D2)
    U2 <- U2[,-1]
    colnames(U2) <- gsub("`", "", colnames(U2))
    U <- cbind(U1, U2)
    
  } else if(model_type == 3) { # main effects model
    if(!is.null(quanti_id)) {
      for(i in quanti_id){
        colnames(D[,i]) <- c(".1", paste0(".1", ":", paste0(initial_colnames[i], ".1")))
      }
    }
    U <- model.matrix(~., D)
  } else {
    if(!is.null(quanti_id)) {
      for(i in quanti_id){
        colnames(D[,i]) <- c(".1", ".2")
      }
    }
    U <- model.matrix(~.^2, D)
  }
  U <- U[,-1]
  
  result <- list(D = D, n = n, p = p, uni_level = uni_level,
                 mi = mi, two_level_id = two_level_id,
                 quali_id = quali_id, quanti_id = quanti_id,
                 me_num = me_num, 
                 initial_colnames = initial_colnames,
                 contrast = contrast,
                 U_j_list = U_j_list, U = U, quanti_D_norm = quanti_D_norm)
  return(result)
}


## calculate the distance between two points
#' @noRd
h_dist <- function(x, two_level, qualitative) {
  # x: vector
  # two_level: TRUE/FALSE
  # qualitative: TRUE/FALSE
  
  if(two_level) {
    h <- as.matrix(dist(x))
    h <- ifelse(h != 0, 1, 0)
    return(h)
  } else if(qualitative) {
    m <- model.matrix(~., data.frame(x))
    m <- m[,-1]
    h <- lapply(1:ncol(m), function(i) {
      a <- as.matrix(dist(m[,i]))
      a <- ifelse(a != 0, 1, 0)
      return(a)
    })
    return(h)
  } else {
    h <- as.matrix(dist(x))
    return(h)
  }
}


## weak heredity
#' @noRd
gweak <- function(U) {
  effects.name <- colnames(U)
  # effects id
  me.idx <- which(!stringr::str_detect(effects.name, ":"))
  hoe.idx <- which(stringr::str_detect(effects.name, ":"))
  # effects name
  me.names <- effects.name[me.idx]
  hoe.names <- stringr::str_split(colnames(U)[hoe.idx], ":")
  # effects num
  m.eff.num <- length(me.idx)
  h.eff.num <- length(hoe.idx)
  mat = mat.or.vec(m.eff.num, h.eff.num)
  if(h.eff.num != 0) {
    for(i in 1:h.eff.num){
      mat[,i] <- as.numeric(me.names %in% hoe.names[[i]])
    }
  }
  return(cbind(-1,diag(m.eff.num+h.eff.num),
               rbind(mat,-diag(h.eff.num))))
}

## strong heredity
#' @noRd
gstrong <- function(U) {
  effects.name <- colnames(U)
  # effects id
  me.idx <- which(!stringr::str_detect(effects.name, ":"))
  hoe.idx <- which(stringr::str_detect(effects.name, ":"))
  # effects name
  me.names <- effects.name[me.idx]
  hoe.names <- stringr::str_split(colnames(U)[hoe.idx], ":")
  hoe.names.unls <- unlist(hoe.names)
  # effects num
  m.eff.num <- length(me.idx)
  h.eff.num <- length(hoe.idx)
  mat <- mat.or.vec((m.eff.num+h.eff.num), length(hoe.names.unls))
  h.each.num <- lengths(hoe.names)
  h.cum.num <- c(0,cumsum(h.each.num))
  if(h.eff.num != 0) {
    for(i in seq_along(hoe.idx)){
      mat[hoe.idx[i], (h.cum.num[i]+1):(h.cum.num[i+1])] <- -1
    }
    for(i in seq_along(hoe.names.unls)){
      a <- which(me.names %in% hoe.names.unls[i])
      mat[a,i] <- 1
    }
  }
  return(cbind(-1,diag(m.eff.num+h.eff.num), mat))
}


