#' Alternative Sca-MCMC Implementation for Variable Selection
#'
#' An alternative implementation of Sca-MCMC for variable selection
#' with binary inclusion indicators.
#'
#' @param y Numeric response vector.
#' @param X Design matrix (n x p).
#' @param family Character, one of "poisson", "pareto", "gamma".
#' @param method Mutation strategy: "ScaI", "ScaII", or "ScaIII".
#' @param N_chain Number of parallel tempered chains (>1).
#' @param n_iter Total MCMC iterations.
#' @param beta_star Target (true) inclusion vector (for adaptive Q0; optional).
#' @param alpha_gamma Shape parameter if family = "gamma" (default 2).
#'
#' @return List containing:
#' \item{beta_chain}{Array [n_iter x p x N_chain] of sampled inclusion vectors.}
#' \item{family}{Model family used.}
#' \item{method}{Mutation strategy used.}
#'
#' @importFrom stats rbinom runif
#' @export
sca_mcmc1 <- function(y, X, family = c("poisson", "pareto", "gamma"),
                      method = c("ScaI", "ScaII", "ScaIII"),
                      N_chain = 8, n_iter = 5000,
                      beta_star = NULL, alpha_gamma = 2) {
  family <- match.arg(family)
  method <- match.arg(method)
  n <- length(y)
  p <- ncol(X)
  L <- p  # length of parameter/inclusion vector
  
  # --- Initialize temperature ladder ---
  temps <- geoTemp(N_chain, T1 = 1, TN = 20)  # inverse-temperatures
  
  # --- Initialize chains ---
  # Each chain holds a binary inclusion vector
  beta_chain <- array(0L, dim = c(n_iter, p, N_chain))
  for (c in 1:N_chain) {
    beta_chain[1, , c] <- stats::rbinom(p, size = 1, prob = 0.5)  # random init
  }
  
  # If beta_star not provided, use first chain as pseudo-target for Q0
  if (is.null(beta_star)) {
    warning("beta_star not provided; using initial state of chain 1 as proxy for Q0 computation.")
    beta_star <- beta_chain[1, , 1]
  }
  
  # --- Precompute log-likelihood for initial states ---
  loglik_current <- numeric(N_chain)
  for (c in 1:N_chain) {
    beta_curr <- beta_chain[1, , c]
    # For variable selection context, assume coefficient = 1 for active vars
    eta <- X %*% beta_curr
    ll_vec <- dglm_likelihood(y, X, beta_curr, family = family, alpha_gamma = alpha_gamma)
    loglik_current[c] <- sum(ll_vec)
  }
  
  # --- MCMC iterations ---
  for (iter in 2:n_iter) {
    # Compute base mutation rate Q0 using current coldest chain (chain 1)
    beta0 <- beta_chain[iter - 1, , 1]
    mut_info <- compute_mutation_rate(method, beta_star, beta0, L, N_chain)
    Q_vec <- mut_info$Q  # mutation probabilities per sub-chain or pair
    
    # Update each chain independently (local moves)
    for (c in 1:N_chain) {
      beta_old <- beta_chain[iter - 1, , c]
      beta_new <- beta_old
      
      # Apply mutations based on strategy
      if (method == "ScaI") {
        # Two-component proposal: mutate subset A with prob Q[1], B with Q[2]
        A <- sample(p, size = max(1, round(p * stats::runif(1))), replace = FALSE)
        B <- setdiff(1:p, A)
        if (stats::runif(1) < Q_vec[1]) beta_new[A] <- 1 - beta_new[A]
        if (stats::runif(1) < Q_vec[2]) beta_new[B] <- 1 - beta_new[B]
      } else if (method == "ScaII") {
        # Mutate r randomly chosen components
        idx_mut <- sample(p, size = min(length(Q_vec), p), replace = FALSE)
        for (j in seq_along(idx_mut)) {
          if (stats::runif(1) < Q_vec[j]) beta_new[idx_mut[j]] <- 1 - beta_new[idx_mut[j]]
        }
      } else if (method == "ScaIII") {
        # Each component has its own mutation prob
        for (j in 1:p) {
          if (stats::runif(1) < Q_vec[j]) beta_new[j] <- 1 - beta_new[j]
        }
      }
      
      # Evaluate likelihood
      eta_new <- X %*% beta_new
      ll_new_vec <- dglm_likelihood(y, X, beta_new, family = family, alpha_gamma = alpha_gamma)
      loglik_new <- sum(ll_new_vec)
      
      # Metropolis-Hastings acceptance (with tempering)
      log_alpha <- (loglik_new - loglik_current[c]) / temps[c]
      if (log(stats::runif(1)) < log_alpha) {
        beta_chain[iter, , c] <- beta_new
        loglik_current[c] <- loglik_new
      } else {
        beta_chain[iter, , c] <- beta_old
      }
    }
    
    # Parallel Tempering swap between adjacent chains
    if (N_chain > 1 && iter %% 5 == 0) {  # every 5 iterations
      i <- sample(1:(N_chain - 1), 1)
      c1 <- i
      c2 <- i + 1
      beta1 <- beta_chain[iter, , c1]
      beta2 <- beta_chain[iter, , c2]
      ll1 <- loglik_current[c1]
      ll2 <- loglik_current[c2]
      t1 <- temps[c1]
      t2 <- temps[c2]
      
      # Swap acceptance probability
      log_swap_alpha <- (ll1 / t2 + ll2 / t1) - (ll1 / t1 + ll2 / t2)
      if (log(stats::runif(1)) < log_swap_alpha) {
        # Accept swap
        beta_chain[iter, , c1] <- beta2
        beta_chain[iter, , c2] <- beta1
        loglik_current[c1] <- ll2
        loglik_current[c2] <- ll1
      }
    }
  }
  
  return(list(
    beta_chain = beta_chain,
    family = family,
    method = method,
    temperatures = temps
  ))
}
