## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

## ----eval=FALSE---------------------------------------------------------------
#  calibrate_to_estimate(
#    rep_design = rep_design,
#    estimate = vector_of_control_totals,
#    vcov_estimate = variance_covariance_matrix_for_controls,
#    cal_formula = ~ CALIBRATION_VARIABLE_1 + CALIBRATION_VARIABLE_2 + ...,
#  )

## ----eval=FALSE---------------------------------------------------------------
#  calibrate_to_sample(
#    primary_rep_design = primary_rep_design,
#    control_rep_design = control_rep_design
#    cal_formula = ~ CALIBRATION_VARIABLE_1 + CALIBRATION_VARIABLE_2 + ...,
#  )

## ----setup--------------------------------------------------------------------
# Load the data
library(svrep)
data("lou_vax_survey")

# Inspect the first few rows
head(lou_vax_survey) |> knitr::kable()

## -----------------------------------------------------------------------------
suppressPackageStartupMessages(
  library(survey)
)

lou_vax_survey_rep <- svydesign(
  data = lou_vax_survey,
  ids = ~ 1, weights = ~ SAMPLING_WEIGHT
) |> 
  as.svrepdesign(type = "JK1", mse = TRUE)

## ----echo=FALSE---------------------------------------------------------------
lou_vax_survey_rep

## -----------------------------------------------------------------------------
# Conduct nonresponse weighting adjustment

nr_adjusted_design <- lou_vax_survey_rep |>
  redistribute_weights(
    reduce_if = RESPONSE_STATUS == "Nonrespondent",
    increase_if = RESPONSE_STATUS == "Respondent"
  ) |>
  subset(RESPONSE_STATUS == "Respondent")

# Inspect the result of the adjustment
rbind(
  'Original' = summarize_rep_weights(lou_vax_survey_rep, type = 'overall'),
  'NR-adjusted' = summarize_rep_weights(nr_adjusted_design, type = 'overall')
)[,c("nrows", "rank", "avg_wgt_sum", "sd_wgt_sums")]

## ----results='hide'-----------------------------------------------------------
data("lou_pums_microdata")

## -----------------------------------------------------------------------------
# Inspect some of the rows/columns of data ----
tail(lou_pums_microdata, n = 5) |> 
  dplyr::select(AGE, SEX, RACE_ETHNICITY, EDUC_ATTAINMENT) |>
  knitr::kable()

## -----------------------------------------------------------------------------
# Convert to a survey design object ----
  pums_rep_design <- svrepdesign(
      data = lou_pums_microdata,
      weights = ~ PWGTP,
      repweights = "PWGTP\\d{1,2}",
      type = "successive-difference",
      variables = ~ AGE + SEX + RACE_ETHNICITY + EDUC_ATTAINMENT,
      mse = TRUE
    )

  pums_rep_design

## -----------------------------------------------------------------------------
# Subset to only include adults
pums_rep_design <- pums_rep_design |> subset(AGE >= 18)

## -----------------------------------------------------------------------------
suppressPackageStartupMessages(
  library(dplyr)
)

# Check that variables match across data sources ----
  pums_rep_design$variables |>
    dplyr::distinct(RACE_ETHNICITY)

  setdiff(lou_vax_survey_rep$variables$RACE_ETHNICITY,
          pums_rep_design$variables$RACE_ETHNICITY)
  setdiff(lou_vax_survey_rep$variables$SEX,
          pums_rep_design$variables$SEX)
  setdiff(lou_vax_survey_rep$variables$EDUC_ATTAINMENT,
          pums_rep_design$variables$EDUC_ATTAINMENT)

## -----------------------------------------------------------------------------
# Estimates from the control survey (ACS)
svymean(
  design = pums_rep_design,
  x = ~ RACE_ETHNICITY + SEX + EDUC_ATTAINMENT
)

# Estimates from the primary survey (Louisville vaccination survey)
svymean(
  design = nr_adjusted_design,
  x = ~ RACE_ETHNICITY + SEX + EDUC_ATTAINMENT
)

## -----------------------------------------------------------------------------
acs_control_totals <- svytotal(
  x = ~ RACE_ETHNICITY + SEX + EDUC_ATTAINMENT,
  design = pums_rep_design
)

control_totals_for_raking <- list(
  'estimates' = coef(acs_control_totals),
  'variance-covariance' = vcov(acs_control_totals)
)

# Inspect point estimates
control_totals_for_raking$estimates

# Inspect a few rows of the control totals' variance-covariance matrix
control_totals_for_raking$`variance-covariance`[5:8,5:8] |>
  `colnames<-`(NULL)

## -----------------------------------------------------------------------------
svytotal(x = ~ RACE_ETHNICITY + SEX + EDUC_ATTAINMENT,
         design = nr_adjusted_design)

## -----------------------------------------------------------------------------
raked_design <- calibrate_to_estimate(
  rep_design = nr_adjusted_design,
  estimate = control_totals_for_raking$estimates,
  vcov_estimate = control_totals_for_raking$`variance-covariance`,
  cal_formula = ~ RACE_ETHNICITY + SEX + EDUC_ATTAINMENT,
  calfun = survey::cal.raking, # Required for raking
  epsilon = 1e-9
)

## -----------------------------------------------------------------------------
# Estimated totals after calibration
svytotal(x = ~ RACE_ETHNICITY + SEX + EDUC_ATTAINMENT,
         design = raked_design)

# Matches the control totals!
cbind(
  'total' = control_totals_for_raking$estimates,
  'SE' = control_totals_for_raking$`variance-covariance` |>
    diag() |> sqrt()
)

## -----------------------------------------------------------------------------
estimates_by_design <- svyby_repwts(
  rep_designs = list(
    "NR-adjusted" = nr_adjusted_design,
    "Raked" = raked_design
  ),
  FUN = svytotal,
  formula = ~ RACE_ETHNICITY + SEX + EDUC_ATTAINMENT
)

t(estimates_by_design[,-1]) |>
  knitr::kable()

## -----------------------------------------------------------------------------
raked_design_opsomer_erciulescu <- calibrate_to_sample(
  primary_rep_design = nr_adjusted_design,
  control_rep_design = pums_rep_design,
  cal_formula = ~ RACE_ETHNICITY + SEX + EDUC_ATTAINMENT,
  calfun = survey::cal.raking,
  epsilon = 1e-9
)

## -----------------------------------------------------------------------------
estimates_by_design <- svyby_repwts(
  rep_designs = list(
    "calibrate_to_estimate()" = raked_design,
    "calibrate_to_sample()" = raked_design_opsomer_erciulescu
  ),
  FUN = svytotal,
  formula = ~ VAX_STATUS + RACE_ETHNICITY + SEX + EDUC_ATTAINMENT
)

t(estimates_by_design[,-1]) |>
  knitr::kable()

## -----------------------------------------------------------------------------
# Create matching post-stratification variable in both datasets
  nr_adjusted_design <- nr_adjusted_design |>
    transform(POSTSTRATUM = interaction(RACE_ETHNICITY, SEX, EDUC_ATTAINMENT,
                                        sep = "|"))

  pums_rep_design <- pums_rep_design |>
    transform(POSTSTRATUM = interaction(RACE_ETHNICITY, SEX, EDUC_ATTAINMENT,
                                        sep = "|"))
  
  levels(pums_rep_design$variables$POSTSTRATUM) <- levels(
    nr_adjusted_design$variables$POSTSTRATUM
  )

# Estimate control totals
  acs_control_totals <- svytotal(
    x = ~ POSTSTRATUM,
    design = pums_rep_design
  )
  
  poststratification_totals <- list(
    'estimate' = coef(acs_control_totals),
    'variance-covariance' = vcov(acs_control_totals)
  )

# Inspect the control totals
  poststratification_totals$estimate |>
    as.data.frame() |>
    `colnames<-`('estimate') |>
    knitr::kable()

## -----------------------------------------------------------------------------
# Post-stratify the design using the estimates
poststrat_design_fuller <- calibrate_to_estimate(
  rep_design = nr_adjusted_design,
  estimate = poststratification_totals$estimate,
  vcov_estimate = poststratification_totals$`variance-covariance`,
  cal_formula = ~ POSTSTRATUM, # Specify the post-stratification variable
  calfun = survey::cal.linear # This option is required for post-stratification
)

## -----------------------------------------------------------------------------
# Post-stratify the design using the two samples
poststrat_design_opsomer_erciulescu <- calibrate_to_sample(
  primary_rep_design = nr_adjusted_design,
  control_rep_design = pums_rep_design,
  cal_formula = ~ POSTSTRATUM, # Specify the post-stratification variable
  calfun = survey::cal.linear # This option is required for post-stratification
)

## -----------------------------------------------------------------------------
estimates_by_design <- svyby_repwts(
  rep_designs = list(
    "calibrate_to_estimate()" = poststrat_design_fuller,
    "calibrate_to_sample()" = poststrat_design_opsomer_erciulescu
  ),
  FUN = svymean,
  formula = ~ VAX_STATUS + RACE_ETHNICITY + SEX + EDUC_ATTAINMENT
)

t(estimates_by_design[,-1]) |>
  knitr::kable()

## -----------------------------------------------------------------------------
# Randomly select which columns will be assigned to each set of perturbed control totals
dimension_of_control_totals <- length(poststratification_totals$estimate)

columns_to_perturb <- sample(x = 1:ncol(nr_adjusted_design$repweights),
                             size = dimension_of_control_totals)

print(columns_to_perturb)

# Perform the calibration
poststratified_design <- calibrate_to_estimate(
  rep_design = nr_adjusted_design,
  estimate = poststratification_totals$estimate,
  vcov_estimate = poststratification_totals$`variance-covariance`,
  cal_formula = ~ POSTSTRATUM,
  calfun = survey::cal.linear,
  col_selection = columns_to_perturb # Specified for reproducibility
)

## -----------------------------------------------------------------------------
poststratified_design$perturbed_control_cols

## -----------------------------------------------------------------------------
# Randomly match the primary replicates to control replicates
set.seed(1999)

column_matching <- rep(NA, times = ncol(nr_adjusted_design$repweights))
column_matching[sample(x = 1:1000, size = 80)] <- 1:80

str(column_matching)

# Perform the calibration
poststratified_design <- calibrate_to_sample(
  primary_rep_design = nr_adjusted_design,
  control_rep_design = pums_rep_design,
  cal_formula = ~ POSTSTRATUM,
  calfun = survey::cal.linear,
  control_col_matches = column_matching
)

## -----------------------------------------------------------------------------
str(poststratified_design$control_column_matches)