ldamatch demos
Kyle Gorman & Géza Kiss
2024-04-14
Univariate case…
library(ldamatch)
set.seed(257)
default_method = "heuristic4"
SIZE <- 15
condition <- as.factor(c(rep("control", 2 * SIZE), rep("treatment", SIZE)))
covariate1 <- c(rnorm(2 * SIZE), rnorm(SIZE, 1, 2))
Univariate case (with random search)…
is.in <- match_groups(condition, covariate1, t_halt, method = "random")
## Search method: random
## Initial group sizes: control: 30 treatment: 15
## Starting random search.
## Found matching: control: 30; treatment: 13 (total: 43)
## Found matching: control: 30; treatment: 13 (total: 43)
## Found matching: control: 30; treatment: 13 (total: 43)
## Found matching: control: 30; treatment: 13 (total: 43)
## Finished random search in 8.48 seconds (wall click time passed:8.5).
## Eventual group sizes: control: 30 treatment: 13
## Removed subjects: control: 0 treatment: 2
## The p-value before matching: 0.06059131
## The p-values after matching: 0.220897103271512
print(table(condition, is.in))
## is.in
## condition FALSE TRUE
## control 0 30
## treatment 2 13
Univariate case (with default search method)…
is.in <- match_groups(condition, covariate1, t_halt, method = default_method)
## Search method: heuristic4
## Initial group sizes: control: 30 treatment: 15
## Starting heuristic4 search.
## Number of subjects: control:30, treatment:15; p/thresh ratio: 0.302957
## Warning: executing %dopar% sequentially: no parallel backend registered
##
Lookahead 1, number of best sets: 1
##
Lookahead 2, number of best sets: 4
##
## Found 4 solution(s) in 2 steps
##
## Finished heuristic4 search in 1.98 seconds (wall click time passed:2.04).
## Eventual group sizes: control: 30 treatment: 13
## Removed subjects: control: 0 treatment: 2
## The p-value before matching: 0.06059131
## The p-values after matching: 0.202160707808612, 0.213496988230277, 0.220897103271512, 0.22165738589329
print(table(condition, is.in))
## is.in
## condition FALSE TRUE
## control 0 30
## treatment 2 13
Multivariate case…
covariate2 <- c(rnorm(2 * SIZE), rnorm(SIZE, 1, 2))
covariates <- cbind(covariate1, covariate2)
Multivariate case (with default search method)…
is.in <- match_groups(condition, covariates, t_halt, method = default_method)
## Search method: heuristic4
## Initial group sizes: control: 30 treatment: 15
## Starting heuristic4 search.
## Number of subjects: control:30, treatment:15; p/thresh ratio: 0.302957
##
Lookahead 1, number of best sets: 1
##
Lookahead 2, number of best sets: 3
##
## Found 3 solution(s) in 2 steps
##
## Finished heuristic4 search in 2.61 seconds (wall click time passed:2.61).
## Eventual group sizes: control: 30 treatment: 13
## Removed subjects: control: 0 treatment: 2
## The p-value before matching: 0.06059131
## The p-values after matching: 0.202160707808612, 0.220897103271512, 0.22165738589329
print(table(condition, is.in))
## is.in
## condition FALSE TRUE
## control 0 30
## treatment 2 13
Multivariate case (with random search)…
is.in <- match_groups(condition, covariates, t_halt, method = "random")
## Search method: random
## Initial group sizes: control: 30 treatment: 15
## Starting random search.
## Found matching: control: 27; treatment: 12 (total: 39)
## Found matching: control: 29; treatment: 13 (total: 42)
## Found matching: control: 29; treatment: 13 (total: 42)
## Found matching: control: 30; treatment: 12 (total: 42)
## Found matching: control: 30; treatment: 13 (total: 43)
## Found matching: control: 30; treatment: 13 (total: 43)
## Found matching: control: 30; treatment: 13 (total: 43)
## Finished random search in 7.87 seconds (wall click time passed:7.88).
## Eventual group sizes: control: 30 treatment: 13
## Removed subjects: control: 0 treatment: 2
## The p-value before matching: 0.06059131
## The p-values after matching: 0.22165738589329
print(table(condition, is.in))
## is.in
## condition FALSE TRUE
## control 0 30
## treatment 2 13
Multivariate case (with special proportions and Wilcox test)…
my.props <- prop.table(c(control = 4, treatment = 3))
is.in <- match_groups(condition, covariates, U_halt, props = my.props)
## Search method: heuristic4
## Initial group sizes: control: 30 treatment: 15
## Starting heuristic4 search.
## Number of subjects: control:30, treatment:15; p/thresh ratio: 0.803495
##
Lookahead 1, number of best sets: 1
##
## Found 1 solution(s) in 1 steps
##
## Finished heuristic4 search in 0.38 seconds (wall click time passed:0.37).
## Eventual group sizes: control: 29 treatment: 15
## Removed subjects: control: 1 treatment: 0
## The p-value before matching: 0.160699
## The p-values after matching: 0.200379547096912
print(table(condition, is.in))
## is.in
## condition FALSE TRUE
## control 1 29
## treatment 0 15
Multivariate case (with Wilks test)…
is.in <- match_groups(condition, covariates, wilks_halt)
## Search method: heuristic4
## Initial group sizes: control: 30 treatment: 15
## Starting heuristic4 search.
## Number of subjects: control:30, treatment:15; p/thresh ratio: 0.159753
##
Lookahead 1, number of best sets: 1
##
Lookahead 2, number of best sets: 1
##
Number of subjects: control:30, treatment:14; p/thresh ratio: 0.424942
##
Lookahead 2, number of best sets: 30
##
## Found 30 solution(s) in 3 steps
##
## Finished heuristic4 search in 4.42 seconds (wall click time passed:4.42).
## Eventual group sizes: control: 29 treatment: 13
## Removed subjects: control: 1 treatment: 2
## The p-value before matching: 0.03195062
## The p-values after matching: 0.200388136169973, 0.20070104804625, 0.200978434271031, 0.202090574876842, 0.20316718370627, 0.204122628649838, 0.204259312376656, 0.204438975596233, 0.204479730822287, 0.204650735112843, 0.207695854258158, 0.209850624409276, 0.212329866531795, 0.213601103416089, 0.214793042459186, 0.217507416358202, 0.218271311214737, 0.219931870593934, 0.220656240469989, 0.22145394327565, 0.221501756165972, 0.223937954614188, 0.225841957702222, 0.226093391195097, 0.229019777753524, 0.229817811663357, 0.234037144986885, 0.23566831774506, 0.239863520338925, 0.248655369465288
print(table(condition, is.in))
## is.in
## condition FALSE TRUE
## control 1 29
## treatment 2 13
Multivariate case (with Wilks test and random search)…
is.in <- match_groups(condition, covariates, wilks_halt, method = "random")
## Search method: random
## Initial group sizes: control: 30 treatment: 15
## Starting random search.
## Found matching: control: 30; treatment: 12 (total: 42)
## Found matching: control: 29; treatment: 13 (total: 42)
## Found matching: control: 30; treatment: 12 (total: 42)
## Found matching: control: 30; treatment: 12 (total: 42)
## Finished random search in 14.51 seconds (wall click time passed:14.51).
## Eventual group sizes: control: 30 treatment: 12
## Removed subjects: control: 0 treatment: 3
## The p-value before matching: 0.03195062
## The p-values after matching: 0.323684521786494
print(table(condition, is.in))
## is.in
## condition FALSE TRUE
## control 0 30
## treatment 3 12
Multivariate case (with Anderson-Darling test and default search
method)…
is.in <- match_groups(condition, covariates, ad_halt, method = default_method, prefer_test = TRUE)
## Search method: heuristic4
## Initial group sizes: control: 30 treatment: 15
## Starting heuristic4 search.
## Number of subjects: control:30, treatment:15; p/thresh ratio: 0.288285
##
Lookahead 1, number of best sets: 4
##
Lookahead 2, number of best sets: 3
##
## Found 3 solution(s) in 2 steps
##
## Finished heuristic4 search in 62.01 seconds (wall click time passed:62.28).
## Eventual group sizes: control: 30 treatment: 13
## Removed subjects: control: 0 treatment: 2
## The p-value before matching: 0.057657
## The p-values after matching: 0.21597
print(table(condition, is.in))
## is.in
## condition FALSE TRUE
## control 0 30
## treatment 2 13
Multivariate case (with t-test and Anderson-Darling test
simultaneously)…
t_ad_halt <- create_halting_test(c(t_halt, ad_halt))
threshes <- c(.2, .02)
is.in <- match_groups(condition, covariates, t_ad_halt, threshes)
## Search method: heuristic4
## Initial group sizes: control: 30 treatment: 15
## Starting heuristic4 search.
## Number of subjects: control:30, treatment:15; p/thresh ratio: 0.302957
##
Lookahead 1, number of best sets: 1
##
Lookahead 2, number of best sets: 3
##
## Found 3 solution(s) in 2 steps
##
## Finished heuristic4 search in 66.83 seconds (wall click time passed:67.3).
## Eventual group sizes: control: 30 treatment: 13
## Removed subjects: control: 0 treatment: 2
## The p-value before matching: 0.057657
## The p-values after matching: 0.19925, 0.21597
print(table(condition, is.in))
## is.in
## condition FALSE TRUE
## control 0 30
## treatment 2 13
Univariate case (with exhaustive search)…
estimate_exhaustive(min_preserved = 42, condition, cases_per_second = 100)
## If 44 of 45 kept: at most 45 cases. If 100 cases per second evaluated: 0.4 seconds.
## If 43 of 45 kept: at most 1035 cases. If 100 cases per second evaluated: 10.3 seconds.
## If 42 of 45 kept: at most 15225 cases. If 100 cases per second evaluated: 2.5 minutes.
## [1] 15225
foreach::registerDoSEQ()
is.ins <- match_groups(condition, covariate1, t_halt, method = "exhaustive", all_results = TRUE)
## Search method: exhaustive
## Initial group sizes: control: 30 treatment: 15
## Starting exhaustive search.
## Created 2 group size configurations each with a total size of 44
## control: 29 treatment: 15 divergence: 0.000128651338541343
## Size of Cartesian product: 30
##
1 to 30
Number of cases processed per second: 6.550218 (cpu time) or 6.550218 (wall clock time).
## control: 30 treatment: 14 divergence: 0.000520578560755861
## Size of Cartesian product: 15
##
1 to 15
Number of cases processed per second: 12.09677 (cpu time) or 12.19512 (wall clock time).
## Created 4 group size configurations each with a total size of 43
## control: 29 treatment: 14 divergence: 0.000135741532290407
## Size of Cartesian product: 450
##
1 to 450
Number of cases processed per second: 288.4615 (cpu time) or 288.4615 (wall clock time).
## control: 29 treatment: 14 divergence: 0.000135741532290407
## Size of Cartesian product: 450
##
1 to 450
Number of cases processed per second: 300 (cpu time) or 300 (wall clock time).
## control: 28 treatment: 15 divergence: 0.000536783341068091
## Size of Cartesian product: 435
##
1 to 435
Number of cases processed per second: 78.66184 (cpu time) or 77.54011 (wall clock time).
## control: 30 treatment: 13 divergence: 0.0021993283249259
## Size of Cartesian product: 105
##
1 to 105
Number of cases processed per second: 85.36585 (cpu time) or 84 (wall clock time).
## Finished exhaustive search in 15.67 seconds (wall click time passed:15.77).
## Eventual group sizes: control: 30 treatment: 13
## Removed subjects: control: 0 treatment: 2
## The p-value before matching: 0.06059131
## The p-values after matching: 0.22165738589329
print(table(condition, is.ins[[1]]))
##
## condition FALSE TRUE
## control 0 30
## treatment 2 13
## [1] 1
# (Confirm exhaustive search by applying default search method to it.)
is.in <- match_groups(condition[is.ins[[1]]], covariate1[is.ins[[1]]], t_halt,
method = default_method)
## Search method: heuristic4
## Groups are already matched.
print(table(condition[is.ins[[1]]], is.in))
## is.in
## TRUE
## control 30
## treatment 13
Univariate case for more than two groups…
set.seed(257)
SIZE <- 15
condition <- as.factor(c(rep("group1", SIZE), rep("group2", SIZE), rep("group3", SIZE)))
covariate1 <- c(rnorm(SIZE, 0, 1), rnorm(SIZE, 0, 2), rnorm(SIZE, 1, 2))
covariate2 <- c(rnorm(SIZE, 0, 1), rnorm(SIZE, 0, 2), rnorm(SIZE, 1, 2))
covariates <- cbind(covariate1, covariate2)
Univariate case for more than two groups (with default search
method)…
is.in <- match_groups(condition, covariates, t_ad_halt, method = default_method)
## Search method: heuristic4
## Initial group sizes: group1: 15 group2: 15 group3: 15
## Starting heuristic4 search.
## Number of subjects: group1:15, group2:15, group3:15; p/thresh ratio: 0.063320
##
Lookahead 1, number of best sets: 6
##
Lookahead 2, number of best sets: 18
##
Number of subjects: group1:15, group2:14, group3:15; p/thresh ratio: 0.126925
##
Lookahead 2, number of best sets: 3
## Number of subjects: group1:15, group2:14, group3:14; p/thresh ratio: 0.190600
##
Lookahead 2, number of best sets: 5
## Number of subjects: group1:15, group2:14, group3:13; p/thresh ratio: 0.269390
##
Lookahead 2, number of best sets: 1
##
Number of subjects: group1:15, group2:13, group3:13; p/thresh ratio: 0.469900
##
Lookahead 2, number of best sets: 1
##
Number of subjects: group1:15, group2:13, group3:12; p/thresh ratio: 0.507750
##
Lookahead 2, number of best sets: 11
##
## Found 11 solution(s) in 7 steps
##
## Random choices: step: 2, num_choices: 2
## Finished heuristic4 search in 357.95 seconds (wall click time passed:360.92).
## Eventual group sizes: group1: 15 group2: 12 group3: 11
## Removed subjects: group1: 0 group2: 3 group3: 4
## The p-value before matching: 0.012664
## The p-values after matching: 0.222261928685954, 0.225391352956842, 0.21875, 0.201370779344551, 0.223663369960827
print(table(condition, is.in))
## is.in
## condition FALSE TRUE
## group1 0 15
## group2 3 12
## group3 4 11