## ----setup, include=FALSE--------------------------------------------------------------------------------------------- knitr::opts_chunk$set( warning = FALSE, message = FALSE, tidy = FALSE, fig.width = 8, fig.height = 5, dpi = 150, fig.retina = 2, fig.align = "center", out.width = "100%", out.extra = 'style="height:auto;"' ) options(width = 120) # width console output ## First, install the packages, if you have not done this already: #if (!require("restriktor")) install.packages("restriktor") ## Then, load the packages: #library(restriktor) # for the goric function # If you want to use restriktor from github: #if (!require("devtools")) install.packages("devtools") #library(devtools) #install_github("LeonardV/restriktor") #, force = TRUE library(restriktor) # for goric function # NB default iter is 1000 # als testen dan ws iter = iters gebruiken, met: # iters = 30 ## ----warning=F, message=F, eval=FALSE--------------------------------------------------------------------------------- # # If ANOVA model: # # # In practise you may want to increase the number of iter, say 2000, and use parallel computing # # future::plan(multisession, workers = 8) # windows machines # # future::plan(multicore, workers = 8) # unix machines # benchmarks_means <- benchmark(goric_object, model_type = "means", iter = 400) # benchmarks_means # plot(benchmarks_means) # # Use 'pop_es' to specify own null population(s). # # # ## If ANOVA or other model: # benchmarks_asymp <- benchmark(goric_object, iter = 400) # # 'model_type = "asymp"', the default # benchmarks_asymp # plot(benchmarks_asymp) # # Use 'pop_est' to specify own null population(s). # # # # print() options: # # output_type = c("rgw", "gw", "rlw", "ld", "all") # # hypo_rate_threshold = # # # # plot() options: # # output_type = c("rgw", "gw", "rlw", "ld") # # percentiles = NULL # # x_lim = c(, ) # # log_scale = FALSE/TRUE ## ----echo=FALSE, results = FALSE, message = FALSE, warning = FALSE---------------------------------------------------- n.coef <- 3 # Number of dummy variables (model without intercept) mu <- rep(0, n.coef) intercept <- 0 f <- .25 #c(0, .1, .25, .4) # According to Cohen 1992 #f2 = r2 / (1-r2) -> r2 = f2 - f2*r2 -> (1-f2)*r2 = f2 #r2 <- f^2 / (1-f^2) # NB correspondeert niet met: 0, .02, .15, .35 # Voor nu zo laten, lijken wel goede voorbeelden samplesize <- 3*40 # c(40, 40, 40) - geeft fout in restr, data ms dan gek? # 40 - geeft nu ineens 40 in totaal b.ratios <- c(3,2,1) sample <- NULL # Determine true / population beta coefficients in data generating process D_ <- matrix(rep(1:n.coef), nrow = samplesize) D <- as.factor(D_) sample$D <- D sample <- model.matrix(~sample$D-1) colnames(sample) <- paste0("D_", seq_len(ncol(sample))) sample <- as.data.frame(sample) # Determine mean values (betas) var.e <- 1 if (sum(abs(b.ratios)) == 0) { # all zeros (thus under H0) means_pop <- b.ratios } else { fun <- function (d) { means_pop = b.ratios*d (1/sqrt(var.e)) * sqrt((1/n.coef) * sum((means_pop - mean(means_pop))^2)) - f } d <- uniroot(fun, lower=0, upper=100)$root # Construct means_pop means_pop <- b.ratios*d # Check #means_pop #[1] 0.9185587 0.6123724 0.3061862 #means_pop[1] - means_pop[2]; means_pop[2] - means_pop[3] # Check: ES = #(1/sqrt(var.e)) * sqrt((1/n.coef) * sum((means_pop - mean(means_pop))^2)) # .25 } set.seed(123) # set seed: to obtain same results when you re-run it epsilon <- rnorm(samplesize, sd=sqrt(var.e)) sample$y <- as.matrix(sample[, 1:n.coef]) %*% matrix(means_pop, nrow = n.coef) + epsilon # Obtain fit fit <- lm(y ~ 0 + D, data=sample) #fit ## ----message = TRUE, warning = FALSE---------------------------------------------------------------------------------- # H1 vs complement (default) - H1 is true H1 <- "D1 > D2 > D3" # mu1 > mu2 > mu3 # Apply GORIC # set.seed(123) # set seed: to obtain same results when you re-run it results_1c <- goric(fit, hypotheses = list(H1)) results_1c ## ----message = F, warning = FALSE------------------------------------------------------------------------------------- # Benchmarks based on null set.seed(123) # set seed: to obtain same results when you re-run it benchmarks_1c <- benchmark(results_1c, iter = 200) #benchmarks_1c # use in R file print(benchmarks_1c, color = FALSE) # use in Rmd file, since Rmd cannot deal with colored text #plot(benchmarks_1c) plot(benchmarks_1c, log_scale = T) ## ----include = FALSE-------------------------------------------------------------------------------------------------- hypo_rate <- print(benchmarks_1c, hypo_rate_threshold = 1) hyporate <- hypo_rate$hypothesis_rate[[2]] ## ----message=F, warning=F, eval = F----------------------------------------------------------------------------------- # hypo_rate_2 <- print(benchmarks_1c, hypo_rate_threshold = 2) ## ----include = F------------------------------------------------------------------------------------------------------ hypo_rate_2 <- print(benchmarks_1c, hypo_rate_threshold = 2) ## --------------------------------------------------------------------------------------------------------------------- hypo_rate_2$hypothesis_rate[[2]] ## ----message = TRUE, warning = FALSE---------------------------------------------------------------------------------- plot(benchmarks_1c, log_scale = TRUE) ## ----message = F, warning = FALSE------------------------------------------------------------------------------------- # H1, H2, and unconstrained (default) - subset/overlap, that is, H1 is true H1 <- "D1 > D2 > D3" # mu1 > mu2 > mu3 H2 <- "D1 > D2" # H2: D1 > D2, D3 # mu1 > mu2, mu3 # Apply GORIC # set.seed(123) results_12u <- goric(fit, hypotheses = list(H1, H2)) ## ----message = F, warning = FALSE------------------------------------------------------------------------------------- results_12u ## ----message = F, warning = FALSE------------------------------------------------------------------------------------- round(results_12u$ratio.gw, 3) ## ----message = F, warning = FALSE------------------------------------------------------------------------------------- # Benchmarks set.seed(123) benchmarks_12u <- benchmark(results_12u, model_type = "means", iter = 400) #benchmarks_12u # R file print(benchmarks_12u, color = FALSE) # Rmd file # # Plots of benchmarks #plot(benchmarks_12u, output_type = "rgw") #plot(benchmarks_12u, output_type = "rgw", log_scale = TRUE) # #plot_out <- plot(benchmarks_12u) # save all plots in object plot_out #plot(plot_out$grobs$`H1 vs. H2`) # call separate plot # ## Plots of benchmarks with log10 transformation of x-axis #plot_out_log <- plot(benchmarks_12u, output_type = "rgw", log_scale = TRUE) # save all plots in object plot_out --> #plot(plot_out_log$grobs$`H1 vs. H2`) # call separate plot ## ----message = F, warning = FALSE, eval = FALSE----------------------------------------------------------------------- # #plot(benchmarks_12u, output_type = "rgw", log_scale = TRUE) ## ----echo=FALSE, message=FALSE, warning=FALSE, fig.width=12, fig.height=10, dpi=200, out.width="100%"----------------- bp <- plot(benchmarks_12u, output_type = "rgw", log_scale = TRUE) bp$plots <- lapply(bp$plots, function(g) { g + #@ggplot2::labs(title = NULL) + ggplot2::theme_bw(base_size = 13) + ggplot2::theme( legend.position = "bottom", legend.box = "vertical", legend.title = ggplot2::element_text(size = 12), legend.text = ggplot2::element_text(size = 11), axis.title = ggplot2::element_text(size = 13), axis.text = ggplot2::element_text(size = 11) ) }) bp ## ----echo = F, message = FALSE, warning = FALSE----------------------------------------------------------------------- b.ratios <- c(2.5,2.5,1) sample <- NULL # Determine true / population beta coefficients in data generating process D_ <- matrix(rep(1:n.coef), nrow = samplesize) D <- as.factor(D_) sample$D <- D sample <- model.matrix(~sample$D-1) colnames(sample) <- paste0("D_", seq_len(ncol(sample))) sample <- as.data.frame(sample) # Determine mean values (betas) var.e <- 1 if (sum(abs(b.ratios)) == 0) { # all zeros (thus under H0) means_pop <- b.ratios } else { fun <- function (d) { means_pop = b.ratios*d (1/sqrt(var.e)) * sqrt((1/n.coef) * sum((means_pop - mean(means_pop))^2)) - f } d <- uniroot(fun, lower=0, upper=100)$root # Construct means_pop means_pop <- b.ratios*d # Check #means_pop #[1] 0.8838835 0.8838835 0.3535534 #means_pop[1] - means_pop[2]; means_pop[2] - means_pop[3] # Check: ES = #(1/sqrt(var.e)) * sqrt((1/n.coef) * sum((means_pop - mean(means_pop))^2)) # .25 } set.seed(124) # set seed: to obtain same results when you re-run it epsilon <- rnorm(samplesize, sd=sqrt(var.e)) sample$y <- as.matrix(sample[, 1:n.coef]) %*% matrix(means_pop, nrow = n.coef) + epsilon # Obtain fit fit_border <- lm(y ~ 0 + D, data=sample) #fit_border ## ----message = TRUE, warning = FALSE---------------------------------------------------------------------------------- # H1 vs complement (default) - border (nl., mu1 = mu2 > mu3) is true H1 <- "D1 > D2 > D3" # mu1 > mu2 > mu3 # Apply GORIC # set.seed(123) results_1c_border <- goric(fit_border, hypotheses = list(H1)) results_1c_border ## ----message = F, warning = FALSE------------------------------------------------------------------------------------- #Default null (when using `model_type = "means"`) # Loglik benchmarks based on default null / no effect sizes, that is, # setting all three means equal in the population set.seed(123) benchmarks_1c_border <- benchmark(results_1c_border, model_type = "means", iter = 400) # loglik diff #print(benchmarks_1c_border, output_type = "ld") # in R file print(benchmarks_1c_border, output_type = "ld", color = FALSE) # in Rmd file plot(benchmarks_1c_border, output_type = "ld") # ratio loglik weights #print(benchmarks_1c_border, output_type = "rlw") # in R file print(benchmarks_1c_border, output_type = "rlw", color = FALSE) # in Rmd file #plot(benchmarks_1c_border, output_type = "rlw", x_lim = c(0, 2.5)) plot(benchmarks_1c_border, output_type = "rlw", log_scale = TRUE) ## ----message = F, warning = FALSE------------------------------------------------------------------------------------- # Specifying multiple null populations, that is, # using all possibilities of setting inequalities to equalities. # Here, we will use the default `model_type` (i.e., "asymp") which takes population parameter values (instead of population effect sizes) est <- coef(fit_border) pop_est <- matrix(c( mean(est[1:3]), mean(est[1:3]), mean(est[1:3]), mean(est[1:2]), mean(est[1:2]), est[3], mean(est[1:2]), est[2], mean(est[1:2]), est[1], mean(est[2:3]), mean(est[2:3]), est[1], est[2], est[3] ), byrow = TRUE, ncol = length(est)) rownames(pop_est) <- c("PE_123eq", "PE_12eq", "PE_13eq", "PE_23eq", "Observed") # set.seed(123) benchmarks_1c_border_allpos <- benchmark(results_1c_border, pop_est = pop_est, iter = 200) # # loglik difference #print(benchmarks_1c_border_allpos, output_type = "ld") # R file print(benchmarks_1c_border_allpos, output_type = "ld", color = FALSE) # Rmd file plot(benchmarks_1c_border_allpos, output_type = "ld") #plot(benchmarks_1c_border_allpos, output_type = "ld", x_lim = c(-.25,.25)) # # ratio of loglik weights #print(benchmarks_1c_border_allpos, output_type = "rlw") # R file print(benchmarks_1c_border_allpos, output_type = "rlw", color = FALSE) # Rmd file #plot(benchmarks_1c_border_allpos, output_type = "rlw") #plot(benchmarks_1c_border_allpos, output_type = "rlw", x_lim = c(0.950, 1.025)) plot(benchmarks_1c_border_allpos, output_type = "rlw", log_scale = TRUE) ## ----echo = F, message = FALSE, warning = FALSE----------------------------------------------------------------------- # Now, group size is 200 (instead of 40) samplesize_orig <- samplesize samplesize <- 3*200 b.ratios <- c(2.5,2.5,1) sample <- NULL # Determine true / population beta coefficients in data generating process D_ <- matrix(rep(1:n.coef), nrow = samplesize) D <- as.factor(D_) sample$D <- D sample <- model.matrix(~sample$D-1) colnames(sample) <- paste0("D_", seq_len(ncol(sample))) sample <- as.data.frame(sample) # Determine mean values (betas) var.e <- 1 if (sum(abs(b.ratios)) == 0) { # all zeros (thus under H0) means_pop <- b.ratios } else { fun <- function (d) { means_pop = b.ratios*d (1/sqrt(var.e)) * sqrt((1/n.coef) * sum((means_pop - mean(means_pop))^2)) - f } d <- uniroot(fun, lower=0, upper=100)$root # Construct means_pop means_pop <- b.ratios*d # Check #means_pop #[1] 0.8838835 0.8838835 0.3535534 #means_pop[1] - means_pop[2]; means_pop[2] - means_pop[3] # Check: ES = #(1/sqrt(var.e)) * sqrt((1/n.coef) * sum((means_pop - mean(means_pop))^2)) # .25 } set.seed(123) # set seed: to obtain same results when you re-run it epsilon <- rnorm(samplesize, sd=sqrt(var.e)) sample$y <- as.matrix(sample[, 1:n.coef]) %*% matrix(means_pop, nrow = n.coef) + epsilon # Obtain fit fit_border_200 <- lm(y ~ 0 + D, data=sample) #fit_border_200 # Return to original value samplesize <- samplesize_orig ## ----message = TRUE, warning = FALSE---------------------------------------------------------------------------------- # Now, group size is 200 (instead of 40) # H1 vs complement (default) - border (nl., mu1 = mu2 > mu3) is true H1 <- "D1 > D2 > D3" # mu1 > mu2 > mu3 # Apply GORIC # set.seed(123) results_1c_border_200 <- goric(fit_border_200, hypotheses = list(H1)) results_1c_border_200 ## ----message = F, warning = FALSE------------------------------------------------------------------------------------- # Specifying multiple null populations, that is, # using all possibilities of setting inequalities to equalities. # Here, we will use the default `model_type` (i.e., "asymp") which takes population parameter values (instead of population effect sizes) est <- coef(fit_border_200) pop_est <- matrix(c( mean(est[1:3]), mean(est[1:3]), mean(est[1:3]), mean(est[1:2]), mean(est[1:2]), est[3], mean(est[1:2]), est[2], mean(est[1:2]), est[1], mean(est[2:3]), mean(est[2:3]), est[1], est[2], est[3] ), byrow = TRUE, ncol = length(est)) rownames(pop_est) <- c("PE_123eq", "PE_12eq", "PE_13eq", "PE_23eq", "Observed") # set.seed(123) benchmarks_1c_border_allpos_200 <- benchmark(results_1c_border_200, pop_est = pop_est, iter = 200) # # loglik difference #print(benchmarks_1c_border_allpos_200, output_type = "ld") # R file print(benchmarks_1c_border_allpos_200, output_type = "ld", color = FALSE) # Rmd file plot(benchmarks_1c_border_allpos_200, output_type = "ld") # x_lim = c(, ) # # ratio of loglik weights #print(benchmarks_1c_border_allpos_200, output_type = "rlw") # R file print(benchmarks_1c_border_allpos_200, output_type = "rlw", color = FALSE) # Rmd file #plot(benchmarks_1c_border_allpos_200, output_type = "rlw") # x_lim = c(, ) plot(benchmarks_1c_border_allpos_200, output_type = "rlw", log_scale = T) ## ----message = F, warning = FALSE------------------------------------------------------------------------------------- # Specifying multiple null populations, that is, # using all possibilities of setting inequalities to equalities. # Here, we will use the default `model_type` (i.e., "asymp") which takes population parameter values (instead of population effect sizes) est <- coef(fit) pop_est <- matrix(c( mean(est[1:3]), mean(est[1:3]), mean(est[1:3]), mean(est[1:2]), mean(est[1:2]), est[3], mean(est[1:2]), est[2], mean(est[1:2]), est[1], mean(est[2:3]), mean(est[2:3]), est[1], est[2], est[3] ), byrow = TRUE, ncol = length(est)) rownames(pop_est) <- c("PE_123eq", "PE_12eq", "PE_13eq", "PE_23eq", "Observed") # set.seed(123) benchmarks_1c_allpos <- benchmark(results_1c, pop_est = pop_est, iter = 200) # # loglik difference #print(benchmarks_1c_allpos, output_type = "ld") # R file print(benchmarks_1c_allpos, output_type = "ld", color = FALSE) # Rmd file plot(benchmarks_1c_border_allpos_200, output_type = "ld") # x_lim = c(, ) # # ratio of loglik weights #print(benchmarks_1c_allpos, output_type = "rlw") # R file print(benchmarks_1c_allpos, output_type = "rlw", color = FALSE) # Rmd file #plot(benchmarks_1c_allpos, output_type = "rlw") # x_lim = c(, ) plot(benchmarks_1c_allpos, output_type = "rlw", log_scale = T) ## ----echo = F, message = FALSE, warning = FALSE----------------------------------------------------------------------- # Now, total sample size is 200 (instead of 40) samplesize_orig <- samplesize samplesize <- 3*200 b.ratios <- c(3,2,1) sample <- NULL # Determine true / population beta coefficients in data generating process D_ <- matrix(rep(1:n.coef), nrow = samplesize) D <- as.factor(D_) sample$D <- D sample <- model.matrix(~sample$D-1) colnames(sample) <- paste0("D_", seq_len(ncol(sample))) sample <- as.data.frame(sample) # Determine mean values (betas) var.e <- 1 if (sum(abs(b.ratios)) == 0) { # all zeros (thus under H0) means_pop <- b.ratios } else { fun <- function (d) { means_pop = b.ratios*d (1/sqrt(var.e)) * sqrt((1/n.coef) * sum((means_pop - mean(means_pop))^2)) - f } d <- uniroot(fun, lower=0, upper=100)$root # Construct means_pop means_pop <- b.ratios*d # Check #means_pop #[1] 0.9185587 0.6123724 0.3061862 #means_pop[1] - means_pop[2]; means_pop[2] - means_pop[3] # Check: ES = #(1/sqrt(var.e)) * sqrt((1/n.coef) * sum((means_pop - mean(means_pop))^2)) # .25 } set.seed(123) # set seed: to obtain same results when you re-run it epsilon <- rnorm(samplesize, sd=sqrt(var.e)) sample$y <- as.matrix(sample[, 1:n.coef]) %*% matrix(means_pop, nrow = n.coef) + epsilon # Obtain fit fit_200 <- lm(y ~ 0 + D, data=sample) #fit_200 # Return to original value samplesize <- samplesize_orig ## ----message = TRUE, warning = FALSE---------------------------------------------------------------------------------- # Now, group size is 200 (instead of 40) # H1 vs complement (default) H1 <- "D1 > D2 > D3" # mu1 > mu2 > mu3 # Apply GORIC # set.seed(123) results_1c_200 <- goric(fit_200, hypotheses = list(H1)) results_1c_200 ## ----message = F, warning = FALSE------------------------------------------------------------------------------------- # Specifying multiple null populations, that is, # using all possibilities of setting inequalities to equalities. # Here, we will use the default `model_type` (i.e., "asymp") which takes population parameter values (instead of population effect sizes) est <- coef(fit_200) pop_est <- matrix(c( mean(est[1:3]), mean(est[1:3]), mean(est[1:3]), mean(est[1:2]), mean(est[1:2]), est[3], mean(est[1:2]), est[2], mean(est[1:2]), est[1], mean(est[2:3]), mean(est[2:3]), est[1], est[2], est[3] ), byrow = TRUE, ncol = length(est)) rownames(pop_est) <- c("PE_123eq", "PE_12eq", "PE_13eq", "PE_23eq", "Observed") # set.seed(123) benchmarks_1c_allpos_200 <- benchmark(results_1c_200, pop_est = pop_est, iter = 200) # # loglik difference #print(benchmarks_1c_allpos, output_type = "ld") # R file print(benchmarks_1c_allpos_200, output_type = "ld", color = FALSE) # Rmd file plot(benchmarks_1c_allpos_200, output_type = "ld") # x_lim = c(, ) # # ratio of loglik weights #print(benchmarks_1c_allpos_200, output_type = "rlw") # R file print(benchmarks_1c_allpos_200, output_type = "rlw", color = FALSE) # Rmd file #plot(benchmarks_1c_allpos_200, output_type = "rlw") # x_lim = c(, ) plot(benchmarks_1c_allpos_200, output_type = "rlw", log_scale = TRUE)