---
title: "Data management and disproportionality"
output: 
  rmarkdown::html_vignette:
    toc: true
    keep_md: true
vignette: >
  %\VignetteIndexEntry{template_main}
  %\VignetteEngine{knitr::rmarkdown}
  %\VignetteEncoding{UTF-8}

---

```{r setup, include = FALSE}
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>",
  include = TRUE,
  echo = TRUE,
  warning = FALSE, 
  message = FALSE
)
```

# Main (data management and disproportionality) script template

You may want to organize your research folder using small scripts,
each designed for a part of your analysis plan.

Inspired by [Reproducible Research in R](https://mjrolland.github.io/reproducibility-checklist/cheatsheet.html)

-    /folder
-    ../data
-    ../scripts
-    ../etc.

The main script might be located in scripts, under the name "main.R".

-    ../scripts/01_main.R

If you haven't, visit `vignette("getting_started")`, `vignette("basic_workflow")` and `vignette("descriptive")` 
for details on what this template does.

## Paths

You may create a separate script with paths, that you would call at the beginning of your main script.

```{r libraries}
library(vigicaen)
library(rlang)
library(here) # if you like the here syntax for file paths
library(dplyr)
```

```{r path_classic, eval = FALSE}
# #### PATHS #### ####

path_base   <- "../vigibase_ecl/main"

# #### IMPORT #### ####

demo     <- dt_parquet(path_base, "demo", in_memory = FALSE)
adr      <- dt_parquet(path_base, "adr",  in_memory = FALSE)
drug     <- dt_parquet(path_base, "drug", in_memory = FALSE)
link     <- dt_parquet(path_base, "link", in_memory = FALSE)
out      <- dt_parquet(path_base, "out",  in_memory = FALSE)
srce     <- dt_parquet(path_base, "srce", in_memory = FALSE)
followup <- dt_parquet(path_base, "followup", in_memory = FALSE)

suspdup  <- dt_parquet(path_base, "suspdup", in_memory = FALSE)

source("00_dict.R") # from template_dictionary
```

```{r test_datasets, include = FALSE}
demo     <- demo_
adr      <- adr_
drug     <- drug_
link     <- link_
out      <- out_
followup <- followup_
srce     <- srce_

thg      <- thg_
mp       <- mp_
meddra   <- meddra_
smq_list <- smq_list_
smq_content <- smq_content_

suspdup <- 
  data.table::data.table(
    UMCReportId = 1,
    SuspectedduplicateReportId = NA
  )
```

```{r codes, include = FALSE, eval=TRUE, message = FALSE}
d_drecno <- ex_$d_drecno

atc_drecno <-
  get_atc_code(atc_sel =  rlang::list2(l03 = c("L03AA")),
               mp = mp_,
               thg_data = thg_,
               vigilyze = TRUE)

a_llt <- ex_$a_llt
```

## Demo

```{r demo_dm, eval = TRUE}
# ---- Deduplicating ---- ####

demo <- demo |> 
  filter(!(UMCReportId %in% suspdup$SuspectedduplicateReportId))

# ---- Drugs ---- ####

# From a list of drugs

demo <-
  demo |>
  add_drug(
    d_code = d_drecno,
    drug_data = drug
  )

# From ATC

demo <-
  demo |>
  add_drug(
    d_code = atc_drecno,
    drug_data = drug
  )

# ---- Reactions ---- ####

demo <-
  demo |>
  add_adr(
    a_code = a_llt,
    adr_data = adr
  )

# ---- Demographics ---- ####

# Age, sex

demo <-
  demo |>
  mutate(
    age = cut(as.integer(AgeGroup),
              breaks = c(0,4,5,6,7,8),
              include.lowest = TRUE, right = TRUE,
              labels = c("<18", "18-45","45-64", "65-74", "75+")),

    sex = case_when(Gender == "1" ~ 1,
                    Gender == "2" ~ 2,
                    Gender %in% c("-","0","9") ~ NA_real_,
                    TRUE ~ NA_real_)
  )

# Death + outcome availability

demo <- 
  demo |> 
  mutate(death = 
           ifelse(UMCReportId %in% out$UMCReportId,
                  UMCReportId %in% 
                    (out |> 
                    filter(Seriousness == "1") |> 
                    pull(UMCReportId)
                    ),
                  NA)
         )

# follow-up, seriousness

demo <-
  demo |>
  mutate(
    fup = if_else(UMCReportId %in% followup$UMCReportId, 1, 0),
    serious = 
      ifelse(
        UMCReportId %in% out$UMCReportId,
        UMCReportId %in% 
          (out |> 
          filter(Serious == "Y") |> 
          pull(UMCReportId)
          ),
        NA)
  )

# year

demo <- 
  demo |> 
  mutate(
    year = as.numeric(substr(FirstDateDatabase, start = 1, stop = 4))
    )

# type of reporter

demo <-
  demo |>
  left_join(
    srce |> transmute(UMCReportId, type_reporter = Type),
    by = "UMCReportId")

# explicit multi-level vars

demo <-
  demo |> 
  mutate(
    Type = factor(Type, levels = c("1", "2", "3", "4", "5")),
    type_reporter = factor(type_reporter,
                           levels = c("1", "2", "3", "4", "5")),
    Region = factor(Region, levels = c("1", "2", "3", "4", "5", "6"))
  )

levels(demo$Type) <-
  c("Spontaneous",                     
     "Report from study",                        
     "Other",   
     "Not available to sender (unknown)",   
     "PMS/Special monitoring")

levels(demo$type_reporter) <-
  c("Physician",
    "Pharmacist",
    "Other Health Professional",
    "Lawyer",
    "Consumer or other non health professional")

levels(demo$Region) <-
  c("African Region",                                    
    "Region of the Americas",                            
    "South-East Asia Region",                            
    "European Region",                                   
    "Eastern Mediterranean Region",                      
    "Western Pacific Region"  
  )

# ---- Check ---- ####

demo |>
  check_dm(cols = c(names(d_drecno), names(a_llt), "fup"))
```

## Link 

```{r link_drug_adr}
link <-
  link |> 
    add_drug(
      d_code = d_drecno,
      drug_data = drug
    ) |> 
    add_adr(
      a_code = a_llt,
      adr_data = adr
    )
```

## Basic models

```{r mods}
# ---- Bivariate ---- ####

rb <- 
  demo |> 
  compute_dispro(
    y = names(a_llt),
    x = names(d_drecno)
  )

# remove the hashes to save your results
## write.csv2(rb, here("outputs", "rb.csv"), row.names = FALSE)

# ---- Multivariate ---- ####

mod <-
  glm(a_colitis ~ nivolumab + age + sex,
      family = "binomial",
      data = demo)

rm <-
  summary(mod)$coefficients |>
  compute_or_mod(
    estimate = Estimate,
    std_er = Std..Error
    )

# remove the hashes to save your results
## write.csv2(rm, here("outputs", "rm.csv"), row.names = FALSE)
```

## Basic descriptive

```{r desc_demo}
# ---- General description ---- ####

r_desc <- 
  demo |> 
  desc_facvar(
    vf = c("age", "sex",
           "type_reporter",
           "Type",
           "year",
           names(d_drecno),
           names(a_llt),
           "serious", "death"),
    ncat_max = 20
  )

# ---- Time to onset ---- ####

r_tto <-
  desc_tto(
    link,
    adr_s = names(a_llt),
    drug_s = names(d_drecno)
    ) 

# ---- Dechallenge ---- ####

r_dch <-
  desc_dch(
    link,
    adr_s = names(a_llt),
    drug_s = names(d_drecno)
    )

# ---- Rechallenge ---- ####

r_rch <-
  desc_rch(
    link,
    demo_data = demo,
    adr_s = names(a_llt),
    drug_s = names(d_drecno)
  )
```