% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/correctness_check.R
\name{correctness_check}
\alias{correctness_check}
\title{Validate Data Against Correctness Rules}
\usage{
correctness_check(
  S_data,
  M_data,
  Result = FALSE,
  show_column = NULL,
  date_parser_fun = smart_to_gregorian_vec,
  golden_data = NULL,
  key_column = NULL,
  external_data = NULL,
  var_select = "all",
  batch_size = 1000,
  verbose = FALSE
)
}
\arguments{
\item{S_data}{A data frame containing the data to be validated.}

\item{M_data}{A data frame containing the validation rules. Must have at least the following columns:
\itemize{
  \item \code{VARIABLE}: The name of the variable to validate (must match column names in S_data)
  \item \code{Correctness_Rule}: The validation rule as an R expression (string)
  \item \code{TYPE}: The data type of the variable ("date", "numeric", or other)
  \item \code{Correctness_Error_Type}: (Optional) Classification of the error type
}}

\item{Result}{Logical. If \code{TRUE}, returns the detailed results for each row in S_data.
If \code{FALSE} (default), returns a summary of validation results.}

\item{show_column}{Character vector. When \code{Result=TRUE}, specifies additional columns from
S_data to include in the output.}

\item{date_parser_fun}{Function to convert date strings to Date objects. Default is \code{smart_to_gregorian_vec},
which should handle various date formats including Jalali dates.}

\item{golden_data}{Optional data frame or list containing reference data for validation.
Accessible within rules via the \code{GOLDEN} variable.}

\item{key_column}{Character string specifying the column name that links rows in S_data to
corresponding rows in golden_data. Required when comparing individual rows with golden_data.}

\item{external_data}{Optional list or data frame containing additional data for validation rules.}

\item{var_select}{Character vector or numeric indices specifying which variables from M_data to validate.
By default, it validates all variables.}

\item{batch_size}{integer. Number of rows to process in each batch (for efficiency).}

\item{verbose}{logical. If TRUE, prints progress messages.}
}
\value{
If \code{Result=FALSE} (default): A data frame with one row per validated variable containing:
\itemize{
  \item VARIABLE: Variable name
  \item Condition_Met: Count of rows meeting the condition
  \item Condition_Not_Met: Count of rows not meeting the condition
  \item NA_Count: Count of rows where validation produced NA
  \item Total_Applicable: Count of non-NA validation results
  \item Total_Rows: Total number of rows in S_data
  \item Percent_Met: Percentage of applicable rows meeting the condition
  \item Percent_Not_Met: Percentage of applicable rows not meeting the condition
  \item Error_Type: Value from Correctness_Error_Type column in M_data
}

If \code{Result=TRUE}: A data frame with one row per row in S_data, containing:
\itemize{
  \item One column per validated variable with logical values (TRUE/FALSE/NA)
  \item Any additional columns specified in show_column
}
}
\description{
This function validates a data frame against a set of correctness rules specified in another data frame.
It allows for complex validation operations, comparison with reference data, and detailed reporting.
}
\details{
The function evaluates each rule specified in M_data against the corresponding data in S_data.
Rules are R expressions written as strings, evaluated in an environment where:

\itemize{
  \item Variables from S_data are available directly by name
  \item \code{val} refers to the current variable being validated
  \item \code{GOLDEN} provides access to reference data (when golden_data is provided)
}

Type conversion is applied to variables in S_data based on the TYPE column in M_data:
\itemize{
  \item "date": Values are converted using the date_parser_fun
  \item "numeric": Values are converted to numeric
  \item Other types: No conversion is applied
}

Special handling for date comparisons is provided, including automatic wrapping
of GOLDEN references when comparing dates.
}
\examples{
Authorized_drug<-data.frame(
  Drug_ID = 1:10,
  Drug_Name = c("Atorvastatin", "Metformin", "Amlodipine", "Omeprazole", "Aspirin",
                "Levothyroxine", "Sertraline", "Pantoprazole", "Losartan", "ASA"),
  stringsAsFactors = FALSE
)

golde<-data.frame(
  National_code = c("123", "456", "789","545","4454","554","665"),
  LastName = c("Bahman","Johnson","Williams","Brown","Jones","Garcia","Miller"),
  Certificate_Expiry = c("1404-07-01", "2030-01-12", "2025-01-11",
  "1404-06-28","2025-09-19",NA,NA),
  Blood_type = c("A-","B+","AB","A+","O-","O+","AB-"),
  stringsAsFactors = FALSE
)

S_data <- data.frame(
  National_code = c("123", "1456", "789","545","4454","554"),
  LastName = c("Aliyar","Johnson","Williams","Brown","Jones","Garcia"),
  VisitDate = c("2025-09-23", "2021-01-10", "2021-01-03","1404-06-28","1404-07-28",NA),
  Test_date = c("1404-07-01", "2021-01-09", "2021-01-14","1404-06-29","2025-09-19",NA),
  Certificate_validity = c("2025-09-23", "2025-01-12", "2025-02-11","1403-06-28","2025-09-19",NA),
  Systolic_Reading1 = c(110, NA, 145, 125,114,NA),
  Systolic_Reading2 = c(125, 150, NA, 110,100,NA),
  Prescription_drug= c("Atorvastatin", "Metformin", "Amlodipine",
   "Omeprazole", "Aspirin","Metoprolol"),
  Blood_type = c("A-","B+","AB","A+","O-","O+"),
  Height = c(178,195,165,NA,155,1.80),
  stringsAsFactors = FALSE
)

M_data <- data.frame(
  VARIABLE = c("National_code", "Certificate_validity", "VisitDate","Test_date",
               "LastName","Systolic_Reading1","Systolic_Reading2",
               "Prescription_drug","Blood_type","Height"),
  Correctness_Rule = c(
    "National_code \%in\% GOLDEN$National_code",
    "val <= GOLDEN$Certificate_Expiry",
    "((val >= '1404-06-01' & val <= '1404-06-31') | val == as.Date('2021-01-02'))",
    "val != VisitDate",
    "val \%in\% GOLDEN$LastName",
    "",
    "",
    "val \%in\% Authorized_drug$Drug_Name",
    "val \%in\% GOLDEN$Blood_type",
    ""),
  TYPE=c("numeric","date","date","date","character","numeric",
  "numeric","character","character","numeric"),
  Correctness_Error_Type=c("Error",NA,"Warning","Error",NA,NA,NA,NA,"Error","Warning"),
  stringsAsFactors = FALSE
)

result <- correctness_check(
  S_data = S_data,
  M_data = M_data,
  golden_data = golde,
  key_column = c("National_code"),
  Result =FALSE,
  external_data = Authorized_drug
)

print(result)
#
result <- correctness_check(
  S_data = S_data,
  M_data = M_data,
  golden_data = golde,
  #key_column = c("National_code"),#If you do not select a key, you can use Gold Data as a
  #list and your logical rules will be NA.
  Result =TRUE,
  external_data = Authorized_drug
)
print(result)

}
