% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/tweet.R
\name{tweet}
\alias{tweet}
\title{Examine and summarize variables in a dataset}
\usage{
tweet(data, select_vars = NULL, top_n = 3, sort_by = "type")
}
\arguments{
\item{data}{The dataset, typically output from clean_the_nest function}

\item{select_vars}{Optional vector of variable names to examine. If NULL, all variables will be summarized.}

\item{top_n}{Number of top categories to display for factor and character variables. Default is 3.}

\item{sort_by}{How to sort variables in the output. Options are "name" (alphabetical) or "type" (grouped by data type). Default is "type".}
}
\value{
A data frame with one row per variable, containing variable name, type, missingness, and type-specific statistics.
}
\description{
Provides a comprehensive summary of variables in a dataset after cleaning with clean_the_nest. This function
examines variables by type, providing appropriate statistics for numeric, date, factor, and character variables.
For numeric variables, it shows min/max values, quartiles and missing data counts. For date variables, it displays
the date range and percentage of non-missing values. For factor and character variables, it shows the number of
unique levels, frequency of top levels, and missing data counts.
}
\examples{
# basic usage of tweet after clean_the_nest
data(dx_data)
df_diag <- clean_the_nest(dx_data, drop_eggs=TRUE, data_type = "cases",
  id_var ="identity",
  diagnosis = "disease_name",
  lettername1 = "first_name",
  lettername2 = "surname",
  dob = "date_of_birth",
  medicare = "medicare_no",
  gender = "gender",
  postcode="postcode",
  fn="indigenous_status",
  onset_date = "diagnosis_date")

# Examine all variables in the cleaned dataset
summary_df <- tweet(df_diag)

# Examine only specific variables
summary_df_subset <- tweet(df_diag, select_vars = c("age", "gender", "onset_date"))

# Show more categories for factor variables
summary_df_detailed <- tweet(df_diag, top_n = 5)
}
