The explore package offers a simplified way to use popular data sets or to create synthetic data for experimenting/teaching/training.
This data set comes with the palmerpenguins package. It contains measurements for penguin species, island in Palmer Archipelago, size (flipper length, body mass, bill dimensions), and sex.
library(dplyr)
library(explore)
data <- use_data_penguins()
glimpse(data)
#> Rows: 344
#> Columns: 8
#> $ species <fct> Adelie, Adelie, Adelie, Adelie, Adelie, Adelie, Adel~
#> $ island <fct> Torgersen, Torgersen, Torgersen, Torgersen, Torgerse~
#> $ bill_length_mm <dbl> 39.1, 39.5, 40.3, NA, 36.7, 39.3, 38.9, 39.2, 34.1, ~
#> $ bill_depth_mm <dbl> 18.7, 17.4, 18.0, NA, 19.3, 20.6, 17.8, 19.6, 18.1, ~
#> $ flipper_length_mm <int> 181, 186, 195, NA, 193, 190, 181, 195, 193, 190, 186~
#> $ body_mass_g <int> 3750, 3800, 3250, NA, 3450, 3650, 3625, 4675, 3475, ~
#> $ sex <fct> male, female, female, NA, female, male, female, male~
#> $ year <int> 2007, 2007, 2007, 2007, 2007, 2007, 2007, 2007, 2007~
This data set comes with the dplyr package. It contains data of 87 star war characters.
data <- use_data_starwars()
glimpse(data)
#> Rows: 87
#> Columns: 14
#> $ name <chr> "Luke Skywalker", "C-3PO", "R2-D2", "Darth Vader", "Leia Or~
#> $ height <int> 172, 167, 96, 202, 150, 178, 165, 97, 183, 182, 188, 180, 2~
#> $ mass <dbl> 77.0, 75.0, 32.0, 136.0, 49.0, 120.0, 75.0, 32.0, 84.0, 77.~
#> $ hair_color <chr> "blond", NA, NA, "none", "brown", "brown, grey", "brown", N~
#> $ skin_color <chr> "fair", "gold", "white, blue", "white", "light", "light", "~
#> $ eye_color <chr> "blue", "yellow", "red", "yellow", "brown", "blue", "blue",~
#> $ birth_year <dbl> 19.0, 112.0, 33.0, 41.9, 19.0, 52.0, 47.0, NA, 24.0, 57.0, ~
#> $ sex <chr> "male", "none", "none", "male", "female", "male", "female",~
#> $ gender <chr> "masculine", "masculine", "masculine", "masculine", "femini~
#> $ homeworld <chr> "Tatooine", "Tatooine", "Naboo", "Tatooine", "Alderaan", "T~
#> $ species <chr> "Human", "Droid", "Droid", "Human", "Human", "Human", "Huma~
#> $ films <list> <"The Empire Strikes Back", "Revenge of the Sith", "Return~
#> $ vehicles <list> <"Snowspeeder", "Imperial Speeder Bike">, <>, <>, <>, "Imp~
#> $ starships <list> <"X-wing", "Imperial shuttle">, <>, <>, "TIE Advanced x1",~
This data set comes with the ggplot2 package. It contains the prices and other attributes of almost 54,000 diamonds.
data <- use_data_diamonds()
glimpse(data)
#> Rows: 53,940
#> Columns: 10
#> $ carat <dbl> 0.23, 0.21, 0.23, 0.29, 0.31, 0.24, 0.24, 0.26, 0.22, 0.23, 0.~
#> $ cut <ord> Ideal, Premium, Good, Premium, Good, Very Good, Very Good, Ver~
#> $ color <ord> E, E, E, I, J, J, I, H, E, H, J, J, F, J, E, E, I, J, J, J, I,~
#> $ clarity <ord> SI2, SI1, VS1, VS2, SI2, VVS2, VVS1, SI1, VS2, VS1, SI1, VS1, ~
#> $ depth <dbl> 61.5, 59.8, 56.9, 62.4, 63.3, 62.8, 62.3, 61.9, 65.1, 59.4, 64~
#> $ table <dbl> 55, 61, 65, 58, 58, 57, 57, 55, 61, 61, 55, 56, 61, 54, 62, 58~
#> $ price <int> 326, 326, 327, 334, 335, 336, 336, 337, 337, 338, 339, 340, 34~
#> $ x <dbl> 3.95, 3.89, 4.05, 4.20, 4.34, 3.94, 3.95, 4.07, 3.87, 4.00, 4.~
#> $ y <dbl> 3.98, 3.84, 4.07, 4.23, 4.35, 3.96, 3.98, 4.11, 3.78, 4.05, 4.~
#> $ z <dbl> 2.43, 2.31, 2.31, 2.63, 2.75, 2.48, 2.47, 2.53, 2.49, 2.39, 2.~
This data set comes with base R. The data set gives the measurements in centimeters of the variables sepal length and width and petal length and width, respectively, for 50 flowers from each of 3 species of iris. The species are Iris setosa, versicolor, and virginica.
data <- use_data_iris()
glimpse(data)
#> Rows: 150
#> Columns: 5
#> $ Sepal.Length <dbl> 5.1, 4.9, 4.7, 4.6, 5.0, 5.4, 4.6, 5.0, 4.4, 4.9, 5.4, 4.~
#> $ Sepal.Width <dbl> 3.5, 3.0, 3.2, 3.1, 3.6, 3.9, 3.4, 3.4, 2.9, 3.1, 3.7, 3.~
#> $ Petal.Length <dbl> 1.4, 1.4, 1.3, 1.5, 1.4, 1.7, 1.4, 1.5, 1.4, 1.5, 1.5, 1.~
#> $ Petal.Width <dbl> 0.2, 0.2, 0.2, 0.2, 0.2, 0.4, 0.3, 0.2, 0.2, 0.1, 0.2, 0.~
#> $ Species <fct> setosa, setosa, setosa, setosa, setosa, setosa, setosa, s~
This data set comes with the ggplot2 package. It contains a subset of the fuel economy data that the EPA makes available on https://fueleconomy.gov/. It contains only models which had a new release every year between 1999 and 2008 - this was used as a proxy for the popularity of the car.
data <- use_data_mpg()
glimpse(data)
#> Rows: 234
#> Columns: 11
#> $ manufacturer <chr> "audi", "audi", "audi", "audi", "audi", "audi", "audi", "~
#> $ model <chr> "a4", "a4", "a4", "a4", "a4", "a4", "a4", "a4 quattro", "~
#> $ displ <dbl> 1.8, 1.8, 2.0, 2.0, 2.8, 2.8, 3.1, 1.8, 1.8, 2.0, 2.0, 2.~
#> $ year <int> 1999, 1999, 2008, 2008, 1999, 1999, 2008, 1999, 1999, 200~
#> $ cyl <int> 4, 4, 4, 4, 6, 6, 6, 4, 4, 4, 4, 6, 6, 6, 6, 6, 6, 8, 8, ~
#> $ trans <chr> "auto(l5)", "manual(m5)", "manual(m6)", "auto(av)", "auto~
#> $ drv <chr> "f", "f", "f", "f", "f", "f", "f", "4", "4", "4", "4", "4~
#> $ cty <int> 18, 21, 20, 21, 16, 18, 18, 18, 16, 20, 19, 15, 17, 17, 1~
#> $ hwy <int> 29, 29, 31, 30, 26, 26, 27, 26, 25, 28, 27, 25, 25, 25, 2~
#> $ fl <chr> "p", "p", "p", "p", "p", "p", "p", "p", "p", "p", "p", "p~
#> $ class <chr> "compact", "compact", "compact", "compact", "compact", "c~
This data set comes with base R. The data was extracted from the 1974 Motor Trend US magazine, and comprises fuel consumption and 10 aspects of automobile design and performance for 32 automobiles (1973–74 models).
data <- use_data_mtcars()
glimpse(data)
#> Rows: 32
#> Columns: 11
#> $ mpg <dbl> 21.0, 21.0, 22.8, 21.4, 18.7, 18.1, 14.3, 24.4, 22.8, 19.2, 17.8,~
#> $ cyl <dbl> 6, 6, 4, 6, 8, 6, 8, 4, 4, 6, 6, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 8,~
#> $ disp <dbl> 160.0, 160.0, 108.0, 258.0, 360.0, 225.0, 360.0, 146.7, 140.8, 16~
#> $ hp <dbl> 110, 110, 93, 110, 175, 105, 245, 62, 95, 123, 123, 180, 180, 180~
#> $ drat <dbl> 3.90, 3.90, 3.85, 3.08, 3.15, 2.76, 3.21, 3.69, 3.92, 3.92, 3.92,~
#> $ wt <dbl> 2.620, 2.875, 2.320, 3.215, 3.440, 3.460, 3.570, 3.190, 3.150, 3.~
#> $ qsec <dbl> 16.46, 17.02, 18.61, 19.44, 17.02, 20.22, 15.84, 20.00, 22.90, 18~
#> $ vs <dbl> 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,~
#> $ am <dbl> 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0,~
#> $ gear <dbl> 4, 4, 4, 3, 3, 3, 3, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 4, 4, 4, 3, 3,~
#> $ carb <dbl> 4, 4, 1, 1, 2, 1, 4, 2, 2, 4, 4, 3, 3, 3, 4, 4, 4, 1, 2, 1, 1, 2,~
This data set comes with base R. Survival of passengers on the Titanic.
data <- use_data_titanic(count = FALSE)
glimpse(data)
#> Rows: 2,201
#> Columns: 4
#> $ Class <chr> "3rd", "3rd", "3rd", "3rd", "3rd", "3rd", "3rd", "3rd", "3rd"~
#> $ Sex <chr> "Male", "Male", "Male", "Male", "Male", "Male", "Male", "Male~
#> $ Age <chr> "Child", "Child", "Child", "Child", "Child", "Child", "Child"~
#> $ Survived <chr> "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "~
data <- use_data_titanic(count = TRUE)
glimpse(data)
#> Rows: 32
#> Columns: 5
#> $ Class <chr> "1st", "2nd", "3rd", "Crew", "1st", "2nd", "3rd", "Crew", "1s~
#> $ Sex <chr> "Male", "Male", "Male", "Male", "Female", "Female", "Female",~
#> $ Age <chr> "Child", "Child", "Child", "Child", "Child", "Child", "Child"~
#> $ Survived <chr> "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "~
#> $ n <dbl> 0, 0, 35, 0, 0, 0, 17, 0, 118, 154, 387, 670, 4, 13, 89, 3, 5~
This data set is an incomplete collection of popular beers in Austria, Germany and Switzerland. Data are collected from various websites in 2023. Some of the collected data may be incorrect.
data <- use_data_beer()
glimpse(data)
#> Rows: 161
#> Columns: 11
#> $ name <chr> "Puntigamer Maerzen", "Puntigamer PR0,0ST", "Puntiga~
#> $ brand <chr> "Puntigamer", "Puntigamer", "Puntigamer", "Puntigame~
#> $ country <chr> "Austria", "Austria", "Austria", "Austria", "Austria~
#> $ year <dbl> 2023, 2023, 2023, 2023, 2023, 2023, 2023, 2023, 2023~
#> $ type <chr> "Rest", "Alkoholfrei", "Rest", "Rest", "Rest", "Rest~
#> $ color_dark <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1~
#> $ alcohol_vol_pct <dbl> 5.1, 0.0, 5.2, 6.0, 4.9, 5.2, 4.4, 0.5, 5.7, 5.3, 7.~
#> $ original_wort <dbl> 11.5, 5.1, 12.1, 13.8, 11.5, 11.9, 11.1, 7.0, 13.2, ~
#> $ energy_kcal_100ml <dbl> 40, 20, 43, 50, 42, 43, 42, 27, 48, 45, 58, 45, 43, ~
#> $ carb_g_100ml <dbl> 2.7, 4.4, 2.9, 3.6, 3.2, 3.0, 3.8, 5.7, 3.5, 3.3, 3.~
#> $ sugar_g_100ml <dbl> 0.0, 1.2, 0.0, 0.0, 0.0, 0.0, 0.0, 2.7, 0.0, 0.0, 0.~
Artificial data that can be used for unit-testing or teaching.
data <- create_data_app(obs = 1000)
glimpse(data)
#> Rows: 1,000
#> Columns: 7
#> $ os <chr> "Android", "iOS", "Android", "iOS", "Other", "Android", "~
#> $ free <int> 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, ~
#> $ downloads <int> 5802, 5048, 4579, 3449, 2464, 11276, 4026, 6841, 10419, 5~
#> $ rating <dbl> 4, 4, 3, 4, 1, 4, 5, 5, 4, 1, 1, 4, 4, 5, 5, 4, 3, 4, 2, ~
#> $ type <chr> "Kids", "Media", "Other", "Shopping", "Connect", "Learn",~
#> $ updates <dbl> 63.00000, 58.00000, 62.00000, 44.00000, 24.00000, 75.0000~
#> $ screen_sizes <dbl> 3, 2, 3, 2, 1, 3, 1, 2, 2, 3, 1, 3, 2, 1, 3, 1, 4, 5, 3, ~
data <- create_data_buy(obs = 1000)
glimpse(data)
#> Rows: 1,000
#> Columns: 13
#> $ period <int> 202012, 202012, 202012, 202012, 202012, 202012, 202012~
#> $ buy <int> 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, ~
#> $ age <int> 39, 57, 55, 66, 71, 44, 64, 51, 70, 44, 58, 47, 68, 71~
#> $ city_ind <int> 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, ~
#> $ female_ind <int> 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, ~
#> $ fixedvoice_ind <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, ~
#> $ fixeddata_ind <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ~
#> $ fixedtv_ind <int> 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, ~
#> $ mobilevoice_ind <int> 0, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 0, ~
#> $ mobiledata_prd <chr> "NO", "NO", "MOBILE STICK", "NO", "BUSINESS", "BUSINES~
#> $ bbi_speed_ind <int> 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, ~
#> $ bbi_usg_gb <int> 77, 49, 53, 44, 55, 93, 50, 64, 63, 87, 45, 45, 70, 79~
#> $ hh_single <int> 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 1, ~
data <- create_data_churn(obs = 1000)
glimpse(data)
#> Rows: 1,000
#> Columns: 9
#> $ price <dbl> 29, 27, 29, 11, 18, 21, 19, 13, 29, 22, 13, 27, 17, 11, 16,~
#> $ type <chr> "Premium", "Regular", "Premium", "Promo", "Promo", "Promo",~
#> $ usage <dbl> 63.0, 39.0, 87.0, 29.0, 22.5, 8.0, 56.0, 94.5, 46.0, 76.0, ~
#> $ shared <int> 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1,~
#> $ device <chr> "Computer", "Tablet", "Phone", "Tablet", "Computer", "Table~
#> $ newsletter <int> 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0,~
#> $ language <chr> "sp", "sp", "sp", "sp", "en", "en", "fr", "en", "en", "de",~
#> $ duration <int> 7, 47, 99, 33, 94, 17, 95, 92, 43, 16, 62, 14, 52, 20, 76, ~
#> $ churn <dbl> 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,~
data <- create_data_esoteric(obs = 1000)
glimpse(data)
#> Rows: 1,000
#> Columns: 6
#> $ starsign <chr> "Leo", "Aquarius", "Virgo", "Pisces", "Aries", "Taurus~
#> $ chinese <chr> "Dragon", "Monkey", "Tiger", "Pig", "Pig", "Horse", "D~
#> $ moon <chr> "Waxing (+)", "Waxing (+)", "Waxing (+)", "Waning (-)"~
#> $ blood <chr> "A+", "AB+", "0+", "0+", "A+", "0+", "B+", "0+", "0-",~
#> $ fingers_crossed <int> 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0, ~
#> $ success <int> 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 1, ~
data <- create_data_person(obs = 1000)
glimpse(data)
#> Rows: 1,000
#> Columns: 15
#> $ age <int> 46, 94, 66, 29, 82, 57, 65, 58, 29, 40, 84, 72, 24, ~
#> $ gender <chr> "Female", "Female", "Male", "Male", "Female", "Femal~
#> $ eye_color <chr> "Blue", "Green", "Brown", "Green", "Brown", "Brown",~
#> $ shoe_size <dbl> 45.2, 37.0, 45.0, 45.0, 39.0, 38.2, 41.2, 46.0, 40.0~
#> $ iq <dbl> 141, 71, 80, 74, 119, 95, 97, 135, 88, 140, 71, 126,~
#> $ education <int> 66, 41, 49, 49, 25, 68, 87, 46, 78, 14, 65, 62, 68, ~
#> $ income <dbl> 132.0, 95.0, 18.0, 54.0, 70.0, 128.0, 128.5, 32.0, 8~
#> $ handset <chr> "Apple", "Apple", "Apple", "Android", "Apple", "Andr~
#> $ pet <chr> "No", "Cat", "Other", "No", "Dog", "No", "Cat", "Dog~
#> $ favorite_pizza <chr> "Pepperoni", "Hawai", "Margaritha", "Carciofi", "Mar~
#> $ favorite_icecream <chr> "Lemon", "Strawberry", "Vanilla", "Vanilla", "Apple"~
#> $ likes_garlic <int> 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0~
#> $ likes_sushi <int> 0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 1~
#> $ likes_beatles <int> 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0~
#> $ likes_beer <int> 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1~
data <- create_data_random(obs = 1000)
glimpse(data)
#> Rows: 1,000
#> Columns: 12
#> $ id <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, ~
#> $ target_ind <int> 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0,~
#> $ var_1 <int> 27, 59, 16, 85, 85, 48, 77, 30, 7, 44, 46, 34, 19, 51, 2, 7~
#> $ var_2 <int> 16, 14, 15, 51, 49, 62, 45, 6, 1, 22, 85, 27, 60, 61, 99, 1~
#> $ var_3 <int> 21, 94, 38, 63, 18, 66, 73, 50, 87, 83, 98, 67, 64, 5, 19, ~
#> $ var_4 <int> 30, 83, 59, 81, 29, 14, 89, 1, 57, 97, 27, 98, 4, 26, 26, 9~
#> $ var_5 <int> 25, 99, 72, 65, 24, 9, 30, 54, 78, 27, 32, 95, 49, 97, 85, ~
#> $ var_6 <int> 44, 40, 37, 53, 7, 72, 24, 84, 100, 11, 49, 68, 82, 77, 43,~
#> $ var_7 <int> 93, 59, 8, 85, 3, 81, 39, 14, 67, 62, 45, 81, 87, 99, 40, 3~
#> $ var_8 <int> 58, 49, 74, 23, 75, 82, 10, 28, 2, 60, 99, 85, 59, 34, 65, ~
#> $ var_9 <int> 80, 88, 24, 56, 90, 1, 16, 26, 77, 7, 90, 31, 89, 61, 46, 7~
#> $ var_10 <int> 31, 32, 87, 33, 13, 36, 93, 88, 82, 2, 63, 78, 72, 19, 58, ~
data <- create_data_unfair(obs = 1000)
glimpse(data)
#> Rows: 1,000
#> Columns: 22
#> $ age <int> 46, 94, 66, 29, 82, 57, 65, 58, 29, 40, 84, 72, 24, 87, 41~
#> $ gender <chr> "Female", "Female", "Male", "Male", "Female", "Female", "F~
#> $ eye_color <chr> "Blue", "Green", "Blue", "Blue", "Blue", "Brown", "Brown",~
#> $ shoe_size <dbl> 45.2, 37.0, 45.0, 45.0, 39.0, 38.2, 41.2, 46.0, 40.0, 42.0~
#> $ iq <dbl> 141, 71, 80, 74, 119, 95, 97, 135, 88, 140, 71, 126, 106, ~
#> $ education <int> 66, 41, 49, 49, 25, 68, 87, 46, 78, 14, 65, 62, 68, 16, 69~
#> $ income <dbl> 132.0, 95.0, 18.0, 54.0, 70.0, 128.0, 128.5, 32.0, 82.0, 9~
#> $ handset <chr> "Apple", "Apple", "Apple", "Android", "Apple", "Android", ~
#> $ pet <chr> "Other", "Cat", "Cat", "Dog", "Cat", "No", "Dog", "No", "N~
#> $ smoking <int> 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0~
#> $ name_arabic <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0~
#> $ outfit <chr> "Casual", "Casual", "Casual", "Alternative", "Elegant", "A~
#> $ glasses <int> 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1~
#> $ tatoos <int> 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0~
#> $ kids <int> 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0~
#> $ bad_debt <dbl> 1, 0, 0, 0, 0, 0, 2, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0~
#> $ credit_card <chr> "No", "Master", "Master", "No", "No", "Visa", "Visa", "Vis~
#> $ left_handed <int> 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0~
#> $ skin_color <chr> "White", "Brown", "White", "White", "White", "White", "Bla~
#> $ religion <chr> "Christian", "No", "Christian", "No", "Christian", "No", "~
#> $ internet_gb <dbl> 0.000000, 60.609298, 260.437887, 55.199729, 0.000000, 179.~
#> $ target_ind <int> 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1~
Create an empty data set and add random variables.
data <- create_data_empty(obs = 1000) %>%
add_var_random_01("smoking", prob = c(0.8, 0.2)) %>%
add_var_random_cat("gender",
cat = c("female", "male", "diverse"),
prob = c(0.45, 0.45, 0.1)) %>%
add_var_random_dbl("internet_usage", min_val = 0, max_val = 1000) %>%
add_var_random_int("age", min_val = 18, max_val = 100) %>%
add_var_random_moon() %>%
add_var_random_starsign()
glimpse(data)
#> Rows: 1,000
#> Columns: 6
#> $ smoking <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, ~
#> $ gender <chr> "male", "male", "female", "male", "female", "female", ~
#> $ internet_usage <dbl> 923.7630, 979.0669, 773.8658, 697.6332, 470.4925, 609.~
#> $ age <int> 84, 54, 44, 45, 60, 73, 60, 74, 62, 46, 81, 95, 58, 19~
#> $ random_moon <chr> "Waxing (+)", "Waning (-)", "Waning (-)", "Waxing (+)"~
#> $ random_starsign <chr> "Saggitarius", "Saggitarius", "Libra", "Pisces", "Pisc~