## ----setup, include=FALSE----------------------------------------------------- knitr::opts_chunk$set(dpi=300) ## ----results = "hide", message = FALSE---------------------------------------- # importing the package functions library(paleobuddy) ## ----------------------------------------------------------------------------- # we set a seed so the results are reproducible set.seed(1) # set the necessary parameters # initial number of species n0 <- 1 # speciation rate - approx. 1 speciation event every 4my # we are trying to create a big phylogeny so phytools can function better lambda <- 0.25 # extinction rate - approx. 1 extinction event every 10my mu <- 0.15 # maximum simulation time - species that die after this are considered extant tMax <- 50 # run the simulation sim <- bd.sim(n0, lambda, mu, tMax = tMax) # take a look at the way the result is organized sim ## ----------------------------------------------------------------------------- # draw simulation draw.sim(sim, showLabel = FALSE) ## ----------------------------------------------------------------------------- # there are currently not many customization options for phylogenies phy <- make.phylo(sim) # plot it with APE - hide tip labels since there are a lot so it looks cluttered ape::plot.phylo(phy, show.tip.label = FALSE) ape::axisPhylo() # plot the molecular phylogeny ape::plot.phylo(ape::drop.fossil(phy), show.tip.label = FALSE) ape::axisPhylo() ## ----------------------------------------------------------------------------- # set a seed set.seed(3) # create simulation # note nExtant, defining we want 200 or more extant species at the end sim <- bd.sim(n0, lambda, mu, tMax = tMax, nExtant = c(200, Inf)) # check the number of extant species paste0("Number of species alive at the end of the simulation: ", sum(sim$EXTANT)) ## ----------------------------------------------------------------------------- # might look a bit cluttered ape::plot.phylo(ape::drop.fossil(make.phylo(sim)), show.tip.label = FALSE) ape::axisPhylo() ## ----------------------------------------------------------------------------- # we set a seed so the results are reproducible set.seed(5) # set the necessary parameters # initial number of species n0 <- 1 # speciation rate - it can be any function of time! lambda <- function(t) { 0.1 + 0.001*t } # extinction rate - also can be any function of time mu <- function(t) { 0.03 * exp(-0.01*t) } # maximum simulation time - species that die after this are considered extant tMax <- 50 # run the simulation sim <- bd.sim(n0, lambda, mu, tMax = tMax) # check the resulting clade out ape::plot.phylo(make.phylo(sim), show.tip.label = FALSE) ape::axisPhylo() ## ----------------------------------------------------------------------------- # again set a seed set.seed(1) # set the sampling rate # using a simple case - there will be on average T occurrences, # per species, where T is the species duration rho <- 1 # bins - used to represent the uncertainty in fossil occurrence times bins <- seq(tMax, 0, -1) # this is a simple 1my bin vector, but one could use the GSA timescale # or something random, etc # run the sampling simulation only for the first 10 species for brevity's sake # returnAll = TRUE makes it so the occurrences are returned as binned as well # (e.g. an occurrence at time 42.34 is returned as between 42 and 41) fossils <- suppressMessages(sample.clade(sim = sim, rho = rho, tMax = tMax, S = 1:10, bins = bins, returnAll = TRUE)) # suppressing messages - the message is to inform the user how # many speciesleft no fossils. In this case, it was 0 # take a look at how the output is organized head(fossils) ## ----------------------------------------------------------------------------- # take only first 5 species with head simHead <- head(sim, 10) # draw longevities with fossil time points suppressMessages(draw.sim(simHead, fossils = fossils)) ## ----------------------------------------------------------------------------- # draw longevities with fossil time ranges suppressMessages(draw.sim(simHead, fossils = fossils[, -3], fossilsFormat = "ranges")) ## ----------------------------------------------------------------------------- # make a copy pFossils <- fossils[, -3] # change the extant column pFossils["Extant"][pFossils["Extant"] == FALSE] = "extant" pFossils["Extant"][pFossils["Extant"] == TRUE] = "extinct" # change column names colnames(pFossils) <- c("Species", "Status", "min_age", "max_age") # check it out head(pFossils) ## ----------------------------------------------------------------------------- per.capita <- function(faBins, laBins, bins) { # create vectors to hold species that were born before and die in interval i, # species who were born in i and die later, # and species who were born before and die later NbL <- NFt <- Nbt <- rep(0, length(bins)) # for each interval for (i in 1:length(bins)) { # number of species that were already around before i and are not seen again NbL[i] <- sum(faBins > bins[i] & laBins == bins[i]) # number of species that were first seen in i and are seen later NFt[i] <- sum(faBins == bins[i] & laBins < bins[i]) # number of species that were first seen before i and are seen after i Nbt[i] <- sum(faBins > bins[i] & laBins < bins[i]) } # calculate the total rates p <- log((NFt + Nbt) / Nbt) q <- log((NbL + Nbt) / Nbt) return(list(p = p, q = q)) } ## ----------------------------------------------------------------------------- set.seed(1) # constant lambda and mu lambda <- 0.15 mu <- 0.05 # nFinal means there will be at least 100 species in the simulation sim <- bd.sim(n0, lambda, mu, tMax, nFinal = c(100, Inf)) # sample--note the higher rho, since this method relies strongly on perfect sampling fossils <- suppressMessages(sample.clade(sim, rho = 10, tMax, bins = bins, returnTrue = FALSE)) ## ----------------------------------------------------------------------------- # get the species names ids <- unique(fossils$Species) # get the first appearance bins - the first time in bins where the fossil was seen (lower bound) faBins <- unlist(lapply(ids, function(i) max(fossils$MaxT[fossils$Species == i]))) # get the last appearance bins - last time in bins where the fossil was seen (upper bound) laBins <- unlist(lapply(ids, function(i) min(fossils$MinT[fossils$Species == i]))) # get the estimates pc <- per.capita(faBins, laBins, bins) # get a mean for each rate print(paste0("Speciation estimate: ", mean(pc$p[is.finite(pc$p)], na.rm = TRUE))) print(paste0("Extinction estimate: ", mean(pc$q[is.finite(pc$q)], na.rm = TRUE))) # and plots plot(x = rev(bins), y = pc$p, type = "l", xlab = "Time (Mya)", ylab = "Speciation rate") abline(h = lambda) plot(x = rev(bins), y = pc$q, type = "l", xlab = "Time (Mya)", ylab = "Extinction rate") abline(h = mu) ## ----------------------------------------------------------------------------- # set a seed set.seed(2) # parameters to set things up n0 <- 1 tMax <- 20 # speciation can be dependent on a time-series variable as well as time lambda <- function(t, env) { 0.01 * t + 0.01*exp(0.01*env) } # let us use the package's temperature data data(temp) # this could instead be data(co2), the other environmental # data frame supplied by paleobuddy # we can make extinction be age-dependent by creating a shape parameter mu <- 10 mShape <- 0.5 # this will make it so species durations are distributed # as a Weibull with scale 10 and shape 2 # run the simulation # we pass the shape and environmental parameters sim <- suppressMessages(bd.sim(n0, lambda, mu, tMax = tMax, mShape = mShape, envL = temp)) # note that lShape and envM also exist # the defaults for all of these customization options is NULL # check out the phylogeny ape::plot.phylo(make.phylo(sim), show.tip.label = FALSE) ape::axisPhylo() ## ----------------------------------------------------------------------------- # speciation may be a step function, presented # as a rates vector and a shift times vector lList <- c(0.1, 0.2, 0.05) lShifts <- c(0, 10, 15) # lShifts could also be c(tMax, tMax - 10, tMax - 15) for identical results # make.rate is the function bd.sim calls to create a # ratem but we will use it here to see how it looks lambda <- make.rate(lList, tMax = 20, rateShifts = lShifts) t <- seq(0, 20, 0.1) plot(t, rev(lambda(t)), type = 'l', main = "Step function speciation rate", xlab = "Time (Mya)", ylab = "Speciation rate", xlim = c(20, 0)) ## ----------------------------------------------------------------------------- # it is not possible to combine the lambda(t, env) and lList methods listed above # but we can create a step function dependent on environmental data with ifelse mu_t <- function(t, env) { ifelse(t < 10, env, ifelse(t < 15, env * 2, env / 2)) } # pass it to make.rate with environmental data mu <- make.rate(mu_t, tMax = tMax, envRate = temp) plot(t, rev(mu(t)), type = 'l', main = "Environmental step function extinction rate", xlab = "Time (Mya)", ylab = "Rate", xlim = c(20, 0)) ## ----eval=FALSE--------------------------------------------------------------- # # set seed again # set.seed(1) # # # age-dependent speciation with a step function # lList <- c(10, 5, 12) # lShifts <- c(0, 10, 15) # lShape <- 2 # # # age-dependent extinction with a step function of an environmental variable # q <- function(t, env) { # ifelse(t < 10, 2*env, # ifelse(t < 15, 1.4*env, env / 2)) # } # # # note shape can be time-dependent as well, though # # we advise for variation not to be too abrupt due # # to computational issues # mShape <- function(t) { # return(1.2 + 0.025*t) # } # # # run the simulation # sim <- suppressMessages(bd.sim(n0, lList, q, tMax = tMax, lShape = lShape, # mShape = mShape, # envM = temp, lShifts = lShifts)) # # # check out the phylogeny # ape::plot.phylo(make.phylo(sim), show.tip.label = FALSE) ## ----------------------------------------------------------------------------- # as an example, we will use a PERT distribution, # a hat-shaped distribution used in PyRate # preservation function dPERT <- function(t, s, e, sp, a = 3, b = 3, log = FALSE) { # check if it is a valid PERT if (e >= s) { message("There is no PERT with e >= s") return(rep(NaN, times = length(t))) } # find the valid and invalid times id1 <- which(t <= e | t >= s) id2 <- which(!(t <= e | t >= s)) t <- t[id2] # initialize result vector res <- vector() # if user wants a log function if (log) { # invalid times get -Inf res[id1] <- -Inf # valid times calculated with log res[id2] <- log(((s - t) ^ 2)*((-e + t) ^ 2)/((s - e) ^ 5*beta(a,b))) } # otherwise else{ res[id1] <- 0 res[id2] <- ((s - t) ^ 2)*((-e + t) ^ 2)/((s - e) ^ 5*beta(a,b)) } return(res) } # set seed set.seed(1) # generate a quick simulation sim <- bd.sim(n0, lambda = 0.1, mu = 0.05, tMax = 20) # sample for the first 10 species fossils <- suppressMessages(sample.clade(sim = sim, rho = 3, tMax = 20, adFun = dPERT)) # here we return true times of fossil occurrences # draw longevities with fossil occurrences draw.sim(sim, fossils = fossils) ## ----eval=FALSE--------------------------------------------------------------- # ## setup # # set seed # set.seed(123) # # # initial number of species # n0 <- 1 # # # number of extant species # tMax <- 10 # # # lambda varies with absolute time (exponential decay) # lambda <- function(t) { # 1 + exp(-t/2) # } # # # mu will vary with species age and environmental variables # data(co2) # data(temp) # # # its scale will depend on CO2, except for a mass extinction event # scale <- function(t, env) { # ifelse(t < 10, env/20, # ifelse(t < 11, 1/1.9, env/30)) # # ME between 10 and 11my from simulation start # # step function-like dependence on CO2 otherwise # } # mu <- make.rate(scale, tMax = tMax, envRate = co2) # # # its shape will vary with time and temperature # shape <- function(t, env) { # 0.5 + 2.5 * log(t + 1) / env # } # mu_shape <- make.rate(shape, tMax = tMax, envRate = temp) # # # finally, fossil sampling will vary linearly with absolute time # rho <- function(t) { # 0.1 + 0.1 * t # } # # ## plot diversification rates to visualize # # time vector # time <- seq(0, tMax, by = 0.01) # # # plotting lambda, mu scale, and mu shape # plot(time, rev(lambda(time)), type = "l", xlim = c(tMax, 0), ylim = c(0, 2), # xlab = "Time (Mya)", ylab = "Rate value", # main = "Speciation rate, extinction scale and shape through time", # col = "orange") # lines(time, rev(1/mu(time)), col = "red") # lines(time, rev(mu_shape(time)), col = "blue") # legend(x = 15, y = 0.7, lty = 1, col = c("orange", "red", "blue"), # legend = c("lambda", "1/(mu scale)", "mu shape")) # # note we plotted the inverse of scale, since Weibull scales are # # equivalent to exponential rates when shape is equal to 1 # # ## simulate # # birth-death simulation # sim <- suppressMessages(bd.sim(n0, lambda, mu, tMax, mShape = mu_shape)) # # # fossil record simulation - returning true sampling times # fossils <- suppressMessages(sample.clade(sim, rho, tMax, returnTrue = TRUE)) # # # draw BD simulation with fossils # draw.sim(sim, fossils = fossils, showLabel = FALSE) # # # draw phylogenetic tree # ape::plot.phylo(make.phylo(sim), show.tip.label = FALSE) ## ----------------------------------------------------------------------------- # set seed set.seed(1) # initial number of species n0 <- 1 # number of extant species at the end of the simulation N <- 10 # conditioning the simulation on the number of extant species # is also a new feature of version 1.1! # speciation, higher for state 1 lambda <- c(0.1, 0.2) # extinction, higher for state 0 mu <- c(0.06, 0.03) # number of traits and states for each--one binary trait nTraits <- 1 nStates <- 2 # initial value of the trait X0 <- 0 # trait transition matrix--it's a list # to allow for the case with multiple traits Q <- list(matrix(c(0, 0.1, 0.1, 0), nrow = 2, ncol = 2)) # run simulation sim <- bd.sim.traits(n0, lambda, mu, N = N, nTraits = nTraits, nStates = nStates, X0 = X0, Q = Q) # extract sim object and trait data frames traits <- sim$TRAITS sim <- sim$SIM # make phylogeny phy <- make.phylo(sim) # get trait values for all tips, using the new traits.summary function tip_traits <- traits.summary(sim, traits)$trait1 # plot phylogenetic tree coloring for the trait ape::plot.phylo(phy, tip.color = c("red", "blue")[tip_traits + 1]) ## ----------------------------------------------------------------------------- # set seed set.seed(1) # fossil sampling rate, higher for state 0 rho <- c(0.5, 0.1) # run fossil sampling fossils <- sample.clade.traits(sim, rho, max(sim$TS), traits, nStates = nStates) # extract fossil record (fossils$TRAITS would be the same as sim$TRAITS here) fossils <- fossils$FOSSILS # extract trait values for both fossils and extant species tip_traits <- traits.summary(sim, traits, fossils, selection = "sampled")$trait1 # we can visualize the simulation and fossils using draw.sim draw.sim(sim, traits, fossils, traitLegendPlacement = "bottomleft") # we can get a phylogeny including extant species and fossils, to # simulate a reconstructed phylogeny used in fossilized birth-death models phy <- make.phylo(sim, fossils, returnTrueExt = FALSE) # returnTrueExt being false means the tips representing the true extinction # will be dropped, and the last sampled fossil will be in its place # plot phylogenetic tree coloring for the trait ape::plot.phylo(phy, tip.color = c("red", "blue")[tip_traits + 1]) # here sampled ancestor fossils are represented as 0-length branches, # so that the tree could be used e.g. for a Beast2 analyses # some software prefer a degree-2 node, in which case one could set the # saFormat parameter of make.phylo to "node" ## ----------------------------------------------------------------------------- # set a seed set.seed(1) # simple simulation, starting with more than one species sim <- bd.sim(n0 = 2, lambda = 0.1, mu = 0, tMax = 20, nFinal = c(20, Inf)) # separate the lineages clades <- find.lineages(sim) # plot each phylogeny # clade 1 ape::plot.phylo(make.phylo(clades$clade_1$sim), show.tip.label = FALSE) # clade 2 ape::plot.phylo(make.phylo(clades$clade_2$sim), show.tip.label = FALSE)