11 Bringing it All Together

Prepare the data for metric calculation.

11.1 Metric Calculation

The following code-chunk will produce more than 1,000 biological metrics.

metrics.df <- nest.df %>% 
  dplyr::mutate(
    rich_family = taxa_rich(.dataframe = .,
                            .key_col = uid,
                            .group_col = family,
                            .unnest_col = data),
    rich_genus = taxa_rich(.dataframe = .,
                           .key_col = uid,
                           .group_col = genus,
                           .unnest_col = data),
    rich_target_taxon = taxa_rich(.dataframe = .,
                                  .key_col = uid,
                                  .group_col = target_taxon,
                                  .unnest_col = data),
    gini_simpson_ept = taxa_div(.dataframe = .,
                                .key_col = uid,
                                .counts_col = total,
                                .group_col = target_taxon,
                                .filter = order %in% c("ephemeroptera",
                                                       "plecoptera",
                                                       "trichoptera"),
                                .job = "gini_simpson",
                                .unnest_col = data),
    simpson_ept = taxa_div(.dataframe = .,
                           .key_col = uid,
                           .counts_col = total,
                           .group_col = target_taxon,
                           .filter = order %in% c("ephemeroptera",
                                                  "plecoptera",
                                                  "trichoptera"),
                           .job = "simpson",
                           .unnest_col = data),
    shannon_ept = taxa_div(.dataframe = .,
                           .key_col = uid,
                           .counts_col = total,
                           .group_col = target_taxon,
                           .filter = order %in% c("ephemeroptera",
                                                  "plecoptera",
                                                  "trichoptera"),
                           .job = "shannon",
                           .base_log = 2,
                           .unnest_col = data),
    pct_ept = taxa_pct(.dataframe = .,
                       .key_col = uid,
                       .counts_col = total,
                       .filter = order %in% c("ephemeroptera",
                                              "plecoptera",
                                              "trichoptera"),
                       .unnest_col = data),
    pct_cote = taxa_pct(.dataframe = .,
                        .key_col = uid,
                        .counts_col = total,
                        .filter = order %in% c("coleoptera",
                                               "odonata",
                                               "trichoptera",
                                               "ephemeroptera"),
                        .unnest_col = data),
    dom_1_target_taxon = taxa_dom(.dataframe = .,
                                  .key_col = uid,
                                  .counts_col = total,
                                  .group_col = target_taxon,
                                  .dom_level = 1,
                                  .unnest_col = data),
    dom_5_target_taxon = taxa_dom(.dataframe = .,
                                  .key_col = uid,
                                  .counts_col = total,
                                  .group_col = target_taxon,
                                  .dom_level = 5,
                                  .unnest_col = data),
    tol_index = taxa_tol_index(.dataframe = .,
                               .key_col = uid,
                               .counts_col = total,
                               .tol_col = ptv,
                               .unnest_col = data)
  ) %>% 
  dplyr::bind_cols(
    taxa_seq(.dataframe = .,
             .key_col = uid,
             .counts_col = total,
             .filter_cols_vec = c("class", "order", "family",
                                  "ffg", "habit", "voltinism"),
             .group_col = target_taxon,
             .job = "rich",
             .unnest_col = data),
    taxa_seq(.dataframe = .,
             .key_col = uid,
             .counts_col = total,
             .filter_cols_vec = c("class", "order", "family",
                                  "ffg", "habit", "voltinism"),
             .group_col = target_taxon,
             .job = "pct_rich",
             .unnest_col = data),
    taxa_seq(.dataframe = .,
             .key_col = uid,
             .counts_col = total,
             .filter_cols_vec = c("class", "order", "family",
                                  "genus", "ffg", "habit", "voltinism"),
             .job = "pct",
             .unnest_col = data),
    taxa_seq(.dataframe = .,
             .key_col = uid,
             .counts_col = total,
             .filter_cols_vec = c("class", "order", "family",
                                  "ffg", "habit", "voltinism"),
             .group_col = target_taxon,
             .job = "simpson",
             .unnest_col = data),
  )

11.2 Metric Selection

11.2.3 Model

## == Workflow [trained] ==========================================================
## Preprocessor: Recipe
## Model: logistic_reg()
## 
## -- Preprocessor ----------------------------------------------------------------
## 5 Recipe Steps
## 
## * step_normalize()
## * step_nzv()
## * step_corr()
## * step_pca()
## * step_adasyn()
## 
## -- Model -----------------------------------------------------------------------
## 
## Call:  stats::glm(formula = formula, family = stats::binomial, data = data)
## 
## Coefficients:
## (Intercept)          PC1          PC2          PC3          PC4  
##    -0.94614     -0.54845      0.11392      0.02437     -0.01808  
## 
## Degrees of Freedom: 99 Total (i.e. Null);  95 Residual
## Null Deviance:       138.6 
## Residual Deviance: 60    AIC: 70