This vignette illustrates the most useful functions of yatah.
For this example, we use data from Zeller et al. (2014). It is the abundances of bacteria present in 199 stool samples.
abundances <- as_tibble(yatah::abundances)
print(abundances, max_extra_cols = 2)
#> # A tibble: 1,585 × 200
#>    lineages       `CCIS00146684ST-4-0` `CCIS00281083ST-3-0` `CCIS02124300ST-4-0`
#>    <chr>                         <dbl>                <dbl>                <dbl>
#>  1 k__Bacteria               100.                  99.8                   96.3  
#>  2 k__Viruses                  0.00697              0.128                  3.70 
#>  3 k__Bacteria|p…             66.2                 24.6                   74.2  
#>  4 k__Bacteria|p…             19.1                 74.4                   11.9  
#>  5 k__Bacteria|p…             12.1                  0.0428                 7.22 
#>  6 k__Bacteria|p…              1.86                 0.428                  0.765
#>  7 k__Bacteria|p…              0.758                0.388                  2.28 
#>  8 k__Viruses|p_…              0.00697              0.128                  3.70 
#>  9 k__Bacteria|p…              0.00155              0.00415                0    
#> 10 k__Bacteria|p…             62.4                 21.7                   62.3  
#> # ℹ 1,575 more rows
#> # ℹ 196 more variables: `CCIS02379307ST-4-0` <dbl>, `CCIS02856720ST-4-0` <dbl>,
#> #   …taxonomy <- select(abundances, lineages)
taxonomy
#> # A tibble: 1,585 × 1
#>    lineages                                  
#>    <chr>                                     
#>  1 k__Bacteria                               
#>  2 k__Viruses                                
#>  3 k__Bacteria|p__Firmicutes                 
#>  4 k__Bacteria|p__Bacteroidetes              
#>  5 k__Bacteria|p__Actinobacteria             
#>  6 k__Bacteria|p__Verrucomicrobia            
#>  7 k__Bacteria|p__Proteobacteria             
#>  8 k__Viruses|p__Viruses_noname              
#>  9 k__Bacteria|p__Candidatus_Saccharibacteria
#> 10 k__Bacteria|p__Firmicutes|c__Clostridia   
#> # ℹ 1,575 more rowsHere, we have all the present bacteria at all different ranks. As we
are just interested in genera that belong to the
Gammaproteobacteria class, we filter() the
lineages with is_clade() and is_rank(). The
genus name is accessible with get_last_clade().
gammap_genus <-
  taxonomy %>% 
  filter(is_clade(lineages, "Gammaproteobacteria"),
         is_rank(lineages, "genus")) %>% 
  mutate(genus = get_last_clade(lineages))
gammap_genus
#> # A tibble: 26 × 2
#>    lineages                                                                genus
#>    <chr>                                                                   <chr>
#>  1 k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Enterobacteria… Esch…
#>  2 k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Pasteurellales… Haem…
#>  3 k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Enterobacteria… Ente…
#>  4 k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Pseudomonadale… Pseu…
#>  5 k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Enterobacteria… Ente…
#>  6 k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Pasteurellales… Aggr…
#>  7 k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Enterobacteria… Hafn…
#>  8 k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Pasteurellales… Acti…
#>  9 k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Xanthomonadale… Sino…
#> 10 k__Bacteria|p__Proteobacteria|c__Gammaproteobacteria|o__Enterobacteria… Citr…
#> # ℹ 16 more rowsIt is useful to have a taxonomic table. taxtable() do
the job.
gammaprot_table <-
  gammap_genus %>% 
  pull(lineages) %>% 
  taxtable()
as_tibble(gammaprot_table)
#> # A tibble: 26 × 6
#>    kingdom  phylum         class               order             family    genus
#>    <chr>    <chr>          <chr>               <chr>             <chr>     <chr>
#>  1 Bacteria Proteobacteria Gammaproteobacteria Enterobacteriales Enteroba… Esch…
#>  2 Bacteria Proteobacteria Gammaproteobacteria Pasteurellales    Pasteure… Haem…
#>  3 Bacteria Proteobacteria Gammaproteobacteria Enterobacteriales Enteroba… Ente…
#>  4 Bacteria Proteobacteria Gammaproteobacteria Pseudomonadales   Pseudomo… Pseu…
#>  5 Bacteria Proteobacteria Gammaproteobacteria Enterobacteriales Enteroba… Ente…
#>  6 Bacteria Proteobacteria Gammaproteobacteria Pasteurellales    Pasteure… Aggr…
#>  7 Bacteria Proteobacteria Gammaproteobacteria Enterobacteriales Enteroba… Hafn…
#>  8 Bacteria Proteobacteria Gammaproteobacteria Pasteurellales    Pasteure… Acti…
#>  9 Bacteria Proteobacteria Gammaproteobacteria Xanthomonadales   Sinobact… Sino…
#> 10 Bacteria Proteobacteria Gammaproteobacteria Enterobacteriales Enteroba… Citr…
#> # ℹ 16 more rowsTo have a tree, use taxtree() with a taxonomic table in
input. By default, it collapses ranks with only one subrank.
gammaprot_tree <- taxtree(gammaprot_table)
gammaprot_tree
#> 
#> Phylogenetic tree with 26 tips and 7 internal nodes.
#> 
#> Tip labels:
#>   Escherichia, Enterobacteriaceae_noname, Enterobacter, Hafnia, Citrobacter, Pantoea, ...
#> Node labels:
#>   Gammaproteobacteria, Enterobacteriaceae, Pasteurellaceae, Pseudomonadales, Moraxellaceae, Xanthomonadales, ...
#> 
#> Rooted; includes branch lengths.