Skip to contents

This tutorial explore a phyloseq version of the dataset from Tengeler et al. (2020) available in the mia package.

Load library

library("MicrobiotaProcess")
#> Warning: replacing previous import 'S4Arrays::makeNindexFromArrayViewport' by
#> 'DelayedArray::makeNindexFromArrayViewport' when loading 'SummarizedExperiment'
library("MiscMetabar")
library("ggplot2")
library("patchwork")
library("iNEXT")
?Tengeler2020

Import dataset in phyloseq format

data(Tengeler2020_pq)
ten <- Tengeler2020_pq
summary_plot_pq(ten)

Alpha-diversity analysis

hill_pq(ten, "patient_status", one_plot = TRUE)

res_inext <-
  iNEXT_pq(ten,
    datatype = "abundance",
    merge_sample_by = "patient_status_vs_cohort",
    nboot = 5
  )
ggiNEXT(res_inext)

accu_plot(
  ten,
  fact = "sample_name",
  add_nb_seq = TRUE,
  by.fact = TRUE,
  step = 100
) + theme(legend.position = c(.8, .6))
#> Warning in vegan::rarefy(as.matrix(unclass(x[i, ])), n, se = TRUE): most
#> observed count data have counts 1, but smallest count is 9
#> Warning in vegan::rarefy(as.matrix(unclass(x[i, ])), n, se = TRUE): most
#> observed count data have counts 1, but smallest count is 17
#> Warning in vegan::rarefy(as.matrix(unclass(x[i, ])), n, se = TRUE): most
#> observed count data have counts 1, but smallest count is 25
#> Warning in vegan::rarefy(as.matrix(unclass(x[i, ])), n, se = TRUE): most
#> observed count data have counts 1, but smallest count is 7
#> Warning in vegan::rarefy(as.matrix(unclass(x[i, ])), n, se = TRUE): most
#> observed count data have counts 1, but smallest count is 8
#> Warning in vegan::rarefy(as.matrix(unclass(x[i, ])), n, se = TRUE): most
#> observed count data have counts 1, but smallest count is 6
#> Warning in vegan::rarefy(as.matrix(unclass(x[i, ])), n, se = TRUE): most
#> observed count data have counts 1, but smallest count is 14
#> Warning in vegan::rarefy(as.matrix(unclass(x[i, ])), n, se = TRUE): most
#> observed count data have counts 1, but smallest count is 28
#> Warning in vegan::rarefy(as.matrix(unclass(x[i, ])), n, se = TRUE): most
#> observed count data have counts 1, but smallest count is 3
#> Warning in vegan::rarefy(as.matrix(unclass(x[i, ])), n, se = TRUE): most
#> observed count data have counts 1, but smallest count is 7
#> Warning in vegan::rarefy(as.matrix(unclass(x[i, ])), n, se = TRUE): most
#> observed count data have counts 1, but smallest count is 2
#> Warning in vegan::rarefy(as.matrix(unclass(x[i, ])), n, se = TRUE): most
#> observed count data have counts 1, but smallest count is 15
#> Warning in vegan::rarefy(as.matrix(unclass(x[i, ])), n, se = TRUE): most
#> observed count data have counts 1, but smallest count is 5

#> Warning in vegan::rarefy(as.matrix(unclass(x[i, ])), n, se = TRUE): most
#> observed count data have counts 1, but smallest count is 5
#> Warning in vegan::rarefy(as.matrix(unclass(x[i, ])), n, se = TRUE): most
#> observed count data have counts 1, but smallest count is 3
#> Warning in vegan::rarefy(as.matrix(unclass(x[i, ])), n, se = TRUE): most
#> observed count data have counts 1, but smallest count is 6
#> Warning in vegan::rarefy(as.matrix(unclass(x[i, ])), n, se = TRUE): most
#> observed count data have counts 1, but smallest count is 3
#> Warning in vegan::rarefy(as.matrix(unclass(x[i, ])), n, se = TRUE): most
#> observed count data have counts 1, but smallest count is 4
#> Warning in vegan::rarefy(as.matrix(unclass(x[i, ])), n, se = TRUE): most
#> observed count data have counts 1, but smallest count is 6
#> Warning in vegan::rarefy(as.matrix(unclass(x[i, ])), n, se = TRUE): most
#> observed count data have counts 1, but smallest count is 3

#> Warning in vegan::rarefy(as.matrix(unclass(x[i, ])), n, se = TRUE): most
#> observed count data have counts 1, but smallest count is 3
#> Warning in vegan::rarefy(as.matrix(unclass(x[i, ])), n, se = TRUE): most
#> observed count data have counts 1, but smallest count is 21
#> Warning in vegan::rarefy(as.matrix(unclass(x[i, ])), n, se = TRUE): most
#> observed count data have counts 1, but smallest count is 5
#> Warning in vegan::rarefy(as.matrix(unclass(x[i, ])), n, se = TRUE): most
#> observed count data have counts 1, but smallest count is 13
#> Warning: Removed 1 row containing missing values (`geom_line()`).

Explore taxonomy

library(metacoder)
heat_tree_pq(ten,
  node_size = n_obs,
  node_color = nb_sequences,
  node_label = taxon_names,
  tree_label = taxon_names,
  node_size_trans = "log10 area"
)

treemap_pq(ten, lvl1 = "Order", lvl2 = "Family")

Beta-diversity analysis : effect of patient status and cohort

circle_pq(ten, "patient_status")

upset_pq(ten, "patient_status_vs_cohort")

ggvenn_pq(clean_pq(ten, force_taxa_as_columns = TRUE),
  "cohort",
  rarefy_before_merging = TRUE
) +
  theme(legend.position = "none")

ten_control <- clean_pq(subset_samples(ten, patient_status == "Control"))
p_control <- heat_tree_pq(ten_control,
  node_size = n_obs,
  node_color = nb_sequences,
  node_label = taxon_names,
  tree_label = taxon_names,
  node_size_trans = "log10 area"
)

ten_ADHD <- clean_pq(subset_samples(ten, patient_status == "ADHD"))
p_ADHD <- heat_tree_pq(ten_ADHD,
  node_size = n_obs,
  node_color = nb_sequences,
  node_label = taxon_names,
  tree_label = taxon_names,
  node_size_trans = "log10 area"
)

p_control + ggtitle("Control") + p_ADHD + ggtitle("ADHD")

knitr::kable(track_wkflow(list(
  "All samples" = ten,
  "Control samples" = ten_control,
  "ADHD samples" = ten_ADHD
)))
nb_sequences nb_clusters nb_samples
All samples 485932 151 27
Control samples 239329 130 14
ADHD samples 246603 142 13
adonis_pq(ten, "cohort + patient_status")
#> Permutation test for adonis under reduced model
#> Terms added sequentially (first to last)
#> Permutation: free
#> Number of permutations: 999
#> 
#> vegan::adonis2(formula = .formula, data = metadata)
#>                Df SumOfSqs      R2      F Pr(>F)  
#> cohort          2   0.7922 0.11785 1.6626  0.071 .
#> patient_status  1   0.4503 0.06698 1.8898  0.069 .
#> Residual       23   5.4799 0.81517                
#> Total          26   6.7223 1.00000                
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ten@tax_table <- phyloseq::tax_table(cbind(
  ten@tax_table,
  "Species" = taxa_names(ten)
))

biplot_pq(subset_taxa_pq(ten, taxa_sums(ten) > 3000),
  merge_sample_by = "patient_status",
  fact = "patient_status",
  nudge_y = 0.4
)

multitax_bar_pq(ten, "Phylum", "Class", "Order", "patient_status")

multitax_bar_pq(ten, "Phylum", "Class", "Order", "patient_status",
  nb_seq = FALSE, log10trans = FALSE
)

Differential abundance analysis

plot_deseq2_pq(ten,
  contrast = c("patient_status", "ADHD", "Control"),
  taxolev = "Genus"
)
#> Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
#> design formula are characters, converting to factors

LEfSe <- diff_analysis(
  ten,
  classgroup = "patient_status",
  mlfun = "lda",
  ldascore = 2,
  p.adjust.methods = "bh"
)
library(ggplot2)
ggeffectsize(LEfSe) +
  scale_color_manual(values = c(
    "#00AED7",
    "#FD9347"
  )) +
  theme_bw()

Session information

sessionInfo()
#> R version 4.3.2 (2023-10-31)
#> Platform: x86_64-pc-linux-gnu (64-bit)
#> Running under: Ubuntu 22.04.3 LTS
#> 
#> Matrix products: default
#> BLAS:   /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3 
#> LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.20.so;  LAPACK version 3.10.0
#> 
#> locale:
#>  [1] LC_CTYPE=C.UTF-8       LC_NUMERIC=C           LC_TIME=C.UTF-8       
#>  [4] LC_COLLATE=C.UTF-8     LC_MONETARY=C.UTF-8    LC_MESSAGES=C.UTF-8   
#>  [7] LC_PAPER=C.UTF-8       LC_NAME=C              LC_ADDRESS=C          
#> [10] LC_TELEPHONE=C         LC_MEASUREMENT=C.UTF-8 LC_IDENTIFICATION=C   
#> 
#> time zone: UTC
#> tzcode source: system (glibc)
#> 
#> attached base packages:
#> [1] stats     graphics  grDevices utils     datasets  methods   base     
#> 
#> other attached packages:
#>  [1] metacoder_0.3.6          iNEXT_3.0.0              patchwork_1.2.0         
#>  [4] MiscMetabar_0.7.8        dplyr_1.1.4              dada2_1.30.0            
#>  [7] Rcpp_1.0.12              ggplot2_3.4.4            phyloseq_1.46.0         
#> [10] MicrobiotaProcess_1.14.0
#> 
#> loaded via a namespace (and not attached):
#>   [1] libcoin_1.0-10              RColorBrewer_1.1-3         
#>   [3] shape_1.4.6                 jsonlite_1.8.8             
#>   [5] magrittr_2.0.3              TH.data_1.1-2              
#>   [7] modeltools_0.2-23           farver_2.1.1               
#>   [9] rmarkdown_2.25              GlobalOptions_0.1.2        
#>  [11] fs_1.6.3                    zlibbioc_1.48.0            
#>  [13] ragg_1.2.7                  vctrs_0.6.5                
#>  [15] multtest_2.58.0             memoise_2.0.1              
#>  [17] Rsamtools_2.18.0            RCurl_1.98-1.14            
#>  [19] ggtree_3.10.0               htmltools_0.5.7            
#>  [21] S4Arrays_1.2.0              ComplexUpset_1.3.3         
#>  [23] Rhdf5lib_1.24.2             SparseArray_1.2.4          
#>  [25] rhdf5_2.46.1                gridGraphics_0.5-1         
#>  [27] sass_0.4.8                  bslib_0.6.1                
#>  [29] desc_1.4.3                  plyr_1.8.9                 
#>  [31] sandwich_3.1-0              zoo_1.8-12                 
#>  [33] cachem_1.0.8                ggfittext_0.10.2           
#>  [35] GenomicAlignments_1.38.2    igraph_2.0.1.1             
#>  [37] lifecycle_1.0.4             iterators_1.0.14           
#>  [39] pkgconfig_2.0.3             Matrix_1.6-1.1             
#>  [41] R6_2.5.1                    fastmap_1.1.1              
#>  [43] GenomeInfoDbData_1.2.11     MatrixGenerics_1.14.0      
#>  [45] digest_0.6.34               aplot_0.2.2                
#>  [47] colorspace_2.1-0            ggnewscale_0.4.9           
#>  [49] ShortRead_1.60.0            S4Vectors_0.40.2           
#>  [51] DESeq2_1.42.0               textshaping_0.3.7          
#>  [53] GenomicRanges_1.54.1        hwriter_1.3.2.1            
#>  [55] vegan_2.6-4                 labeling_0.4.3             
#>  [57] fansi_1.0.6                 abind_1.4-5                
#>  [59] mgcv_1.9-0                  compiler_4.3.2             
#>  [61] withr_3.0.0                 BiocParallel_1.36.0        
#>  [63] highr_0.10                  ggsignif_0.6.4             
#>  [65] MASS_7.3-60                 DelayedArray_0.28.0        
#>  [67] biomformat_1.30.0           permute_0.9-7              
#>  [69] tools_4.3.2                 ape_5.7-1                  
#>  [71] glue_1.7.0                  treemapify_2.5.6           
#>  [73] nlme_3.1-163                rhdf5filters_1.14.1        
#>  [75] grid_4.3.2                  cluster_2.1.4              
#>  [77] reshape2_1.4.4              ade4_1.7-22                
#>  [79] generics_0.1.3              gtable_0.3.4               
#>  [81] tidyr_1.3.1                 ggVennDiagram_1.5.0        
#>  [83] data.table_1.15.0           coin_1.4-3                 
#>  [85] utf8_1.2.4                  XVector_0.42.0             
#>  [87] BiocGenerics_0.48.1         ggrepel_0.9.5              
#>  [89] foreach_1.5.2               pillar_1.9.0               
#>  [91] stringr_1.5.1               yulab.utils_0.1.4          
#>  [93] circlize_0.4.15             splines_4.3.2              
#>  [95] treeio_1.26.0               lattice_0.21-9             
#>  [97] survival_3.5-7              deldir_2.0-2               
#>  [99] tidyselect_1.2.0            locfit_1.5-9.8             
#> [101] pbapply_1.7-2               Biostrings_2.70.2          
#> [103] knitr_1.45                  gridExtra_2.3              
#> [105] IRanges_2.36.0              SummarizedExperiment_1.32.0
#> [107] ggtreeExtra_1.12.0          stats4_4.3.2               
#> [109] xfun_0.42                   Biobase_2.62.0             
#> [111] matrixStats_1.2.0           stringi_1.8.3              
#> [113] lazyeval_0.2.2              ggfun_0.1.4                
#> [115] yaml_2.3.8                  evaluate_0.23              
#> [117] codetools_0.2-19            interp_1.1-6               
#> [119] tibble_3.2.1                ggplotify_0.1.2            
#> [121] cli_3.6.2                   RcppParallel_5.1.7         
#> [123] systemfonts_1.0.5           munsell_0.5.0              
#> [125] jquerylib_0.1.4             GenomeInfoDb_1.38.6        
#> [127] png_0.1-8                   parallel_4.3.2             
#> [129] ggh4x_0.2.8                 pkgdown_2.0.7              
#> [131] latticeExtra_0.6-30         jpeg_0.1-10                
#> [133] bitops_1.0-7                ggstar_1.0.4               
#> [135] mvtnorm_1.2-4               tidytree_0.4.6             
#> [137] GA_3.2.4                    scales_1.3.0               
#> [139] purrr_1.0.2                 crayon_1.5.2               
#> [141] rlang_1.1.3                 multcomp_1.4-25

References

Tengeler, A.C., Dam, S.A., Wiesmann, M. et al. Gut microbiota from persons with attention-deficit/hyperactivity disorder affects the brain in mice. Microbiome 8, 44 (2020). https://microbiomejournal.biomedcentral.com/articles/10.1186/s40168-020-00816-x