library(MiscMetabar)
data(data_fungi)
Alpha diversity analysis
Hill number
Numerous metrics of diversity exist. Hill numbers 1 is a kind of general framework for alpha diversity index.
renyi_res <- vegan::renyi(data_fungi@otu_table)
head(renyi_res)
#> 0 0.25 0.5 1 2
#> A10-005-B_S188_MERGED.fastq.gz 4.204693 3.615323 3.044244 2.0754183 1.1561862
#> A10-005-H_S189_MERGED.fastq.gz 4.248495 3.150337 2.063712 0.9938545 0.6464533
#> A10-005-M_S190_MERGED.fastq.gz 3.988984 3.607773 3.308361 2.9304156 2.5781603
#> A12-007_S191_MERGED.fastq.gz 5.036953 4.423283 3.751946 2.6309045 1.8468497
#> A12-007-B_S2_MERGED.fastq.gz 3.850148 3.349461 2.874269 2.1881145 1.6533262
#> A15-004_S3_MERGED.fastq.gz 4.025352 3.890614 3.747555 3.4533352 2.9618789
#> 4 8 16 32
#> A10-005-B_S188_MERGED.fastq.gz 0.8007816 0.6864775 0.6407124 0.6200442
#> A10-005-H_S189_MERGED.fastq.gz 0.5146170 0.4469617 0.4172228 0.4037640
#> A10-005-M_S190_MERGED.fastq.gz 2.3022639 2.1015018 1.9744485 1.9110311
#> A12-007_S191_MERGED.fastq.gz 1.5920328 1.4947479 1.4421368 1.4109287
#> A12-007-B_S2_MERGED.fastq.gz 1.3979355 1.2774402 1.2106906 1.1733049
#> A15-004_S3_MERGED.fastq.gz 2.4997916 2.2378349 2.1045646 2.0377518
#> 64 Inf
#> A10-005-B_S188_MERGED.fastq.gz 0.6102022 0.6006678
#> A10-005-H_S189_MERGED.fastq.gz 0.3973550 0.3911464
#> A10-005-M_S190_MERGED.fastq.gz 1.8806976 1.8513117
#> A12-007_S191_MERGED.fastq.gz 1.3916365 1.3700776
#> A12-007-B_S2_MERGED.fastq.gz 1.1547034 1.1366612
#> A15-004_S3_MERGED.fastq.gz 2.0054156 1.9740810
Test for difference in diversity (hill number)
One way to keep into account for difference in the number of sequences per samples is to use a Tukey test on a linear model with the square roots of the number of sequence as the first explanatory variable of the linear model 2.
p <- MiscMetabar::hill_pq(data_fungi, fact = "Height")
p$plot_Hill_0
#> NULL
p$plot_tuckey
#> NULL
See also the tutorial of the microbiome package for an alternative using the non-parametric Kolmogorov-Smirnov test for two-group comparisons when there are no relevant covariates.
Alpha diversity using package MicrobiotaProcess
library("MicrobiotaProcess")
clean_pq(subset_samples_pq(data_fungi, !is.na(data_fungi@sam_data$Height))) %>%
as.MPSE() %>%
mp_cal_alpha() %>%
mp_plot_alpha(.group = "Height")
#> Warning in wilcox.test.default(c(4, 4, 5, 5, 4, 4, 3, 6, 3, 4, 3, 2, 3, :
#> cannot compute exact p-value with ties
#> Warning in wilcox.test.default(c(3, 4, 3, 5, 4, 6, 3, 5, 4, 4, 3, 3, 3, :
#> cannot compute exact p-value with ties
#> Warning in wilcox.test.default(c(3, 4, 3, 5, 4, 6, 3, 5, 4, 4, 3, 3, 3, :
#> cannot compute exact p-value with ties
#> Warning in wilcox.test.default(c(1.32966134885476, 1.242453324894,
#> 1.56071040904141, : cannot compute exact p-value with ties
#> Warning in wilcox.test.default(c(0.867563228481461, 1.32966134885476,
#> 0.867563228481461, : cannot compute exact p-value with ties
#> Warning in wilcox.test.default(c(0.867563228481461, 1.32966134885476,
#> 0.867563228481461, : cannot compute exact p-value with ties
Effect of samples variables on alpha diversity using automated model selection and multimodel inference with (G)LMs
From the help of glmulti package :
glmulti finds what are the n best models (the confidence set of models) among all possible models (the candidate set, as specified by the user). Models are fitted with the specified fitting function (default is glm) and are ranked with the specified Information Criterion (default is aicc). The best models are found either through exhaustive screening of the candidates, or using a genetic algorithm, which allows very large candidate sets to be adressed. The output can be used for model selection, variable selection, and multimodel inference.
library("glmulti")
formula <- "Hill_0 ~ Hill_1 + Abundance + Time + Height"
res_glmulti <-
glmutli_pq(data_fungi, formula = formula, level = 1)
#> Initialization...
#> TASK: Exhaustive screening of candidate set.
#> Fitting...
#> Completed.
res_glmulti
#> estimates unconditional_interval nb_model importance
#> Hill_1 3.062117997 1.868174e-01 8 1
#> Abundance 0.002959644 8.478374e-08 8 1
#> Time 0.789091999 2.443263e-01 8 1
#> HeightLow 6.884340946 3.444196e+01 8 1
#> HeightMiddle 0.339123798 3.727962e+01 8 1
#> alpha variable
#> Hill_1 8.570200e-01 Hill_1
#> Abundance 5.773492e-04 Abundance
#> Time 9.800932e-01 Time
#> HeightLow 1.163660e+01 HeightLow
#> HeightMiddle 1.210648e+01 HeightMiddle
ggplot(data = res_glmulti, aes(x = estimates, y = variable)) +
geom_point(
size = 2,
alpha = 1,
show.legend = FALSE
) +
geom_vline(
xintercept = 0,
linetype = "dotted",
linewidth = 1
) +
geom_errorbar(
aes(xmin = estimates - alpha, xmax = estimates + alpha),
width = 0.8,
position = position_dodge(width = 0.8),
alpha = 0.7,
show.legend = FALSE
) +
geom_label(aes(label = nb_model), nudge_y = 0.3, size = 3) +
xlab("Standardized estimates") +
ylab(formula)
ggplot(data = res_glmulti, aes(
x = importance,
y = as.factor(variable),
fill = estimates
)) +
geom_bar(
stat = "identity",
show.legend = FALSE,
alpha = 0.8
) +
xlim(c(0, 1)) +
geom_label(aes(label = nb_model, x = 0.1),
size = 3,
fill = "white"
) +
scale_fill_viridis_b() +
xlab("Importance") +
ylab(formula)
formula <- "Hill_0 ~ Abundance + Time + Height"
res_glmulti_interaction <-
glmutli_pq(data_fungi, formula = formula, level = 2)
#> Initialization...
#> TASK: Exhaustive screening of candidate set.
#> Fitting...
#>
#> After 50 models:
#> Best model: Hill_0~1+Abundance+Time+Time:Abundance+Height:Abundance+Height:Time
#> Crit= 1069.11608982306
#> Mean crit= 1218.19009955263
#> Completed.
res_glmulti_interaction
#> estimates unconditional_interval nb_model importance
#> HeightHigh:Time 0.1004073616 4.167750e-02 8 0.04216251
#> Abundance:HeightHigh 0.0001609310 8.984023e-08 8 0.09020701
#> HeightLow -0.7865687564 9.769200e+00 32 0.24714664
#> HeightMiddle -2.6419930721 2.789953e+01 32 0.24714664
#> HeightLow:Time -0.6511123699 1.599292e+00 32 0.55051517
#> HeightMiddle:Time -1.3322473025 3.078720e+00 32 0.55051517
#> Abundance:Time -0.0001068559 4.586032e-09 32 0.81587143
#> Abundance:HeightLow 0.0011137659 7.957713e-07 32 0.86967993
#> Abundance:HeightMiddle 0.0017155970 1.245718e-06 32 0.86967993
#> Abundance 0.0024839088 8.790126e-07 32 0.90902176
#> Time 2.7869220741 2.663548e+00 32 0.92512111
#> alpha variable
#> HeightHigh:Time 4.006348e-01 HeightHigh:Time
#> Abundance:HeightHigh 5.877776e-04 Abundance:HeightHigh
#> HeightLow 6.171172e+00 HeightLow
#> HeightMiddle 1.038989e+01 HeightMiddle
#> HeightLow:Time 2.491898e+00 HeightLow:Time
#> HeightMiddle:Time 3.449710e+00 HeightMiddle:Time
#> Abundance:Time 1.335247e-04 Abundance:Time
#> Abundance:HeightLow 1.759641e-03 Abundance:HeightLow
#> Abundance:HeightMiddle 2.198334e-03 Abundance:HeightMiddle
#> Abundance 1.847287e-03 Abundance
#> Time 3.214276e+00 Time
ggplot(data = res_glmulti_interaction, aes(x = estimates, y = variable)) +
geom_point(
size = 2,
alpha = 1,
show.legend = FALSE
) +
geom_vline(
xintercept = 0,
linetype = "dotted",
linewidth = 1
) +
geom_errorbar(
aes(xmin = estimates - alpha, xmax = estimates + alpha),
width = 0.8,
position = position_dodge(width = 0.8),
alpha = 0.7,
show.legend = FALSE
) +
geom_label(aes(label = nb_model), nudge_y = 0.3, size = 3) +
xlab("Standardized estimates") +
ylab(formula)
ggplot(data = res_glmulti_interaction, aes(
x = importance,
y = as.factor(variable),
fill = estimates
)) +
geom_bar(
stat = "identity",
show.legend = FALSE,
alpha = 0.8
) +
xlim(c(0, 1)) +
geom_label(aes(label = nb_model, x = 0.1),
size = 3,
fill = "white"
) +
scale_fill_viridis_b() +
xlab("Importance") +
ylab(formula)
Session information
sessionInfo()
#> R version 4.4.1 (2024-06-14)
#> Platform: x86_64-pc-linux-gnu
#> Running under: Debian GNU/Linux 12 (bookworm)
#>
#> Matrix products: default
#> BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.11.0
#> LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.11.0
#>
#> locale:
#> [1] LC_CTYPE=fr_FR.UTF-8 LC_NUMERIC=C
#> [3] LC_TIME=fr_FR.UTF-8 LC_COLLATE=fr_FR.UTF-8
#> [5] LC_MONETARY=fr_FR.UTF-8 LC_MESSAGES=fr_FR.UTF-8
#> [7] LC_PAPER=fr_FR.UTF-8 LC_NAME=fr_FR.UTF-8
#> [9] LC_ADDRESS=fr_FR.UTF-8 LC_TELEPHONE=fr_FR.UTF-8
#> [11] LC_MEASUREMENT=fr_FR.UTF-8 LC_IDENTIFICATION=fr_FR.UTF-8
#>
#> time zone: Europe/Paris
#> tzcode source: system (glibc)
#>
#> attached base packages:
#> [1] stats graphics grDevices utils datasets methods base
#>
#> other attached packages:
#> [1] glmulti_1.0.8 leaps_3.2 rJava_1.0-11
#> [4] MicrobiotaProcess_1.16.1 MiscMetabar_0.10.2 purrr_1.0.2
#> [7] dplyr_1.1.4 dada2_1.32.0 Rcpp_1.0.13
#> [10] ggplot2_3.5.1 phyloseq_1.48.0
#>
#> loaded via a namespace (and not attached):
#> [1] libcoin_1.0-10 RColorBrewer_1.1-3
#> [3] rstudioapi_0.16.0 jsonlite_1.8.9
#> [5] magrittr_2.0.3 TH.data_1.1-2
#> [7] modeltools_0.2-23 farver_2.1.2
#> [9] rmarkdown_2.28 fs_1.6.4
#> [11] zlibbioc_1.50.0 ragg_1.3.3
#> [13] vctrs_0.6.5 multtest_2.60.0
#> [15] Rsamtools_2.20.0 ggtree_3.12.0
#> [17] htmltools_0.5.8.1 S4Arrays_1.4.1
#> [19] Rhdf5lib_1.26.0 gridGraphics_0.5-1
#> [21] SparseArray_1.4.8 rhdf5_2.48.0
#> [23] sass_0.4.9 bslib_0.8.0
#> [25] htmlwidgets_1.6.4 desc_1.4.3
#> [27] plyr_1.8.9 sandwich_3.1-1
#> [29] zoo_1.8-12 cachem_1.1.0
#> [31] GenomicAlignments_1.40.0 igraph_2.0.3
#> [33] lifecycle_1.0.4 iterators_1.0.14
#> [35] pkgconfig_2.0.3 Matrix_1.7-0
#> [37] R6_2.5.1 fastmap_1.2.0
#> [39] GenomeInfoDbData_1.2.12 MatrixGenerics_1.16.0
#> [41] aplot_0.2.3 digest_0.6.37
#> [43] ggnewscale_0.5.0 colorspace_2.1-1
#> [45] ShortRead_1.62.0 patchwork_1.3.0
#> [47] S4Vectors_0.42.1 textshaping_0.4.0
#> [49] GenomicRanges_1.56.1 hwriter_1.3.2.1
#> [51] vegan_2.6-8 labeling_0.4.3
#> [53] fansi_1.0.6 httr_1.4.7
#> [55] abind_1.4-8 mgcv_1.9-1
#> [57] compiler_4.4.1 withr_3.0.1
#> [59] BiocParallel_1.38.0 highr_0.11
#> [61] ggsignif_0.6.4 MASS_7.3-61
#> [63] DelayedArray_0.30.1 biomformat_1.32.0
#> [65] permute_0.9-7 tools_4.4.1
#> [67] ape_5.8 glue_1.8.0
#> [69] nlme_3.1-166 rhdf5filters_1.16.0
#> [71] grid_4.4.1 cluster_2.1.6
#> [73] reshape2_1.4.4 ade4_1.7-22
#> [75] generics_0.1.3 gtable_0.3.5
#> [77] tidyr_1.3.1 data.table_1.16.0
#> [79] coin_1.4-3 utf8_1.2.4
#> [81] XVector_0.44.0 BiocGenerics_0.50.0
#> [83] ggrepel_0.9.6 foreach_1.5.2
#> [85] pillar_1.9.0 stringr_1.5.1
#> [87] yulab.utils_0.1.7 splines_4.4.1
#> [89] gghalves_0.1.4 treeio_1.28.0
#> [91] lattice_0.22-6 survival_3.7-0
#> [93] deldir_2.0-4 tidyselect_1.2.1
#> [95] Biostrings_2.72.1 knitr_1.48
#> [97] gridExtra_2.3 IRanges_2.38.1
#> [99] SummarizedExperiment_1.34.0 ggtreeExtra_1.14.0
#> [101] stats4_4.4.1 xfun_0.48
#> [103] Biobase_2.64.0 matrixStats_1.4.1
#> [105] stringi_1.8.4 UCSC.utils_1.0.0
#> [107] lazyeval_0.2.2 ggfun_0.1.6
#> [109] yaml_2.3.10 evaluate_1.0.0
#> [111] codetools_0.2-20 interp_1.1-6
#> [113] tibble_3.2.1 ggplotify_0.1.2
#> [115] cli_3.6.3 RcppParallel_5.1.9
#> [117] systemfonts_1.1.0 munsell_0.5.1
#> [119] jquerylib_0.1.4 GenomeInfoDb_1.40.1
#> [121] png_0.1-8 parallel_4.4.1
#> [123] ggh4x_0.2.8 pkgdown_2.1.1
#> [125] latticeExtra_0.6-30 jpeg_0.1-10
#> [127] bitops_1.0-9 ggstar_1.0.4
#> [129] pwalign_1.0.0 viridisLite_0.4.2
#> [131] tidytree_0.4.6 mvtnorm_1.3-1
#> [133] scales_1.3.0 crayon_1.5.3
#> [135] rlang_1.1.4 multcomp_1.4-26