library(MiscMetabar)
data(data_fungi)
Permanova
data_fungi_woNA4height <- subset_samples(data_fungi, !is.na(data_fungi@sam_data$Height))
res_ado <- adonis_pq(data_fungi_woNA4height, "Tree_name+Height")
knitr::kable(res_ado)
Df | SumOfSqs | R2 | F | Pr(>F) | |
---|---|---|---|---|---|
Model | 63 | 36.92559 | 0.5881754 | 1.518899 | 0.001 |
Residual | 67 | 25.85431 | 0.4118246 | NA | NA |
Total | 130 | 62.77990 | 1.0000000 | NA | NA |
Graph Test
data_fungi_woNA4height <- subset_samples(data_fungi, !is.na(data_fungi@sam_data$Height))
graph_test_pq(data_fungi_woNA4height, "Height")
Circle of ASVs
circle_pq(data_fungi_woNA4height, "Height")
Compare two (group of) samples
Biplot
data_fungi_low_high <- subset_samples(
data_fungi,
data_fungi@sam_data$Height %in%
c("Low", "High")
)
data_fungi_low_high <- subset_taxa_pq(
data_fungi_low_high,
taxa_sums(data_fungi_low_high) > 5000
)
biplot_pq(data_fungi_low_high, fact = "Height", merge_sample_by = "Height")
#> Warning: Removed 1 row containing missing values or values outside the scale range
#> (`geom_rect()`).
Compare two (group of) samples with a table
compare_pairs_pq(data_fungi_low_high,
bifactor = "Height",
merge_sample_by = "Height",
modality = "Time"
)
#> # A tibble: 4 × 13
#> modality nb_ASV_High nb_ASV_Low nb_shared_ASV div_High div_Low nb_shared_seq
#> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 0 12 16 9 1.8 1.37 57639
#> 2 5 20 18 14 1.95 1.98 76006
#> 3 10 11 13 10 1.18 1.25 47042
#> 4 15 17 19 12 2 2.04 161348
#> # ℹ 6 more variables: percent_shared_seq_High <dbl>,
#> # percent_shared_seq_Low <dbl>, percent_shared_ASV_High <dbl>,
#> # percent_shared_ASV_Low <dbl>, ratio_nb_High_Low <dbl>,
#> # ratio_div_High_Low <dbl>
Venn diagram
ggvenn_pq(data_fungi, fact = "Height") +
ggplot2::scale_fill_distiller(palette = "BuPu", direction = 1) +
labs(title = "Share number of ASV among Height in tree")
ggvenn_pq(data_fungi, fact = "Height", min_nb_seq = 5000) +
ggplot2::scale_fill_distiller(palette = "BuPu", direction = 1) +
labs(title = "Share number of ASV with more than 5000 seqs")
ggvenn_pq(data_fungi,
fact = "Height", taxonomic_rank = "Genus",
min_nb_seq = 100
) +
ggplot2::scale_fill_distiller(palette = "BuPu", direction = 1) +
labs(title = "Share number of Genus represented by at least one ASV with more than 100 seqs")
Upset plot
Venn diagram can quickly become complex to read when the number of modalities increase. One graphical solution is upset plot. MiscMetabar propose a solution based on the package ComplexUpset.
upset_pq(data_fungi, fact = "Height")
upset_pq(data_fungi, fact = "Time")
ComplexUpset
package allow powerful configuration of you
plot as you can see in the following figure.
upset_pq(
data_fungi,
fact = "Time",
width_ratio = 0.2,
annotations = list(
"Sequences per ASV \n (log10)" = (
ggplot(mapping = aes(y = log10(Abundance)))
+
geom_jitter(aes(
color =
Abundance
), na.rm = TRUE)
+
geom_violin(alpha = 0.5, na.rm = TRUE) +
theme(legend.key.size = unit(0.2, "cm")) +
theme(axis.text = element_text(size = 12))
),
"ASV per phylum" = (
ggplot(mapping = aes(fill = Phylum))
+
geom_bar() +
ylab("ASV per phylum") +
theme(legend.key.size = unit(0.2, "cm")) +
theme(axis.text = element_text(size = 12))
)
)
)
Change in abundance across a factor
Benchdamic
There is a lot of available methods. Please refer to R package benchdamic for a list of method and a implementation of a benchmark for your data.
Using Deseq2 package
data("GlobalPatterns", package = "phyloseq")
GP <- subset_samples(
GlobalPatterns,
GlobalPatterns@sam_data$SampleType %in% c("Soil", "Skin")
)
plot_deseq2_pq(GP, c("SampleType", "Soil", "Skin"), pval = 0.001)
Session information
sessionInfo()
#> R version 4.4.1 (2024-06-14)
#> Platform: x86_64-pc-linux-gnu
#> Running under: Debian GNU/Linux 12 (bookworm)
#>
#> Matrix products: default
#> BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.11.0
#> LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.11.0
#>
#> locale:
#> [1] LC_CTYPE=fr_FR.UTF-8 LC_NUMERIC=C
#> [3] LC_TIME=fr_FR.UTF-8 LC_COLLATE=fr_FR.UTF-8
#> [5] LC_MONETARY=fr_FR.UTF-8 LC_MESSAGES=fr_FR.UTF-8
#> [7] LC_PAPER=fr_FR.UTF-8 LC_NAME=fr_FR.UTF-8
#> [9] LC_ADDRESS=fr_FR.UTF-8 LC_TELEPHONE=fr_FR.UTF-8
#> [11] LC_MEASUREMENT=fr_FR.UTF-8 LC_IDENTIFICATION=fr_FR.UTF-8
#>
#> time zone: Europe/Paris
#> tzcode source: system (glibc)
#>
#> attached base packages:
#> [1] grid stats graphics grDevices utils datasets methods
#> [8] base
#>
#> other attached packages:
#> [1] MiscMetabar_0.9.3 purrr_1.0.2 dplyr_1.1.4 dada2_1.32.0
#> [5] Rcpp_1.0.13 ggplot2_3.5.1 phyloseq_1.48.0
#>
#> loaded via a namespace (and not attached):
#> [1] RColorBrewer_1.1-3 rstudioapi_0.16.0
#> [3] jsonlite_1.8.8 shape_1.4.6.1
#> [5] magrittr_2.0.3 farver_2.1.2
#> [7] rmarkdown_2.28 GlobalOptions_0.1.2
#> [9] fs_1.6.4 zlibbioc_1.50.0
#> [11] ragg_1.3.2 vctrs_0.6.5
#> [13] multtest_2.60.0 Rsamtools_2.20.0
#> [15] htmltools_0.5.8.1 S4Arrays_1.4.1
#> [17] ComplexUpset_1.3.3 Rhdf5lib_1.26.0
#> [19] SparseArray_1.4.8 rhdf5_2.48.0
#> [21] sass_0.4.9 bslib_0.8.0
#> [23] htmlwidgets_1.6.4 desc_1.4.3
#> [25] plyr_1.8.9 cachem_1.1.0
#> [27] GenomicAlignments_1.40.0 igraph_2.0.3
#> [29] lifecycle_1.0.4 ggnetwork_0.5.13
#> [31] iterators_1.0.14 pkgconfig_2.0.3
#> [33] Matrix_1.7-0 R6_2.5.1
#> [35] fastmap_1.2.0 GenomeInfoDbData_1.2.12
#> [37] MatrixGenerics_1.16.0 digest_0.6.37
#> [39] colorspace_2.1-1 ShortRead_1.62.0
#> [41] patchwork_1.2.0 S4Vectors_0.42.1
#> [43] DESeq2_1.44.0 textshaping_0.4.0
#> [45] GenomicRanges_1.56.1 hwriter_1.3.2.1
#> [47] vegan_2.6-8 labeling_0.4.3
#> [49] fansi_1.0.6 httr_1.4.7
#> [51] abind_1.4-5 mgcv_1.9-1
#> [53] compiler_4.4.1 withr_3.0.1
#> [55] venneuler_1.1-4 BiocParallel_1.38.0
#> [57] highr_0.11 MASS_7.3-61
#> [59] DelayedArray_0.30.1 biomformat_1.32.0
#> [61] permute_0.9-7 tools_4.4.1
#> [63] ape_5.8 glue_1.7.0
#> [65] nlme_3.1-165 rhdf5filters_1.16.0
#> [67] cluster_2.1.6 reshape2_1.4.4
#> [69] ade4_1.7-22 generics_0.1.3
#> [71] gtable_0.3.5 tidyr_1.3.1
#> [73] data.table_1.16.0 ggVennDiagram_1.5.2
#> [75] utf8_1.2.4 XVector_0.44.0
#> [77] BiocGenerics_0.50.0 foreach_1.5.2
#> [79] pillar_1.9.0 stringr_1.5.1
#> [81] circlize_0.4.16 rJava_1.0-11
#> [83] splines_4.4.1 lattice_0.22-6
#> [85] survival_3.7-0 deldir_2.0-4
#> [87] tidyselect_1.2.1 locfit_1.5-9.10
#> [89] Biostrings_2.72.1 pbapply_1.7-2
#> [91] knitr_1.48 gridExtra_2.3
#> [93] IRanges_2.38.1 SummarizedExperiment_1.34.0
#> [95] stats4_4.4.1 xfun_0.47
#> [97] Biobase_2.64.0 matrixStats_1.3.0
#> [99] stringi_1.8.4 UCSC.utils_1.0.0
#> [101] yaml_2.3.10 evaluate_0.24.0
#> [103] codetools_0.2-20 interp_1.1-6
#> [105] tibble_3.2.1 cli_3.6.3
#> [107] RcppParallel_5.1.9 systemfonts_1.1.0
#> [109] munsell_0.5.1 jquerylib_0.1.4
#> [111] GenomeInfoDb_1.40.1 png_0.1-8
#> [113] parallel_4.4.1 pkgdown_2.1.0
#> [115] latticeExtra_0.6-30 jpeg_0.1-10
#> [117] bitops_1.0-8 pwalign_1.0.0
#> [119] phyloseqGraphTest_0.1.1 scales_1.3.0
#> [121] crayon_1.5.3 rlang_1.1.4