Simplify taxonomy by removing some unused characters such as "k__"
Source:R/miscellanous.R
simplify_taxo.RdInternally used in clean_pq()
Usage
simplify_taxo(
physeq,
pattern_to_remove = c(".__", ".*:"),
ranks_for_pattern_to_remove = phyloseq::rank_names(physeq),
ranks_to_remove_space = phyloseq::rank_names(physeq)[!grepl("Species",
phyloseq::rank_names(physeq))],
ranks_to_remove_NA = phyloseq::rank_names(physeq),
pattern_to_NA = NULL,
ranks_for_pattern_to_NA = phyloseq::rank_names(physeq)
)Arguments
- physeq
(required) a
phyloseq-classobject obtained using thephyloseqpackage.- pattern_to_remove
(a vector of character) regex patterns passed to
base::gsub(): the matched substring is deleted from the cell value and the rest of the string is kept (e.g.".__"turns"k__Fungi"into"Fungi").- ranks_for_pattern_to_remove
(character vector or NULL; default all ranks) column names in
tax_tableto whichpattern_to_removeis applied. PassNULLto skip this operation on all columns.- ranks_to_remove_space
(character vector or NULL; default all ranks whose name does not contain
"Species") column names from which ASCII spaces and non-breaking spaces (U+00A0) are stripped. PassNULLto skip space removal entirely.- ranks_to_remove_NA
(character vector or NULL; default all ranks) column names from which the literal string
"NA"(case-sensitive) is removed. PassNULLto skip this operation. Breaking change from v0.16: the oldremove_NA = FALSEdefault is nowranks_to_remove_NAdefaulting to all ranks; passNULLto reproduce the old behaviour.- pattern_to_NA
(character; default NULL): a regex; if an entire cell value matches, the whole cell is replaced with
NA(nothing from the original value is kept). Designed for PR2-style placeholder unknowns such asEmbryophyceae_X,Embryophyceae_XX,Embryophyceae_XXX,Embryophyceae_XXX_sp., orMortierella_sp.. Use"_X+$|_sp\\.$"to cover all such patterns:_X+$catches rank-filler X's;_sp\\.$catches any genus-only species placeholder.- ranks_for_pattern_to_NA
(character vector or NULL; default all ranks) column names to which
pattern_to_NAis applied. PassNULLto skip this operation on all columns.
Value
A phyloseq-class object with simplified taxonomy
Examples
d_fm <- data_fungi_mini
d_fm@tax_table[, "Species"] <- paste0(rep(
c("s__", "s:"),
ntaxa(d_fm) / 2
), d_fm@tax_table[, "Species"])
# First column is the new vector of Species,
# second column is the column before simplification
cbind(
simplify_taxo(d_fm)@tax_table[, "Species"],
d_fm@tax_table[, "Species"]
)
#> Species Species
#> ASV7 "" "s__NA"
#> ASV8 "ostrea" "s:ostrea"
#> ASV12 "raduloides" "s__raduloides"
#> ASV18 "ostrea" "s:ostrea"
#> ASV25 "lachnopus" "s__lachnopus"
#> ASV26 "hirsutum" "s:hirsutum"
#> ASV27 "brasiliensis" "s__brasiliensis"
#> ASV29 "eyrei" "s:eyrei"
#> ASV32 "oblongisporum" "s__oblongisporum"
#> ASV34 "" "s:NA"
#> ASV35 "fomentarius" "s__fomentarius"
#> ASV41 "renati" "s:renati"
#> ASV42 "lachnopus" "s__lachnopus"
#> ASV46 "pellucida" "s:pellucida"
#> ASV47 "molaris" "s__molaris"
#> ASV48 "caryae" "s:caryae"
#> ASV49 "livescens" "s__livescens"
#> ASV50 "analogum" "s:analogum"
#> ASV53 "fomentarius" "s__fomentarius"
#> ASV54 "" "s:NA"
#> ASV58 "fomentarius" "s__fomentarius"
#> ASV59 "roseocremeum" "s:roseocremeum"
#> ASV61 "setigerum" "s__setigerum"
#> ASV62 "" "s:NA"
#> ASV63 "" "s__NA"
#> ASV64 "versicolor" "s:versicolor"
#> ASV67 "raduloides" "s__raduloides"
#> ASV68 "lachnopus" "s:lachnopus"
#> ASV71 "" "s__NA"
#> ASV72 "" "s:NA"
#> ASV75 "versiformis" "s__versiformis"
#> ASV77 "lachnopus" "s:lachnopus"
#> ASV82 "glandulosa" "s__glandulosa"
#> ASV83 "" "s:NA"
#> ASV85 "pubera" "s__pubera"
#> ASV91 "mesenterica" "s:mesenterica"
#> ASV93 "" "s__NA"
#> ASV94 "ostrea" "s:ostrea"
#> ASV99 "fomentarius" "s__fomentarius"
#> ASV100 "" "s:NA"
#> ASV101 "buckii" "s__buckii"
#> ASV104 "coralloides" "s:coralloides"
#> ASV105 "flaviporus" "s__flaviporus"
#> ASV107 "raduloides" "s:raduloides"
#> ASV108 "glandulosa" "s__glandulosa"
# Apply pattern_to_remove only to Genus and Species columns
cbind(
simplify_taxo(d_fm,
ranks_for_pattern_to_remove = c("Genus", "Species")
)@tax_table[, "Species"],
d_fm@tax_table[, "Species"]
)
#> Species Species
#> ASV7 "" "s__NA"
#> ASV8 "ostrea" "s:ostrea"
#> ASV12 "raduloides" "s__raduloides"
#> ASV18 "ostrea" "s:ostrea"
#> ASV25 "lachnopus" "s__lachnopus"
#> ASV26 "hirsutum" "s:hirsutum"
#> ASV27 "brasiliensis" "s__brasiliensis"
#> ASV29 "eyrei" "s:eyrei"
#> ASV32 "oblongisporum" "s__oblongisporum"
#> ASV34 "" "s:NA"
#> ASV35 "fomentarius" "s__fomentarius"
#> ASV41 "renati" "s:renati"
#> ASV42 "lachnopus" "s__lachnopus"
#> ASV46 "pellucida" "s:pellucida"
#> ASV47 "molaris" "s__molaris"
#> ASV48 "caryae" "s:caryae"
#> ASV49 "livescens" "s__livescens"
#> ASV50 "analogum" "s:analogum"
#> ASV53 "fomentarius" "s__fomentarius"
#> ASV54 "" "s:NA"
#> ASV58 "fomentarius" "s__fomentarius"
#> ASV59 "roseocremeum" "s:roseocremeum"
#> ASV61 "setigerum" "s__setigerum"
#> ASV62 "" "s:NA"
#> ASV63 "" "s__NA"
#> ASV64 "versicolor" "s:versicolor"
#> ASV67 "raduloides" "s__raduloides"
#> ASV68 "lachnopus" "s:lachnopus"
#> ASV71 "" "s__NA"
#> ASV72 "" "s:NA"
#> ASV75 "versiformis" "s__versiformis"
#> ASV77 "lachnopus" "s:lachnopus"
#> ASV82 "glandulosa" "s__glandulosa"
#> ASV83 "" "s:NA"
#> ASV85 "pubera" "s__pubera"
#> ASV91 "mesenterica" "s:mesenterica"
#> ASV93 "" "s__NA"
#> ASV94 "ostrea" "s:ostrea"
#> ASV99 "fomentarius" "s__fomentarius"
#> ASV100 "" "s:NA"
#> ASV101 "buckii" "s__buckii"
#> ASV104 "coralloides" "s:coralloides"
#> ASV105 "flaviporus" "s__flaviporus"
#> ASV107 "raduloides" "s:raduloides"
#> ASV108 "glandulosa" "s__glandulosa"
if (FALSE) { # \dontrun{
# Replace PR2 placeholder unknowns (_X, _XX, _XXX, _XXX_sp., Genus_sp.) with NA
simplify_taxo(pq_pr2, pattern_to_NA = "_X+$|_sp\\.$")
# Apply pattern_to_NA only to the Species column
simplify_taxo(pq_pr2,
pattern_to_NA = "_X+$|_sp\\.$",
ranks_for_pattern_to_NA = "Species"
)
} # }