Importing and formatting data from phyloseq objects
# Taxa Bar plots form Plyloseq Data
# Andrew McCracken
# 4/13/22
setwd("C:/Users/andre/Desktop/Pespeni Lab/SSWD Pycnopodia 16s Networks Manuscript/Taxa-bar-plots")
# Load Libraries (idk if you need them all but here we go)
library(microbiome)
## Loading required package: phyloseq
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 4.1.2
##
## microbiome R package (microbiome.github.com)
##
##
##
## Copyright (C) 2011-2020 Leo Lahti,
## Sudarshan Shetty et al. <microbiome.github.io>
##
## Attaching package: 'microbiome'
## The following object is masked from 'package:ggplot2':
##
## alpha
## The following object is masked from 'package:base':
##
## transform
library(phyloseq)
library(qiime2R)
library(knitr)
library(kableExtra)
## Warning: package 'kableExtra' was built under R version 4.1.3
#install.packages("remotes")
#remotes::install_github("gmteunisse/Fantaxtic")
library(remotes)
## Warning: package 'remotes' was built under R version 4.1.3
library(fantaxtic)
## Loading required package: reshape2
library(reshape2)
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.1.2
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v tibble 3.1.5 v dplyr 1.0.7
## v tidyr 1.1.4 v stringr 1.4.0
## v readr 2.0.2 v forcats 0.5.1
## v purrr 0.3.4
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x microbiome::alpha() masks ggplot2::alpha()
## x dplyr::filter() masks stats::filter()
## x dplyr::group_rows() masks kableExtra::group_rows()
## x dplyr::lag() masks stats::lag()
library(sjmisc)
## Warning: package 'sjmisc' was built under R version 4.1.3
## Install package "strengejacke" from GitHub (`devtools::install_github("strengejacke/strengejacke")`) to load all sj-packages at once!
##
## Attaching package: 'sjmisc'
## The following object is masked from 'package:purrr':
##
## is_empty
## The following object is masked from 'package:tidyr':
##
## replace_na
## The following object is masked from 'package:tibble':
##
## add_case
# import phyloseq files
physeq_all<-qza_to_phyloseq(
features="table.qza",
tree="rooted-tree.qza",
taxonomy="taxonomy.qza",
metadata = "pyc_manifest")
#Filter to Family Level
filter_fam <- tax_glom(physeq_all, "Family")
#normalize per individual sample.
norm_abund_fam <- transform_sample_counts(filter_fam, function(x) x / sum(x))
#Filter top taxa
#https://rdrr.io/github/gmteunisse/Fantaxtic/man/get_top_taxa.html
top_tax_merge <- get_top_taxa(norm_abund_fam, 15, relative = TRUE, discard_other = FALSE, other_label = "Other")
# Phyloseq Plot
#plot_bar(top_tax_merge, "animalID", "Abundance", fill="Family")
Converting Phyloseq object to Data.Frame for ggplot
########################################################
# convert to data table
setwd("C:/Users/andre/Desktop/Pespeni Lab/SSWD Pycnopodia 16s Networks Manuscript/Taxa-bar-plots")
source('phyloseq_to_df.R')
phylo_table_fam_all <- phyloseq_to_df(norm_abund_fam)
phylo_table_fam_15 <- phyloseq_to_df(top_tax_merge)
# Randomly sample 5 from HH, 5 SH, 5 SS
# Col 9-55 -> HH
# Col 56-75 -> SH
# Col 76-93 -> SS
subset_fam_15 <- cbind(phylo_table_fam_15[,6],
sample(x = phylo_table_fam_15[,9:55], size=5),
sample(x = phylo_table_fam_15[,56:75], size=5),
sample(x = phylo_table_fam_15[,76:93], size=5))
subset_fam_15_rotate <- rotate_df(subset_fam_15, rn = "sample", cn = TRUE)
fam_15_long <- pivot_longer(subset_fam_15,
col=2:16,
names_to = 'Sample',
values_to = 'Abundance')
names(fam_15_long)[1] <- 'taxa'
#Relative Abundance
group_by(fam_15_long, taxa)
## # A tibble: 240 x 3
## # Groups: taxa [16]
## taxa Sample Abundance
## <chr> <chr> <dbl>
## 1 Spirochaetaceae HH01_08 0.775
## 2 Spirochaetaceae HH01_19 0.850
## 3 Spirochaetaceae HH01_01 0.942
## 4 Spirochaetaceae HH02_13 0.593
## 5 Spirochaetaceae HH02_23 0.487
## 6 Spirochaetaceae SH13_08 0.320
## 7 Spirochaetaceae SH15_06 0.654
## 8 Spirochaetaceae SH15_08 0.593
## 9 Spirochaetaceae SH13_10 0.469
## 10 Spirochaetaceae SH13_05 0.219
## # ... with 230 more rows
ggplot(data=fam_15_long) + geom_bar(aes(x=Sample, y=Abundance, fill=taxa), stat="identity")