Home Page

Importing and formatting data from phyloseq objects

# Taxa Bar plots form Plyloseq Data
# Andrew McCracken
# 4/13/22

setwd("C:/Users/andre/Desktop/Pespeni Lab/SSWD Pycnopodia 16s Networks Manuscript/Taxa-bar-plots")

# Load Libraries (idk if you need them all but here we go) 
library(microbiome)
## Loading required package: phyloseq
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 4.1.2
## 
## microbiome R package (microbiome.github.com)
##     
## 
## 
##  Copyright (C) 2011-2020 Leo Lahti, 
##     Sudarshan Shetty et al. <microbiome.github.io>
## 
## Attaching package: 'microbiome'
## The following object is masked from 'package:ggplot2':
## 
##     alpha
## The following object is masked from 'package:base':
## 
##     transform
library(phyloseq)
library(qiime2R)
library(knitr)
library(kableExtra)
## Warning: package 'kableExtra' was built under R version 4.1.3
#install.packages("remotes")
#remotes::install_github("gmteunisse/Fantaxtic")
library(remotes)
## Warning: package 'remotes' was built under R version 4.1.3
library(fantaxtic)
## Loading required package: reshape2
library(reshape2)
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.1.2
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v tibble  3.1.5     v dplyr   1.0.7
## v tidyr   1.1.4     v stringr 1.4.0
## v readr   2.0.2     v forcats 0.5.1
## v purrr   0.3.4
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x microbiome::alpha() masks ggplot2::alpha()
## x dplyr::filter()     masks stats::filter()
## x dplyr::group_rows() masks kableExtra::group_rows()
## x dplyr::lag()        masks stats::lag()
library(sjmisc)
## Warning: package 'sjmisc' was built under R version 4.1.3
## Install package "strengejacke" from GitHub (`devtools::install_github("strengejacke/strengejacke")`) to load all sj-packages at once!
## 
## Attaching package: 'sjmisc'
## The following object is masked from 'package:purrr':
## 
##     is_empty
## The following object is masked from 'package:tidyr':
## 
##     replace_na
## The following object is masked from 'package:tibble':
## 
##     add_case
# import phyloseq files
physeq_all<-qza_to_phyloseq(
  features="table.qza",
  tree="rooted-tree.qza",
  taxonomy="taxonomy.qza",
  metadata = "pyc_manifest")

#Filter to Family Level
filter_fam <- tax_glom(physeq_all, "Family")

#normalize per individual sample. 
norm_abund_fam <- transform_sample_counts(filter_fam, function(x) x / sum(x))

#Filter top taxa
#https://rdrr.io/github/gmteunisse/Fantaxtic/man/get_top_taxa.html
top_tax_merge <- get_top_taxa(norm_abund_fam, 15, relative = TRUE, discard_other = FALSE, other_label = "Other")


# Phyloseq Plot
#plot_bar(top_tax_merge, "animalID", "Abundance", fill="Family")

Converting Phyloseq object to Data.Frame for ggplot

########################################################
# convert to data table
setwd("C:/Users/andre/Desktop/Pespeni Lab/SSWD Pycnopodia 16s Networks Manuscript/Taxa-bar-plots")
source('phyloseq_to_df.R')
phylo_table_fam_all <- phyloseq_to_df(norm_abund_fam)
phylo_table_fam_15 <- phyloseq_to_df(top_tax_merge)


# Randomly sample 5 from HH, 5 SH, 5 SS
# Col 9-55 -> HH
# Col 56-75 -> SH
# Col 76-93 -> SS

subset_fam_15 <- cbind(phylo_table_fam_15[,6], 
                       sample(x = phylo_table_fam_15[,9:55], size=5),
                       sample(x = phylo_table_fam_15[,56:75], size=5),
                       sample(x = phylo_table_fam_15[,76:93], size=5))

subset_fam_15_rotate <- rotate_df(subset_fam_15, rn = "sample", cn = TRUE)


fam_15_long <- pivot_longer(subset_fam_15, 
                            col=2:16,  
                            names_to = 'Sample', 
                            values_to = 'Abundance')

names(fam_15_long)[1] <- 'taxa'

#Relative Abundance 
group_by(fam_15_long, taxa)
## # A tibble: 240 x 3
## # Groups:   taxa [16]
##    taxa            Sample  Abundance
##    <chr>           <chr>       <dbl>
##  1 Spirochaetaceae HH01_08     0.775
##  2 Spirochaetaceae HH01_19     0.850
##  3 Spirochaetaceae HH01_01     0.942
##  4 Spirochaetaceae HH02_13     0.593
##  5 Spirochaetaceae HH02_23     0.487
##  6 Spirochaetaceae SH13_08     0.320
##  7 Spirochaetaceae SH15_06     0.654
##  8 Spirochaetaceae SH15_08     0.593
##  9 Spirochaetaceae SH13_10     0.469
## 10 Spirochaetaceae SH13_05     0.219
## # ... with 230 more rows
ggplot(data=fam_15_long) + geom_bar(aes(x=Sample, y=Abundance, fill=taxa), stat="identity")