Introduction to twoStageLCA (Two Stage Linked Component Analysis)
Huan Chen
Jinrui Liu
Shreyash Sonthalia
Guangyan Li
Carlo Colantuoni
02 October, 2022
Source:vignettes/twoStageLCA.Rmd
twoStageLCA.Rmd
Install and load SJD
package
To install this package in R, run the following commands:
library(devtools)
install_github("CHuanSite/SJD")
Single-Cell RNAseq Example
First, install the ‘googleDrive’ package
install.packages('googledrive', repos = "http://cran.us.r-project.org")
#> Installing package into '/private/var/folders/hn/15_w90p96zn9px58h76x8sfr0000gn/T/Rtmpnf4wal/temp_libpath14ecf600e2706'
#> (as 'lib' is unspecified)
#>
#> The downloaded binary packages are in
#> /var/folders/hn/15_w90p96zn9px58h76x8sfr0000gn/T//RtmpW9ZsPG/downloaded_packages
Download RNA and explaination data from Google Drive,
library(googledrive)
## Data file
url_data <- "https://drive.google.com/file/d/1OQovDBPwRX_O2N1GSNY8fzJn-p3-fwQV/view?usp=sharing"
drive_download(url_data, overwrite = TRUE)
#> ! Using an auto-discovered, cached token.
#> To suppress this message, modify your code or options to clearly consent to
#> the use of a cached token.
#> See gargle's "Non-interactive auth" vignette for more details:
#> <https://gargle.r-lib.org/articles/non-interactive-auth.html>
#> ℹ The googledrive package is using a cached token for hzchenhuan@gmail.com.
#> File downloaded:
#> • data.zip <id: 1OQovDBPwRX_O2N1GSNY8fzJn-p3-fwQV>
#> Saved locally as:
#> • data.zip
unzip('data.zip')
## Explaination file
url_explaination <- 'https://drive.google.com/file/d/1S3HdygRCMvPttmVd9cix4GskWj1VPJaM/view?usp=sharing'
drive_download(url_explaination, overwrite = TRUE)
#> File downloaded:
#> • data_explaination.zip <id: 1S3HdygRCMvPttmVd9cix4GskWj1VPJaM>
#> Saved locally as:
#> • data_explaination.zip
unzip('data_explaination.zip')
Read data into R
library(tidyverse)
#> ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
#> ✔ ggplot2 3.3.6 ✔ purrr 0.3.4
#> ✔ tibble 3.1.8 ✔ dplyr 1.0.9
#> ✔ tidyr 1.2.0 ✔ stringr 1.4.0
#> ✔ readr 2.1.2 ✔ forcats 0.5.1
#> ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
#> ✖ dplyr::combine() masks Biobase::combine(), BiocGenerics::combine()
#> ✖ dplyr::filter() masks stats::filter()
#> ✖ dplyr::lag() masks stats::lag()
#> ✖ ggplot2::Position() masks BiocGenerics::Position(), base::Position()
## Read in files
inVitro_bulk = read.table('1_inVitro_Bulk_Cortecon.plog2_trimNord.txt', stringsAsFactors = FALSE, header = TRUE) %>% select(-1) %>% as.matrix
inVitro_sc = read.table('2_inVitro_SingleCell_scESCdifBifurc.CelSeq_trimNord.txt', stringsAsFactors = FALSE, header = TRUE) %>% select(-1) %>% as.matrix
inVivo_bulk = read.table('3_inVivo_Bulk_BrainSpan_RNAseq_Gene_DFC_noSVA_plog2_trimNord.txt', stringsAsFactors = FALSE, header = TRUE) %>% select(-1) %>% as.matrix
inVivo_sc = read.table('4_inVivo_SingleCell_CtxDevoSC4kTopoTypoTempo_plog2_trimNord.txt', stringsAsFactors = FALSE, header = TRUE) %>% select(-1) %>% as.matrix
## legends for the 4 datasets
inVitro_bulk_exp = read.table("1_inVitro_Bulk_Cortecon.pd.txt",stringsAsFactors = FALSE, header = T)
inVitro_sc_exp = read.table("2_inVitro_SingleCell_scESCdifBifurc.CelSeq.pd.txt", stringsAsFactors = FALSE, header = T)
inVivo_bulk_exp = read.table("3_inVivo_Bulk_BrainSpan.RNAseq.Gene.DFC.pd.txt", stringsAsFactors = FALSE, header = T)
inVivo_sc_exp = read.table("4_inVivo_SingleCell_CtxDevoSC4kTopoTypoTempo.pd.txt", stringsAsFactors = FALSE, header = T)
Conduct Two-stage linked component analysis
library(SJD)
## List of datasets and group assignment and number of components
dataset = list(inVitro_bulk, inVitro_sc, inVivo_bulk, inVivo_sc)
group = list(c(1,2,3,4), c(1,2), c(3,4), c(1,3), c(2,4), c(1), c(2), c(3), c(4))
comp_num = c(2,2,2,2,2,2,2,2,2)
## Output result
twoStageLCA_res = twoStageLCA(dataset, group, comp_num)
Visualize the result
# par(mfrow = c(2,2)) #, mai=c(0.6,0.6,0.6,0))
# common component
plot(t(twoStageLCA_res$score_list[[1]][[1]]), col = inVitro_bulk_exp$color, pch = 16, xlab = "PC1", ylab = "PC2", main = "common: inVitro_bulk", cex = 1, cex.axis = 1, cex.lab = 1, cex.main = 1)
plot(t(twoStageLCA_res$score_list[[2]][[1]]), col = inVitro_sc_exp$COLORby.DCX, pch = 16, xlab = "PC1", ylab = "PC2", main = "common: inVitro_sc", cex = 1, cex.axis = 1, cex.lab = 1, cex.main = 1)
plot(t(twoStageLCA_res$score_list[[3]][[1]]), col = inVivo_bulk_exp$color, pch = 16, xlab = "PC1", ylab = "PC2", main = "common: inVivo_bulk", cex = 1, cex.axis = 1, cex.lab = 1, cex.main = 1)
plot(t(twoStageLCA_res$score_list[[4]][[1]]), col = inVivo_sc_exp$COLORby.DCX, pch = 16, xlab = "PC1", ylab = "PC2", main = "common: inVivo_sc", cex = 1, cex.axis = 1, cex.lab = 1, cex.main = 1)
# par(mfrow = c(1,2)) #, mai=c(0.6,0.6,0.6,0))
## in vitro component
plot(t(twoStageLCA_res$score_list[[1]][[2]]), col = inVitro_bulk_exp$color, pch = 16, xlab = "PC1", ylab = "PC2", main = "common: inVitro_bulk", cex = 1, cex.axis = 1, cex.lab = 1, cex.main = 1)
plot(t(twoStageLCA_res$score_list[[2]][[2]]), col = inVitro_sc_exp$COLORby.DCX, pch = 16, xlab = "PC1", ylab = "PC2", main = "common: inVitro_sc", cex = 1, cex.axis = 1, cex.lab = 1, cex.main = 1)
## in vivo component
plot(t(twoStageLCA_res$score_list[[3]][[3]]), col = inVivo_bulk_exp$color, pch = 16, xlab = "PC1", ylab = "PC2", main = "common: inVivo_bulk", cex = 1, cex.axis = 1, cex.lab = 1, cex.main = 1)
plot(t(twoStageLCA_res$score_list[[4]][[3]]), col = inVivo_sc_exp$COLORby.DCX, pch = 16, xlab = "PC1", ylab = "PC2", main = "common: inVivo_sc", cex = 1, cex.axis = 1, cex.lab = 1, cex.main = 1)
## bulk component
plot(t(twoStageLCA_res$score_list[[1]][[4]]), col = inVitro_bulk_exp$color, pch = 16, xlab = "PC1", ylab = "PC2", main = "common: inVitro_bulk", cex = 1, cex.axis = 1, cex.lab = 1, cex.main = 1)
plot(t(twoStageLCA_res$score_list[[3]][[4]]), col = inVivo_bulk_exp$color, pch = 16, xlab = "PC1", ylab = "PC2", main = "common: inVivo_bulk", cex = 1, cex.axis = 1, cex.lab = 1, cex.main = 1)
## sc component
plot(t(twoStageLCA_res$score_list[[2]][[5]]), col = inVitro_sc_exp$COLORby.DCX, pch = 16, xlab = "PC1", ylab = "PC2", main = "common: inVitro_sc", cex = 1, cex.axis = 1, cex.lab = 1, cex.main = 1)
plot(t(twoStageLCA_res$score_list[[4]][[5]]), col = inVivo_sc_exp$COLORby.DCX, pch = 16, xlab = "PC1", ylab = "PC2", main = "common: inVivo_sc", cex = 1, cex.axis = 1, cex.lab = 1, cex.main = 1)
Reference
- Huan Chen, Brian Caffo, Genevieve Stein-O’Brien, Jinrui Liu, Ben Langmead, Carlo Colantuoni, Luo Xiao, Two-stage linked component analysis for joint decomposition of multiple biologically related data sets, Biostatistics, 2022;, kxac005, https://doi.org/10.1093/biostatistics/kxac005