Demo
Huan Chen
Jinrui Liu
Shreyash Sonthalia
Guangyan Li
Carlo Colantuoni
02 October, 2022
Source:vignettes/SJD.Rmd
SJD.Rmd
Below is a demo of installing and running SJD package on simulated datasets. For analytical vignette using real data, please refer to Real Data Analysis here.
Install and load SJD
package
To install this package in R, run the following commands:
library(devtools)
install_github("CHuanSite/SJD")
Simple example
# Simulation the dataset
dataset = list(matrix(runif(5000, 1, 2), nrow = 100, ncol = 50),
matrix(runif(5000, 1, 2), nrow = 100, ncol = 50),
matrix(runif(5000, 1, 2), nrow = 100, ncol = 50),
matrix(runif(5000, 1, 2), nrow = 100, ncol = 50))
## Specify the structure among the datasets
group = list(c(1,2,3,4), c(1,2), c(3,4), c(1,3), c(2,4), c(1), c(2), c(3), c(4))
comp_num = c(2,2,2,2,2,2,2,2,2)
## Separate PCA, ICA, NMF
sepPCA_res = sepPCA(dataset, comp_num)
sepICA_res = sepICA(dataset, comp_num)
sepNMF_res = sepNMF(dataset, comp_num)
## Concatenated PCA, ICA, NMF
concatPCA_res = concatPCA(dataset, group, comp_num)
concatICA_res = concatICA(dataset, group, comp_num)
concatNMF_res = concatNMF(dataset, group, comp_num)
## Joint PCA, ICA, NMF
jointPCA_res = jointPCA(dataset, group, comp_num)
jointICA_res = jointICA(dataset, group, comp_num)
jointNMF_res = jointNMF(dataset, group, comp_num)
## twoStageLCA
twoStageLCA_res = twoStageLCA(dataset, group, comp_num)
To access the component
str(concatPCA_res$linked_component_list)
#> List of 9
#> $ component_No.1: num [1:100, 1:2] -0.2649 0.0712 -0.0558 -0.1471 0.0252 ...
#> ..- attr(*, "dimnames")=List of 2
#> .. ..$ : chr [1:100] "gene_No.1" "gene_No.2" "gene_No.3" "gene_No.4" ...
#> .. ..$ : chr [1:2] "component_No.1_subcomp.1" "component_No.1_subcomp.2"
#> $ component_No.2: num [1:100, 1:2] 0.1692 -0.0312 0.1816 0.1296 0.0332 ...
#> ..- attr(*, "dimnames")=List of 2
#> .. ..$ : chr [1:100] "gene_No.1" "gene_No.2" "gene_No.3" "gene_No.4" ...
#> .. ..$ : chr [1:2] "component_No.2_subcomp.1" "component_No.2_subcomp.2"
#> $ component_No.3: num [1:100, 1:2] -0.1636 0.176 0.0312 -0.0658 0.0737 ...
#> ..- attr(*, "dimnames")=List of 2
#> .. ..$ : chr [1:100] "gene_No.1" "gene_No.2" "gene_No.3" "gene_No.4" ...
#> .. ..$ : chr [1:2] "component_No.3_subcomp.1" "component_No.3_subcomp.2"
#> $ component_No.4: num [1:100, 1:2] -0.1648 0.0688 -0.1521 -0.1165 -0.0381 ...
#> ..- attr(*, "dimnames")=List of 2
#> .. ..$ : chr [1:100] "gene_No.1" "gene_No.2" "gene_No.3" "gene_No.4" ...
#> .. ..$ : chr [1:2] "component_No.4_subcomp.1" "component_No.4_subcomp.2"
#> $ component_No.5: num [1:100, 1:2] -0.25047 0.07335 0.00609 -0.0993 0.04414 ...
#> ..- attr(*, "dimnames")=List of 2
#> .. ..$ : chr [1:100] "gene_No.1" "gene_No.2" "gene_No.3" "gene_No.4" ...
#> .. ..$ : chr [1:2] "component_No.5_subcomp.1" "component_No.5_subcomp.2"
#> $ component_No.6: num [1:100, 1:2] -0.1377 -0.0125 -0.1179 -0.0947 -0.1363 ...
#> ..- attr(*, "dimnames")=List of 2
#> .. ..$ : chr [1:100] "gene_No.1" "gene_No.2" "gene_No.3" "gene_No.4" ...
#> .. ..$ : chr [1:2] "component_No.6_subcomp.1" "component_No.6_subcomp.2"
#> $ component_No.7: num [1:100, 1:2] -0.02 -0.1373 -0.026 0.0252 0.1094 ...
#> ..- attr(*, "dimnames")=List of 2
#> .. ..$ : chr [1:100] "gene_No.1" "gene_No.2" "gene_No.3" "gene_No.4" ...
#> .. ..$ : chr [1:2] "component_No.7_subcomp.1" "component_No.7_subcomp.2"
#> $ component_No.8: num [1:100, 1:2] -0.1339 0.1261 0.1081 -0.1386 -0.0263 ...
#> ..- attr(*, "dimnames")=List of 2
#> .. ..$ : chr [1:100] "gene_No.1" "gene_No.2" "gene_No.3" "gene_No.4" ...
#> .. ..$ : chr [1:2] "component_No.8_subcomp.1" "component_No.8_subcomp.2"
#> $ component_No.9: num [1:100, 1:2] -0.193 -0.0162 0.1877 -0.0662 0.0562 ...
#> ..- attr(*, "dimnames")=List of 2
#> .. ..$ : chr [1:100] "gene_No.1" "gene_No.2" "gene_No.3" "gene_No.4" ...
#> .. ..$ : chr [1:2] "component_No.9_subcomp.1" "component_No.9_subcomp.2"
To access the score
str(concatPCA_res$score_list)
#> List of 4
#> $ dataset_No.1:List of 9
#> ..$ component_No.1: num [1:2, 1:50] -1.151 2.544 2.002 2.067 0.558 ...
#> .. ..- attr(*, "dimnames")=List of 2
#> .. .. ..$ : chr [1:2] "component_No.1_subcomp.1, PVE: 0.033331" "component_No.1_subcomp.2, PVE: 0.023812"
#> .. .. ..$ : chr [1:50] "subject_No.1" "subject_No.2" "subject_No.3" "subject_No.4" ...
#> ..$ component_No.2: num [1:2, 1:50] -1.85 -1.87 -3.44 1.33 1.03 ...
#> .. ..- attr(*, "dimnames")=List of 2
#> .. .. ..$ : chr [1:2] "component_No.2_subcomp.1, PVE: 0.045869" "component_No.2_subcomp.2, PVE: 0.040222"
#> .. .. ..$ : chr [1:50] "subject_No.1" "subject_No.2" "subject_No.3" "subject_No.4" ...
#> ..$ component_No.3: logi NA
#> ..$ component_No.4: num [1:2, 1:50] 1.335 0.536 2.912 0.739 -1.972 ...
#> .. ..- attr(*, "dimnames")=List of 2
#> .. .. ..$ : chr [1:2] "component_No.4_subcomp.1, PVE: 0.048891" "component_No.4_subcomp.2, PVE: 0.030486"
#> .. .. ..$ : chr [1:50] "subject_No.1" "subject_No.2" "subject_No.3" "subject_No.4" ...
#> ..$ component_No.5: logi NA
#> ..$ component_No.6: num [1:2, 1:50] 2.542 0.162 2.593 -0.705 -0.928 ...
#> .. ..- attr(*, "dimnames")=List of 2
#> .. .. ..$ : chr [1:2] "component_No.6_subcomp.1, PVE: 0.056170" "component_No.6_subcomp.2, PVE: 0.052568"
#> .. .. ..$ : chr [1:50] "subject_No.1" "subject_No.2" "subject_No.3" "subject_No.4" ...
#> ..$ component_No.7: logi NA
#> ..$ component_No.8: logi NA
#> ..$ component_No.9: logi NA
#> $ dataset_No.2:List of 9
#> ..$ component_No.1: num [1:2, 1:50] 0.114 -0.284 -0.345 0.303 2.094 ...
#> .. ..- attr(*, "dimnames")=List of 2
#> .. .. ..$ : chr [1:2] "component_No.1_subcomp.1, PVE: 0.031346" "component_No.1_subcomp.2, PVE: 0.021446"
#> .. .. ..$ : chr [1:50] "subject_No.1" "subject_No.2" "subject_No.3" "subject_No.4" ...
#> ..$ component_No.2: num [1:2, 1:50] -0.747 -2.025 0.392 1.525 -0.448 ...
#> .. ..- attr(*, "dimnames")=List of 2
#> .. .. ..$ : chr [1:2] "component_No.2_subcomp.1, PVE: 0.035465" "component_No.2_subcomp.2, PVE: 0.031001"
#> .. .. ..$ : chr [1:50] "subject_No.1" "subject_No.2" "subject_No.3" "subject_No.4" ...
#> ..$ component_No.3: logi NA
#> ..$ component_No.4: logi NA
#> ..$ component_No.5: num [1:2, 1:50] -0.149 -1.244 1.113 -1.311 1.151 ...
#> .. ..- attr(*, "dimnames")=List of 2
#> .. .. ..$ : chr [1:2] "component_No.5_subcomp.1, PVE: 0.031926" "component_No.5_subcomp.2, PVE: 0.030301"
#> .. .. ..$ : chr [1:50] "subject_No.1" "subject_No.2" "subject_No.3" "subject_No.4" ...
#> ..$ component_No.6: logi NA
#> ..$ component_No.7: num [1:2, 1:50] 0.684 1.342 3.613 -0.611 3.838 ...
#> .. ..- attr(*, "dimnames")=List of 2
#> .. .. ..$ : chr [1:2] "component_No.7_subcomp.1, PVE: 0.051229" "component_No.7_subcomp.2, PVE: 0.049898"
#> .. .. ..$ : chr [1:50] "subject_No.1" "subject_No.2" "subject_No.3" "subject_No.4" ...
#> ..$ component_No.8: logi NA
#> ..$ component_No.9: logi NA
#> $ dataset_No.3:List of 9
#> ..$ component_No.1: num [1:2, 1:50] -2.459 -2.215 -1.45 0.844 -2 ...
#> .. ..- attr(*, "dimnames")=List of 2
#> .. .. ..$ : chr [1:2] "component_No.1_subcomp.1, PVE: 0.024430" "component_No.1_subcomp.2, PVE: 0.028108"
#> .. .. ..$ : chr [1:50] "subject_No.1" "subject_No.2" "subject_No.3" "subject_No.4" ...
#> ..$ component_No.2: logi NA
#> ..$ component_No.3: num [1:2, 1:50] 0.0673 -1.3937 -2.3066 0.3895 1.8 ...
#> .. ..- attr(*, "dimnames")=List of 2
#> .. .. ..$ : chr [1:2] "component_No.3_subcomp.1, PVE: 0.036413" "component_No.3_subcomp.2, PVE: 0.026953"
#> .. .. ..$ : chr [1:50] "subject_No.1" "subject_No.2" "subject_No.3" "subject_No.4" ...
#> ..$ component_No.4: num [1:2, 1:50] -2.9 -1.97 -1 1.06 -2.43 ...
#> .. ..- attr(*, "dimnames")=List of 2
#> .. .. ..$ : chr [1:2] "component_No.4_subcomp.1, PVE: 0.024186" "component_No.4_subcomp.2, PVE: 0.037703"
#> .. .. ..$ : chr [1:50] "subject_No.1" "subject_No.2" "subject_No.3" "subject_No.4" ...
#> ..$ component_No.5: logi NA
#> ..$ component_No.6: logi NA
#> ..$ component_No.7: logi NA
#> ..$ component_No.8: num [1:2, 1:50] 1.388 1.891 -1.599 0.279 2.413 ...
#> .. ..- attr(*, "dimnames")=List of 2
#> .. .. ..$ : chr [1:2] "component_No.8_subcomp.1, PVE: 0.051370" "component_No.8_subcomp.2, PVE: 0.050324"
#> .. .. ..$ : chr [1:50] "subject_No.1" "subject_No.2" "subject_No.3" "subject_No.4" ...
#> ..$ component_No.9: logi NA
#> $ dataset_No.4:List of 9
#> ..$ component_No.1: num [1:2, 1:50] 0.815 1.733 -0.251 -2.307 -1.196 ...
#> .. ..- attr(*, "dimnames")=List of 2
#> .. .. ..$ : chr [1:2] "component_No.1_subcomp.1, PVE: 0.032123" "component_No.1_subcomp.2, PVE: 0.034866"
#> .. .. ..$ : chr [1:50] "subject_No.1" "subject_No.2" "subject_No.3" "subject_No.4" ...
#> ..$ component_No.2: logi NA
#> ..$ component_No.3: num [1:2, 1:50] -2.585 -1.284 2.446 0.791 -1.365 ...
#> .. ..- attr(*, "dimnames")=List of 2
#> .. .. ..$ : chr [1:2] "component_No.3_subcomp.1, PVE: 0.045145" "component_No.3_subcomp.2, PVE: 0.045044"
#> .. .. ..$ : chr [1:50] "subject_No.1" "subject_No.2" "subject_No.3" "subject_No.4" ...
#> ..$ component_No.4: logi NA
#> ..$ component_No.5: num [1:2, 1:50] 1.193 -1.185 0.26 3.479 0.334 ...
#> .. ..- attr(*, "dimnames")=List of 2
#> .. .. ..$ : chr [1:2] "component_No.5_subcomp.1, PVE: 0.045281" "component_No.5_subcomp.2, PVE: 0.039914"
#> .. .. ..$ : chr [1:50] "subject_No.1" "subject_No.2" "subject_No.3" "subject_No.4" ...
#> ..$ component_No.6: logi NA
#> ..$ component_No.7: logi NA
#> ..$ component_No.8: logi NA
#> ..$ component_No.9: num [1:2, 1:50] 3.501 -1.402 -0.344 3.473 1.373 ...
#> .. ..- attr(*, "dimnames")=List of 2
#> .. .. ..$ : chr [1:2] "component_No.9_subcomp.1, PVE: 0.057037" "component_No.9_subcomp.2, PVE: 0.056014"
#> .. .. ..$ : chr [1:50] "subject_No.1" "subject_No.2" "subject_No.3" "subject_No.4" ...
Weighting data sets by different weights
SJD package also allows users to specify weights for datasets during the analysis
# Simulation the dataset
dataset = list(matrix(runif(5000, 1, 2), nrow = 100, ncol = 50),
matrix(runif(5000, 1, 2), nrow = 100, ncol = 50),
matrix(runif(5000, 1, 2), nrow = 100, ncol = 50),
matrix(runif(5000, 1, 2), nrow = 100, ncol = 50))
## Specify the structure among the datasets
group = list(c(1,2,3,4), c(1,2), c(3,4), c(1,3), c(2,4), c(1), c(2), c(3), c(4))
comp_num = c(2,2,2,2,2,2,2,2,2)
weighting = c(2, 1, 4, 3)
## Separate PCA, ICA, NMF
sepPCA_res = sepPCA(dataset, comp_num, weighting)
sepICA_res = sepICA(dataset, comp_num, weighting)
sepNMF_res = sepNMF(dataset, comp_num, weighting)
## Concatenated PCA, ICA, NMF
concatPCA_res = concatPCA(dataset, group, comp_num, weighting)
concatICA_res = concatICA(dataset, group, comp_num, weighting)
concatNMF_res = concatNMF(dataset, group, comp_num, weighting)
## Joint PCA, ICA, NMF
jointPCA_res = jointPCA(dataset, group, comp_num, weighting)
jointICA_res = jointICA(dataset, group, comp_num, weighting)
jointNMF_res = jointNMF(dataset, group, comp_num, weighting)
## twoStageLCA
twoStageLCA_res = twoStageLCA(dataset, group, comp_num, weighting)
Projecting new data sets to extracted components
# Simulation the dataset
dataset = list(matrix(runif(5000, 1, 2), nrow = 100, ncol = 50),
matrix(runif(5000, 1, 2), nrow = 100, ncol = 50),
matrix(runif(5000, 1, 2), nrow = 100, ncol = 50),
matrix(runif(5000, 1, 2), nrow = 100, ncol = 50))
## Specify the structure among the datasets
group = list(c(1,2,3,4), c(1,2), c(3,4), c(1,3), c(2,4), c(1), c(2), c(3), c(4))
comp_num = c(2,2,2,2,2,2,2,2,2)
## Projected new data sets
proj_dataset = list(matrix(runif(5000, 1, 2), nrow = 100, ncol = 50))
proj_group = list(c(TRUE, TRUE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, FALSE))
## concatenate PCA with projection functionality on
res_concatPCA = concatPCA(dataset, group, comp_num, weighting = NULL, proj_dataset = proj_dataset, proj_group = proj_group)