Demo

Below is a demo of installing and running SJD package on simulated datasets. For analytical vignette using real data, please refer to Real Data Analysis here.

Install and load `SJD` package

To install this package in R, run the following commands:

library(devtools)
install_github("CHuanSite/SJD")

library(SJD)

Simple example

# Simulation the dataset
dataset = list(matrix(runif(5000, 1, 2), nrow = 100, ncol = 50),
               matrix(runif(5000, 1, 2), nrow = 100, ncol = 50),
               matrix(runif(5000, 1, 2), nrow = 100, ncol = 50),
               matrix(runif(5000, 1, 2), nrow = 100, ncol = 50))
               
## Specify the structure among the datasets
group = list(c(1,2,3,4), c(1,2), c(3,4), c(1,3), c(2,4), c(1), c(2), c(3), c(4))
comp_num = c(2,2,2,2,2,2,2,2,2)

## Separate PCA, ICA, NMF
sepPCA_res = sepPCA(dataset, comp_num)
sepICA_res = sepICA(dataset, comp_num)
sepNMF_res = sepNMF(dataset, comp_num)

## Concatenated PCA, ICA, NMF
concatPCA_res = concatPCA(dataset, group, comp_num)
concatICA_res = concatICA(dataset, group, comp_num)
concatNMF_res = concatNMF(dataset, group, comp_num)

## Joint PCA, ICA, NMF
jointPCA_res = jointPCA(dataset, group, comp_num)
jointICA_res = jointICA(dataset, group, comp_num)
jointNMF_res = jointNMF(dataset, group, comp_num)

## twoStageLCA
twoStageLCA_res = twoStageLCA(dataset, group, comp_num)

To access the component

str(concatPCA_res$linked_component_list)
#> List of 9
#>  $ component_No.1: num [1:100, 1:2] -0.2649 0.0712 -0.0558 -0.1471 0.0252 ...
#>   ..- attr(*, "dimnames")=List of 2
#>   .. ..$ : chr [1:100] "gene_No.1" "gene_No.2" "gene_No.3" "gene_No.4" ...
#>   .. ..$ : chr [1:2] "component_No.1_subcomp.1" "component_No.1_subcomp.2"
#>  $ component_No.2: num [1:100, 1:2] 0.1692 -0.0312 0.1816 0.1296 0.0332 ...
#>   ..- attr(*, "dimnames")=List of 2
#>   .. ..$ : chr [1:100] "gene_No.1" "gene_No.2" "gene_No.3" "gene_No.4" ...
#>   .. ..$ : chr [1:2] "component_No.2_subcomp.1" "component_No.2_subcomp.2"
#>  $ component_No.3: num [1:100, 1:2] -0.1636 0.176 0.0312 -0.0658 0.0737 ...
#>   ..- attr(*, "dimnames")=List of 2
#>   .. ..$ : chr [1:100] "gene_No.1" "gene_No.2" "gene_No.3" "gene_No.4" ...
#>   .. ..$ : chr [1:2] "component_No.3_subcomp.1" "component_No.3_subcomp.2"
#>  $ component_No.4: num [1:100, 1:2] -0.1648 0.0688 -0.1521 -0.1165 -0.0381 ...
#>   ..- attr(*, "dimnames")=List of 2
#>   .. ..$ : chr [1:100] "gene_No.1" "gene_No.2" "gene_No.3" "gene_No.4" ...
#>   .. ..$ : chr [1:2] "component_No.4_subcomp.1" "component_No.4_subcomp.2"
#>  $ component_No.5: num [1:100, 1:2] -0.25047 0.07335 0.00609 -0.0993 0.04414 ...
#>   ..- attr(*, "dimnames")=List of 2
#>   .. ..$ : chr [1:100] "gene_No.1" "gene_No.2" "gene_No.3" "gene_No.4" ...
#>   .. ..$ : chr [1:2] "component_No.5_subcomp.1" "component_No.5_subcomp.2"
#>  $ component_No.6: num [1:100, 1:2] -0.1377 -0.0125 -0.1179 -0.0947 -0.1363 ...
#>   ..- attr(*, "dimnames")=List of 2
#>   .. ..$ : chr [1:100] "gene_No.1" "gene_No.2" "gene_No.3" "gene_No.4" ...
#>   .. ..$ : chr [1:2] "component_No.6_subcomp.1" "component_No.6_subcomp.2"
#>  $ component_No.7: num [1:100, 1:2] -0.02 -0.1373 -0.026 0.0252 0.1094 ...
#>   ..- attr(*, "dimnames")=List of 2
#>   .. ..$ : chr [1:100] "gene_No.1" "gene_No.2" "gene_No.3" "gene_No.4" ...
#>   .. ..$ : chr [1:2] "component_No.7_subcomp.1" "component_No.7_subcomp.2"
#>  $ component_No.8: num [1:100, 1:2] -0.1339 0.1261 0.1081 -0.1386 -0.0263 ...
#>   ..- attr(*, "dimnames")=List of 2
#>   .. ..$ : chr [1:100] "gene_No.1" "gene_No.2" "gene_No.3" "gene_No.4" ...
#>   .. ..$ : chr [1:2] "component_No.8_subcomp.1" "component_No.8_subcomp.2"
#>  $ component_No.9: num [1:100, 1:2] -0.193 -0.0162 0.1877 -0.0662 0.0562 ...
#>   ..- attr(*, "dimnames")=List of 2
#>   .. ..$ : chr [1:100] "gene_No.1" "gene_No.2" "gene_No.3" "gene_No.4" ...
#>   .. ..$ : chr [1:2] "component_No.9_subcomp.1" "component_No.9_subcomp.2"

To access the score

str(concatPCA_res$score_list)
#> List of 4
#>  $ dataset_No.1:List of 9
#>   ..$ component_No.1: num [1:2, 1:50] -1.151 2.544 2.002 2.067 0.558 ...
#>   .. ..- attr(*, "dimnames")=List of 2
#>   .. .. ..$ : chr [1:2] "component_No.1_subcomp.1, PVE: 0.033331" "component_No.1_subcomp.2, PVE: 0.023812"
#>   .. .. ..$ : chr [1:50] "subject_No.1" "subject_No.2" "subject_No.3" "subject_No.4" ...
#>   ..$ component_No.2: num [1:2, 1:50] -1.85 -1.87 -3.44 1.33 1.03 ...
#>   .. ..- attr(*, "dimnames")=List of 2
#>   .. .. ..$ : chr [1:2] "component_No.2_subcomp.1, PVE: 0.045869" "component_No.2_subcomp.2, PVE: 0.040222"
#>   .. .. ..$ : chr [1:50] "subject_No.1" "subject_No.2" "subject_No.3" "subject_No.4" ...
#>   ..$ component_No.3: logi NA
#>   ..$ component_No.4: num [1:2, 1:50] 1.335 0.536 2.912 0.739 -1.972 ...
#>   .. ..- attr(*, "dimnames")=List of 2
#>   .. .. ..$ : chr [1:2] "component_No.4_subcomp.1, PVE: 0.048891" "component_No.4_subcomp.2, PVE: 0.030486"
#>   .. .. ..$ : chr [1:50] "subject_No.1" "subject_No.2" "subject_No.3" "subject_No.4" ...
#>   ..$ component_No.5: logi NA
#>   ..$ component_No.6: num [1:2, 1:50] 2.542 0.162 2.593 -0.705 -0.928 ...
#>   .. ..- attr(*, "dimnames")=List of 2
#>   .. .. ..$ : chr [1:2] "component_No.6_subcomp.1, PVE: 0.056170" "component_No.6_subcomp.2, PVE: 0.052568"
#>   .. .. ..$ : chr [1:50] "subject_No.1" "subject_No.2" "subject_No.3" "subject_No.4" ...
#>   ..$ component_No.7: logi NA
#>   ..$ component_No.8: logi NA
#>   ..$ component_No.9: logi NA
#>  $ dataset_No.2:List of 9
#>   ..$ component_No.1: num [1:2, 1:50] 0.114 -0.284 -0.345 0.303 2.094 ...
#>   .. ..- attr(*, "dimnames")=List of 2
#>   .. .. ..$ : chr [1:2] "component_No.1_subcomp.1, PVE: 0.031346" "component_No.1_subcomp.2, PVE: 0.021446"
#>   .. .. ..$ : chr [1:50] "subject_No.1" "subject_No.2" "subject_No.3" "subject_No.4" ...
#>   ..$ component_No.2: num [1:2, 1:50] -0.747 -2.025 0.392 1.525 -0.448 ...
#>   .. ..- attr(*, "dimnames")=List of 2
#>   .. .. ..$ : chr [1:2] "component_No.2_subcomp.1, PVE: 0.035465" "component_No.2_subcomp.2, PVE: 0.031001"
#>   .. .. ..$ : chr [1:50] "subject_No.1" "subject_No.2" "subject_No.3" "subject_No.4" ...
#>   ..$ component_No.3: logi NA
#>   ..$ component_No.4: logi NA
#>   ..$ component_No.5: num [1:2, 1:50] -0.149 -1.244 1.113 -1.311 1.151 ...
#>   .. ..- attr(*, "dimnames")=List of 2
#>   .. .. ..$ : chr [1:2] "component_No.5_subcomp.1, PVE: 0.031926" "component_No.5_subcomp.2, PVE: 0.030301"
#>   .. .. ..$ : chr [1:50] "subject_No.1" "subject_No.2" "subject_No.3" "subject_No.4" ...
#>   ..$ component_No.6: logi NA
#>   ..$ component_No.7: num [1:2, 1:50] 0.684 1.342 3.613 -0.611 3.838 ...
#>   .. ..- attr(*, "dimnames")=List of 2
#>   .. .. ..$ : chr [1:2] "component_No.7_subcomp.1, PVE: 0.051229" "component_No.7_subcomp.2, PVE: 0.049898"
#>   .. .. ..$ : chr [1:50] "subject_No.1" "subject_No.2" "subject_No.3" "subject_No.4" ...
#>   ..$ component_No.8: logi NA
#>   ..$ component_No.9: logi NA
#>  $ dataset_No.3:List of 9
#>   ..$ component_No.1: num [1:2, 1:50] -2.459 -2.215 -1.45 0.844 -2 ...
#>   .. ..- attr(*, "dimnames")=List of 2
#>   .. .. ..$ : chr [1:2] "component_No.1_subcomp.1, PVE: 0.024430" "component_No.1_subcomp.2, PVE: 0.028108"
#>   .. .. ..$ : chr [1:50] "subject_No.1" "subject_No.2" "subject_No.3" "subject_No.4" ...
#>   ..$ component_No.2: logi NA
#>   ..$ component_No.3: num [1:2, 1:50] 0.0673 -1.3937 -2.3066 0.3895 1.8 ...
#>   .. ..- attr(*, "dimnames")=List of 2
#>   .. .. ..$ : chr [1:2] "component_No.3_subcomp.1, PVE: 0.036413" "component_No.3_subcomp.2, PVE: 0.026953"
#>   .. .. ..$ : chr [1:50] "subject_No.1" "subject_No.2" "subject_No.3" "subject_No.4" ...
#>   ..$ component_No.4: num [1:2, 1:50] -2.9 -1.97 -1 1.06 -2.43 ...
#>   .. ..- attr(*, "dimnames")=List of 2
#>   .. .. ..$ : chr [1:2] "component_No.4_subcomp.1, PVE: 0.024186" "component_No.4_subcomp.2, PVE: 0.037703"
#>   .. .. ..$ : chr [1:50] "subject_No.1" "subject_No.2" "subject_No.3" "subject_No.4" ...
#>   ..$ component_No.5: logi NA
#>   ..$ component_No.6: logi NA
#>   ..$ component_No.7: logi NA
#>   ..$ component_No.8: num [1:2, 1:50] 1.388 1.891 -1.599 0.279 2.413 ...
#>   .. ..- attr(*, "dimnames")=List of 2
#>   .. .. ..$ : chr [1:2] "component_No.8_subcomp.1, PVE: 0.051370" "component_No.8_subcomp.2, PVE: 0.050324"
#>   .. .. ..$ : chr [1:50] "subject_No.1" "subject_No.2" "subject_No.3" "subject_No.4" ...
#>   ..$ component_No.9: logi NA
#>  $ dataset_No.4:List of 9
#>   ..$ component_No.1: num [1:2, 1:50] 0.815 1.733 -0.251 -2.307 -1.196 ...
#>   .. ..- attr(*, "dimnames")=List of 2
#>   .. .. ..$ : chr [1:2] "component_No.1_subcomp.1, PVE: 0.032123" "component_No.1_subcomp.2, PVE: 0.034866"
#>   .. .. ..$ : chr [1:50] "subject_No.1" "subject_No.2" "subject_No.3" "subject_No.4" ...
#>   ..$ component_No.2: logi NA
#>   ..$ component_No.3: num [1:2, 1:50] -2.585 -1.284 2.446 0.791 -1.365 ...
#>   .. ..- attr(*, "dimnames")=List of 2
#>   .. .. ..$ : chr [1:2] "component_No.3_subcomp.1, PVE: 0.045145" "component_No.3_subcomp.2, PVE: 0.045044"
#>   .. .. ..$ : chr [1:50] "subject_No.1" "subject_No.2" "subject_No.3" "subject_No.4" ...
#>   ..$ component_No.4: logi NA
#>   ..$ component_No.5: num [1:2, 1:50] 1.193 -1.185 0.26 3.479 0.334 ...
#>   .. ..- attr(*, "dimnames")=List of 2
#>   .. .. ..$ : chr [1:2] "component_No.5_subcomp.1, PVE: 0.045281" "component_No.5_subcomp.2, PVE: 0.039914"
#>   .. .. ..$ : chr [1:50] "subject_No.1" "subject_No.2" "subject_No.3" "subject_No.4" ...
#>   ..$ component_No.6: logi NA
#>   ..$ component_No.7: logi NA
#>   ..$ component_No.8: logi NA
#>   ..$ component_No.9: num [1:2, 1:50] 3.501 -1.402 -0.344 3.473 1.373 ...
#>   .. ..- attr(*, "dimnames")=List of 2
#>   .. .. ..$ : chr [1:2] "component_No.9_subcomp.1, PVE: 0.057037" "component_No.9_subcomp.2, PVE: 0.056014"
#>   .. .. ..$ : chr [1:50] "subject_No.1" "subject_No.2" "subject_No.3" "subject_No.4" ...

Weighting data sets by different weights

SJD package also allows users to specify weights for datasets during the analysis

# Simulation the dataset
dataset = list(matrix(runif(5000, 1, 2), nrow = 100, ncol = 50),
               matrix(runif(5000, 1, 2), nrow = 100, ncol = 50),
               matrix(runif(5000, 1, 2), nrow = 100, ncol = 50),
               matrix(runif(5000, 1, 2), nrow = 100, ncol = 50))
               
## Specify the structure among the datasets
group = list(c(1,2,3,4), c(1,2), c(3,4), c(1,3), c(2,4), c(1), c(2), c(3), c(4))
comp_num = c(2,2,2,2,2,2,2,2,2)
weighting = c(2, 1, 4, 3)

## Separate PCA, ICA, NMF
sepPCA_res = sepPCA(dataset, comp_num, weighting)
sepICA_res = sepICA(dataset, comp_num, weighting)
sepNMF_res = sepNMF(dataset, comp_num, weighting)

## Concatenated PCA, ICA, NMF
concatPCA_res = concatPCA(dataset, group, comp_num, weighting)
concatICA_res = concatICA(dataset, group, comp_num, weighting)
concatNMF_res = concatNMF(dataset, group, comp_num, weighting)

## Joint PCA, ICA, NMF
jointPCA_res = jointPCA(dataset, group, comp_num, weighting)
jointICA_res = jointICA(dataset, group, comp_num, weighting)
jointNMF_res = jointNMF(dataset, group, comp_num, weighting)

## twoStageLCA
twoStageLCA_res = twoStageLCA(dataset, group, comp_num, weighting)

Projecting new data sets to extracted components

# Simulation the dataset
dataset = list(matrix(runif(5000, 1, 2), nrow = 100, ncol = 50),
               matrix(runif(5000, 1, 2), nrow = 100, ncol = 50),
               matrix(runif(5000, 1, 2), nrow = 100, ncol = 50),
               matrix(runif(5000, 1, 2), nrow = 100, ncol = 50))
               
## Specify the structure among the datasets
group = list(c(1,2,3,4), c(1,2), c(3,4), c(1,3), c(2,4), c(1), c(2), c(3), c(4))
comp_num = c(2,2,2,2,2,2,2,2,2)

## Projected new data sets
proj_dataset = list(matrix(runif(5000, 1, 2), nrow = 100, ncol = 50))
proj_group = list(c(TRUE, TRUE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, FALSE))

## concatenate PCA with projection functionality on
res_concatPCA = concatPCA(dataset, group, comp_num, weighting = NULL, proj_dataset = proj_dataset, proj_group = proj_group)

Huan Chen

Jinrui Liu

Shreyash Sonthalia

Guangyan Li

Carlo Colantuoni

02 October, 2022

Install and load `SJD` package

Simple example

Weighting data sets by different weights

Projecting new data sets to extracted components

Demo

Huan Chen

Jinrui Liu

Shreyash Sonthalia

Guangyan Li

Carlo Colantuoni

02 October, 2022

Install and load SJD package

Simple example

Weighting data sets by different weights

Projecting new data sets to extracted components

Install and load `SJD` package