Title: | Measuring Cell Type Similarity with Gene Ontology in Single-Cell RNA-Seq |
---|---|
Description: | Traditional methods for analyzing single cell RNA-seq datasets focus solely on gene expression, but this package introduces a novel approach that goes beyond this limitation. Using Gene Ontology terms as features, the package allows for the functional profile of cell populations, and comparison within and between datasets from the same or different species. Our approach enables the discovery of previously unrecognized functional similarities and differences between cell types and has demonstrated success in identifying cell types' functional correspondence even between evolutionarily distant species. |
Authors: | Yuyao Song [aut, cre, ctb], Irene Papatheodorou [aut, ths] |
Maintainer: | Yuyao Song <[email protected]> |
License: | GPL (>= 3) |
Version: | 0.2.1 |
Built: | 2025-02-14 04:14:28 UTC |
Source: | https://github.com/papatheodorou-group/scgoclust |
standard seurat analysis on GO_seurat object
analyzeGOSeurat( go_seurat_obj, cell_type_col, norm_log1p = TRUE, scale.factor = 10000, nfeatures = 2000, cluster_res = 1, min.dist = 0.3, ... )
analyzeGOSeurat( go_seurat_obj, cell_type_col, norm_log1p = TRUE, scale.factor = 10000, nfeatures = 2000, cluster_res = 1, min.dist = 0.3, ... )
go_seurat_obj |
go seurat object created by makeGOSeurat |
cell_type_col |
column name in mera.data storing cell type classes |
norm_log1p |
whether or not to perform data normalisation and log1p transformation, default TRUE |
scale.factor |
param for Seurat NormalizeData |
nfeatures |
param for Seurat FindVariableFeatures |
cluster_res |
resolution for Seurat FindClusters |
min.dist |
param for Seurat RunUMAP |
... |
additional params for all Seurat functions involved in this function |
standard analyzed GO seurat object until UMAP
library(scGOclust) library(httr) httr::set_config(httr::config(ssl_verifypeer = FALSE)) data(mmu_tbl) data(mmu_subset) go_seurat_obj = makeGOSeurat(ensembl_to_GO = mmu_tbl, seurat_obj = mmu_subset, feature_type = "external_gene_name") analyzeGOSeurat(go_seurat_obj = go_seurat_obj, cell_type_col = "cell_type_annotation")
library(scGOclust) library(httr) httr::set_config(httr::config(ssl_verifypeer = FALSE)) data(mmu_tbl) data(mmu_subset) go_seurat_obj = makeGOSeurat(ensembl_to_GO = mmu_tbl, seurat_obj = mmu_subset, feature_type = "external_gene_name") analyzeGOSeurat(go_seurat_obj = go_seurat_obj, cell_type_col = "cell_type_annotation")
calculate correlation between cell types represented by scaled GO, per-species
cellTypeGOCorr(cell_type_go, corr_method = "pearson")
cellTypeGOCorr(cell_type_go, corr_method = "pearson")
cell_type_go |
cell type GO table calculated via getCellTypeGO |
corr_method |
correlation method, choose among "pearson", "kendall", "spearman", default 'pearson' |
a dataframe with correlation between cell types
library(scGOclust) library(httr) httr::set_config(httr::config(ssl_verifypeer = FALSE)) data(mmu_tbl) data(mmu_subset) go_seurat_obj = makeGOSeurat(ensembl_to_GO = mmu_tbl, seurat_obj = mmu_subset, feature_type = "external_gene_name") cell_type_go = getCellTypeGO(go_seurat_obj = go_seurat_obj, cell_type_co = "cell_type_annotation") cellTypeGOCorr(cell_type_go = cell_type_go, corr_method = "pearson")
library(scGOclust) library(httr) httr::set_config(httr::config(ssl_verifypeer = FALSE)) data(mmu_tbl) data(mmu_subset) go_seurat_obj = makeGOSeurat(ensembl_to_GO = mmu_tbl, seurat_obj = mmu_subset, feature_type = "external_gene_name") cell_type_go = getCellTypeGO(go_seurat_obj = go_seurat_obj, cell_type_co = "cell_type_annotation") cellTypeGOCorr(cell_type_go = cell_type_go, corr_method = "pearson")
calculate cross-species correlation between cell types represented by scaled GO
crossSpeciesCellTypeGOCorr( species_1, species_2, cell_type_go_sp1, cell_type_go_sp2, corr_method = "pearson" )
crossSpeciesCellTypeGOCorr( species_1, species_2, cell_type_go_sp1, cell_type_go_sp2, corr_method = "pearson" )
species_1 |
name of species one |
species_2 |
name of species two |
cell_type_go_sp1 |
cell type GO table of species one calculated via getCellTypeGO |
cell_type_go_sp2 |
cell type GO table of species two calculated via getCellTypeGO |
corr_method |
correlation method, choose among "pearson", "kendall", "spearman", default 'pearson' |
correlation between cell types
library(scGOclust) library(httr) httr::set_config(httr::config(ssl_verifypeer = FALSE)) data(mmu_tbl) data(mmu_subset) data(dme_tbl) data(dme_subset) mmu_go_obj = makeGOSeurat(ensembl_to_GO = mmu_tbl, seurat_obj = mmu_subset, feature_type = "external_gene_name") dme_go_obj = makeGOSeurat(ensembl_to_GO = dme_tbl, seurat_obj = dme_subset, feature_type = "external_gene_name") mmu_cell_type_go = getCellTypeGO(go_seurat_obj = mmu_go_obj, cell_type_co = "cell_type_annotation") dme_cell_type_go = getCellTypeGO(go_seurat_obj = dme_go_obj, cell_type_co = "annotation") crossSpeciesCellTypeGOCorr(species_1 = 'mmusculus', species_2 = 'dmelanogaster', cell_type_go_sp1 = mmu_cell_type_go, cell_type_go_sp2 = dme_cell_type_go)
library(scGOclust) library(httr) httr::set_config(httr::config(ssl_verifypeer = FALSE)) data(mmu_tbl) data(mmu_subset) data(dme_tbl) data(dme_subset) mmu_go_obj = makeGOSeurat(ensembl_to_GO = mmu_tbl, seurat_obj = mmu_subset, feature_type = "external_gene_name") dme_go_obj = makeGOSeurat(ensembl_to_GO = dme_tbl, seurat_obj = dme_subset, feature_type = "external_gene_name") mmu_cell_type_go = getCellTypeGO(go_seurat_obj = mmu_go_obj, cell_type_co = "cell_type_annotation") dme_cell_type_go = getCellTypeGO(go_seurat_obj = dme_go_obj, cell_type_co = "annotation") crossSpeciesCellTypeGOCorr(species_1 = 'mmusculus', species_2 = 'dmelanogaster', cell_type_go_sp1 = mmu_cell_type_go, cell_type_go_sp2 = dme_cell_type_go)
Drosophila gut scRNA-seq data, 10X Chromium Subset to 45 cells per cell type as an example data
dme_subset
dme_subset
a 'Seurat' object
<https://flycellatlas.org/>
Drosophila EMSEMBL gene and GO annotation, subset to genes present in 'dme_subset'
dme_tbl
dme_tbl
a 'data.frame' object
<http://www.ensembl.org/>
get requested ensembl ID to GO mapping table
ensemblToGo( species, GO_type = "biological_process", GO_linkage_type = c("standard"), ... )
ensemblToGo( species, GO_type = "biological_process", GO_linkage_type = c("standard"), ... )
species |
species name matching ensembl biomaRt naming, such as hsapiens, mmusculus |
GO_type |
GO term type, choose among 'biological_process', 'molecular_function', 'cellular_component', default 'biological_process' |
GO_linkage_type |
GO annotation evidence codes to include. Default is 'standard', which means only including manually checked records (excluding IEA) and excluding those inferred from gene expression experiments to maximally suffice the species expression independence assumption. 'Stringent' means only including those with experimental evidence, also not from gene expression experiment, or from manual curation with evidence (excluding those from mass-annotation pipelines). Choose among 'experimental', 'phylogenetic', 'computational', 'author', 'curator', 'electronic', 'standard', stringent' |
... |
additional params for useEnsembl function called in this function |
a table with ensembl to GO terms mapping including requested linkage type
library(scGOclust) library(httr) httr::set_config(httr::config(ssl_verifypeer = FALSE)) ensemblToGo("mmusculus", GO_type = "biological_process", GO_linkage_type = 'stringent')
library(scGOclust) library(httr) httr::set_config(httr::config(ssl_verifypeer = FALSE)) ensemblToGo("mmusculus", GO_type = "biological_process", GO_linkage_type = 'stringent')
get per cell type average scaled vector of GO terms
getCellTypeGO(go_seurat_obj, cell_type_col, norm_log1p = TRUE)
getCellTypeGO(go_seurat_obj, cell_type_col, norm_log1p = TRUE)
go_seurat_obj |
go seurat object created by makeGOSeurat |
cell_type_col |
column name in mera.data storing cell type classes |
norm_log1p |
whether or not to perform data normalisation and log1p transformation, default TRUE |
a table of scaled GO representation per cell type (averaged)
library(scGOclust) library(httr) httr::set_config(httr::config(ssl_verifypeer = FALSE)) data(mmu_tbl) data(mmu_subset) go_seurat_obj = makeGOSeurat(ensembl_to_GO = mmu_tbl, seurat_obj = mmu_subset, feature_type = "external_gene_name") getCellTypeGO(go_seurat_obj = go_seurat_obj, cell_type_co = "cell_type_annotation")
library(scGOclust) library(httr) httr::set_config(httr::config(ssl_verifypeer = FALSE)) data(mmu_tbl) data(mmu_subset) go_seurat_obj = makeGOSeurat(ensembl_to_GO = mmu_tbl, seurat_obj = mmu_subset, feature_type = "external_gene_name") getCellTypeGO(go_seurat_obj = go_seurat_obj, cell_type_co = "cell_type_annotation")
record some global variables: pre-defined column name in biomaRt query and markers
create a seurat object with GO terms
makeGOSeurat(ensembl_to_GO, seurat_obj, feature_type = "ensembl_gene_id")
makeGOSeurat(ensembl_to_GO, seurat_obj, feature_type = "ensembl_gene_id")
ensembl_to_GO |
ensembl_to_go mapping table from function ensemblToGo |
seurat_obj |
count matrix with genes to cells |
feature_type |
feature type of count matrix, choose from ensembl_gene_id, external_gene_name, default ensembl_gene_id |
a seurat object with GO terms as features
library(scGOclust) library(httr) httr::set_config(httr::config(ssl_verifypeer = FALSE)) data(mmu_tbl) data(mmu_subset) makeGOSeurat(ensembl_to_GO = mmu_tbl, seurat_obj = mmu_subset, feature_type = "external_gene_name")
library(scGOclust) library(httr) httr::set_config(httr::config(ssl_verifypeer = FALSE)) data(mmu_tbl) data(mmu_subset) makeGOSeurat(ensembl_to_GO = mmu_tbl, seurat_obj = mmu_subset, feature_type = "external_gene_name")
Mouse stomach and intestine scRNA-seq data, microwell-seq Subset to 50 cells per cell type as an example data
mmu_subset
mmu_subset
a 'Seurat' object
<https://bis.zju.edu.cn/MCA/>
Mouse EMSEMBL gene and GO annotation, subset to genes present in 'mmu_subset'
mmu_tbl
mmu_tbl
a 'data.frame' object
<http://www.ensembl.org/>
plot clustered heatmap for cell type corr
plotCellTypeCorrHeatmap(corr_matrix, scale = NA, ...)
plotCellTypeCorrHeatmap(corr_matrix, scale = NA, ...)
corr_matrix |
correlation matrix from cellTypeGOCorr or crossSpeciesCellTypeGOCorr |
scale |
scale value by column, row, or default no scaling (NA) |
... |
params to pass to slanter::sheatmap |
a sheatmap heatmap
library(scGOclust) library(httr) httr::set_config(httr::config(ssl_verifypeer = FALSE)) data(mmu_tbl) data(mmu_subset) go_seurat_obj = makeGOSeurat(ensembl_to_GO = mmu_tbl, seurat_obj = mmu_subset, feature_type = "external_gene_name") cell_type_go = getCellTypeGO(go_seurat_obj = go_seurat_obj, cell_type_co = "cell_type_annotation") corr_matrix = cellTypeGOCorr(cell_type_go = cell_type_go, corr_method = "pearson") plotCellTypeCorrHeatmap(corr_matrix = corr_matrix, scale = "column")
library(scGOclust) library(httr) httr::set_config(httr::config(ssl_verifypeer = FALSE)) data(mmu_tbl) data(mmu_subset) go_seurat_obj = makeGOSeurat(ensembl_to_GO = mmu_tbl, seurat_obj = mmu_subset, feature_type = "external_gene_name") cell_type_go = getCellTypeGO(go_seurat_obj = go_seurat_obj, cell_type_co = "cell_type_annotation") corr_matrix = cellTypeGOCorr(cell_type_go = cell_type_go, corr_method = "pearson") plotCellTypeCorrHeatmap(corr_matrix = corr_matrix, scale = "column")
plot Sankey diagram for cell type links above a certain threshould
plotCellTypeSankey(corr_matrix, corr_threshould = 0.1, ...)
plotCellTypeSankey(corr_matrix, corr_threshould = 0.1, ...)
corr_matrix |
cell type corr matrix from crossSpeciesCellTypeGOCorr |
corr_threshould |
minimum corr value for positively related cell types, default 0.6 |
... |
additional params for sankeyNetwork |
a Sankey plot showing related cell types
library(scGOclust) library(httr) httr::set_config(httr::config(ssl_verifypeer = FALSE)) data(mmu_tbl) data(mmu_subset) go_seurat_obj = makeGOSeurat(ensembl_to_GO = mmu_tbl, seurat_obj = mmu_subset, feature_type = "external_gene_name") cell_type_go = getCellTypeGO(go_seurat_obj = go_seurat_obj, cell_type_co = "cell_type_annotation") corr_matrix = cellTypeGOCorr(cell_type_go = cell_type_go, corr_method = "pearson") plotCellTypeSankey(corr_matrix = corr_matrix, 0.1)
library(scGOclust) library(httr) httr::set_config(httr::config(ssl_verifypeer = FALSE)) data(mmu_tbl) data(mmu_subset) go_seurat_obj = makeGOSeurat(ensembl_to_GO = mmu_tbl, seurat_obj = mmu_subset, feature_type = "external_gene_name") cell_type_go = getCellTypeGO(go_seurat_obj = go_seurat_obj, cell_type_co = "cell_type_annotation") corr_matrix = cellTypeGOCorr(cell_type_go = cell_type_go, corr_method = "pearson") plotCellTypeSankey(corr_matrix = corr_matrix, 0.1)