Skip to content

hamidghaedi/scRNA_cell_annotation

Folders and files

NameName
Last commit message
Last commit date

Latest commit

 

History

6 Commits
 
 
 
 

Repository files navigation

scRNA cell annotation

In this repo, we will be doing:

(i) Assigning cell labels from reference data: or Automatic cell annotation for bladder cancer dataset used in scRNA-seq analysis using the reference data from Tabula Sapiens.

(ii) Assigning cell labels to sub-cluster of epithelial cells from gene sets (to be added)

(i) Assigning cell labels using a ref dataset

# loading libraries library(Seurat) library(SeuratData) library(SeuratDisk)

Download Tabula Sapiens for bladder

# Set the destination directory for saving the downloaded file destination_dir <- getwd() # Set the URL of the file to be downloaded file_url <- "https://ndownloader.figshare.com/files/27388874" # Define the file name for saving the downloaded file file_name <- "TS_Bladder.h5ad" # Download the file and save it to the specified directory #download.file(file_url, file.path(destination_dir, file_name))

Reading and processing ref data

in python:

# import libs import scanpy as sc from scipy import io import os # Set working directory os.chdir("C://Users/qaedi/OneDrive - Queen's University/Documents/scRNA_cell_annotation") # creating a dir !mkdir matrix_files # reading h5ad file adata = sc.read_h5ad('TS_Bladder.h5ad') adata #AnnData object with n_obs × n_vars = 21568 × 58833 # obs: 'Annotation', 'Predictability', 'Manually Annotated', 'Donor', 'Method', 'Organ', #'Compartment', 'Anatomical Information' # var: 'gene_symbol', 'ensembl_id', 'gene_length' # uns: 'Annotation_colors', 'Compartment_colors', 'Donor_colors', 'Manually Annotated_colors', #'Method_colors', 'Organ_colors', 'Propagated.Annotationcollapsed_colors', '_scvi', 'donor_colors', #'leiden', 'method_colors', 'neighbors', 'tissue_colors', 'umap' # obsm: 'X_umap' # layers: 'counts', 'raw_counts' # obsp: 'connectivities', 'distances' # Ensuring that the raw count is the main matrix adata.X = adata.layers['raw_counts'] # generating files needed to create Seurat object with open('matrix_files/barcodes.tsv', 'w') as f: for item in adata.obs_names: f.write(item + '\n') with open('matrix_files/features.tsv', 'w') as f: for item in ['\t'.join([x,x,'Gene Expression']) for x in adata.var_names]: f.write(item + '\n') io.mmwrite('matrix_files/matrix', adata.X.T) # gzipping the files import gzip import glob file_list = glob.glob("matrix_files/*") for file_path in file_list: with open(file_path, "rb") as file_in: with gzip.open(file_path + ".gz", "wb") as file_out: file_out.write(file_in.read()) 

Now we have a directory with all needed files to create a Seurat object:

library(Seurat) library(SingleR) library(patchwork) library(cowplot) # create seurat object ## locating files raw_data <- Read10X(data.dir = "matrix_files/") ## reading metadata metadata <- read.csv("ref_metadata.csv") rownames(metadata) <- metadata$X ## creating Seurat object su <- CreateSeuratObject(counts = raw_data, meta.data = metadata) ## retain cells that are manually annotated su <- subset(su, subset = Manually.Annotated == TRUE) ## creating a group for batch id su$batch_id <- paste0(su$Donor, "_", su$Method) #____________________Exploring source of variation______________# ## Normalizing the counts su <- NormalizeData(object = su, normalization.method = "LogNormalize") # Find variable feature su <- FindVariableFeatures(su, selection.method = "vst", nfeatures = 2000, verbose = TRUE) ## Scaling su <- ScaleData(su) ## Perform PCA su <- RunPCA(su) ## Plot the PCA colored by cell cycle phase no_split <- DimPlot(su, reduction = "pca", group.by= "batch_id") with_split <- DimPlot(su, reduction = "pca", group.by= "batch_id", split.by= "batch_id") no_split + with_split

PCA_batch_id.png

So there is no difference between batches , so we can proceed with file as is.

## Converting Seurat to sce object refSce <- as.SingleCellExperiment(su)

Reading and processing query data

hsu <- readRDS("~/scRNA/github/harmonized_seurat.RDS") # Adding cluster data hsu$clusters <- Idents(hsu) query_su <- CreateSeuratObject(counts = GetAssayData(object = hsu, assay = "RNA"), meta.data = hsu@meta.data) # convert to sce querySce <- as.SingleCellExperiment(query_su)

Running singleR to predict cell types

pred <- SingleR(test=querySce, ref=refSce, labels=refSce$Annotation, de.method="wilcox") # Checking rownames all(rownames(query_su@meta.data) == rownames(data.frame(pred))) # Adding predicted classes to metadata query_su$singleR_pred <- data.frame(pred)$pruned.labels

Prediction result visualization

library(pheatmap) tab <- table(query_su$clusters, query_su$singleR_pred) pheatmap(log2(tab+10), color=colorRampPalette(c("white", "blue"))(101))

As it showed in the below plot, the overlap between Tabula Sapiens labels and our manual labels is significant:

heatmap_manual_labels_TS_labels.png

About

Assigning cell labels w/o ref data (Tabula Sapiens)

Topics

Resources

Stars

Watchers

Forks