Skip to contents

Build SummarizedExperiment using a Seurat object

Usage

dataset_seurat(
  seurat_obj,
  count_assay,
  cell_id_col,
  cell_type_col,
  tpm_assay = NULL,
  name = "SimBu_dataset",
  spike_in_col = NULL,
  additional_cols = NULL,
  filter_genes = TRUE,
  variance_cutoff = 0,
  type_abundance_cutoff = 0,
  scale_tpm = TRUE
)

Arguments

seurat_obj

(mandatory) Seurat object with TPM counts

count_assay

(mandatory) name of assay in Seurat object which contains count data in 'counts' slot

cell_id_col

(mandatory) name of column in Seurat meta.data with unique cell ids

cell_type_col

(mandatory) name of column in Seurat meta.data with cell type name

tpm_assay

name of assay in Seurat object which contains TPM data in 'counts' slot

name

name of the dataset; will be used for new unique IDs of cells

spike_in_col

which column in annotation contains information on spike_in counts, which can be used to re-scale counts; mandatory for spike_in scaling factor in simulation

additional_cols

list of column names in annotation, that should be stored as well in dataset object

filter_genes

boolean, if TRUE, removes all genes with 0 expression over all samples & genes with variance below variance_cutoff

variance_cutoff

numeric, is only applied if filter_genes is TRUE: removes all genes with variance below the chosen cutoff

type_abundance_cutoff

numeric, remove all cells, whose cell-type appears less then the given value. This removes low abundant cell-types

scale_tpm

boolean, if TRUE (default) the cells in tpm_matrix will be scaled to sum up to 1e6

Value

Return a SummarizedExperiment object

Examples

counts <- Matrix::Matrix(matrix(stats::rpois(3e5, 5), ncol=300), sparse = TRUE)
tpm <- Matrix::Matrix(matrix(stats::rpois(3e5, 5), ncol=300), sparse = TRUE)
tpm <- Matrix::t(1e6*Matrix::t(tpm)/Matrix::colSums(tpm))

colnames(counts) <- paste0("cell-",rep(1:300))
colnames(tpm) <- paste0("cell-",rep(1:300))
rownames(counts) <- paste0("gene-",rep(1:1000))
rownames(tpm) <- paste0("gene-",rep(1:1000))

annotation <- data.frame("ID"=paste0("cell-",rep(1:300)), 
                         "cell_type"=c(rep("T cells CD4",50), 
                                       rep("T cells CD8",50),
                                       rep("Macrophages",100),
                                       rep("NK cells",10),
                                       rep("B cells",70),
                                       rep("Monocytes",20)),
                         row.names = paste0("cell-",rep(1:300)))

seurat_obj <- Seurat::CreateSeuratObject(counts = counts, assay = 'counts', meta.data = annotation)
tpm_assay <- Seurat::CreateAssayObject(counts = tpm)
seurat_obj[['tpm']] <- tpm_assay

ds_seurat <- SimBu::dataset_seurat(seurat_obj = seurat_obj, 
                                   count_assay = "counts", 
                                   cell_id_col = 'ID', 
                                   cell_type_col = 'cell_type', 
                                   tpm_assay = 'tpm',
                                   name = "seurat_dataset")
#> Filtering genes...
#> Created dataset.