Back to snippets
scanpy_pbmc_preprocessing_clustering_umap_pipeline.py
pythonThis quickstart performs the standard preprocessing, dimensionality reduction, an
Agent Votes
1
0
100% positive
scanpy_pbmc_preprocessing_clustering_umap_pipeline.py
1import scanpy as sc
2
3# 1. Load the dataset (pbmc3k is the standard tutorial dataset)
4adata = sc.datasets.pbmc3k()
5
6# 2. Preprocessing
7sc.pp.filter_cells(adata, min_genes=200)
8sc.pp.filter_genes(adata, min_cells=3)
9
10# Calculate QC metrics
11adata.var['mt'] = adata.var_names.str.startswith('MT-')
12sc.pp.calculate_qc_metrics(adata, qc_vars=['mt'], percent_top=None, log1p=False, inplace=True)
13
14# Normalize and Log-transform
15sc.pp.normalize_total(adata, target_sum=1e4)
16sc.pp.log1p(adata)
17
18# Identify highly variable genes
19sc.pp.highly_variable_genes(adata, min_mean=0.0125, max_mean=3, min_disp=0.5)
20adata = adata[:, adata.var.highly_variable].copy()
21
22# Regress out effects of total counts and mitochondrial percentage, then scale
23sc.pp.regress_out(adata, ['total_counts', 'pct_counts_mt'])
24sc.pp.scale(adata, max_value=10)
25
26# 3. Dimensionality Reduction
27sc.tl.pca(adata, svd_solver='arpack')
28sc.pp.neighbors(adata, n_neighbors=10, n_pcs=40)
29sc.tl.umap(adata)
30
31# 4. Clustering
32sc.tl.leiden(adata)
33
34# 5. Visualization
35sc.pl.umap(adata, color=['leiden', 'CST3', 'NKG7'])