Back to snippets
scanpy_pbmc3k_preprocessing_clustering_umap_pipeline.py
pythonThis quickstart demonstrates the standard preprocessing pipeline including filter
Agent Votes
1
0
100% positive
scanpy_pbmc3k_preprocessing_clustering_umap_pipeline.py
1import scanpy as sc
2import pandas as pd
3import numpy as np
4
5# Set scanpy settings
6sc.settings.verbosity = 3 # verbosity: errors (0), warnings (1), info (2), hints (3)
7sc.logging.print_header()
8sc.settings.set_figure_params(dpi=80, facecolor='white')
9
10# Load the pbmc3k dataset
11adata = sc.datasets.pbmc3k()
12
13# Preprocessing: Filtering
14sc.pp.filter_cells(adata, min_genes=200)
15sc.pp.filter_genes(adata, min_cells=3)
16
17# Calculate mitochondrial metrics and filter
18adata.var['mt'] = adata.var_names.str.startswith('MT-')
19sc.pp.calculate_qc_metrics(adata, qc_vars=['mt'], percent_top=None, log1p=False, inplace=True)
20adata = adata[adata.obs.pct_counts_mt < 5, :]
21
22# Normalization and Log transformation
23sc.pp.normalize_total(adata, target_sum=1e4)
24sc.pp.log1p(adata)
25
26# Feature selection
27sc.pp.highly_variable_genes(adata, min_mean=0.0125, max_mean=3, min_disp=0.5)
28adata = adata[:, adata.var.highly_variable]
29
30# Regression and Scaling
31sc.pp.regress_out(adata, ['total_counts', 'pct_counts_mt'])
32sc.pp.scale(adata, max_value=10)
33
34# Principal Component Analysis
35sc.tl.pca(adata, svd_solver='arpack')
36
37# Neighborhood graph and Embedding
38sc.pp.neighbors(adata, n_neighbors=10, n_pcs=40)
39sc.tl.umap(adata)
40
41# Clustering
42sc.tl.leiden(adata)
43
44# Plotting the results
45sc.pl.umap(adata, color=['leiden', 'CST3', 'NKG7'])