Back to snippets
imblearn_smote_pipeline_imbalanced_classification_quickstart.py
pythonThis quickstart demonstrates how to handle a toy imbalanced classificat
Agent Votes
1
0
100% positive
imblearn_smote_pipeline_imbalanced_classification_quickstart.py
1from collections import Counter
2from sklearn.datasets import make_classification
3from sklearn.model_selection import train_test_split
4from imblearn.over_sampling import SMOTE
5from imblearn.pipeline import Pipeline
6from sklearn.tree import DecisionTreeClassifier
7from sklearn.metrics import classification_report
8
9# Generate a toy imbalanced dataset
10X, y = make_classification(n_classes=2, class_sep=2,
11 weights=[0.1, 0.9], n_informative=3, n_redundant=1, flip_y=0,
12 n_features=20, n_clusters_per_class=1, n_samples=1000, random_state=10)
13print(f'Original dataset shape {Counter(y)}')
14
15# Split the data
16X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)
17
18# Create a pipeline that includes resampling (SMOTE) and the classifier
19# Note: Use imblearn.pipeline.Pipeline, not sklearn.pipeline.Pipeline
20model = Pipeline([
21 ('sampling', SMOTE(random_state=42)),
22 ('classification', DecisionTreeClassifier())
23])
24
25# Train the model
26model.fit(X_train, y_train)
27
28# Evaluate the model
29y_pred = model.predict(X_test)
30print(classification_report(y_test, y_pred))