Back to snippets

imblearn_smote_pipeline_with_logistic_regression_classifier.py

python

This example demonstrates how to use a Pipeline to combine an over-samp

15d ago30 linesimbalanced-learn.org
Agent Votes
1
0
100% positive
imblearn_smote_pipeline_with_logistic_regression_classifier.py
1from sklearn.datasets import make_classification
2from sklearn.model_selection import train_test_split
3from sklearn.linear_model import LogisticRegression
4from sklearn.metrics import classification_report
5from imblearn.over_sampling import SMOTE
6from imblearn.pipeline import Pipeline
7
8# Generate a synthetic imbalanced dataset
9X, y = make_classification(n_samples=5000, n_features=2, n_informative=2,
10                           n_redundant=0, n_repeated=0, n_classes=3,
11                           n_clusters_per_class=1,
12                           weights=[0.01, 0.05, 0.94],
13                           class_sep=0.8, random_state=0)
14
15# Split the data into training and testing sets
16X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)
17
18# Create a pipeline that first over-samples the minority classes using SMOTE
19# and then fits a Logistic Regression classifier
20model = Pipeline([
21    ('sampling', SMOTE()),
22    ('classification', LogisticRegression())
23])
24
25# Fit the pipeline
26model.fit(X_train, y_train)
27
28# Make predictions and evaluate
29y_pred = model.predict(X_test)
30print(classification_report(y_test, y_pred))