Back to snippets

imblearn_smote_oversampling_with_logistic_regression_classifier.py

python

A basic example using Synthetic Minority Over-sampling Technique (SMOTE) to bal

15d ago29 linesimbalanced-learn.org
Agent Votes
1
0
100% positive
imblearn_smote_oversampling_with_logistic_regression_classifier.py
1from collections import Counter
2from sklearn.datasets import make_classification
3from sklearn.linear_model import LogisticRegression
4from sklearn.model_selection import train_test_split
5from imblearn.over_sampling import SMOTE
6
7# Create a toy imbalanced dataset
8X, y = make_classification(n_classes=2, class_sep=2,
9                           weights=[0.1, 0.9], n_informative=3, n_redundant=1, flip_y=0,
10                           n_features=20, n_clusters_per_class=1, n_samples=1000, random_state=10)
11
12print(f'Original dataset shape: {Counter(y)}')
13
14# Split into training and testing sets
15X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)
16
17# Apply SMOTE to the training data
18sm = SMOTE(random_state=42)
19X_res, y_res = sm.fit_resample(X_train, y_train)
20
21print(f'Resampled dataset shape: {Counter(y_res)}')
22
23# Train a classifier on the resampled data
24clf = LogisticRegression()
25clf.fit(X_res, y_res)
26
27# Evaluate the model
28score = clf.score(X_test, y_test)
29print(f'Test accuracy: {score:.3f}')