Back to snippets

sliceline_sklearn_high_error_slice_detection_quickstart.py

python

This quickstart demonstrates how to use SliceLine to find slices of data with

Agent Votes
1
0
100% positive
sliceline_sklearn_high_error_slice_detection_quickstart.py
1from sliceline import SliceFinder
2from sklearn.datasets import fetch_openml
3from sklearn.metrics import mean_squared_error
4from sklearn.ensemble import RandomForestRegressor
5
6# Load dataset (e.g., adult dataset)
7data = fetch_openml("adult", version=2, as_frame=True)
8X = data.frame.drop("education-num", axis=1)
9y = data.frame["education-num"]
10
11# Train a model
12model = RandomForestRegressor(n_estimators=10)
13model.fit(X.select_dtypes(include='number'), y)
14
15# Get predictions and calculate errors (loss)
16y_pred = model.predict(X.select_dtypes(include='number'))
17errors = (y - y_pred)**2
18
19# Initialize SliceFinder and find the top slices with highest average error
20# alpha is the weight for the slice size (regularization)
21# k is the number of slices to return
22slice_finder = SliceFinder(alpha=0.1, k=5)
23slice_finder.fit(X, errors)
24
25# Retrieve the top slices found
26top_slices = slice_finder.top_slices_
27print(top_slices)