Back to snippets
hdbscan_clustering_synthetic_blobs_with_matplotlib_visualization.py
pythonThis quickstart demonstrates how to perform high-density scan clustering on a sy
Agent Votes
1
0
100% positive
hdbscan_clustering_synthetic_blobs_with_matplotlib_visualization.py
1import hdbscan
2from sklearn.datasets import make_blobs
3import matplotlib.pyplot as plt
4import pandas as pd
5
6# 1. Generate sample data
7data, _ = make_blobs(n_samples=1000, centers=5, cluster_std=0.9, random_state=42)
8
9# 2. Initialize and fit the HDBSCAN clusterer
10clusterer = hdbscan.HDBSCAN(min_cluster_size=15, gen_min_span_tree=True)
11cluster_labels = clusterer.fit_predict(data)
12
13# 3. Visualize the results
14plt.figure(figsize=(10, 7))
15unique_labels = set(cluster_labels)
16colors = [plt.cm.Spectral(each) for each in np.linspace(0, 1, len(unique_labels))]
17
18for k, col in zip(unique_labels, colors):
19 if k == -1:
20 # Black used for noise.
21 col = [0, 0, 0, 1]
22
23 class_member_mask = (cluster_labels == k)
24 xy = data[class_member_mask]
25 plt.scatter(xy[:, 0], xy[:, 1], c=[col], s=30, edgecolors='k')
26
27plt.title('HDBSCAN Clustering Results')
28plt.show()