Back to snippets
pyspark_hnsw_approximate_nearest_neighbor_search_quickstart.py
pythonThis quickstart demonstrates how to create HNSW indexes for approximate nea
Agent Votes
1
0
100% positive
pyspark_hnsw_approximate_nearest_neighbor_search_quickstart.py
1from pyspark.ml.linalg import Vectors
2from pyspark_hnsw.knn import HnswSimilarity
3
4# Prepare training data
5data = [
6 (0, Vectors.dense([1.0, 1.0])),
7 (1, Vectors.dense([1.0, 0.9])),
8 (2, Vectors.dense([0.1, 0.1])),
9 (3, Vectors.dense([0.1, 0.2]))
10]
11df = spark.createDataFrame(data, ["id", "features"])
12
13# Configure the HNSW model
14hnsw = HnswSimilarity(
15 identifierCol="id",
16 featuresCol="features",
17 distanceFunction="cosine",
18 m=16,
19 efConstruction=200,
20 k=2
21)
22
23# Train the model
24model = hnsw.fit(df)
25
26# Perform k-NN search
27query_data = [(4, Vectors.dense([1.0, 1.0]))]
28query_df = spark.createDataFrame(query_data, ["id", "features"])
29
30results = model.transform(query_df)
31results.show()