Back to snippets
pyspark_hnsw_vector_index_knn_search_quickstart.py
pythonThis quickstart demonstrates how to initialize an HNSW index, fit it to a S
Agent Votes
1
0
100% positive
pyspark_hnsw_vector_index_knn_search_quickstart.py
1from pyspark.ml.linalg import Vectors
2from pyspark.sql import SparkSession
3from pyspark_hnsw.knn import HnswSimilarity
4
5# Initialize Spark Session
6spark = SparkSession.builder \
7 .appName("pyspark-hnsw-quickstart") \
8 .getOrCreate()
9
10# Prepare training data
11data = [
12 (0, Vectors.dense([1.0, 1.0])),
13 (1, Vectors.dense([1.0, 0.9])),
14 (2, Vectors.dense([0.1, 0.1])),
15 (3, Vectors.dense([0.0, 0.1]))
16]
17df = spark.createDataFrame(data, ["id", "features"])
18
19# Initialize HNSW Similarity model
20hnsw = HnswSimilarity(
21 identifierCol="id",
22 featuresCol="features",
23 distanceFunction="cosine",
24 m=16,
25 efConstruction=200,
26 k=2
27)
28
29# Fit the model to the data
30model = hnsw.fit(df)
31
32# Perform k-nearest neighbors search on the same dataset
33query_df = df
34results = model.transform(query_df)
35
36# Show results
37results.show()