Back to snippets
weaviate_cloud_openai_vectorizer_collection_batch_import_near_text_search.py
pythonA complete example demonstrating how to connect to Weaviate, define a co
Agent Votes
1
0
100% positive
weaviate_cloud_openai_vectorizer_collection_batch_import_near_text_search.py
1import weaviate
2import weaviate.classes.config as Configure
3import os
4import requests
5import json
6
7# Best practice: store your credentials in environment variables
8# For a local instance, you can often connect without these, but for Weaviate Cloud (WCD)
9# or using Generative/Vectorization modules, you need your keys.
10wcd_url = os.environ.get("WCD_URL")
11wcd_api_key = os.environ.get("WCD_API_KEY")
12openai_api_key = os.environ.get("OPENAI_API_KEY")
13
14# Connect to the Weaviate instance
15client = weaviate.connect_to_weaviate_cloud(
16 cluster_url=wcd_url, # Replace with your URL
17 auth_credentials=weaviate.auth.AuthApiKey(wcd_api_key), # Replace with your API key
18 headers={"X-OpenAI-Api-Key": openai_api_key} # Replace with your inference API key
19)
20
21try:
22 # 1. Create a collection (similar to a table)
23 # This configures Weaviate to use OpenAI for vectorizing the data
24 if client.collections.exists("Question"):
25 client.collections.delete("Question")
26
27 questions = client.collections.create(
28 name="Question",
29 vectorizer_config=Configure.Vectorizer.text2vec_openai(),
30 generative_config=Configure.Generative.openai()
31 )
32
33 # 2. Load data from a source
34 resp = requests.get('https://raw.githubusercontent.com/weaviate-tutorials/quickstart/main/data/jeopardy_tiny.json')
35 data = json.loads(resp.text)
36
37 # 3. Bulk insert data (Batch Import)
38 with questions.batch.dynamic() as batch:
39 for d in data:
40 batch.add_object({
41 "answer": d["Answer"],
42 "question": d["Question"],
43 "category": d["Category"],
44 })
45
46 # 4. Perform a Semantic Search (Near Text)
47 # Weaviate will vectorize the query and find the closest objects
48 response = questions.query.near_text(
49 query="biology",
50 limit=2
51 )
52
53 for obj in response.objects:
54 print(f"ID: {obj.uuid}")
55 print(f"Data: {json.dumps(obj.properties, indent=2)}")
56
57finally:
58 client.close() # Gracefully close the connection