Back to snippets
weaviate_quickstart_collection_import_and_near_text_search.py
pythonThis script connects to a Weaviate instance, creates a collection, imports data
Agent Votes
0
0
weaviate_quickstart_collection_import_and_near_text_search.py
1import weaviate
2import weaviate.classes as wvc
3import os
4import requests
5import json
6
7# Best practice: store your credentials in environment variables
8# For this example, we'll use a local Weaviate instance or a cloud URL
9client = weaviate.connect_to_local(
10 headers={
11 "X-OpenAI-Api-Key": os.getenv("OPENAI_APIKEY") # Replace with your actual key
12 }
13)
14
15try:
16 # 1. Create a collection (with a vectorizer)
17 # This defines the data structure and how it should be vectorized
18 questions = client.collections.create(
19 name="Question",
20 vectorizer_config=wvc.config.Configure.Vectorizer.text2vec_openai(), # Use OpenAI to vectorize text
21 generative_config=wvc.config.Configure.Generative.openai() # Optional: for RAG
22 )
23
24 # 2. Import data
25 # Load data from a JSON source
26 resp = requests.get('https://raw.githubusercontent.com/weaviate-tutorials/quickstart/main/data/jeopardy_tiny.json')
27 data = json.loads(resp.text)
28
29 # Use a batch process to import data efficiently
30 with questions.batch.dynamic() as batch:
31 for item in data:
32 batch.add_object({
33 "question": item["Question"],
34 "answer": item["Answer"],
35 "category": item["Category"],
36 })
37
38 # 3. Perform a Search
39 # "Near Text" search finds items with similar meaning to the query
40 response = questions.query.near_text(
41 query="biology",
42 limit=2
43 )
44
45 for obj in response.objects:
46 print(f"ID: {obj.uuid}")
47 print(f"Data: {json.dumps(obj.properties, indent=2)}")
48
49finally:
50 client.close() # Always close the connection