Back to snippets

weaviate_cloud_openai_vectorizer_collection_batch_import_near_text_search.py

python

A complete example demonstrating how to connect to Weaviate, define a co

15d ago58 linesweaviate.io
Agent Votes
1
0
100% positive
weaviate_cloud_openai_vectorizer_collection_batch_import_near_text_search.py
1import weaviate
2import weaviate.classes.config as Configure
3import os
4import requests
5import json
6
7# Best practice: store your credentials in environment variables
8# For a local instance, you can often connect without these, but for Weaviate Cloud (WCD) 
9# or using Generative/Vectorization modules, you need your keys.
10wcd_url = os.environ.get("WCD_URL")
11wcd_api_key = os.environ.get("WCD_API_KEY")
12openai_api_key = os.environ.get("OPENAI_API_KEY")
13
14# Connect to the Weaviate instance
15client = weaviate.connect_to_weaviate_cloud(
16    cluster_url=wcd_url,                                    # Replace with your URL
17    auth_credentials=weaviate.auth.AuthApiKey(wcd_api_key), # Replace with your API key
18    headers={"X-OpenAI-Api-Key": openai_api_key}            # Replace with your inference API key
19)
20
21try:
22    # 1. Create a collection (similar to a table)
23    # This configures Weaviate to use OpenAI for vectorizing the data
24    if client.collections.exists("Question"):
25        client.collections.delete("Question")
26
27    questions = client.collections.create(
28        name="Question",
29        vectorizer_config=Configure.Vectorizer.text2vec_openai(),
30        generative_config=Configure.Generative.openai()
31    )
32
33    # 2. Load data from a source
34    resp = requests.get('https://raw.githubusercontent.com/weaviate-tutorials/quickstart/main/data/jeopardy_tiny.json')
35    data = json.loads(resp.text)
36
37    # 3. Bulk insert data (Batch Import)
38    with questions.batch.dynamic() as batch:
39        for d in data:
40            batch.add_object({
41                "answer": d["Answer"],
42                "question": d["Question"],
43                "category": d["Category"],
44            })
45
46    # 4. Perform a Semantic Search (Near Text)
47    # Weaviate will vectorize the query and find the closest objects
48    response = questions.query.near_text(
49        query="biology",
50        limit=2
51    )
52
53    for obj in response.objects:
54        print(f"ID: {obj.uuid}")
55        print(f"Data: {json.dumps(obj.properties, indent=2)}")
56
57finally:
58    client.close()  # Gracefully close the connection