import requests
from datetime import datetime, timezone
import openai
from pinecone import Pinecone
# configure clients
pc = Pinecone(api_key="pinecone-api-key")
openai_client = openai.OpenAI(api_key="your-openai-api-key")
NEBULY_API_KEY = "your-nebuly-api-key"
# input parameters
end_user = "TestUser"
input_message = "What is the capital of France?"
# Chain start
time_start = datetime.now(tz=timezone.utc)
nebuly_traces = []
# First call to an openai model to get the input for the RAG source
system_prompt = (
"You are the first step in a retrieval-generation chain. "
"Given a user input you should generate the input for the "
"data retrieval source, a vector DB. The vector DB expects "
"as input a query that retrieves the relevant information. Please "
"you must provide only the query for the VectorDB in your response."
)
model = "gpt-4-turbo"
response = openai_client.chat.completions.create(
model=model,
messages=[
{"role": "system", "content": system_prompt},
{"role": "user", "content": input_message},
],
)
# Let's store the input and output of the first call in the nebuly_traces list
nebuly_traces.append(
{
"input": input_message,
"output": response.choices[0].message.content,
"model": model,
"system_prompt": system_prompt,
"history": []
}
)
# Let's now call the VectorDB with the output of the previous call
query = response.choices[0].message.content
index_name = 'data-retrieval'
index = pc.Index(index_name)
embed_model = "text-embedding-ada-002"
res = openai_client.embeddings.create(
input=[query],
model=embed_model
)
# retrieve from Pinecone
xq = res.data[0].embedding
# get relevant contexts (including the questions)
res = index.query(vector=xq, top_k=5, include_metadata=True)
# store the results in the nebuly_traces list
for match in res.matches:
nebuly_traces.append(
{
"source": index_name,
"input": query,
"output": match["metadata"]["text"],
}
)
# Final call to the openai model
system_prompt = (
"You are a chatbot model used for QA use cases. "
"You receive the user input and an extra context from a Retrieval source. "
"You must use the given information to generate the output."
)
input_format = "User input: {user_input}\nContext: {context}"
context = "\n".join([match["metadata"]["text"] for match in res.matches])
input_with_context = input_format.format(
user_input=input_message,
context=context
)
response = openai_client.chat.completions.create(
model=model,
messages=[
{
"role": "system",
"content": system_prompt
},
{
"role": "user",
"content": input_with_context
}
],
)
time_end = datetime.now(tz=timezone.utc)
# store the second llm call in nebuly_traces
nebuly_traces.append(
{
"model": model,
"input": input_with_context,
"output": response.choices[0].message.content,
"system_message": system_prompt,
"history": []
}
)
# send data to nebuly platform
data = {
"interaction": {
"input": input_message,
"output": response.choices[0].message.content,
"time_start": time_start.isoformat(),
"time_end": time_end.isoformat(),
"history": [],
"end_user": end_user,
},
"traces": nebuly_traces,
"anonymize": False
}
url = "https://backend.nebuly.com/event-ingestion/api/v1/events/trace_interaction"
headers = {
"Authorization": f"Bearer {NEBULY_API_KEY}",
"Content-Type": "application/json"
}
nebuly_response = requests.request("POST", url, json=data, headers=headers)