Integrations
Raw endpoints
For use cases not using a third party SDK or for situations where an highly customization is needed we expose the raw endpoint you can use to send the interaction.
This example shows how to track the traces in a simple RAG use case, using openai and pinecone, and send all the relevant data to the nebuly platform. Further details on the endpoint definition can be found in the api-reference section.
Python Example
import requests
from datetime import datetime, timezone
import openai
from pinecone import Pinecone
# configure clients
pc = Pinecone(api_key="pinecone-api-key")
openai_client = openai.OpenAI(api_key="your-openai-api-key")
NEBULY_API_KEY = "your-nebuly-api-key"
# input parameters
end_user = "TestUser"
input_message = "What is the capital of France?"
# Chain start
time_start = datetime.now(tz=timezone.utc)
nebuly_traces = []
# First call to an openai model to get the input for the RAG source
system_prompt = (
"You are the first step in a retrieval-generation chain. "
"Given a user input you should generate the input for the "
"data retrieval source, a vector DB. The vector DB expects "
"as input a query that retrieves the relevant information. Please "
"you must provide only the query for the VectorDB in your response."
)
model = "gpt-4-turbo"
response = openai_client.chat.completions.create(
model=model,
messages=[
{"role": "system", "content": system_prompt},
{"role": "user", "content": input_message},
],
)
# Let's store the input and output of the first call in the nebuly_traces list
nebuly_traces.append(
{
"input": input_message,
"output": response.choices[0].message.content,
"model": model,
"system_prompt": system_prompt,
"history": []
}
)
# Let's now call the VectorDB with the output of the previous call
query = response.choices[0].message.content
index_name = 'data-retrieval'
index = pc.Index(index_name)
embed_model = "text-embedding-ada-002"
res = openai_client.embeddings.create(
input=[query],
model=embed_model
)
# retrieve from Pinecone
xq = res.data[0].embedding
# get relevant contexts (including the questions)
res = index.query(vector=xq, top_k=5, include_metadata=True)
# store the results in the nebuly_traces list
for match in res.matches:
nebuly_traces.append(
{
"source": index_name,
"input": query,
"output": match["metadata"]["text"],
}
)
# Final call to the openai model
system_prompt = (
"You are a chatbot model used for QA use cases. "
"You receive the user input and an extra context from a Retrieval source. "
"You must use the given information to generate the output."
)
input_format = "User input: {user_input}\nContext: {context}"
context = "\n".join([match["metadata"]["text"] for match in res.matches])
input_with_context = input_format.format(
user_input=input_message,
context=context
)
response = openai_client.chat.completions.create(
model=model,
messages=[
{
"role": "system",
"content": system_prompt
},
{
"role": "user",
"content": input_with_context
}
],
)
time_end = datetime.now(tz=timezone.utc)
# store the second llm call in nebuly_traces
nebuly_traces.append(
{
"model": model,
"input": input_with_context,
"output": response.choices[0].message.content,
"system_message": system_prompt,
"history": []
}
)
# send data to nebuly platform
data = {
"interaction": {
"input": input_message,
"output": response.choices[0].message.content,
"time_start": time_start.isoformat(),
"time_end": time_end.isoformat(),
"history": [],
"end_user": end_user,
},
"traces": nebuly_traces,
"anonymize": False
}
url = "https://backend.nebuly.com/event-ingestion/api/v1/events/trace_interaction"
headers = {
"Authorization": f"Bearer {NEBULY_API_KEY}",
"Content-Type": "application/json"
}
nebuly_response = requests.request("POST", url, json=data, headers=headers)