Skip to main content

Documentation Index

Fetch the complete documentation index at: https://docs.nebuly.com/llms.txt

Use this file to discover all available pages before exploring further.

Retrieve data for custom analysis

In this example, we demonstrate how to retrieve data from Nebuly endpoints and generate a CSV file suitable for further analysis. Specifically, we show how to use the get-interaction-aggregates and get-interactions endpoints to create a CSV where each row represents a user action along with its associated topic, including a set of 10 sampled interactions for each action.
import os, csv, time
from typing import Dict, List
import requests
import pandas as pd
from tqdm import tqdm


BASE = "https://backend.nebuly.com/api/external"
AGG   = "/get-interaction-aggregates"       
LIST  = "/get-interactions"  

time_range = {
    "start": "2024-01-01T00:00:00Z",  # Set here the date from which you want to get the topics
    "end": "2025-06-04T23:59:59Z"     # Set here the date to which you want to get the topics
}

HEADERS = {
    "Content-Type": "application/json",
    "Authorization": f"Bearer {os.getenv('NEBULY_API_KEY')}"
}
if HEADERS["Authorization"] == "Bearer None":
    raise RuntimeError("Please set NEBULY_API_KEY in your environment")

def post(endpoint: str, payload: Dict) -> Dict:
    """Thin wrapper with basic error handling and auto-retry"""
    for attempt in range(3):
        r = requests.post(BASE + endpoint, json=payload, headers=HEADERS, timeout=30)
        if r.ok:
            return r.json()
        if r.status_code >= 500:
            time.sleep(2 + attempt)      # simple back-off
            continue
        raise RuntimeError(f"{r.status_code}: {r.text}")
    raise RuntimeError("Nebuly API repeatedly failed")

# 1️⃣  ────────────────────────────────────────────────────────────
# get *all* topics sorted by interaction_count (descending)
topic_resp = post(
    AGG,
    {
        "group_by": {"kind": "topic"},
        "variables": ["n_interactions", "n_users"],
        "filters": [],
        "time_range": time_range,
        "offset": 0,
        "limit": 100,
    },
)
topics = sorted(
    topic_resp["data"],
    key=lambda d: d["n_interactions"],
    reverse=True,
)  # [{"group_name": "topic 1", "n_interactions": x1, "n_users": y1 ...}, {"group_name": "topic 2", "n_interactions": x2, "n_users": y2 ...}, ...]


# 2️⃣ & 3️⃣  ──────────────────────────────────────────────────────
# Fetch actions and samples for each topic
rows: List[Dict] = []
print("Fetching actions + samples …\n")
for t in tqdm(topics, unit="topic"):
    topic = t["group_name"]
    # a) actions for this topic
    actions_resp = post(
        AGG,
        {
            "group_by": {"kind": "user_action"},
            "variables": ["n_interactions", "n_users"],
            "filters": [{"kind": "topic", "values": [topic]}],
            "time_range": time_range,
            "offset": 0,
            "limit": 100,
        },
    )
    for act in actions_resp["data"]:
        action   = act["group_name"]
        n_inter  = act["n_interactions"]
        # b) ~10 interaction samples for this action
        int_resp = post(
            LIST,
            {
                "limit": 10,
                "filters": [
                    {"kind": "topic", "values": [topic]},
                    {"kind": "user_action", "values": [action]}
                ],
                "time_range": time_range,
            },
        )
        samples = [{"input": i["input_text"], "output": i["output_text"]} for i in int_resp["data"]]
        rows.append(
            {
                "topic": topic,
                "action": action,
                "interactions": n_inter,
                "samples": samples
            }
        )

# 4️⃣  ────────────────────────────────────────────────────────────
df = pd.DataFrame(rows)
df.to_csv("nebuly_topic_actions.csv", index=False, quoting=csv.QUOTE_ALL)