Documentation Index
Fetch the complete documentation index at: https://docs.nebuly.com/llms.txt
Use this file to discover all available pages before exploring further.
Documentation Index
Fetch the complete documentation index at: https://docs.nebuly.com/llms.txt
Use this file to discover all available pages before exploring further.
import os, csv, time
from typing import Dict, List
import requests
import pandas as pd
from tqdm import tqdm
BASE = "https://backend.nebuly.com/api/external"
AGG = "/get-interaction-aggregates"
LIST = "/get-interactions"
time_range = {
"start": "2024-01-01T00:00:00Z", # Set here the date from which you want to get the topics
"end": "2025-06-04T23:59:59Z" # Set here the date to which you want to get the topics
}
HEADERS = {
"Content-Type": "application/json",
"Authorization": f"Bearer {os.getenv('NEBULY_API_KEY')}"
}
if HEADERS["Authorization"] == "Bearer None":
raise RuntimeError("Please set NEBULY_API_KEY in your environment")
def post(endpoint: str, payload: Dict) -> Dict:
"""Thin wrapper with basic error handling and auto-retry"""
for attempt in range(3):
r = requests.post(BASE + endpoint, json=payload, headers=HEADERS, timeout=30)
if r.ok:
return r.json()
if r.status_code >= 500:
time.sleep(2 + attempt) # simple back-off
continue
raise RuntimeError(f"{r.status_code}: {r.text}")
raise RuntimeError("Nebuly API repeatedly failed")
# 1️⃣ ────────────────────────────────────────────────────────────
# get *all* topics sorted by interaction_count (descending)
topic_resp = post(
AGG,
{
"group_by": {"kind": "topic"},
"variables": ["n_interactions", "n_users"],
"filters": [],
"time_range": time_range,
"offset": 0,
"limit": 100,
},
)
topics = sorted(
topic_resp["data"],
key=lambda d: d["n_interactions"],
reverse=True,
) # [{"group_name": "topic 1", "n_interactions": x1, "n_users": y1 ...}, {"group_name": "topic 2", "n_interactions": x2, "n_users": y2 ...}, ...]
# 2️⃣ & 3️⃣ ──────────────────────────────────────────────────────
# Fetch actions and samples for each topic
rows: List[Dict] = []
print("Fetching actions + samples …\n")
for t in tqdm(topics, unit="topic"):
topic = t["group_name"]
# a) actions for this topic
actions_resp = post(
AGG,
{
"group_by": {"kind": "user_action"},
"variables": ["n_interactions", "n_users"],
"filters": [{"kind": "topic", "values": [topic]}],
"time_range": time_range,
"offset": 0,
"limit": 100,
},
)
for act in actions_resp["data"]:
action = act["group_name"]
n_inter = act["n_interactions"]
# b) ~10 interaction samples for this action
int_resp = post(
LIST,
{
"limit": 10,
"filters": [
{"kind": "topic", "values": [topic]},
{"kind": "user_action", "values": [action]}
],
"time_range": time_range,
},
)
samples = [{"input": i["input_text"], "output": i["output_text"]} for i in int_resp["data"]]
rows.append(
{
"topic": topic,
"action": action,
"interactions": n_inter,
"samples": samples
}
)
# 4️⃣ ────────────────────────────────────────────────────────────
df = pd.DataFrame(rows)
df.to_csv("nebuly_topic_actions.csv", index=False, quoting=csv.QUOTE_ALL)