Orqen Docs

Code Examples

Code examples

Complete working examples for common use cases.

Switching from Anthropic or Bedrock?

Native provider SDKs use different tool-use payloads. The provider migration guide shows flip-able OpenAI, Anthropic Messages, and Bedrock Converse examples.

Basic tool-calling agent

A minimal agent loop with 5 tools. Orqen runs invisibly — just change the base_url.

from openai import OpenAI
import json

client = OpenAI(
    api_key="sk-orq-YOUR_KEY",
    base_url="https://api.orqen.app/v1",
)

TOOLS = [
    {
        "type": "function",
        "function": {
            "name": "get_weather",
            "description": "Get current weather for a city. Use when user asks about weather, temperature, or forecast.",
            "parameters": {
                "type": "object",
                "properties": {"city": {"type": "string"}},
                "required": ["city"],
            },
        },
    },
    # ... add more tools. Orqen will prune to the relevant ones automatically.
]

def run_agent(user_message: str) -> str:
    messages = [{"role": "user", "content": user_message}]

    for _ in range(10):   # max 10 rounds
        response = client.chat.completions.create(
            model="gpt-4o",
            messages=messages,
            tools=TOOLS,
            tool_choice="auto",
        )
        choice = response.choices[0]

        if not choice.message.tool_calls:
            return choice.message.content   # final answer

        # Append assistant turn and execute tools
        messages.append(choice.message)
        for tc in choice.message.tool_calls:
            result = execute_tool(tc.function.name, json.loads(tc.function.arguments))
            messages.append({
                "role": "tool",
                "tool_call_id": tc.id,
                "content": json.dumps(result),
            })

    return "Max rounds reached."

def execute_tool(name: str, args: dict) -> dict:
    if name == "get_weather":
        return {"temperature": 15.7, "conditions": "partly cloudy", "city": args["city"]}
    return {"error": f"Unknown tool: {name}"}

if __name__ == "__main__":
    print(run_agent("What's the weather in London today?"))

Automatic model routing

Let Orqen pick the best model based on task complexity and your connected providers.

from openai import OpenAI

client = OpenAI(
    api_key="sk-orq-YOUR_KEY",
    base_url="https://api.orqen.app/v1",
)

# Simple query → Orqen routes to a fast, cheap model (e.g. Haiku or GPT-4o-mini)
response = client.chat.completions.create(
    model="orqen/auto",
    messages=[{"role": "user", "content": "What is 2 + 2?"}],
)

# Complex analysis → Orqen routes to a capable model (e.g. Sonnet or GPT-4o)
response = client.chat.completions.create(
    model="orqen/auto",
    messages=[{"role": "user", "content": "Analyse this codebase and suggest architectural improvements..."}],
    tools=[...],
)

# Always cheapest
response = client.chat.completions.create(model="orqen/cheap", messages=[...])

# Always fastest (by observed latency)
response = client.chat.completions.create(model="orqen/fast", messages=[...])

Streaming

Orqen supports streaming responses. Tool pruning happens before the stream begins, so the first token arrives without extra latency.

from openai import OpenAI

client = OpenAI(
    api_key="sk-orq-YOUR_KEY",
    base_url="https://api.orqen.app/v1",
)

stream = client.chat.completions.create(
    model="gpt-4o",
    messages=[{"role": "user", "content": "Explain quantum computing in simple terms."}],
    tools=[...],
    stream=True,
)

for chunk in stream:
    if chunk.choices[0].delta.content:
        print(chunk.choices[0].delta.content, end="", flush=True)

Inspecting pruning results

Every response includes headers showing what Orqen did. Access them via the raw response:

import httpx
import json

headers = {
    "Authorization": "Bearer sk-orq-YOUR_KEY",
    "Content-Type": "application/json",
}

body = {
    "model": "gpt-4o",
    "messages": [{"role": "user", "content": "What is the weather in Paris?"}],
    "tools": [...],  # your tools (e.g. 51 in bedrock_multi_tool_agent.py)
}

with httpx.Client() as client:
    r = client.post("https://api.orqen.app/v1/chat/completions", headers=headers, json=body)

print("Tools in:      ", r.headers.get("x-orqen-tools-input"))
print("Tools out:     ", r.headers.get("x-orqen-tools-output"))
print("Prune ratio:   ", r.headers.get("x-orqen-prune-ratio"))
print("Routing method:", r.headers.get("x-orqen-routing"))
print()
print(r.json()["choices"][0]["message"])

Error handling

from openai import OpenAI, APIStatusError, RateLimitError
import time

client = OpenAI(
    api_key="sk-orq-YOUR_KEY",
    base_url="https://api.orqen.app/v1",
)

def call_with_retry(messages, tools, max_retries=3):
    for attempt in range(max_retries):
        try:
            return client.chat.completions.create(
                model="gpt-4o",
                messages=messages,
                tools=tools,
            )
        except RateLimitError:
            # Orqen rate limit — wait and retry
            wait = 60 * (attempt + 1)
            print(f"Rate limited. Waiting {wait}s...")
            time.sleep(wait)
        except APIStatusError as e:
            if e.status_code == 503:
                # Orqen temporarily unavailable — retry with short wait
                time.sleep(5)
            elif e.status_code in (400, 401, 403):
                raise   # don't retry auth/validation errors
            else:
                raise
    raise RuntimeError("Max retries exceeded")