Streaming - ARouter

ARouter supports streaming responses for all models. When streaming is enabled, tokens are delivered in real time as they’re generated. To enable streaming, set stream: true in your request body.

Python (OpenAI)
Node.js (OpenAI)
Go
cURL
fetch (raw)

from openai import OpenAI

client = OpenAI(
    base_url="https://api.arouter.ai/v1",
    api_key="lr_live_xxxx",
)

stream = client.chat.completions.create(
    model="openai/gpt-5.4",
    messages=[{"role": "user", "content": "How would you build the tallest building ever?"}],
    stream=True,
)

for chunk in stream:
    content = chunk.choices[0].delta.content
    if content:
        print(content, end="", flush=True)

# Final chunk includes usage stats
# Access via: stream.get_final_completion().usage

import OpenAI from "openai";

const client = new OpenAI({
  baseURL: "https://api.arouter.ai/v1",
  apiKey: "lr_live_xxxx",
});

const stream = await client.chat.completions.create({
  model: "openai/gpt-5.4",
  messages: [{ role: "user", content: "How would you build the tallest building ever?" }],
  stream: true,
});

for await (const chunk of stream) {
  const content = chunk.choices[0]?.delta?.content;
  if (content) process.stdout.write(content);

  // Final chunk includes usage stats
  if (chunk.usage) {
    console.log("\nUsage:", chunk.usage);
  }
}

stream, err := client.ChatCompletionStream(ctx, arouter.ChatCompletionRequest{
    Model: "openai/gpt-5.4",
    Messages: []arouter.Message{
        {Role: "user", Content: "How would you build the tallest building ever?"},
    },
})
if err != nil {
    log.Fatal(err)
}
defer stream.Close()

for {
    chunk, err := stream.Recv()
    if err == arouter.ErrStreamDone {
        break
    }
    if err != nil {
        log.Fatal(err)
    }
    fmt.Print(chunk.Choices[0].Delta.Content)
}

curl -N https://api.arouter.ai/v1/chat/completions \
  -H "Authorization: Bearer lr_live_xxxx" \
  -H "Content-Type: application/json" \
  -d '{
    "model": "openai/gpt-5.4",
    "messages": [{"role": "user", "content": "How would you build the tallest building ever?"}],
    "stream": true
  }'

const response = await fetch('https://api.arouter.ai/v1/chat/completions', {
  method: 'POST',
  headers: {
    Authorization: 'Bearer lr_live_xxxx',
    'Content-Type': 'application/json',
  },
  body: JSON.stringify({
    model: 'openai/gpt-5.4',
    messages: [{ role: 'user', content: 'How would you build the tallest building ever?' }],
    stream: true,
  }),
});

const reader = response.body?.getReader();
if (!reader) throw new Error('No response body');

const decoder = new TextDecoder();
let buffer = '';

try {
  while (true) {
    const { done, value } = await reader.read();
    if (done) break;

    buffer += decoder.decode(value, { stream: true });

    while (true) {
      const lineEnd = buffer.indexOf('\n');
      if (lineEnd === -1) break;

      const line = buffer.slice(0, lineEnd).trim();
      buffer = buffer.slice(lineEnd + 1);

      if (line.startsWith('data: ')) {
        const data = line.slice(6);
        if (data === '[DONE]') break;

        try {
          const parsed = JSON.parse(data);
          const content = parsed.choices[0]?.delta?.content;
          if (content) process.stdout.write(content);
        } catch (e) {
          // ignore invalid JSON
        }
      }
    }
  }
} finally {
  reader.cancel();
}

Anthropic Streaming

The Anthropic SDK uses its own streaming format:

import anthropic

client = anthropic.Anthropic(
    base_url="https://api.arouter.ai",
    api_key="lr_live_xxxx",
)

with client.messages.stream(
    model="claude-sonnet-4.6",
    max_tokens=1024,
    messages=[{"role": "user", "content": "How would you build the tallest building ever?"}],
) as stream:
    for text in stream.text_stream:
        print(text, end="", flush=True)

Gemini Streaming

Gemini uses streamGenerateContent instead of generateContent:

import google.generativeai as genai

genai.configure(
    api_key="lr_live_xxxx",
    transport="rest",
    client_options={"api_endpoint": "https://api.arouter.ai"},
)

model = genai.GenerativeModel("gemini-2.5-flash")
response = model.generate_content("How would you build the tallest building ever?", stream=True)

for chunk in response:
    print(chunk.text, end="", flush=True)

SSE Format

Under the hood, streaming uses Server-Sent Events. Each content event looks like:

data: {"id":"chatcmpl-xxx","object":"chat.completion.chunk","model":"openai/gpt-5.4","choices":[{"index":0,"delta":{"content":"Hello"},"finish_reason":null}]}

data: {"id":"chatcmpl-xxx","object":"chat.completion.chunk","model":"openai/gpt-5.4","choices":[{"index":0,"delta":{"content":" world"},"finish_reason":null}]}

data: {"id":"chatcmpl-xxx","object":"chat.completion.chunk","model":"openai/gpt-5.4","choices":[{"index":0,"delta":{},"finish_reason":"stop"}]}

The final chunk before [DONE] contains usage data with an empty choices array:

data: {"id":"chatcmpl-xxx","object":"chat.completion.chunk","model":"openai/gpt-5.4","choices":[],"usage":{"prompt_tokens":10,"completion_tokens":20,"total_tokens":30,"prompt_tokens_details":{"cached_tokens":0},"completion_tokens_details":{"reasoning_tokens":0}}}

data: [DONE]

ARouter may occasionally send SSE comments (lines starting with :) to prevent connection timeouts. These can be safely ignored per the SSE specification.

Recommended SSE Client Libraries

Some SSE client implementations may not parse the payload correctly. We recommend:

eventsource-parser — lightweight SSE parser
OpenAI SDK — handles SSE, tool calls, and usage automatically
Vercel AI SDK — React/Next.js streaming helpers

Stream Cancellation

Streaming requests can be cancelled by aborting the connection. For supported providers, this immediately stops model processing.

Node.js (AbortController)
Python
fetch (AbortController)

import OpenAI from "openai";

const client = new OpenAI({
  baseURL: "https://api.arouter.ai/v1",
  apiKey: "lr_live_xxxx",
});

const controller = new AbortController();

try {
  const stream = await client.chat.completions.create(
    {
      model: "openai/gpt-5.4",
      messages: [{ role: "user", content: "Write a long story" }],
      stream: true,
    },
    { signal: controller.signal },
  );

  for await (const chunk of stream) {
    const content = chunk.choices[0]?.delta?.content;
    if (content) process.stdout.write(content);
  }
} catch (error) {
  if (error.name === "AbortError") {
    console.log("Stream cancelled");
  } else {
    throw error;
  }
}

// To cancel:
controller.abort();

import requests
from threading import Event, Thread

def stream_with_cancellation(prompt: str, cancel_event: Event):
    with requests.Session() as session:
        response = session.post(
            "https://api.arouter.ai/v1/chat/completions",
            headers={"Authorization": "Bearer lr_live_xxxx"},
            json={
                "model": "openai/gpt-5.4",
                "messages": [{"role": "user", "content": prompt}],
                "stream": True,
            },
            stream=True,
        )
        try:
            for line in response.iter_lines():
                if cancel_event.is_set():
                    response.close()
                    return
                if line:
                    print(line.decode(), end="", flush=True)
        finally:
            response.close()

cancel_event = Event()
t = Thread(target=lambda: stream_with_cancellation("Write a long story", cancel_event))
t.start()

# To cancel:
cancel_event.set()

const controller = new AbortController();

try {
  const response = await fetch('https://api.arouter.ai/v1/chat/completions', {
    method: 'POST',
    headers: {
      Authorization: 'Bearer lr_live_xxxx',
      'Content-Type': 'application/json',
    },
    body: JSON.stringify({
      model: 'openai/gpt-5.4',
      messages: [{ role: 'user', content: 'Write a long story' }],
      stream: true,
    }),
    signal: controller.signal,
  });

  // process stream...
} catch (error) {
  if (error.name === 'AbortError') {
    console.log('Stream cancelled');
  } else {
    throw error;
  }
}

// To cancel:
controller.abort();

Handling Errors During Streaming

ARouter handles errors differently depending on when they occur during the streaming process.

Errors Before Any Tokens Are Sent

If an error occurs before any tokens have been streamed, ARouter returns a standard JSON error response with the appropriate HTTP status code:

{
  "error": {
    "code": 400,
    "message": "Invalid model specified"
  }
}

Common HTTP status codes:

Code	Meaning
400	Bad Request — invalid parameters
401	Unauthorized — invalid API key
402	Payment Required — insufficient credits
429	Too Many Requests — rate limited
502	Bad Gateway — provider error
503	Service Unavailable — no available providers

Errors After Tokens Have Been Sent (Mid-Stream)

If an error occurs after some tokens have already been streamed, ARouter cannot change the HTTP status code (which is already 200 OK). Instead, the error is sent as an SSE event:

data: {"id":"chatcmpl-xxx","object":"chat.completion.chunk","model":"openai/gpt-5.4","error":{"code":"server_error","message":"Provider disconnected unexpectedly"},"choices":[{"index":0,"delta":{"content":""},"finish_reason":"error"}]}

Key characteristics:

The error appears at the top level alongside standard response fields
A choices array is included with finish_reason: "error" to terminate the stream
The HTTP status remains 200 OK since headers were already sent

Error Handling Code Examples

Python (OpenAI)
Node.js (OpenAI)
fetch (raw)

from openai import OpenAI, APIStatusError

client = OpenAI(
    base_url="https://api.arouter.ai/v1",
    api_key="lr_live_xxxx",
)

try:
    stream = client.chat.completions.create(
        model="openai/gpt-5.4",
        messages=[{"role": "user", "content": "Write a story"}],
        stream=True,
    )
    for chunk in stream:
        content = chunk.choices[0].delta.content
        if content:
            print(content, end="", flush=True)
except APIStatusError as e:
    print(f"\nError {e.status_code}: {e.message}")

import OpenAI from "openai";

const client = new OpenAI({
  baseURL: "https://api.arouter.ai/v1",
  apiKey: "lr_live_xxxx",
});

try {
  const stream = await client.chat.completions.create({
    model: "openai/gpt-5.4",
    messages: [{ role: "user", content: "Write a story" }],
    stream: true,
  });

  for await (const chunk of stream) {
    // Check for mid-stream errors
    if ("error" in chunk) {
      console.error(`Stream error: ${(chunk as any).error.message}`);
      if (chunk.choices?.[0]?.finish_reason === "error") {
        console.log("Stream terminated due to error");
      }
      break;
    }
    const content = chunk.choices[0]?.delta?.content;
    if (content) process.stdout.write(content);
  }
} catch (error) {
  if (error instanceof OpenAI.APIError) {
    console.error(`Error ${error.status}: ${error.message}`);
  } else {
    throw error;
  }
}

async function streamWithErrorHandling(prompt: string) {
  const response = await fetch('https://api.arouter.ai/v1/chat/completions', {
    method: 'POST',
    headers: {
      Authorization: 'Bearer lr_live_xxxx',
      'Content-Type': 'application/json',
    },
    body: JSON.stringify({
      model: 'openai/gpt-5.4',
      messages: [{ role: 'user', content: prompt }],
      stream: true,
    }),
  });

  // Check initial HTTP status for pre-stream errors
  if (!response.ok) {
    const error = await response.json();
    console.error(`Error: ${error.error.message}`);
    return;
  }

  const reader = response.body?.getReader();
  if (!reader) throw new Error('No response body');
  const decoder = new TextDecoder();
  let buffer = '';

  try {
    while (true) {
      const { done, value } = await reader.read();
      if (done) break;

      buffer += decoder.decode(value, { stream: true });

      while (true) {
        const lineEnd = buffer.indexOf('\n');
        if (lineEnd === -1) break;
        const line = buffer.slice(0, lineEnd).trim();
        buffer = buffer.slice(lineEnd + 1);

        if (line.startsWith('data: ')) {
          const data = line.slice(6);
          if (data === '[DONE]') return;

          try {
            const parsed = JSON.parse(data);

            // Check for mid-stream error
            if (parsed.error) {
              console.error(`Stream error: ${parsed.error.message}`);
              return;
            }

            const content = parsed.choices[0]?.delta?.content;
            if (content) process.stdout.write(content);
          } catch (e) {
            // ignore parse errors
          }
        }
      }
    }
  } finally {
    reader.cancel();
  }
}

​Anthropic Streaming

​Gemini Streaming

​SSE Format

​Recommended SSE Client Libraries

​Stream Cancellation

​Handling Errors During Streaming

​Errors Before Any Tokens Are Sent

​Errors After Tokens Have Been Sent (Mid-Stream)

​Error Handling Code Examples

Anthropic Streaming

Gemini Streaming

SSE Format

Recommended SSE Client Libraries

Stream Cancellation

Handling Errors During Streaming

Errors Before Any Tokens Are Sent

Errors After Tokens Have Been Sent (Mid-Stream)

Error Handling Code Examples