ARouter supports streaming responses for all models. When streaming is enabled, tokens are delivered in real time as they’re generated.
To enable streaming, set stream: true in your request body.
Python (OpenAI)
Node.js (OpenAI)
Go
cURL
fetch (raw)
from openai import OpenAI
client = OpenAI(
base_url="https://api.arouter.ai/v1",
api_key="lr_live_xxxx",
)
stream = client.chat.completions.create(
model="openai/gpt-5.4",
messages=[{"role": "user", "content": "How would you build the tallest building ever?"}],
stream=True,
)
for chunk in stream:
content = chunk.choices[0].delta.content
if content:
print(content, end="", flush=True)
# Final chunk includes usage stats
# Access via: stream.get_final_completion().usage
import OpenAI from "openai";
const client = new OpenAI({
baseURL: "https://api.arouter.ai/v1",
apiKey: "lr_live_xxxx",
});
const stream = await client.chat.completions.create({
model: "openai/gpt-5.4",
messages: [{ role: "user", content: "How would you build the tallest building ever?" }],
stream: true,
});
for await (const chunk of stream) {
const content = chunk.choices[0]?.delta?.content;
if (content) process.stdout.write(content);
// Final chunk includes usage stats
if (chunk.usage) {
console.log("\nUsage:", chunk.usage);
}
}
stream, err := client.ChatCompletionStream(ctx, arouter.ChatCompletionRequest{
Model: "openai/gpt-5.4",
Messages: []arouter.Message{
{Role: "user", Content: "How would you build the tallest building ever?"},
},
})
if err != nil {
log.Fatal(err)
}
defer stream.Close()
for {
chunk, err := stream.Recv()
if err == arouter.ErrStreamDone {
break
}
if err != nil {
log.Fatal(err)
}
fmt.Print(chunk.Choices[0].Delta.Content)
}
curl -N https://api.arouter.ai/v1/chat/completions \
-H "Authorization: Bearer lr_live_xxxx" \
-H "Content-Type: application/json" \
-d '{
"model": "openai/gpt-5.4",
"messages": [{"role": "user", "content": "How would you build the tallest building ever?"}],
"stream": true
}'
const response = await fetch('https://api.arouter.ai/v1/chat/completions', {
method: 'POST',
headers: {
Authorization: 'Bearer lr_live_xxxx',
'Content-Type': 'application/json',
},
body: JSON.stringify({
model: 'openai/gpt-5.4',
messages: [{ role: 'user', content: 'How would you build the tallest building ever?' }],
stream: true,
}),
});
const reader = response.body?.getReader();
if (!reader) throw new Error('No response body');
const decoder = new TextDecoder();
let buffer = '';
try {
while (true) {
const { done, value } = await reader.read();
if (done) break;
buffer += decoder.decode(value, { stream: true });
while (true) {
const lineEnd = buffer.indexOf('\n');
if (lineEnd === -1) break;
const line = buffer.slice(0, lineEnd).trim();
buffer = buffer.slice(lineEnd + 1);
if (line.startsWith('data: ')) {
const data = line.slice(6);
if (data === '[DONE]') break;
try {
const parsed = JSON.parse(data);
const content = parsed.choices[0]?.delta?.content;
if (content) process.stdout.write(content);
} catch (e) {
// ignore invalid JSON
}
}
}
}
} finally {
reader.cancel();
}
Anthropic Streaming
The Anthropic SDK uses its own streaming format:
import anthropic
client = anthropic.Anthropic(
base_url="https://api.arouter.ai",
api_key="lr_live_xxxx",
)
with client.messages.stream(
model="claude-sonnet-4.6",
max_tokens=1024,
messages=[{"role": "user", "content": "How would you build the tallest building ever?"}],
) as stream:
for text in stream.text_stream:
print(text, end="", flush=True)
Gemini Streaming
Gemini uses streamGenerateContent instead of generateContent:
import google.generativeai as genai
genai.configure(
api_key="lr_live_xxxx",
transport="rest",
client_options={"api_endpoint": "https://api.arouter.ai"},
)
model = genai.GenerativeModel("gemini-2.5-flash")
response = model.generate_content("How would you build the tallest building ever?", stream=True)
for chunk in response:
print(chunk.text, end="", flush=True)
Under the hood, streaming uses Server-Sent Events. Each content event looks like:
data: {"id":"chatcmpl-xxx","object":"chat.completion.chunk","model":"openai/gpt-5.4","choices":[{"index":0,"delta":{"content":"Hello"},"finish_reason":null}]}
data: {"id":"chatcmpl-xxx","object":"chat.completion.chunk","model":"openai/gpt-5.4","choices":[{"index":0,"delta":{"content":" world"},"finish_reason":null}]}
data: {"id":"chatcmpl-xxx","object":"chat.completion.chunk","model":"openai/gpt-5.4","choices":[{"index":0,"delta":{},"finish_reason":"stop"}]}
The final chunk before [DONE] contains usage data with an empty choices array:
data: {"id":"chatcmpl-xxx","object":"chat.completion.chunk","model":"openai/gpt-5.4","choices":[],"usage":{"prompt_tokens":10,"completion_tokens":20,"total_tokens":30,"prompt_tokens_details":{"cached_tokens":0},"completion_tokens_details":{"reasoning_tokens":0}}}
data: [DONE]
ARouter may occasionally send SSE comments (lines starting with :) to prevent connection timeouts. These can be safely ignored per the SSE specification.
Recommended SSE Client Libraries
Some SSE client implementations may not parse the payload correctly. We recommend:
Stream Cancellation
Streaming requests can be cancelled by aborting the connection. For supported providers, this immediately stops model processing.
import OpenAI from "openai";
const client = new OpenAI({
baseURL: "https://api.arouter.ai/v1",
apiKey: "lr_live_xxxx",
});
const controller = new AbortController();
try {
const stream = await client.chat.completions.create(
{
model: "openai/gpt-5.4",
messages: [{ role: "user", content: "Write a long story" }],
stream: true,
},
{ signal: controller.signal },
);
for await (const chunk of stream) {
const content = chunk.choices[0]?.delta?.content;
if (content) process.stdout.write(content);
}
} catch (error) {
if (error.name === "AbortError") {
console.log("Stream cancelled");
} else {
throw error;
}
}
// To cancel:
controller.abort();
import requests
from threading import Event, Thread
def stream_with_cancellation(prompt: str, cancel_event: Event):
with requests.Session() as session:
response = session.post(
"https://api.arouter.ai/v1/chat/completions",
headers={"Authorization": "Bearer lr_live_xxxx"},
json={
"model": "openai/gpt-5.4",
"messages": [{"role": "user", "content": prompt}],
"stream": True,
},
stream=True,
)
try:
for line in response.iter_lines():
if cancel_event.is_set():
response.close()
return
if line:
print(line.decode(), end="", flush=True)
finally:
response.close()
cancel_event = Event()
t = Thread(target=lambda: stream_with_cancellation("Write a long story", cancel_event))
t.start()
# To cancel:
cancel_event.set()
const controller = new AbortController();
try {
const response = await fetch('https://api.arouter.ai/v1/chat/completions', {
method: 'POST',
headers: {
Authorization: 'Bearer lr_live_xxxx',
'Content-Type': 'application/json',
},
body: JSON.stringify({
model: 'openai/gpt-5.4',
messages: [{ role: 'user', content: 'Write a long story' }],
stream: true,
}),
signal: controller.signal,
});
// process stream...
} catch (error) {
if (error.name === 'AbortError') {
console.log('Stream cancelled');
} else {
throw error;
}
}
// To cancel:
controller.abort();
Handling Errors During Streaming
ARouter handles errors differently depending on when they occur during the streaming process.
Errors Before Any Tokens Are Sent
If an error occurs before any tokens have been streamed, ARouter returns a standard JSON error response with the appropriate HTTP status code:
{
"error": {
"code": 400,
"message": "Invalid model specified"
}
}
Common HTTP status codes:
| Code | Meaning |
|---|
| 400 | Bad Request — invalid parameters |
| 401 | Unauthorized — invalid API key |
| 402 | Payment Required — insufficient credits |
| 429 | Too Many Requests — rate limited |
| 502 | Bad Gateway — provider error |
| 503 | Service Unavailable — no available providers |
Errors After Tokens Have Been Sent (Mid-Stream)
If an error occurs after some tokens have already been streamed, ARouter cannot change the HTTP status code (which is already 200 OK). Instead, the error is sent as an SSE event:
data: {"id":"chatcmpl-xxx","object":"chat.completion.chunk","model":"openai/gpt-5.4","error":{"code":"server_error","message":"Provider disconnected unexpectedly"},"choices":[{"index":0,"delta":{"content":""},"finish_reason":"error"}]}
Key characteristics:
- The error appears at the top level alongside standard response fields
- A
choices array is included with finish_reason: "error" to terminate the stream
- The HTTP status remains 200 OK since headers were already sent
Error Handling Code Examples
Python (OpenAI)
Node.js (OpenAI)
fetch (raw)
from openai import OpenAI, APIStatusError
client = OpenAI(
base_url="https://api.arouter.ai/v1",
api_key="lr_live_xxxx",
)
try:
stream = client.chat.completions.create(
model="openai/gpt-5.4",
messages=[{"role": "user", "content": "Write a story"}],
stream=True,
)
for chunk in stream:
content = chunk.choices[0].delta.content
if content:
print(content, end="", flush=True)
except APIStatusError as e:
print(f"\nError {e.status_code}: {e.message}")
import OpenAI from "openai";
const client = new OpenAI({
baseURL: "https://api.arouter.ai/v1",
apiKey: "lr_live_xxxx",
});
try {
const stream = await client.chat.completions.create({
model: "openai/gpt-5.4",
messages: [{ role: "user", content: "Write a story" }],
stream: true,
});
for await (const chunk of stream) {
// Check for mid-stream errors
if ("error" in chunk) {
console.error(`Stream error: ${(chunk as any).error.message}`);
if (chunk.choices?.[0]?.finish_reason === "error") {
console.log("Stream terminated due to error");
}
break;
}
const content = chunk.choices[0]?.delta?.content;
if (content) process.stdout.write(content);
}
} catch (error) {
if (error instanceof OpenAI.APIError) {
console.error(`Error ${error.status}: ${error.message}`);
} else {
throw error;
}
}
async function streamWithErrorHandling(prompt: string) {
const response = await fetch('https://api.arouter.ai/v1/chat/completions', {
method: 'POST',
headers: {
Authorization: 'Bearer lr_live_xxxx',
'Content-Type': 'application/json',
},
body: JSON.stringify({
model: 'openai/gpt-5.4',
messages: [{ role: 'user', content: prompt }],
stream: true,
}),
});
// Check initial HTTP status for pre-stream errors
if (!response.ok) {
const error = await response.json();
console.error(`Error: ${error.error.message}`);
return;
}
const reader = response.body?.getReader();
if (!reader) throw new Error('No response body');
const decoder = new TextDecoder();
let buffer = '';
try {
while (true) {
const { done, value } = await reader.read();
if (done) break;
buffer += decoder.decode(value, { stream: true });
while (true) {
const lineEnd = buffer.indexOf('\n');
if (lineEnd === -1) break;
const line = buffer.slice(0, lineEnd).trim();
buffer = buffer.slice(lineEnd + 1);
if (line.startsWith('data: ')) {
const data = line.slice(6);
if (data === '[DONE]') return;
try {
const parsed = JSON.parse(data);
// Check for mid-stream error
if (parsed.error) {
console.error(`Stream error: ${parsed.error.message}`);
return;
}
const content = parsed.choices[0]?.delta?.content;
if (content) process.stdout.write(content);
} catch (e) {
// ignore parse errors
}
}
}
}
} finally {
reader.cancel();
}
}