stream: true。
- Python (OpenAI)
- Node.js (OpenAI)
- Go
- cURL
- fetch (raw)
from openai import OpenAI
client = OpenAI(
base_url="https://api.arouter.ai/v1",
api_key="lr_live_xxxx",
)
stream = client.chat.completions.create(
model="openai/gpt-5.4",
messages=[{"role": "user", "content": "How would you build the tallest building ever?"}],
stream=True,
)
for chunk in stream:
content = chunk.choices[0].delta.content
if content:
print(content, end="", flush=True)
# Final chunk includes usage stats
# Access via: stream.get_final_completion().usage
import OpenAI from "openai";
const client = new OpenAI({
baseURL: "https://api.arouter.ai/v1",
apiKey: "lr_live_xxxx",
});
const stream = await client.chat.completions.create({
model: "openai/gpt-5.4",
messages: [{ role: "user", content: "How would you build the tallest building ever?" }],
stream: true,
});
for await (const chunk of stream) {
const content = chunk.choices[0]?.delta?.content;
if (content) process.stdout.write(content);
// Final chunk includes usage stats
if (chunk.usage) {
console.log("\nUsage:", chunk.usage);
}
}
stream, err := client.ChatCompletionStream(ctx, arouter.ChatCompletionRequest{
Model: "openai/gpt-5.4",
Messages: []arouter.Message{
{Role: "user", Content: "How would you build the tallest building ever?"},
},
})
if err != nil {
log.Fatal(err)
}
defer stream.Close()
for {
chunk, err := stream.Recv()
if err == arouter.ErrStreamDone {
break
}
if err != nil {
log.Fatal(err)
}
fmt.Print(chunk.Choices[0].Delta.Content)
}
curl -N https://api.arouter.ai/v1/chat/completions \
-H "Authorization: Bearer lr_live_xxxx" \
-H "Content-Type: application/json" \
-d '{
"model": "openai/gpt-5.4",
"messages": [{"role": "user", "content": "How would you build the tallest building ever?"}],
"stream": true
}'
const response = await fetch('https://api.arouter.ai/v1/chat/completions', {
method: 'POST',
headers: {
Authorization: 'Bearer lr_live_xxxx',
'Content-Type': 'application/json',
},
body: JSON.stringify({
model: 'openai/gpt-5.4',
messages: [{ role: 'user', content: 'How would you build the tallest building ever?' }],
stream: true,
}),
});
const reader = response.body?.getReader();
if (!reader) throw new Error('No response body');
const decoder = new TextDecoder();
let buffer = '';
try {
while (true) {
const { done, value } = await reader.read();
if (done) break;
buffer += decoder.decode(value, { stream: true });
while (true) {
const lineEnd = buffer.indexOf('\n');
if (lineEnd === -1) break;
const line = buffer.slice(0, lineEnd).trim();
buffer = buffer.slice(lineEnd + 1);
if (line.startsWith('data: ')) {
const data = line.slice(6);
if (data === '[DONE]') break;
try {
const parsed = JSON.parse(data);
const content = parsed.choices[0]?.delta?.content;
if (content) process.stdout.write(content);
} catch (e) {
// ignore invalid JSON
}
}
}
}
} finally {
reader.cancel();
}
Anthropic 串流傳輸
Anthropic SDK 使用其自有的串流傳輸格式:import anthropic
client = anthropic.Anthropic(
base_url="https://api.arouter.ai",
api_key="lr_live_xxxx",
)
with client.messages.stream(
model="claude-sonnet-4.6",
max_tokens=1024,
messages=[{"role": "user", "content": "How would you build the tallest building ever?"}],
) as stream:
for text in stream.text_stream:
print(text, end="", flush=True)
Gemini 串流傳輸
Gemini 使用streamGenerateContent 而非 generateContent:
import google.generativeai as genai
genai.configure(
api_key="lr_live_xxxx",
transport="rest",
client_options={"api_endpoint": "https://api.arouter.ai"},
)
model = genai.GenerativeModel("gemini-2.5-flash")
response = model.generate_content("How would you build the tallest building ever?", stream=True)
for chunk in response:
print(chunk.text, end="", flush=True)
SSE 格式
底層串流傳輸使用 Server-Sent Events。每個內容事件的格式如下:data: {"id":"chatcmpl-xxx","object":"chat.completion.chunk","model":"openai/gpt-5.4","choices":[{"index":0,"delta":{"content":"Hello"},"finish_reason":null}]}
data: {"id":"chatcmpl-xxx","object":"chat.completion.chunk","model":"openai/gpt-5.4","choices":[{"index":0,"delta":{"content":" world"},"finish_reason":null}]}
data: {"id":"chatcmpl-xxx","object":"chat.completion.chunk","model":"openai/gpt-5.4","choices":[{"index":0,"delta":{},"finish_reason":"stop"}]}
[DONE] 之前的最後一個資料塊包含用量資料,且 choices 陣列為空:
data: {"id":"chatcmpl-xxx","object":"chat.completion.chunk","model":"openai/gpt-5.4","choices":[],"usage":{"prompt_tokens":10,"completion_tokens":20,"total_tokens":30,"prompt_tokens_details":{"cached_tokens":0},"completion_tokens_details":{"reasoning_tokens":0}}}
data: [DONE]
ARouter 可能偶爾發送 SSE 注釋(以
: 開頭的行)以防止連線逾時。根據 SSE 規範,這些注釋可以安全忽略。推薦的 SSE 客戶端程式庫
部分 SSE 客戶端實作可能無法正確解析資料。我們推薦:- eventsource-parser — 輕量級 SSE 解析器
- OpenAI SDK — 自動處理 SSE、工具呼叫和用量統計
- Vercel AI SDK — React/Next.js 串流傳輸輔助工具
取消串流請求
串流請求可透過中斷連線來取消。對於支援的提供商,這將立即停止模型處理。- Node.js (AbortController)
- Python
- fetch (AbortController)
import OpenAI from "openai";
const client = new OpenAI({
baseURL: "https://api.arouter.ai/v1",
apiKey: "lr_live_xxxx",
});
const controller = new AbortController();
try {
const stream = await client.chat.completions.create(
{
model: "openai/gpt-5.4",
messages: [{ role: "user", content: "Write a long story" }],
stream: true,
},
{ signal: controller.signal },
);
for await (const chunk of stream) {
const content = chunk.choices[0]?.delta?.content;
if (content) process.stdout.write(content);
}
} catch (error) {
if (error.name === "AbortError") {
console.log("Stream cancelled");
} else {
throw error;
}
}
// To cancel:
controller.abort();
import requests
from threading import Event, Thread
def stream_with_cancellation(prompt: str, cancel_event: Event):
with requests.Session() as session:
response = session.post(
"https://api.arouter.ai/v1/chat/completions",
headers={"Authorization": "Bearer lr_live_xxxx"},
json={
"model": "openai/gpt-5.4",
"messages": [{"role": "user", "content": prompt}],
"stream": True,
},
stream=True,
)
try:
for line in response.iter_lines():
if cancel_event.is_set():
response.close()
return
if line:
print(line.decode(), end="", flush=True)
finally:
response.close()
cancel_event = Event()
t = Thread(target=lambda: stream_with_cancellation("Write a long story", cancel_event))
t.start()
# To cancel:
cancel_event.set()
const controller = new AbortController();
try {
const response = await fetch('https://api.arouter.ai/v1/chat/completions', {
method: 'POST',
headers: {
Authorization: 'Bearer lr_live_xxxx',
'Content-Type': 'application/json',
},
body: JSON.stringify({
model: 'openai/gpt-5.4',
messages: [{ role: 'user', content: 'Write a long story' }],
stream: true,
}),
signal: controller.signal,
});
// process stream...
} catch (error) {
if (error.name === 'AbortError') {
console.log('Stream cancelled');
} else {
throw error;
}
}
// To cancel:
controller.abort();
串流傳輸中的錯誤處理
ARouter 根據錯誤發生的時間,以不同方式處理串流傳輸中的錯誤。傳送任何 Token 之前出現的錯誤
如果在開始串流傳輸之前發生錯誤,ARouter 會回傳帶有適當 HTTP 狀態碼的標準 JSON 錯誤回應:{
"error": {
"code": 400,
"message": "Invalid model specified"
}
}
| 代碼 | 含義 |
|---|---|
| 400 | Bad Request — 參數無效 |
| 401 | Unauthorized — API key 無效 |
| 402 | Payment Required — 額度不足 |
| 429 | Too Many Requests — 已被限速 |
| 502 | Bad Gateway — 提供商錯誤 |
| 503 | Service Unavailable — 無可用提供商 |
已傳送部分 Token 後出現的錯誤(串流中途)
如果在已傳輸部分 token 後發生錯誤,ARouter 無法更改 HTTP 狀態碼(此時已為 200 OK)。錯誤將以 SSE 事件的形式傳送:data: {"id":"chatcmpl-xxx","object":"chat.completion.chunk","model":"openai/gpt-5.4","error":{"code":"server_error","message":"Provider disconnected unexpectedly"},"choices":[{"index":0,"delta":{"content":""},"finish_reason":"error"}]}
- 錯誤出現在頂層,與標準回應欄位並列
choices陣列包含finish_reason: "error"以終止串流- 由於回應標頭已傳送,HTTP 狀態保持 200 OK
錯誤處理程式碼範例
- Python (OpenAI)
- Node.js (OpenAI)
- fetch (raw)
from openai import OpenAI, APIStatusError
client = OpenAI(
base_url="https://api.arouter.ai/v1",
api_key="lr_live_xxxx",
)
try:
stream = client.chat.completions.create(
model="openai/gpt-5.4",
messages=[{"role": "user", "content": "Write a story"}],
stream=True,
)
for chunk in stream:
content = chunk.choices[0].delta.content
if content:
print(content, end="", flush=True)
except APIStatusError as e:
print(f"\nError {e.status_code}: {e.message}")
import OpenAI from "openai";
const client = new OpenAI({
baseURL: "https://api.arouter.ai/v1",
apiKey: "lr_live_xxxx",
});
try {
const stream = await client.chat.completions.create({
model: "openai/gpt-5.4",
messages: [{ role: "user", content: "Write a story" }],
stream: true,
});
for await (const chunk of stream) {
// Check for mid-stream errors
if ("error" in chunk) {
console.error(`Stream error: ${(chunk as any).error.message}`);
if (chunk.choices?.[0]?.finish_reason === "error") {
console.log("Stream terminated due to error");
}
break;
}
const content = chunk.choices[0]?.delta?.content;
if (content) process.stdout.write(content);
}
} catch (error) {
if (error instanceof OpenAI.APIError) {
console.error(`Error ${error.status}: ${error.message}`);
} else {
throw error;
}
}
async function streamWithErrorHandling(prompt: string) {
const response = await fetch('https://api.arouter.ai/v1/chat/completions', {
method: 'POST',
headers: {
Authorization: 'Bearer lr_live_xxxx',
'Content-Type': 'application/json',
},
body: JSON.stringify({
model: 'openai/gpt-5.4',
messages: [{ role: 'user', content: prompt }],
stream: true,
}),
});
// Check initial HTTP status for pre-stream errors
if (!response.ok) {
const error = await response.json();
console.error(`Error: ${error.error.message}`);
return;
}
const reader = response.body?.getReader();
if (!reader) throw new Error('No response body');
const decoder = new TextDecoder();
let buffer = '';
try {
while (true) {
const { done, value } = await reader.read();
if (done) break;
buffer += decoder.decode(value, { stream: true });
while (true) {
const lineEnd = buffer.indexOf('\n');
if (lineEnd === -1) break;
const line = buffer.slice(0, lineEnd).trim();
buffer = buffer.slice(lineEnd + 1);
if (line.startsWith('data: ')) {
const data = line.slice(6);
if (data === '[DONE]') return;
try {
const parsed = JSON.parse(data);
// Check for mid-stream error
if (parsed.error) {
console.error(`Stream error: ${parsed.error.message}`);
return;
}
const content = parsed.choices[0]?.delta?.content;
if (content) process.stdout.write(content);
} catch (e) {
// ignore parse errors
}
}
}
}
} finally {
reader.cancel();
}
}