import anthropic
client = anthropic.Anthropic(
base_url="https://api.arouter.ai",
api_key="lr_live_xxxx",
)
long_document = "<document content here>" * 50
response = client.messages.create(
model="claude-sonnet-4.6",
max_tokens=1024,
system=[
{
"type": "text",
"text": f"Reference document:\n\n{long_document}",
"cache_control": {"type": "ephemeral"},
}
],
messages=[
{"role": "user", "content": "Summarize the key points."}
],
)
print(response.usage)
# Usage(input_tokens=1500, output_tokens=80,
# cache_creation_input_tokens=1024, cache_read_input_tokens=0)
# 第二次调用——从缓存读取而非写入
response2 = client.messages.create(
model="claude-sonnet-4.6",
max_tokens=1024,
system=[
{
"type": "text",
"text": f"Reference document:\n\n{long_document}",
"cache_control": {"type": "ephemeral"},
}
],
messages=[
{"role": "user", "content": "What's the main topic?"}
],
)
print(response2.usage)
# Usage(input_tokens=476, output_tokens=40,
# cache_creation_input_tokens=0, cache_read_input_tokens=1024)