The Context Consumption Comparison
# Same information: prose vs structured JSON
# Prose version (~150 tokens):
prose_extraction = """
The invoice was issued by Acme Corporation on January 15th, 2024,
with invoice number INV-2024-001847. The total amount is four thousand,
nine hundred and ninety-nine dollars, due by February 14th, 2024.
The invoice covers software licensing services. Overall I'm fairly confident
in this extraction though the due date was somewhat unclear in the document.
"""
# Structured JSON (~60 tokens) — same information, 60% fewer tokens:
json_extraction = {
"vendor": "Acme Corporation",
"invoice_id": "INV-2024-001847",
"date": "2024-01-15",
"amount": 4999.00,
"due_date": "2024-02-14",
"service_type": "software_licensing",
"confidence": {"overall": "medium", "uncertain_fields": [{"field": "due_date"}]}
}
# JSON is:
# - 60% fewer tokens
# - Directly parseable — no extraction step
# - Machine-verifiable via schema
# - Confidence embedded — no second API call for routing
Pipeline Efficiency
# ❌ Prose pipeline — multiple inefficient steps
async def prose_pipeline(document: bytes) -> dict:
# Step 1: Extraction (~150 token result)
prose = await call_claude(
"Extract invoice data and summarize it in prose.",
document
)
# Step 2: Re-extraction from prose (~adds 150 tokens to context)
structured = await call_claude(
f"Parse this summary into JSON: {prose}",
# Now context has: original document + prose + parsing request
)
# Step 3: Confidence check (~adds more context)
confidence = await call_claude(
f"Rate confidence of this extraction: {structured}"
)
# Total: 3 API calls, large context
return {"data": structured, "confidence": confidence}
# ✅ Structured pipeline — one efficient step
async def structured_pipeline(document: bytes) -> dict:
# Step 1: Direct structured extraction (single call)
result = await call_claude(
system="Extract invoice data. Include confidence in output.",
user=document,
tools=[invoice_schema_with_confidence],
tool_choice={"type": "tool", "name": "extract_invoice"}
)
# Total: 1 API call, compact structured output
# Confidence is IN the output — no additional call needed
# Downstream steps receive clean JSON — no parsing step needed
return parse_tool_result(result)
Schema Design for Context Efficiency
# Efficient schema: no padding, no redundancy
efficient_schema = {
"properties": {
"vendor": {"type": "string"},
"invoice_id": {"type": "string"},
"date": {"type": "string"},
"amount": {"type": "number"},
"due_date": {"type": ["string", "null"]}, # null saves tokens vs empty string
"confidence": {
"type": "object",
"properties": {
"overall": {"type": "string", "enum": ["high", "medium", "low"]},
"uncertain_fields": {
"type": "array",
"items": {
"properties": {
"field": {"type": "string"},
"reason": {"type": "string"}
}
}
}
}
}
},
"required": ["vendor", "invoice_id", "date", "amount", "confidence"]
# due_date nullable — doesn't force empty string into context
}
Key Takeaways
- JSON is 40-60% more compact than prose for the same information
- Structured output eliminates parse step — downstream gets usable data directly
- Confidence embedded in schema — routing decision without extra API call
- Schema validation prevents retry — no reformatting turns filling context
- D4 structure + D5 efficiency = compound savings in API calls and context usage