{
  "context_concept": {
    "id": "ctx_context_window_finite",
    "name": "Why context windows are finite",
    "category": "context_window",
    "description": "Context windows are finite because attention computation over a transformer scales with sequence length, and serving very long contexts costs more compute, memory, and latency per request.",
    "how_it_works": "A larger context window is not free: it increases the cost and time to process a request, which is part of why providers publish a fixed maximum window size per model rather than an unbounded one.",
    "source_note": "General, model-agnostic architectural reasoning; not sourced to a specific document mirrored in this repo.",
    "created_at": "2026-07-02 08:26:58",
    "cite_as": "https://subagentcontext.com/api/context-concepts/ctx_context_window_finite"
  }
}