{
  "context_concepts": [
    {
      "id": "ctx_context_window_basics",
      "name": "What a context window is",
      "category": "context_window",
      "description": "The context window is the total amount of text (measured in tokens) a model can consider at once for a given request — the system prompt, conversation history, tool definitions, tool results, and any attached documents all share this one budget.",
      "how_it_works": "Every input token and every output token generated in a turn counts against the same window. Once a conversation's accumulated tokens approach the window's limit, older content must be trimmed, summarized, or dropped, or the request will fail or be truncated.",
      "source_note": "General mechanism described qualitatively; exact numeric token-limit figures per model/tier were not independently re-verified this session, so no specific number is asserted here.",
      "created_at": "2026-07-02 08:26:58",
      "cite_as": "https://subagentcontext.com/api/context-concepts/ctx_context_window_basics"
    },
    {
      "id": "ctx_context_window_finite",
      "name": "Why context windows are finite",
      "category": "context_window",
      "description": "Context windows are finite because attention computation over a transformer scales with sequence length, and serving very long contexts costs more compute, memory, and latency per request.",
      "how_it_works": "A larger context window is not free: it increases the cost and time to process a request, which is part of why providers publish a fixed maximum window size per model rather than an unbounded one.",
      "source_note": "General, model-agnostic architectural reasoning; not sourced to a specific document mirrored in this repo.",
      "created_at": "2026-07-02 08:26:58",
      "cite_as": "https://subagentcontext.com/api/context-concepts/ctx_context_window_finite"
    },
    {
      "id": "ctx_managing_long_conversations",
      "name": "Managing long conversations within a window",
      "category": "context_window",
      "description": "Long-running agent sessions (like a Cowork session running for many turns) accumulate tool calls, tool results, and conversation turns that can approach the context window's limit well before the conversation is actually finished.",
      "how_it_works": "Strategies include summarizing or compacting older turns, dropping large intermediate tool outputs once they are no longer needed, and being selective about how much of a large file or dataset is read into context at once versus fetched incrementally.",
      "source_note": "General agentic-session practice; consistent with this repo's own documented practice of scoping reads (e.g. Read tool offset/length) rather than loading entire large files.",
      "created_at": "2026-07-02 08:26:58",
      "cite_as": "https://subagentcontext.com/api/context-concepts/ctx_managing_long_conversations"
    },
    {
      "id": "ctx_tokens_vs_words",
      "name": "Tokens are not the same as words",
      "category": "context_window",
      "description": "Context window and prompt-caching limits are measured in tokens, not words or characters — a token is typically a sub-word unit, so the same block of English text might be roughly 1.3–1.5x as many tokens as words, and code, non-English text, or unusual formatting can tokenize less efficiently.",
      "how_it_works": "This is why a document that looks short by word count can still consume a large fraction of a context window, especially for code, tables, or dense structured data — actual token count is what should be checked, not word count.",
      "source_note": "General, widely-known tokenization fact; not tied to a specific mirrored source in this repo.",
      "created_at": "2026-07-02 08:27:09",
      "cite_as": "https://subagentcontext.com/api/context-concepts/ctx_tokens_vs_words"
    }
  ]
}