{
  "context_concepts": [
    {
      "id": "ctx_context_editing_tradeoffs",
      "name": "Trade-offs of aggressive context editing",
      "category": "context_editing",
      "description": "Summarizing or dropping older context saves budget but risks losing detail the model might need later — a fact mentioned once early in a session and then discarded can resurface as a gap several turns later if it was compacted away too aggressively.",
      "how_it_works": "Effective context editing tends to preserve verbatim the pieces most likely to be needed again (decisions made, constraints stated, file paths, IDs) while more freely summarizing narrative or exploratory content that was only useful in the moment.",
      "source_note": "General practice reasoning; not sourced to a specific mirrored document.",
      "created_at": "2026-07-02 08:27:09",
      "cite_as": "https://subagentcontext.com/api/context-concepts/ctx_context_editing_tradeoffs"
    },
    {
      "id": "ctx_context_editing_what",
      "name": "What context editing / compaction is",
      "category": "context_editing",
      "description": "Context editing (sometimes called compaction) is the general practice of programmatically trimming, summarizing, or removing older or less-relevant content from a conversation's context so that a long-running session can continue without exceeding the context window.",
      "how_it_works": "A common approach: once context usage crosses a threshold, older tool results or conversation turns are summarized into a shorter form (or dropped entirely if no longer relevant) and replaced in place, freeing budget for new turns while preserving the gist of what came before.",
      "source_note": "General mechanism description; no single canonical Anthropic doc for this exact term was cited verbatim this session, so this row is phrased generally rather than quoting a specific API field or parameter name.",
      "created_at": "2026-07-02 08:27:09",
      "cite_as": "https://subagentcontext.com/api/context-concepts/ctx_context_editing_what"
    },
    {
      "id": "ctx_context_window_basics",
      "name": "What a context window is",
      "category": "context_window",
      "description": "The context window is the total amount of text (measured in tokens) a model can consider at once for a given request — the system prompt, conversation history, tool definitions, tool results, and any attached documents all share this one budget.",
      "how_it_works": "Every input token and every output token generated in a turn counts against the same window. Once a conversation's accumulated tokens approach the window's limit, older content must be trimmed, summarized, or dropped, or the request will fail or be truncated.",
      "source_note": "General mechanism described qualitatively; exact numeric token-limit figures per model/tier were not independently re-verified this session, so no specific number is asserted here.",
      "created_at": "2026-07-02 08:26:58",
      "cite_as": "https://subagentcontext.com/api/context-concepts/ctx_context_window_basics"
    },
    {
      "id": "ctx_context_window_finite",
      "name": "Why context windows are finite",
      "category": "context_window",
      "description": "Context windows are finite because attention computation over a transformer scales with sequence length, and serving very long contexts costs more compute, memory, and latency per request.",
      "how_it_works": "A larger context window is not free: it increases the cost and time to process a request, which is part of why providers publish a fixed maximum window size per model rather than an unbounded one.",
      "source_note": "General, model-agnostic architectural reasoning; not sourced to a specific document mirrored in this repo.",
      "created_at": "2026-07-02 08:26:58",
      "cite_as": "https://subagentcontext.com/api/context-concepts/ctx_context_window_finite"
    },
    {
      "id": "ctx_managing_long_conversations",
      "name": "Managing long conversations within a window",
      "category": "context_window",
      "description": "Long-running agent sessions (like a Cowork session running for many turns) accumulate tool calls, tool results, and conversation turns that can approach the context window's limit well before the conversation is actually finished.",
      "how_it_works": "Strategies include summarizing or compacting older turns, dropping large intermediate tool outputs once they are no longer needed, and being selective about how much of a large file or dataset is read into context at once versus fetched incrementally.",
      "source_note": "General agentic-session practice; consistent with this repo's own documented practice of scoping reads (e.g. Read tool offset/length) rather than loading entire large files.",
      "created_at": "2026-07-02 08:26:58",
      "cite_as": "https://subagentcontext.com/api/context-concepts/ctx_managing_long_conversations"
    },
    {
      "id": "ctx_prompt_caching_what",
      "name": "What prompt caching is",
      "category": "prompt_caching",
      "description": "Prompt caching lets a client mark a prefix of a prompt (e.g. a long system prompt, a set of tool definitions, or a large shared document) as cacheable, so that repeated requests reusing that same prefix are billed and processed more cheaply and with lower latency than reprocessing it from scratch each time.",
      "how_it_works": "On a cache hit, the model does not need to reprocess the cached prefix's tokens the same way it would a fully fresh prompt — the provider serves the cached computation, and only the new (non-cached) portion of the prompt is processed at full cost/latency.",
      "source_note": "Grounded in docs/docs/platform.claude.com/docs/en/build-with-claude/prompt-caching.md, mirrored in this repo per CLAUDE.md.",
      "created_at": "2026-07-02 08:27:03",
      "cite_as": "https://subagentcontext.com/api/context-concepts/ctx_prompt_caching_what"
    },
    {
      "id": "ctx_prompt_caching_when_useful",
      "name": "When prompt caching is most useful",
      "category": "prompt_caching",
      "description": "Prompt caching pays off most when the same large prefix (system prompt, tool schema, long reference document, or few-shot examples) is reused across many requests in a short time window, such as a multi-turn agent session or a batch of requests over the same document.",
      "how_it_works": "Each cached prefix has a limited time-to-live; if requests reusing that prefix keep arriving before it expires, subsequent requests keep getting the caching benefit; if too much time passes between requests, the cache entry expires and the next request pays full cost to reprocess and re-cache it.",
      "source_note": "General usage pattern consistent with docs/docs/platform.claude.com/docs/en/build-with-claude/prompt-caching.md.",
      "created_at": "2026-07-02 08:27:03",
      "cite_as": "https://subagentcontext.com/api/context-concepts/ctx_prompt_caching_when_useful"
    },
    {
      "id": "ctx_tokens_vs_words",
      "name": "Tokens are not the same as words",
      "category": "context_window",
      "description": "Context window and prompt-caching limits are measured in tokens, not words or characters — a token is typically a sub-word unit, so the same block of English text might be roughly 1.3–1.5x as many tokens as words, and code, non-English text, or unusual formatting can tokenize less efficiently.",
      "how_it_works": "This is why a document that looks short by word count can still consume a large fraction of a context window, especially for code, tables, or dense structured data — actual token count is what should be checked, not word count.",
      "source_note": "General, widely-known tokenization fact; not tied to a specific mirrored source in this repo.",
      "created_at": "2026-07-02 08:27:09",
      "cite_as": "https://subagentcontext.com/api/context-concepts/ctx_tokens_vs_words"
    }
  ]
}