{
  "context_concept": {
    "id": "ctx_prompt_caching_what",
    "name": "What prompt caching is",
    "category": "prompt_caching",
    "description": "Prompt caching lets a client mark a prefix of a prompt (e.g. a long system prompt, a set of tool definitions, or a large shared document) as cacheable, so that repeated requests reusing that same prefix are billed and processed more cheaply and with lower latency than reprocessing it from scratch each time.",
    "how_it_works": "On a cache hit, the model does not need to reprocess the cached prefix's tokens the same way it would a fully fresh prompt — the provider serves the cached computation, and only the new (non-cached) portion of the prompt is processed at full cost/latency.",
    "source_note": "Grounded in docs/docs/platform.claude.com/docs/en/build-with-claude/prompt-caching.md, mirrored in this repo per CLAUDE.md.",
    "created_at": "2026-07-02 08:27:03",
    "cite_as": "https://subagentcontext.com/api/context-concepts/ctx_prompt_caching_what"
  }
}