{
  "context_concept": {
    "id": "ctx_tokens_vs_words",
    "name": "Tokens are not the same as words",
    "category": "context_window",
    "description": "Context window and prompt-caching limits are measured in tokens, not words or characters — a token is typically a sub-word unit, so the same block of English text might be roughly 1.3–1.5x as many tokens as words, and code, non-English text, or unusual formatting can tokenize less efficiently.",
    "how_it_works": "This is why a document that looks short by word count can still consume a large fraction of a context window, especially for code, tables, or dense structured data — actual token count is what should be checked, not word count.",
    "source_note": "General, widely-known tokenization fact; not tied to a specific mirrored source in this repo.",
    "created_at": "2026-07-02 08:27:09",
    "cite_as": "https://subagentcontext.com/api/context-concepts/ctx_tokens_vs_words"
  }
}