diff --git a/README-ru.md b/README-ru.md new file mode 100644 index 000000000..a5ecb8047 --- /dev/null +++ b/README-ru.md @@ -0,0 +1,235 @@ +[English](./README.md) | [中文](./README-zh.md) | [日本語](./README-ja.md) | [Русский](./README-ru.md) +# Learn Claude Code -- собираем nano-агента в стиле Claude Code с нуля +
+ +Сканируйте QR-код в WeChat, чтобы подписаться на нас, +или подпишитесь в X: [shareAI-Lab](https://x.com/baicai003) + +``` + THE AGENT PATTERN + ================= + + User --> messages[] --> LLM --> response + | + stop_reason == "tool_use"? + / \ + yes no + | | + execute tools return text + append results + loop back -----------------> messages[] + + + Это минимальный цикл. Он нужен любому AI-агенту для программирования. + В продакшене поверх него добавляются политики, разрешения и жизненный цикл. +``` + +**12 последовательных сессий: от простого цикла до изолированного автономного выполнения.** +**Каждая сессия добавляет ровно один механизм. У каждого механизма есть свой девиз.** + +> **s01**   *"One loop & Bash is all you need"* — один инструмент + один цикл = агент +> +> **s02**   *"Adding a tool means adding one handler"* — цикл не меняется; новые инструменты просто регистрируются в dispatch map +> +> **s03**   *"An agent without a plan drifts"* — сначала перечисли шаги, потом выполняй; доля завершённых задач резко растёт +> +> **s04**   *"Break big tasks down; each subtask gets a clean context"* — субагенты используют отдельный `messages[]`, поэтому основной диалог остаётся чистым +> +> **s05**   *"Load knowledge when you need it, not upfront"* — знания подмешиваются через `tool_result`, а не заранее через system prompt +> +> **s06**   *"Context will fill up; you need a way to make room"* — трёхслойное сжатие контекста позволяет вести бесконечные сессии +> +> **s07**   *"Break big goals into small tasks, order them, persist to disk"* — файловый граф задач с зависимостями закладывает основу для мультиагентной координации +> +> **s08**   *"Run slow operations in the background; the agent keeps thinking"* — долгие операции уходят в фон, а агент продолжает мыслить дальше +> +> **s09**   *"When the task is too big for one, delegate to teammates"* — постоянные напарники + асинхронные почтовые ящики +> +> **s10**   *"Teammates need shared communication rules"* — один паттерн request-response покрывает все переговоры +> +> **s11**   *"Teammates scan the board and claim tasks themselves"* — напарники сами смотрят на доску задач и забирают работу +> +> **s12**   *"Each works in its own directory, no interference"* — задачи управляют целями, worktree управляют каталогами, связь держится на ID + +--- + +## Базовый паттерн + +```python +def agent_loop(messages): + while True: + response = client.messages.create( + model=MODEL, system=SYSTEM, + messages=messages, tools=TOOLS, + ) + messages.append({"role": "assistant", + "content": response.content}) + + if response.stop_reason != "tool_use": + return + + results = [] + for block in response.content: + if block.type == "tool_use": + output = TOOL_HANDLERS[block.name](**block.input) + results.append({ + "type": "tool_result", + "tool_use_id": block.id, + "content": output, + }) + messages.append({"role": "user", "content": results}) +``` + +Каждая следующая сессия добавляет поверх этого цикла ещё один механизм, не меняя сам цикл. + +## Область проекта + +Этот репозиторий — учебный проект формата 0→1 по сборке mini/nano-агента в стиле Claude Code. +Ради ясной обучающей траектории здесь намеренно упрощены или опущены некоторые продакшен-механизмы: + +- полноценные event/hook-шины, например `PreToolUse`, `SessionStart/End`, `ConfigChange` + В `s12` есть только минимальный append-only поток lifecycle-событий для обучения. +- rule-based система разрешений и trust-workflows +- контроль жизненного цикла сессий (`resume`/`fork`) и более полный lifecycle worktree +- полные детали runtime MCP: transport, OAuth, subscribe/poll ресурсов + +Считайте JSONL mailbox-протокол команды в этом репозитории учебной реализацией, а не утверждением о внутренностях какого-либо конкретного продакшен-агента. + +## Быстрый старт + +```sh +git clone https://github.com/shareAI-lab/learn-claude-code +cd learn-claude-code +pip install -r requirements.txt +cp .env.example .env # Отредактируйте .env и добавьте свой ANTHROPIC_API_KEY + +python agents/s01_agent_loop.py # Начинайте отсюда +python agents/s12_worktree_task_isolation.py # Полная конечная точка прогрессии +python agents/s_full.py # Итоговая версия: все механизмы вместе +``` + +### Веб-платформа + +Интерактивные визуализации, пошаговые схемы, просмотр исходников и документация. + +```sh +cd web && npm install && npm run dev # http://localhost:3000 +``` + +## Учебный путь + +``` +Этап 1: ЦИКЛ Этап 2: ПЛАНИРОВАНИЕ И ЗНАНИЯ +================== ================================= +s01 The Agent Loop [1] s03 TodoWrite [5] + while + stop_reason TodoManager + напоминание + | | + +-> s02 Tool Use [4] s04 Subagents [5] + dispatch map: name->handler отдельный messages[] на каждого дочернего агента + | + s05 Skills [5] + SKILL.md через tool_result + | + s06 Context Compact [5] + трёхслойное сжатие + +Этап 3: ПОСТОЯНСТВО Этап 4: КОМАНДЫ +================== ============================ +s07 Tasks [8] s09 Agent Teams [9] + файловый CRUD + граф зависимостей напарники + JSONL inbox + | | +s08 Background Tasks [6] s10 Team Protocols [12] + фоновые потоки + очередь уведомлений shutdown + approval FSM + | + s11 Autonomous Agents [14] + idle-cycle + auto-claim + | + s12 Worktree Isolation [16] + координация задач + изолированные каталоги выполнения + + [N] = число инструментов +``` + +## Архитектура + +``` +learn-claude-code/ +| +|-- agents/ # Python-реализации по шагам (s01-s12 + итоговый s_full) +|-- docs/{en,zh,ja,ru}/ # Документация с упором на ментальные модели (4 языка) +|-- web/ # Интерактивная учебная платформа (Next.js) +|-- skills/ # Skill-файлы для s05 ++-- .github/workflows/ci.yml # CI: typecheck + build +``` + +## Документация + +Формат с упором на ментальные модели: проблема, решение, ASCII-схема, минимальный код. +Доступно на [English](./docs/en/) | [中文](./docs/zh/) | [日本語](./docs/ja/) | [Русский](./docs/ru/). + +| Сессия | Тема | Девиз | +|--------|------|-------| +| [s01](./docs/ru/s01-the-agent-loop.md) | Цикл агента | *One loop & Bash is all you need* | +| [s02](./docs/ru/s02-tool-use.md) | Инструменты | *Adding a tool means adding one handler* | +| [s03](./docs/ru/s03-todo-write.md) | TodoWrite | *An agent without a plan drifts* | +| [s04](./docs/ru/s04-subagent.md) | Субагенты | *Break big tasks down; each subtask gets a clean context* | +| [s05](./docs/ru/s05-skill-loading.md) | Skills | *Load knowledge when you need it, not upfront* | +| [s06](./docs/ru/s06-context-compact.md) | Сжатие контекста | *Context will fill up; you need a way to make room* | +| [s07](./docs/ru/s07-task-system.md) | Система задач | *Break big goals into small tasks, order them, persist to disk* | +| [s08](./docs/ru/s08-background-tasks.md) | Фоновые задачи | *Run slow operations in the background; the agent keeps thinking* | +| [s09](./docs/ru/s09-agent-teams.md) | Команды агентов | *When the task is too big for one, delegate to teammates* | +| [s10](./docs/ru/s10-team-protocols.md) | Командные протоколы | *Teammates need shared communication rules* | +| [s11](./docs/ru/s11-autonomous-agents.md) | Автономные агенты | *Teammates scan the board and claim tasks themselves* | +| [s12](./docs/ru/s12-worktree-task-isolation.md) | Изоляция worktree и задач | *Each works in its own directory, no interference* | + +## Что дальше: от понимания к реальному использованию + +После этих 12 сессий вы будете понимать устройство агента изнутри. Применить это знание можно двумя путями: + +### Kode Agent CLI -- Open-Source CLI для coding-агента + +> `npm i -g @shareai-lab/kode` + +Поддержка Skill и LSP, готовность к Windows, подключение GLM / MiniMax / DeepSeek и других открытых моделей. Установили и пользуетесь. + +GitHub: **[shareAI-lab/Kode-cli](https://github.com/shareAI-lab/Kode-cli)** + +### Kode Agent SDK -- встраивайте агентные возможности в своё приложение + +Официальный Claude Code Agent SDK под капотом общается с полноценным CLI-процессом: на каждого параллельного пользователя нужен отдельный терминальный процесс. Kode SDK — это отдельная библиотека без per-user process overhead, которую можно встраивать в бэкенды, расширения браузера, embedded-устройства и другие окружения. + +GitHub: **[shareAI-lab/Kode-agent-sdk](https://github.com/shareAI-lab/Kode-agent-sdk)** + +--- + +## Репозиторий-сестра: от *сессий по запросу* к *постоянно работающему ассистенту* + +Агент из этого репозитория — модель **использовал и закрыл**: открыли терминал, дали задачу, завершили работу, следующая сессия стартует с пустого контекста. Так устроена модель Claude Code. + +[OpenClaw](https://github.com/openclaw/openclaw) показал другой вариант: поверх того же agent core достаточно двух механизмов, чтобы агент превратился из «подтолкни и он шевельнётся» в «сам просыпается каждые 30 секунд и проверяет, нет ли работы»: + +- **Heartbeat** — каждые 30 секунд система отправляет агенту сообщение и просит проверить, есть ли что делать. Ничего нет? Спит дальше. Есть? Действует сразу. +- **Cron** — агент может сам планировать будущие задачи и автоматически выполнять их в нужный момент. + +Добавьте маршрутизацию по IM-каналам (WhatsApp / Telegram / Slack / Discord, 13+ платформ), постоянную память контекста и систему личности Soul — и агент превратится из одноразового инструмента в всегда включённого персонального AI-ассистента. + +**[claw0](https://github.com/shareAI-lab/claw0)** — наш сопутствующий учебный репозиторий, который разбирает эти механизмы с нуля: + +``` +claw agent = agent core + heartbeat + cron + IM chat + memory + soul +``` + +``` +learn-claude-code claw0 +(ядро рантайма агента: (активный always-on ассистент: + цикл, инструменты, планирование, heartbeat, cron, IM-каналы, + команды, изоляция worktree) память, личность soul) +``` + +## Лицензия + +MIT + +--- + +**Модель и есть агент. Наша работа — дать ей инструменты и не мешать.** diff --git a/README.md b/README.md index 3e8bf90f5..7fcde89d0 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -[English](./README.md) | [中文](./README-zh.md) | [日本語](./README-ja.md) +[English](./README.md) | [中文](./README-zh.md) | [日本語](./README-ja.md) | [Русский](./README-ru.md) # Learn Claude Code -- A nano Claude Code-like agent, built from 0 to 1
@@ -157,7 +157,7 @@ s08 Background Tasks [6] s10 Team Protocols [12] learn-claude-code/ | |-- agents/ # Python reference implementations (s01-s12 + s_full capstone) -|-- docs/{en,zh,ja}/ # Mental-model-first documentation (3 languages) +|-- docs/{en,zh,ja,ru}/ # Mental-model-first documentation (4 languages) |-- web/ # Interactive learning platform (Next.js) |-- skills/ # Skill files for s05 +-- .github/workflows/ci.yml # CI: typecheck + build @@ -166,7 +166,7 @@ learn-claude-code/ ## Documentation Mental-model-first: problem, solution, ASCII diagram, minimal code. -Available in [English](./docs/en/) | [中文](./docs/zh/) | [日本語](./docs/ja/). +Available in [English](./docs/en/) | [中文](./docs/zh/) | [日本語](./docs/ja/) | [Русский](./docs/ru/). | Session | Topic | Motto | |---------|-------|-------| diff --git a/docs/ru/s01-the-agent-loop.md b/docs/ru/s01-the-agent-loop.md new file mode 100644 index 000000000..bc6024a9d --- /dev/null +++ b/docs/ru/s01-the-agent-loop.md @@ -0,0 +1,114 @@ +# s01: Цикл агента + +`[ s01 ] s02 > s03 > s04 > s05 > s06 | s07 > s08 > s09 > s10 > s11 > s12` + +> *"Одного цикла и Bash уже достаточно"* — один инструмент + один цикл = агент. + +## Проблема + +Языковая модель умеет рассуждать о коде, но не может *потрогать* реальный мир: читать файлы, запускать тесты, проверять ошибки. Без цикла каждый вызов инструмента требует вручную копировать результат обратно в диалог. Циклом становитесь вы. + +## Решение + +``` ++--------+ +-------+ +---------+ +| User | ---> | LLM | ---> | Tool | +| prompt | | | | execute | ++--------+ +---+---+ +----+----+ + ^ | + | tool_result | + +----------------+ + (цикл, пока stop_reason == "tool_use") +``` + +Весь поток управления держится на одном условии выхода. Цикл работает, пока модель продолжает вызывать инструменты. + +## Как это работает + +1. Запрос пользователя становится первым сообщением. + +```python +messages.append({"role": "user", "content": query}) +``` + +2. Отправляем `messages` и описания инструментов в LLM. + +```python +response = client.messages.create( + model=MODEL, system=SYSTEM, messages=messages, + tools=TOOLS, max_tokens=8000, +) +``` + +3. Добавляем ответ ассистента. Проверяем `stop_reason`: если модель не вызвала инструмент, работа закончена. + +```python +messages.append({"role": "assistant", "content": response.content}) +if response.stop_reason != "tool_use": + return +``` + +4. Выполняем каждый вызов инструмента, собираем результаты и добавляем их как сообщение пользователя. Затем возвращаемся к шагу 2. + +```python +results = [] +for block in response.content: + if block.type == "tool_use": + output = run_bash(block.input["command"]) + results.append({ + "type": "tool_result", + "tool_use_id": block.id, + "content": output, + }) +messages.append({"role": "user", "content": results}) +``` + +Если собрать всё в одну функцию: + +```python +def agent_loop(query): + messages = [{"role": "user", "content": query}] + while True: + response = client.messages.create( + model=MODEL, system=SYSTEM, messages=messages, + tools=TOOLS, max_tokens=8000, + ) + messages.append({"role": "assistant", "content": response.content}) + + if response.stop_reason != "tool_use": + return + + results = [] + for block in response.content: + if block.type == "tool_use": + output = run_bash(block.input["command"]) + results.append({ + "type": "tool_result", + "tool_use_id": block.id, + "content": output, + }) + messages.append({"role": "user", "content": results}) +``` + +Вот и весь агент меньше чем в 30 строк. Всё остальное в этом курсе наслоится сверху, не меняя сам цикл. + +## Что изменилось + +| Компонент | До | После | +|-----------|----|--------| +| Цикл агента | отсутствует | `while True` + `stop_reason` | +| Инструменты | отсутствуют | `bash` (один инструмент) | +| Сообщения | отсутствуют | накапливаемый список | +| Управление потоком | отсутствует | `stop_reason != "tool_use"` | + +## Попробуйте + +```sh +cd learn-claude-code +python agents/s01_agent_loop.py +``` + +1. `Создай файл hello.py, который печатает "Hello, World!"` +2. `Покажи все Python-файлы в этом каталоге` +3. `Какая сейчас активная git-ветка?` +4. `Создай каталог test_output и запиши в него 3 файла` diff --git a/docs/ru/s02-tool-use.md b/docs/ru/s02-tool-use.md new file mode 100644 index 000000000..fef493923 --- /dev/null +++ b/docs/ru/s02-tool-use.md @@ -0,0 +1,97 @@ +# s02: Инструменты + +`s01 > [ s02 ] s03 > s04 > s05 > s06 | s07 > s08 > s09 > s10 > s11 > s12` + +> *"Добавить инструмент — значит добавить один обработчик"* — цикл остаётся тем же; новый инструмент означает новый обработчик в карте диспетчеризации. + +## Проблема + +Когда у агента есть только `bash`, он запускает shell для всего подряд. `cat` непредсказуемо обрезает вывод, `sed` ломается на спецсимволах, а каждый вызов bash становится широкой и слабо ограниченной поверхностью атаки. Специализированные инструменты вроде `read_file` и `write_file` позволяют навязать песочницу по путям на уровне самого инструмента. + +Главная мысль: чтобы добавить инструмент, не нужно менять сам цикл. + +## Решение + +``` ++--------+ +-------+ +------------------+ +| User | ---> | LLM | ---> | Tool Dispatch | +| prompt | | | | { | ++--------+ +---+---+ | bash: run_bash | + ^ | read: run_read | + | | write: run_wr | + +-----------+ edit: run_edit | + tool_result | } | + +------------------+ + +Карта диспетчеризации — это словарь: {tool_name: handler_function}. +Один lookup заменяет любую if/elif-цепочку. +``` + +## Как это работает + +1. У каждого инструмента есть свой обработчик. Песочница по путям не даёт выйти за пределы рабочей директории. + +```python +def safe_path(p: str) -> Path: + path = (WORKDIR / p).resolve() + if not path.is_relative_to(WORKDIR): + raise ValueError(f"Path escapes workspace: {p}") + return path + +def run_read(path: str, limit: int = None) -> str: + text = safe_path(path).read_text() + lines = text.splitlines() + if limit and limit < len(lines): + lines = lines[:limit] + return "\n".join(lines)[:50000] +``` + +2. Карта диспетчеризации связывает имена инструментов с обработчиками. + +```python +TOOL_HANDLERS = { + "bash": lambda **kw: run_bash(kw["command"]), + "read_file": lambda **kw: run_read(kw["path"], kw.get("limit")), + "write_file": lambda **kw: run_write(kw["path"], kw["content"]), + "edit_file": lambda **kw: run_edit(kw["path"], kw["old_text"], + kw["new_text"]), +} +``` + +3. Внутри цикла мы просто находим обработчик по имени. Тело цикла не меняется со времён s01. + +```python +for block in response.content: + if block.type == "tool_use": + handler = TOOL_HANDLERS.get(block.name) + output = handler(**block.input) if handler \ + else f"Unknown tool: {block.name}" + results.append({ + "type": "tool_result", + "tool_use_id": block.id, + "content": output, + }) +``` + +Добавить инструмент = добавить обработчик + схему инструмента. Цикл остаётся нетронутым. + +## Что изменилось относительно s01 + +| Компонент | Было (s01) | Стало (s02) | +|-----------|------------|-------------| +| Инструменты | 1 (`bash`) | 4 (`bash`, `read`, `write`, `edit`) | +| Диспетчеризация | жёсткий вызов `bash` | словарь `TOOL_HANDLERS` | +| Безопасность путей | нет | песочница через `safe_path()` | +| Цикл агента | без изменений | без изменений | + +## Попробуйте + +```sh +cd learn-claude-code +python agents/s02_tool_use.py +``` + +1. `Прочитай файл requirements.txt` +2. `Создай файл greet.py с функцией greet(name)` +3. `Отредактируй greet.py и добавь docstring к функции` +4. `Снова прочитай greet.py и проверь, что правка применилась` diff --git a/docs/ru/s03-todo-write.md b/docs/ru/s03-todo-write.md new file mode 100644 index 000000000..98b021a66 --- /dev/null +++ b/docs/ru/s03-todo-write.md @@ -0,0 +1,94 @@ +# s03: План задач + +`s01 > s02 > [ s03 ] s04 > s05 > s06 | s07 > s08 > s09 > s10 > s11 > s12` + +> *"Агент без плана быстро начинает блуждать"* — сначала перечисли шаги, потом выполняй. + +## Проблема + +На многошаговых задачах модель теряет нить. Она повторяет уже сделанное, пропускает шаги или уходит в сторону. В длинных диалогах это усиливается: системный промпт постепенно тонет в результатах инструментов. Рефакторинг из 10 шагов может пройти шаги 1-3, а потом модель начнёт импровизировать, потому что забыла пункты 4-10. + +## Решение + +``` ++--------+ +-------+ +---------+ +| User | ---> | LLM | ---> | Tools | +| prompt | | | | + todo | ++--------+ +---+---+ +----+----+ + ^ | + | tool_result | + +----------------+ + | + +-----------+-----------+ + | TodoManager state | + | [ ] task A | + | [>] task B <- doing | + | [x] task C | + +-----------------------+ + | + if rounds_since_todo >= 3: + inject into tool_result +``` + +## Как это работает + +1. `TodoManager` хранит элементы со статусами. Только один элемент одновременно может быть `in_progress`. + +```python +class TodoManager: + def update(self, items: list) -> str: + validated, in_progress_count = [], 0 + for item in items: + status = item.get("status", "pending") + if status == "in_progress": + in_progress_count += 1 + validated.append({"id": item["id"], "text": item["text"], + "status": status}) + if in_progress_count > 1: + raise ValueError("Only one task can be in_progress") + self.items = validated + return self.render() +``` + +2. Инструмент `todo` подключается в карту диспетчеризации так же, как и любой другой. + +```python +TOOL_HANDLERS = { + # ...base tools... + "todo": lambda **kw: TODO.update(kw["items"]), +} +``` + +3. Если модель 3 и более раунда не вызывает `todo`, в контекст подмешивается напоминание. + +```python +if rounds_since_todo >= 3 and messages: + last = messages[-1] + if last["role"] == "user" and isinstance(last.get("content"), list): + last["content"].insert(0, { + "type": "text", + "text": "Update your todos.", + }) +``` + +Ограничение «только один `in_progress`» заставляет модель фокусироваться последовательно. Напоминание создаёт ощущение внешней подотчётности. + +## Что изменилось относительно s02 + +| Компонент | Было (s02) | Стало (s03) | +|-----------|------------|-------------| +| Инструменты | 4 | 5 (`+todo`) | +| Планирование | нет | `TodoManager` со статусами | +| Напоминание | нет | `` после 3 раундов | +| Цикл агента | простой dispatch | `+ rounds_since_todo` counter | + +## Попробуйте + +```sh +cd learn-claude-code +python agents/s03_todo_write.py +``` + +1. `Отрефактори файл hello.py: добавь type hints, docstring и main guard` +2. `Создай Python-пакет с __init__.py, utils.py и tests/test_utils.py` +3. `Проверь все Python-файлы и исправь стилевые проблемы` diff --git a/docs/ru/s04-subagent.md b/docs/ru/s04-subagent.md new file mode 100644 index 000000000..47e895d7d --- /dev/null +++ b/docs/ru/s04-subagent.md @@ -0,0 +1,92 @@ +# s04: Субагенты + +`s01 > s02 > s03 > [ s04 ] s05 > s06 | s07 > s08 > s09 > s10 > s11 > s12` + +> *"Разбивай крупную работу на части; каждой подзадаче — чистый контекст"* — у каждой подзадачи свой независимый `messages[]`, поэтому основной диалог не захламляется. + +## Проблема + +Пока агент работает, массив `messages` растёт. Каждое чтение файла, каждый вывод bash остаётся в контексте навсегда. На вопрос «Какой тестовый фреймворк использует этот проект?» может понадобиться прочитать 5 файлов, но родительскому агенту нужен только ответ: `pytest`. + +## Решение + +``` +Родительский агент Субагент ++------------------+ +------------------+ +| messages=[...] | | messages=[] | <-- чистый старт +| | dispatch | | +| tool: task | -----------> | while tool_use: | +| prompt="..." | | call tools | +| | сводка | append results | +| result = "..." | <----------- | return last text | ++------------------+ +------------------+ + +Контекст родителя остаётся чистым. Контекст субагента выбрасывается. +``` + +## Как это работает + +1. Родитель получает инструмент `task`. Дочерний агент получает все базовые инструменты, кроме `task`, чтобы не было рекурсивного бесконтрольного спавна. + +```python +PARENT_TOOLS = CHILD_TOOLS + [ + {"name": "task", + "description": "Spawn a subagent with fresh context.", + "input_schema": { + "type": "object", + "properties": {"prompt": {"type": "string"}}, + "required": ["prompt"], + }}, +] +``` + +2. Субагент стартует с `messages=[]` и крутит свой отдельный цикл. Родителю возвращается только финальный текст. + +```python +def run_subagent(prompt: str) -> str: + sub_messages = [{"role": "user", "content": prompt}] + for _ in range(30): # safety limit + response = client.messages.create( + model=MODEL, system=SUBAGENT_SYSTEM, + messages=sub_messages, + tools=CHILD_TOOLS, max_tokens=8000, + ) + sub_messages.append({"role": "assistant", + "content": response.content}) + if response.stop_reason != "tool_use": + break + results = [] + for block in response.content: + if block.type == "tool_use": + handler = TOOL_HANDLERS.get(block.name) + output = handler(**block.input) + results.append({"type": "tool_result", + "tool_use_id": block.id, + "content": str(output)[:50000]}) + sub_messages.append({"role": "user", "content": results}) + return "".join( + b.text for b in response.content if hasattr(b, "text") + ) or "(нет сводки)" +``` + +Вся история дочернего агента, включая десятки tool call, выбрасывается. Родитель получает один абзац сводки как обычный `tool_result`. + +## Что изменилось относительно s03 + +| Компонент | Было (s03) | Стало (s04) | +|-----------|------------|-------------| +| Инструменты | 5 | 5 базовых + `task` у родителя | +| Контекст | один общий | изоляция родителя и ребёнка | +| Субагент | отсутствует | функция `run_subagent()` | +| Возвращаемое значение | N/A | только текст сводки | + +## Попробуйте + +```sh +cd learn-claude-code +python agents/s04_subagent.py +``` + +1. `Используй подзадачу, чтобы выяснить, какой тестовый фреймворк использует этот проект` +2. `Делегируй чтение всех .py-файлов и верни краткое описание каждого` +3. `Через task создай новый модуль, а затем проверь его отсюда` diff --git a/docs/ru/s05-skill-loading.md b/docs/ru/s05-skill-loading.md new file mode 100644 index 000000000..84c67ba17 --- /dev/null +++ b/docs/ru/s05-skill-loading.md @@ -0,0 +1,107 @@ +# s05: Навыки + +`s01 > s02 > s03 > s04 > [ s05 ] s06 | s07 > s08 > s09 > s10 > s11 > s12` + +> *"Подгружай знания по мере необходимости, а не заранее"* — знания подмешиваются через `tool_result`, а не через системный промпт заранее. + +## Проблема + +Вы хотите, чтобы агент соблюдал доменные процессы: git-конвенции, паттерны тестирования, чеклисты код-ревью. Если сложить всё это в системный промпт, вы сожжёте кучу токенов на навыки, которые в конкретной задаче вообще не пригодятся. 10 навыков по 2000 токенов каждый — это 20 000 токенов, большая часть которых в текущем запросе просто лишняя. + +## Решение + +``` +Системный промпт (Слой 1 — всегда присутствует): ++--------------------------------------+ +| Ты агент по работе с кодом. | +| Доступные навыки: | +| - git: Помощники по git-процессу | ~100 токенов/навык +| - test: Практики тестирования | ++--------------------------------------+ + +Когда модель вызывает load_skill("git"): ++--------------------------------------+ +| tool_result (Слой 2 — по запросу): | +| | +| Полные инструкции по git... | ~2000 токенов +| Шаг 1: ... | +| | ++--------------------------------------+ +``` + +Слой 1: в системном промпте лежат только *имена* навыков, это дёшево. +Слой 2: *полное содержимое* навыка приходит через `tool_result`, только когда оно реально нужно. + +## Как это работает + +1. Каждый навык — это директория с файлом `SKILL.md` и YAML frontmatter. + +``` +skills/ + pdf/ + SKILL.md # ---\n name: pdf\n description: Process PDF files\n ---\n ... + code-review/ + SKILL.md # ---\n name: code-review\n description: Review code\n ---\n ... +``` + +2. `SkillLoader` сканирует `SKILL.md` и использует имя директории как идентификатор навыка. + +```python +class SkillLoader: + def __init__(self, skills_dir: Path): + self.skills = {} + for f in sorted(skills_dir.rglob("SKILL.md")): + text = f.read_text() + meta, body = self._parse_frontmatter(text) + name = meta.get("name", f.parent.name) + self.skills[name] = {"meta": meta, "body": body} + + def get_descriptions(self) -> str: + lines = [] + for name, skill in self.skills.items(): + desc = skill["meta"].get("description", "") + lines.append(f" - {name}: {desc}") + return "\n".join(lines) + + def get_content(self, name: str) -> str: + skill = self.skills.get(name) + if not skill: + return f"Error: Unknown skill '{name}'." + return f"\n{skill['body']}\n" +``` + +3. Слой 1 идёт в системный промпт. Слой 2 — это ещё один обычный обработчик инструмента. + +```python +SYSTEM = f"""Ты агент по работе с кодом в {WORKDIR}. +Доступные навыки: +{SKILL_LOADER.get_descriptions()}""" + +TOOL_HANDLERS = { + # ...base tools... + "load_skill": lambda **kw: SKILL_LOADER.get_content(kw["name"]), +} +``` + +Модель знает, какие навыки вообще доступны, но тянет полный текст только по необходимости. + +## Что изменилось относительно s04 + +| Компонент | Было (s04) | Стало (s05) | +|-----------|------------|-------------| +| Инструменты | 5 (base + task) | 5 (base + `load_skill`) | +| Системный промпт | статическая строка | + описания навыков | +| Знания | отсутствуют | `skills/*/SKILL.md` | +| Инъекция | отсутствует | двухслойная: system + tool_result | + +## Попробуйте + +```sh +cd learn-claude-code +python agents/s05_skill_loading.py +``` + +1. `Какие навыки доступны?` +2. `Загрузи навык agent-builder и следуй его инструкциям` +3. `Мне нужен code review — сначала загрузи подходящий навык` +4. `Собери MCP-сервер с помощью навыка mcp-builder` diff --git a/docs/ru/s06-context-compact.md b/docs/ru/s06-context-compact.md new file mode 100644 index 000000000..19e244188 --- /dev/null +++ b/docs/ru/s06-context-compact.md @@ -0,0 +1,123 @@ +# s06: Сжатие контекста + +`s01 > s02 > s03 > s04 > s05 > [ s06 ] | s07 > s08 > s09 > s10 > s11 > s12` + +> *"Контекст всё равно заполнится; нужно уметь освобождать место"* — трёхслойная стратегия сжатия делает бесконечные сессии возможными. + +## Проблема + +Окно контекста конечно. Один `read_file` на файле в 1000 строк легко стоит ~4000 токенов. Прочитайте 30 файлов и выполните 20 bash-команд — и вы уже упрётесь в 100 000+ токенов. Без сжатия агент не сможет работать на больших кодовых базах. + +## Решение + +Три слоя, от самых мягких к самым агрессивным: + +``` +Каждый ход: ++------------------+ +| Результат инструмента | ++------------------+ + | + v +[Слой 1: микросжатие] (тихо, каждый ход) + Заменяет tool_result старше 3 ходов + на "[Previous: used {tool_name}]" + | + v +[Проверка: токенов > 50000?] + | | + no yes + | | + v v +продолжить [Слой 2: автосжатие] + Сохраняет transcript в .transcripts/ + LLM делает сводку разговора. + Все сообщения заменяются на [сводку]. + | + v + [Слой 3: инструмент сжатия] + Модель сама вызывает compact. + Используется та же сводка, что и в auto_compact. +``` + +## Как это работает + +1. **Слой 1 — микросжатие (`micro_compact`)**: перед каждым вызовом LLM старые результаты инструментов заменяются заглушками. + +```python +def micro_compact(messages: list) -> list: + tool_results = [] + for i, msg in enumerate(messages): + if msg["role"] == "user" and isinstance(msg.get("content"), list): + for j, part in enumerate(msg["content"]): + if isinstance(part, dict) and part.get("type") == "tool_result": + tool_results.append((i, j, part)) + if len(tool_results) <= KEEP_RECENT: + return messages + for _, _, part in tool_results[:-KEEP_RECENT]: + if len(part.get("content", "")) > 100: + part["content"] = f"[Previous: used {tool_name}]" + return messages +``` + +2. **Слой 2 — автосжатие (`auto_compact`)**: когда оценка токенов превышает порог, полный transcript сохраняется на диск, после чего LLM просится сделать сводку. + +```python +def auto_compact(messages: list) -> list: + # Save transcript for recovery + transcript_path = TRANSCRIPT_DIR / f"transcript_{int(time.time())}.jsonl" + with open(transcript_path, "w") as f: + for msg in messages: + f.write(json.dumps(msg, default=str) + "\n") + # LLM summarizes + response = client.messages.create( + model=MODEL, + messages=[{"role": "user", "content": + "Summarize this conversation for continuity..." + + json.dumps(messages, default=str)[:80000]}], + max_tokens=2000, + ) + return [ + {"role": "user", "content": f"[Compressed]\n\n{response.content[0].text}"}, + {"role": "assistant", "content": "Understood. Continuing."}, + ] +``` + +3. **Слой 3 — ручной `compact`**: инструмент `compact` запускает то же сжатие по требованию. + +4. Цикл объединяет все три слоя: + +```python +def agent_loop(messages: list): + while True: + micro_compact(messages) # Layer 1 + if estimate_tokens(messages) > THRESHOLD: + messages[:] = auto_compact(messages) # Layer 2 + response = client.messages.create(...) + # ... tool execution ... + if manual_compact: + messages[:] = auto_compact(messages) # Layer 3 +``` + +Полная история не исчезает безвозвратно: она просто уходит из активного контекста в transcripts на диске. + +## Что изменилось относительно s05 + +| Компонент | Было (s05) | Стало (s06) | +|-----------|------------|-------------| +| Инструменты | 5 | 5 (base + `compact`) | +| Управление контекстом | отсутствует | трёхслойное сжатие | +| Микросжатие | отсутствует | старые результаты → placeholders | +| Автосжатие | отсутствует | срабатывает по порогу токенов | +| Transcripts | отсутствуют | сохраняются в `.transcripts/` | + +## Попробуйте + +```sh +cd learn-claude-code +python agents/s06_context_compact.py +``` + +1. `Прочитай по очереди каждый Python-файл в каталоге agents/` +2. `Продолжай читать файлы, пока сжатие не сработает автоматически` +3. `Используй инструмент compact, чтобы вручную сжать разговор` diff --git a/docs/ru/s07-task-system.md b/docs/ru/s07-task-system.md new file mode 100644 index 000000000..568e53938 --- /dev/null +++ b/docs/ru/s07-task-system.md @@ -0,0 +1,131 @@ +# s07: Система задач + +`s01 > s02 > s03 > s04 > s05 > s06 | [ s07 ] s08 > s09 > s10 > s11 > s12` + +> *"Разбей большую цель на маленькие задачи, упорядочь их и сохрани на диск"* — файловый граф задач с зависимостями становится фундаментом для мультиагентной координации. + +## Проблема + +`TodoManager` из s03 — это плоский чеклист в памяти: без порядка, без зависимостей и почти без состояний. Но реальные цели имеют структуру: задача B зависит от A, задачи C и D можно делать параллельно, а задача E ждёт завершения и C, и D. + +Если связи между задачами не выражены явно, агент не понимает: + +- что уже готово к выполнению; +- что заблокировано; +- что можно делать параллельно. + +А поскольку список живёт только в памяти, сжатие контекста из s06 просто его сотрёт. + +## Решение + +Поднимаем чеклист до **графа задач**, который хранится на диске. Каждая задача — отдельный JSON-файл со статусом, зависимостями (`blockedBy`) и обратными связями (`blocks`). Такой граф в любой момент отвечает на три вопроса: + +- **Что готово?** — задачи со статусом `pending` и пустым `blockedBy`. +- **Что заблокировано?** — задачи, ожидающие незавершённых зависимостей. +- **Что уже завершено?** — `completed` задачи, которые автоматически разблокируют зависящие от них. + +``` +.tasks/ + task_1.json {"id":1, "status":"completed"} + task_2.json {"id":2, "blockedBy":[1], "status":"pending"} + task_3.json {"id":3, "blockedBy":[1], "status":"pending"} + task_4.json {"id":4, "blockedBy":[2,3], "status":"pending"} + +Граф задач (DAG): + +----------+ + +--> | task 2 | --+ + | | pending | | ++----------+ +----------+ +--> +----------+ +| task 1 | | task 4 | +| completed| --> +----------+ +--> | blocked | ++----------+ | task 3 | --+ +----------+ + | pending | + +----------+ + +Порядок: task 1 должна завершиться раньше 2 и 3 +Параллелизм: tasks 2 и 3 могут идти одновременно +Зависимости: task 4 ждёт и 2, и 3 +Статусы: pending -> in_progress -> completed +``` + +Этот граф задач становится опорной структурой для всего, что идёт после s07: фоновые задачи (s08), команды агентов (s09+) и изоляция через рабочие деревья (s12) читают и пишут в одну и ту же систему. + +## Как это работает + +1. **TaskManager**: один JSON-файл на задачу, CRUD и граф зависимостей. + +```python +class TaskManager: + def __init__(self, tasks_dir: Path): + self.dir = tasks_dir + self.dir.mkdir(exist_ok=True) + self._next_id = self._max_id() + 1 + + def create(self, subject, description=""): + task = {"id": self._next_id, "subject": subject, + "status": "pending", "blockedBy": [], + "blocks": [], "owner": ""} + self._save(task) + self._next_id += 1 + return json.dumps(task, indent=2) +``` + +2. **Разрешение зависимостей**: завершение задачи убирает её ID из `blockedBy` у всех остальных, автоматически разблокируя зависимые задачи. + +```python +def _clear_dependency(self, completed_id): + for f in self.dir.glob("task_*.json"): + task = json.loads(f.read_text()) + if completed_id in task.get("blockedBy", []): + task["blockedBy"].remove(completed_id) + self._save(task) +``` + +3. **Переходы статусов и wiring зависимостей**: `update` обрабатывает переходы и связи. + +```python +def update(self, task_id, status=None, + add_blocked_by=None, add_blocks=None): + task = self._load(task_id) + if status: + task["status"] = status + if status == "completed": + self._clear_dependency(task_id) + self._save(task) +``` + +4. В карте диспетчеризации появляется четыре инструмента для задач. + +```python +TOOL_HANDLERS = { + # ...base tools... + "task_create": lambda **kw: TASKS.create(kw["subject"]), + "task_update": lambda **kw: TASKS.update(kw["task_id"], kw.get("status")), + "task_list": lambda **kw: TASKS.list_all(), + "task_get": lambda **kw: TASKS.get(kw["task_id"]), +} +``` + +Начиная с s07, для многошаговой работы граф задач становится стандартом. План задач из s03 остаётся удобным вариантом для коротких односеансовых чеклистов. + +## Что изменилось относительно s06 + +| Компонент | Было (s06) | Стало (s07) | +|-----------|------------|-------------| +| Инструменты | 5 | 8 (`task_create/update/list/get`) | +| Модель планирования | плоский чеклист в памяти | граф задач с зависимостями на диске | +| Связи | отсутствуют | рёбра `blockedBy` и `blocks` | +| Статусы | сделано / не сделано | `pending` -> `in_progress` -> `completed` | +| Постоянство | теряется при compact | переживает compact и рестарты | + +## Попробуйте + +```sh +cd learn-claude-code +python agents/s07_task_system.py +``` + +1. `Создай 3 задачи: "Подготовить проект", "Написать код", "Написать тесты". Свяжи их зависимостями по порядку.` +2. `Покажи все задачи и граф их зависимостей` +3. `Заверши задачу 1, а затем снова покажи список задач, чтобы увидеть разблокировку задачи 2` +4. `Создай доску задач для рефакторинга: parse -> transform -> emit -> test, где transform и emit могут идти параллельно после parse` diff --git a/docs/ru/s08-background-tasks.md b/docs/ru/s08-background-tasks.md new file mode 100644 index 000000000..967014180 --- /dev/null +++ b/docs/ru/s08-background-tasks.md @@ -0,0 +1,107 @@ +# s08: Фоновые задачи + +`s01 > s02 > s03 > s04 > s05 > s06 | s07 > [ s08 ] s09 > s10 > s11 > s12` + +> *"Медленные операции можно увести в фон, пока агент думает дальше"* — демон-потоки запускают команды в фоне и подмешивают уведомления по завершении. + +## Проблема + +Некоторые команды работают минутами: `npm install`, `pytest`, `docker build`. В блокирующем цикле модель просто сидит и ждёт. Если пользователь говорит «установи зависимости и пока это идёт, создай config-файл», агент выполняет это последовательно, а не параллельно. + +## Решение + +``` +Основной поток Фоновый поток ++-----------------+ +-----------------+ +| цикл агента | | идёт subprocess | +| ... | | ... | +| [вызов LLM] <---+------ | enqueue(result) | +| ^слив очереди | +-----------------+ ++-----------------+ + +Линия времени: +Агент --[спавн A]--[спавн B]--[другая работа]---- + | | + v v + [A идёт] [B идёт] (параллельно) + | | + +-- результаты подмешиваются перед следующим вызовом LLM --+ +``` + +## Как это работает + +1. `BackgroundManager` хранит задачи и потокобезопасную очередь уведомлений. + +```python +class BackgroundManager: + def __init__(self): + self.tasks = {} + self._notification_queue = [] + self._lock = threading.Lock() +``` + +2. `run()` запускает демон-поток и сразу возвращает управление. + +```python +def run(self, command: str) -> str: + task_id = str(uuid.uuid4())[:8] + self.tasks[task_id] = {"status": "running", "command": command} + thread = threading.Thread( + target=self._execute, args=(task_id, command), daemon=True) + thread.start() + return f"Background task {task_id} started" +``` + +3. Когда subprocess завершается, результат попадает в очередь уведомлений. + +```python +def _execute(self, task_id, command): + try: + r = subprocess.run(command, shell=True, cwd=WORKDIR, + capture_output=True, text=True, timeout=300) + output = (r.stdout + r.stderr).strip()[:50000] + except subprocess.TimeoutExpired: + output = "Error: Timeout (300s)" + with self._lock: + self._notification_queue.append({ + "task_id": task_id, "result": output[:500]}) +``` + +4. Перед каждым вызовом LLM агент опустошает очередь уведомлений. + +```python +def agent_loop(messages: list): + while True: + notifs = BG.drain_notifications() + if notifs: + notif_text = "\n".join( + f"[bg:{n['task_id']}] {n['result']}" for n in notifs) + messages.append({"role": "user", + "content": f"\n{notif_text}\n" + f""}) + messages.append({"role": "assistant", + "content": "Noted background results."}) + response = client.messages.create(...) +``` + +Сам агентный цикл остаётся однопоточным. В параллель уходит только I/O подзадач и subprocess. + +## Что изменилось относительно s07 + +| Компонент | Было (s07) | Стало (s08) | +|-----------|------------|-------------| +| Инструменты | 8 | 6 (base + `background_run` + `check`) | +| Выполнение | только блокирующее | блокирующее + фоновые потоки | +| Уведомления | отсутствуют | очередь очищается на каждом цикле | +| Параллельность | отсутствует | демон-потоки | + +## Попробуйте + +```sh +cd learn-claude-code +python agents/s08_background_tasks.py +``` + +1. `Запусти "sleep 5 && echo done" в фоне, а пока он работает, создай файл` +2. `Стартуй 3 фоновые задачи: "sleep 2", "sleep 4", "sleep 6". Затем проверь их статус.` +3. `Запусти pytest в фоне и продолжай параллельно делать другие вещи` diff --git a/docs/ru/s09-agent-teams.md b/docs/ru/s09-agent-teams.md new file mode 100644 index 000000000..c2177378a --- /dev/null +++ b/docs/ru/s09-agent-teams.md @@ -0,0 +1,129 @@ +# s09: Команды агентов + +`s01 > s02 > s03 > s04 > s05 > s06 | s07 > s08 > [ s09 ] s10 > s11 > s12` + +> *"Когда задача слишком велика для одного, делегируй её напарникам"* — постоянные напарники и асинхронные почтовые ящики. + +## Проблема + +Субагенты из s04 — одноразовые: создали, поработали, вернули сводку, умерли. У них нет личности, нет памяти между вызовами. Фоновые задачи из s08 умеют запускать shell-команды, но не принимают LLM-управляемые решения. + +Для настоящей командной работы нужны: + +1. постоянные агенты, живущие дольше одного запроса; +2. идентичность и управление жизненным циклом; +3. канал связи между агентами. + +## Решение + +``` +Жизненный цикл напарника: + запуск -> РАБОТАЕТ -> ОЖИДАНИЕ -> РАБОТАЕТ -> ... -> ЗАВЕРШЕНИЕ + +Коммуникация: + .team/ + config.json <- состав команды + статусы + inbox/ + alice.jsonl <- append-only, drain-on-read + bob.jsonl + lead.jsonl + + +--------+ send("alice","bob","...") +--------+ + | alice | -----------------------------> | bob | + | loop | bob.jsonl << {json_line} | loop | + +--------+ +--------+ + ^ | + | BUS.read_inbox("alice") | + +---- alice.jsonl -> read + drain ---------+ +``` + +## Как это работает + +1. `TeammateManager` поддерживает `config.json` со списком участников команды. + +```python +class TeammateManager: + def __init__(self, team_dir: Path): + self.dir = team_dir + self.dir.mkdir(exist_ok=True) + self.config_path = self.dir / "config.json" + self.config = self._load_config() + self.threads = {} +``` + +2. `spawn()` создаёт нового участника и запускает его агентный цикл в отдельном потоке. + +```python +def spawn(self, name: str, role: str, prompt: str) -> str: + member = {"name": name, "role": role, "status": "working"} + self.config["members"].append(member) + self._save_config() + thread = threading.Thread( + target=self._teammate_loop, + args=(name, role, prompt), daemon=True) + thread.start() + return f"Spawned teammate '{name}' (role: {role})" +``` + +3. `MessageBus` использует append-only JSONL-входящий ящик. `send()` дописывает строку, `read_inbox()` читает всё и очищает файл. + +```python +class MessageBus: + def send(self, sender, to, content, msg_type="message", extra=None): + msg = {"type": msg_type, "from": sender, + "content": content, "timestamp": time.time()} + if extra: + msg.update(extra) + with open(self.dir / f"{to}.jsonl", "a") as f: + f.write(json.dumps(msg) + "\n") + + def read_inbox(self, name): + path = self.dir / f"{name}.jsonl" + if not path.exists(): return "[]" + msgs = [json.loads(l) for l in path.read_text().strip().splitlines() if l] + path.write_text("") # drain + return json.dumps(msgs, indent=2) +``` + +4. Каждый напарник перед каждым LLM-вызовом проверяет входящий ящик и добавляет новые сообщения в контекст. + +```python +def _teammate_loop(self, name, role, prompt): + messages = [{"role": "user", "content": prompt}] + for _ in range(50): + inbox = BUS.read_inbox(name) + if inbox != "[]": + messages.append({"role": "user", + "content": f"{inbox}"}) + messages.append({"role": "assistant", + "content": "Noted inbox messages."}) + response = client.messages.create(...) + if response.stop_reason != "tool_use": + break + # execute tools, append results... + self._find_member(name)["status"] = "idle" +``` + +## Что изменилось относительно s08 + +| Компонент | Было (s08) | Стало (s09) | +|-----------|------------|-------------| +| Инструменты | 6 | 9 (`+spawn/send/read_inbox`) | +| Агенты | один агент | lead + N напарников | +| Постоянство | отсутствует | `config.json` + JSONL-входящий ящик | +| Потоки | только фоновые команды | полноценный агентный цикл на поток | +| Жизненный цикл | fire-and-forget | `idle -> working -> idle` | +| Коммуникация | отсутствует | личные сообщения + broadcast | + +## Попробуйте + +```sh +cd learn-claude-code +python agents/s09_agent_teams.py +``` + +1. `Заспавнь alice как coder и bob как tester. Пусть alice отправит bob сообщение.` +2. `Разошли всем напарникам сообщение "status update: phase 1 complete"` +3. `Проверь, есть ли сообщения во входящем ящике лида` +4. `Введи /team, чтобы посмотреть состав команды и статусы` +5. `Введи /inbox, чтобы вручную проверить входящий ящик лидера` diff --git a/docs/ru/s10-team-protocols.md b/docs/ru/s10-team-protocols.md new file mode 100644 index 000000000..40683baec --- /dev/null +++ b/docs/ru/s10-team-protocols.md @@ -0,0 +1,104 @@ +# s10: Командные протоколы + +`s01 > s02 > s03 > s04 > s05 > s06 | s07 > s08 > s09 > [ s10 ] s11 > s12` + +> *"Напарникам нужны общие правила общения"* — один паттерн «запрос-ответ» управляет всеми переговорами. + +## Проблема + +В s09 напарники умеют работать и общаться, но у них нет структурированной координации. + +**Shutdown**: если просто убить поток, файлы могут остаться наполовину записанными, а `config.json` — устаревшим. Нужен handshake: лидер просит завершиться, напарник либо соглашается и аккуратно выходит, либо отклоняет запрос и продолжает работу. + +**Plan approval**: если лидер говорит «отрефактори auth-модуль», напарник начинает сразу. Но для рискованных изменений лидер должен сначала посмотреть план. + +Обе ситуации имеют одну и ту же форму: одна сторона отправляет запрос с уникальным ID, другая отвечает, ссылаясь на тот же ID. + +## Решение + +``` +Протокол завершения Протокол утверждения плана +=================== ========================= + +Лидер Напарник Напарник Лидер + | | | | + |--shutdown_req-->| |--plan_req------>| + | {req_id:"abc"} | | {req_id:"xyz"} | + | | | | + |<--shutdown_resp-| |<--plan_resp-----| + | {req_id:"abc", | | {req_id:"xyz", | + | approve:true} | | approve:true} | + +Общий FSM: + [pending] --approve--> [approved] + [pending] --reject---> [rejected] + +Трекеры: + shutdown_requests = {req_id: {target, status}} + plan_requests = {req_id: {from, plan, status}} +``` + +## Как это работает + +1. Лидер инициирует shutdown: генерирует `request_id` и отправляет запрос через входящий ящик. + +```python +shutdown_requests = {} + +def handle_shutdown_request(teammate: str) -> str: + req_id = str(uuid.uuid4())[:8] + shutdown_requests[req_id] = {"target": teammate, "status": "pending"} + BUS.send("lead", teammate, "Please shut down gracefully.", + "shutdown_request", {"request_id": req_id}) + return f"Shutdown request {req_id} sent (status: pending)" +``` + +2. Напарник получает запрос и отвечает approve/reject. + +```python +if tool_name == "shutdown_response": + req_id = args["request_id"] + approve = args["approve"] + shutdown_requests[req_id]["status"] = "approved" if approve else "rejected" + BUS.send(sender, "lead", args.get("reason", ""), + "shutdown_response", + {"request_id": req_id, "approve": approve}) +``` + +3. Approval плана устроен так же. Напарник отправляет план, создавая `request_id`, а лидер отвечает на тот же ID. + +```python +plan_requests = {} + +def handle_plan_review(request_id, approve, feedback=""): + req = plan_requests[request_id] + req["status"] = "approved" if approve else "rejected" + BUS.send("lead", req["from"], feedback, + "plan_approval_response", + {"request_id": request_id, "approve": approve}) +``` + +Один FSM, два применения. Машина состояний `pending -> approved | rejected` подходит для любого протокола «запрос-ответ». + +## Что изменилось относительно s09 + +| Компонент | Было (s09) | Стало (s10) | +|-----------|------------|-------------| +| Инструменты | 9 | 12 (`+shutdown_req/resp +plan`) | +| Shutdown | только естественный выход | согласованный протокол «запрос-ответ» | +| Gating плана | отсутствует | submit/review с подтверждением | +| Корреляция | отсутствует | `request_id` на каждый запрос | +| FSM | отсутствует | `pending -> approved/rejected` | + +## Попробуйте + +```sh +cd learn-claude-code +python agents/s10_team_protocols.py +``` + +1. `Заспавнь alice как coder, а затем запроси её завершение` +2. `Покажи список напарников и проверь статус alice после одобрения shutdown` +3. `Заспавнь bob с рискованной задачей на рефакторинг. Посмотри его план и отклони его.` +4. `Заспавнь charlie, попроси его отправить план, а затем одобри его` +5. `Введи /team, чтобы следить за статусами` diff --git a/docs/ru/s11-autonomous-agents.md b/docs/ru/s11-autonomous-agents.md new file mode 100644 index 000000000..164c26c3c --- /dev/null +++ b/docs/ru/s11-autonomous-agents.md @@ -0,0 +1,140 @@ +# s11: Автономные агенты + +`s01 > s02 > s03 > s04 > s05 > s06 | s07 > s08 > s09 > s10 > [ s11 ] s12` + +> *"Напарники сами смотрят на доску и забирают задачи"* — лидеру больше не нужно вручную раздавать каждую задачу. + +## Проблема + +В s09-s10 напарники работают только по прямому указанию. Лидеру приходится спавнить каждого с конкретным запросом. Есть 10 ничьих задач на доске? Значит, лидер назначит все 10 вручную. Такой подход не масштабируется. + +Настоящая автономность выглядит иначе: напарники сами смотрят на доску задач, сами забирают ничьи задачи, выполняют их и затем ищут следующую работу. + +Есть ещё один тонкий момент: после сжатия контекста из s06 агент может забыть, кто он такой. Это исправляется повторным внедрением его идентичности. + +## Решение + +``` +Жизненный цикл напарника с idle-циклом: + ++-------+ +| запуск | ++---+---+ + | + v ++-------+ tool_use +-------+ +| РАБОТА| <------------- | LLM | ++---+---+ +-------+ + | + | stop_reason != tool_use (или вызван idle tool) + v ++--------+ +| ОЖИД. | опрос каждые 5 с, максимум 60 с ++---+----+ + | + +---> проверить inbox --> есть письмо? ---> РАБОТА + | + +---> проверить .tasks/ -> есть свободная? -> забрать -> РАБОТА + | + +---> тайм-аут 60 c --------------------> ЗАВЕРШЕНИЕ + +Повторная инъекция идентичности после compress: + if len(messages) <= 3: + messages.insert(0, identity_block) +``` + +## Как это работает + +1. Цикл напарника делится на две фазы: `WORK` и `IDLE`. Когда LLM перестаёт вызывать инструменты или сама вызывает `idle`, агент переходит в ожидание. + +```python +def _loop(self, name, role, prompt): + while True: + # -- WORK PHASE -- + messages = [{"role": "user", "content": prompt}] + for _ in range(50): + response = client.messages.create(...) + if response.stop_reason != "tool_use": + break + # execute tools... + if idle_requested: + break + + # -- IDLE PHASE -- + self._set_status(name, "idle") + resume = self._idle_poll(name, messages) + if not resume: + self._set_status(name, "shutdown") + return + self._set_status(name, "working") +``` + +2. В idle-фазе агент циклически опрашивает входящий ящик и доску задач. + +```python +def _idle_poll(self, name, messages): + for _ in range(IDLE_TIMEOUT // POLL_INTERVAL): # 60s / 5s = 12 + time.sleep(POLL_INTERVAL) + inbox = BUS.read_inbox(name) + if inbox: + messages.append({"role": "user", + "content": f"{inbox}"}) + return True + unclaimed = scan_unclaimed_tasks() + if unclaimed: + claim_task(unclaimed[0]["id"], name) + messages.append({"role": "user", + "content": f"Task #{unclaimed[0]['id']}: " + f"{unclaimed[0]['subject']}"}) + return True + return False # timeout -> shutdown +``` + +3. Сканирование доски задач: ищем `pending`, без владельца и без блокировок. + +```python +def scan_unclaimed_tasks() -> list: + unclaimed = [] + for f in sorted(TASKS_DIR.glob("task_*.json")): + task = json.loads(f.read_text()) + if (task.get("status") == "pending" + and not task.get("owner") + and not task.get("blockedBy")): + unclaimed.append(task) + return unclaimed +``` + +4. Повторная инъекция идентичности: если сообщений слишком мало, значит, скорее всего, сработал compact, и агенту нужно напомнить, кто он. + +```python +if len(messages) <= 3: + messages.insert(0, {"role": "user", + "content": f"You are '{name}', role: {role}, " + f"team: {team_name}. Continue your work."}) + messages.insert(1, {"role": "assistant", + "content": f"I am {name}. Continuing."}) +``` + +## Что изменилось относительно s10 + +| Компонент | Было (s10) | Стало (s11) | +|-----------|------------|-------------| +| Инструменты | 12 | 14 (`+idle`, `+claim_task`) | +| Автономность | лидер раздаёт работу | самоорганизация | +| Фаза ожидания | отсутствует | опрос входящего ящика и доски задач | +| Захват задач | только вручную | автозахват свободных задач | +| Идентичность | только системный промпт | + повторное внедрение после compact | +| Тайм-аут | отсутствует | 60 секунд простоя -> автозавершение | + +## Попробуйте + +```sh +cd learn-claude-code +python agents/s11_autonomous_agents.py +``` + +1. `Создай на доске 3 задачи, затем заспавнь alice и bob. Посмотри, как они сами их заберут.` +2. `Заспавнь напарника-coder и позволь ему самому найти работу на доске задач` +3. `Создай задачи с зависимостями и посмотри, как напарники соблюдают порядок блокировок` +4. `Введи /tasks, чтобы посмотреть доску задач с владельцами` +5. `Введи /team, чтобы увидеть, кто сейчас работает, а кто простаивает` diff --git a/docs/ru/s12-worktree-task-isolation.md b/docs/ru/s12-worktree-task-isolation.md new file mode 100644 index 000000000..90658824a --- /dev/null +++ b/docs/ru/s12-worktree-task-isolation.md @@ -0,0 +1,120 @@ +# s12: Изоляция задач и рабочих деревьев + +`s01 > s02 > s03 > s04 > s05 > s06 | s07 > s08 > s09 > s10 > s11 > [ s12 ]` + +> *"Каждый работает в своей директории и никому не мешает"* — задачи управляют целями, рабочие деревья управляют каталогами, а связываются через ID. + +## Проблема + +К s11 агенты уже умеют автоматически забирать и завершать задачи. Но все задачи всё ещё выполняются в одной общей директории. Если два агента одновременно рефакторят разные модули, они начнут сталкиваться: агент A редактирует `config.py`, агент B тоже редактирует `config.py`, unstaged changes смешиваются, а откатить это чисто уже нельзя. + +Доска задач знает *что* нужно сделать, но ничего не говорит о том, *где* это делать. Исправление — дать каждой задаче собственную директорию `git worktree`. Задачи управляют целями, рабочие деревья — контекстом выполнения. Связка идёт по ID задачи. + +## Решение + +``` +Контур управления (.tasks/) Контур выполнения (.worktrees/) ++------------------+ +------------------------+ +| task_1.json | | auth-refactor/ | +| status: in_progress <------> branch: wt/auth-refactor +| worktree: "auth-refactor" | task_id: 1 | ++------------------+ +------------------------+ +| task_2.json | | ui-login/ | +| status: pending <------> branch: wt/ui-login +| worktree: "ui-login" | task_id: 2 | ++------------------+ +------------------------+ + | + index.json (реестр worktree) + events.jsonl (журнал жизненного цикла) + +Машины состояний: + Задача: pending -> in_progress -> completed + Worktree: absent -> active -> removed | kept +``` + +## Как это работает + +1. **Создаём задачу.** Сначала сохраняем цель. + +```python +TASKS.create("Implement auth refactor") +# -> .tasks/task_1.json status=pending worktree="" +``` + +2. **Создаём worktree и привязываем к задаче.** Если передать `task_id`, задача автоматически перейдёт в `in_progress`. + +```python +WORKTREES.create("auth-refactor", task_id=1) +# -> git worktree add -b wt/auth-refactor .worktrees/auth-refactor HEAD +# -> новая запись в index.json, а task_1.json получает worktree="auth-refactor" +``` + +Привязка обновляет состояние с обеих сторон: + +```python +def bind_worktree(self, task_id, worktree): + task = self._load(task_id) + task["worktree"] = worktree + if task["status"] == "pending": + task["status"] = "in_progress" + self._save(task) +``` + +3. **Выполняем команды внутри worktree.** `cwd` указывает на изолированную директорию. + +```python +subprocess.run(command, shell=True, cwd=worktree_path, + capture_output=True, text=True, timeout=300) +``` + +4. **Закрытие работы.** Есть два варианта: + +- `worktree_keep(name)` — сохранить директорию на потом. +- `worktree_remove(name, complete_task=True)` — удалить директорию, завершить связанную задачу и сгенерировать событие. Один вызов закрывает и окружение, и задачу. + +```python +def remove(self, name, force=False, complete_task=False): + self._run_git(["worktree", "remove", wt["path"]]) + if complete_task and wt.get("task_id") is not None: + self.tasks.update(wt["task_id"], status="completed") + self.tasks.unbind_worktree(wt["task_id"]) + self.events.emit("task.completed", ...) +``` + +5. **Поток событий.** Каждый шаг жизненного цикла дописывается в `.worktrees/events.jsonl`: + +```json +{ + "event": "worktree.remove.after", + "task": {"id": 1, "status": "completed"}, + "worktree": {"name": "auth-refactor", "status": "removed"}, + "ts": 1730000000 +} +``` + +Генерируемые события: `worktree.create.before/after/failed`, `worktree.remove.before/after/failed`, `worktree.keep`, `task.completed`. + +После падения процесса состояние восстанавливается по `.tasks/` и `.worktrees/index.json` на диске. Память разговора эфемерна, файловое состояние — долговечно. + +## Что изменилось относительно s11 + +| Компонент | Было (s11) | Стало (s12) | +|-----------|------------|-------------| +| Координация | доска задач (`owner/status`) | доска задач + явная привязка к worktree | +| Область выполнения | общая директория | изолированная директория на задачу | +| Восстановление | только статус задач | статус задач + индекс рабочих деревьев | +| Закрытие | завершение задачи | завершение задачи + явный `keep/remove` | +| Наблюдаемость жизненного цикла | неявно через логи | явные события в `.worktrees/events.jsonl` | + +## Попробуйте + +```sh +cd learn-claude-code +python agents/s12_worktree_task_isolation.py +``` + +1. `Создай задачи для backend auth и frontend login page, затем покажи список задач` +2. `Создай worktree "auth-refactor" для задачи 1, а затем привяжи задачу 2 к новому worktree "ui-login"` +3. `Запусти "git status --short" внутри worktree "auth-refactor"` +4. `Сохрани worktree "ui-login", затем покажи список worktree и проверь события` +5. `Удали worktree "auth-refactor" с complete_task=true, затем покажи задачи, worktree и события` diff --git a/web/scripts/extract-content.ts b/web/scripts/extract-content.ts index 6e35badd9..b7d802fe9 100644 --- a/web/scripts/extract-content.ts +++ b/web/scripts/extract-content.ts @@ -103,7 +103,9 @@ function countLoc(lines: string[]): number { // docs/en/s01-the-agent-loop.md -> "en" // docs/zh/s01-the-agent-loop.md -> "zh" // docs/ja/s01-the-agent-loop.md -> "ja" -function detectLocale(relPath: string): "en" | "zh" | "ja" { +// docs/ru/s01-the-agent-loop.md -> "ru" +function detectLocale(relPath: string): "en" | "zh" | "ja" | "ru" { + if (relPath.startsWith("ru/") || relPath.startsWith("ru\\")) return "ru"; if (relPath.startsWith("zh/") || relPath.startsWith("zh\\")) return "zh"; if (relPath.startsWith("ja/") || relPath.startsWith("ja\\")) return "ja"; return "en"; @@ -215,11 +217,11 @@ function main() { }); } - // 4. Read doc files from locale subdirectories (en/, zh/, ja/) + // 4. Read doc files from locale subdirectories (en/, zh/, ja/, ru/) const docs: DocContent[] = []; if (fs.existsSync(DOCS_DIR)) { - const localeDirs = ["en", "zh", "ja"]; + const localeDirs = ["en", "zh", "ja", "ru"]; let totalDocFiles = 0; for (const locale of localeDirs) { @@ -245,7 +247,12 @@ function main() { const titleMatch = content.match(/^#\s+(.+)$/m); const title = titleMatch ? titleMatch[1] : filename; - docs.push({ version, locale: locale as "en" | "zh" | "ja", title, content }); + docs.push({ + version, + locale: locale as "en" | "zh" | "ja" | "ru", + title, + content, + }); } } diff --git a/web/src/app/[locale]/(learn)/[version]/diff/diff-content.tsx b/web/src/app/[locale]/(learn)/[version]/diff/diff-content.tsx index d6e21011e..609c86ad4 100644 --- a/web/src/app/[locale]/(learn)/[version]/diff/diff-content.tsx +++ b/web/src/app/[locale]/(learn)/[version]/diff/diff-content.tsx @@ -2,13 +2,18 @@ import { useMemo } from "react"; import Link from "next/link"; -import { useLocale } from "@/lib/i18n"; +import { useLocale, useTranslations } from "@/lib/i18n"; import { VERSION_META } from "@/lib/constants"; +import { + getTranslatedLayerLabel, + getTranslatedSessionTitle, + getTranslatedVersionField, +} from "@/lib/version-i18n"; import { Card, CardHeader, CardTitle } from "@/components/ui/card"; import { LayerBadge } from "@/components/ui/badge"; import { CodeDiff } from "@/components/diff/code-diff"; -import { ArrowLeft, Plus, Minus, FileCode, Wrench, Box, FunctionSquare } from "lucide-react"; -import type { AgentVersion, VersionDiff, VersionIndex } from "@/types/agent-data"; +import { ArrowLeft, FileCode, Wrench, Box, FunctionSquare } from "lucide-react"; +import type { VersionIndex } from "@/types/agent-data"; import versionData from "@/data/generated/versions.json"; const data = versionData as VersionIndex; @@ -19,22 +24,57 @@ interface DiffPageContentProps { export function DiffPageContent({ version }: DiffPageContentProps) { const locale = useLocale(); + const tVersion = useTranslations("version"); + const tLayer = useTranslations("layer_labels"); + const tSession = useTranslations("sessions"); + const tMeta = useTranslations("version_meta"); const meta = VERSION_META[version]; + const copy = + locale === "ru" + ? { + versionNotFound: "Версия не найдена.", + backToTimeline: "Назад к пути", + backToVersion: "Назад к", + noPrevious: "Это первая версия, поэтому сравнивать пока не с чем.", + locDelta: "Изменение LOC", + newTools: "Новые инструменты", + newClasses: "Новые классы", + newFunctions: "Новые функции", + lines: "строк", + tools: "инструментов", + sourceDiff: "Дифф исходного кода", + } + : { + versionNotFound: "Version not found.", + backToTimeline: "Back to timeline", + backToVersion: "Back to", + noPrevious: "This is the first version -- there is no previous version to compare against.", + locDelta: "LOC Delta", + newTools: "New Tools", + newClasses: "New Classes", + newFunctions: "New Functions", + lines: "lines", + tools: "tools", + sourceDiff: "Source Code Diff", + }; const { currentVersion, prevVersion, diff } = useMemo(() => { const current = data.versions.find((v) => v.id === version); const prevId = meta?.prevVersion; const prev = prevId ? data.versions.find((v) => v.id === prevId) : null; - const d = data.diffs.find((d) => d.to === version); - return { currentVersion: current, prevVersion: prev, diff: d }; + const currentDiff = data.diffs.find((d) => d.to === version); + return { currentVersion: current, prevVersion: prev, diff: currentDiff }; }, [version, meta]); if (!meta || !currentVersion) { return (
-

Version not found.

- - Back to timeline +

{copy.versionNotFound}

+ + {copy.backToTimeline}
); @@ -48,12 +88,12 @@ export function DiffPageContent({ version }: DiffPageContentProps) { className="mb-6 inline-flex items-center gap-1 text-sm text-zinc-500 hover:text-zinc-700 dark:hover:text-zinc-300" > - Back to {meta.title} + {copy.backToVersion} {getTranslatedSessionTitle(tSession, version, meta.title)} -

{meta.title}

-

- This is the first version -- there is no previous version to compare against. -

+

+ {getTranslatedSessionTitle(tSession, version, meta.title)} +

+

{copy.noPrevious}

); } @@ -67,33 +107,42 @@ export function DiffPageContent({ version }: DiffPageContentProps) { className="mb-6 inline-flex items-center gap-1 text-sm text-zinc-500 hover:text-zinc-700 dark:hover:text-zinc-300" > - Back to {meta.title} + {copy.backToVersion} {getTranslatedSessionTitle(tSession, version, meta.title)} - {/* Header */}

- {prevMeta?.title || prevVersion.id} → {meta.title} + {prevMeta + ? getTranslatedSessionTitle(tSession, prevVersion.id, prevMeta.title) + : prevVersion.id}{" "} + → {getTranslatedSessionTitle(tSession, version, meta.title)}

- {prevVersion.id} ({prevVersion.loc} LOC) → {version} ({currentVersion.loc} LOC) + {prevVersion.id} ({prevVersion.loc} {tVersion("loc")}) → {version} ( + {currentVersion.loc} {tVersion("loc")})

- {/* Structural Diff */}
- LOC Delta + {copy.locDelta}
- = 0 ? "text-green-600 dark:text-green-400" : "text-red-600 dark:text-red-400"}> - {diff.locDelta >= 0 ? "+" : ""}{diff.locDelta} + = 0 + ? "text-green-600 dark:text-green-400" + : "text-red-600 dark:text-red-400" + } + > + {diff.locDelta >= 0 ? "+" : ""} + {diff.locDelta} - lines + {copy.lines}
@@ -101,7 +150,7 @@ export function DiffPageContent({ version }: DiffPageContentProps) {
- New Tools + {copy.newTools}
@@ -110,7 +159,10 @@ export function DiffPageContent({ version }: DiffPageContentProps) { {diff.newTools.length > 0 && (
{diff.newTools.map((tool) => ( - + {tool} ))} @@ -122,7 +174,7 @@ export function DiffPageContent({ version }: DiffPageContentProps) {
- New Classes + {copy.newClasses}
@@ -131,7 +183,10 @@ export function DiffPageContent({ version }: DiffPageContentProps) { {diff.newClasses.length > 0 && (
{diff.newClasses.map((cls) => ( - + {cls} ))} @@ -143,7 +198,7 @@ export function DiffPageContent({ version }: DiffPageContentProps) {
- New Functions + {copy.newFunctions}
@@ -152,7 +207,10 @@ export function DiffPageContent({ version }: DiffPageContentProps) { {diff.newFunctions.length > 0 && (
{diff.newFunctions.map((fn) => ( - + {fn} ))} @@ -161,35 +219,51 @@ export function DiffPageContent({ version }: DiffPageContentProps) {
- {/* Version Info Comparison */}
- {prevMeta?.title || prevVersion.id} -

{prevMeta?.subtitle}

+ + {prevMeta + ? getTranslatedSessionTitle(tSession, prevVersion.id, prevMeta.title) + : prevVersion.id} + +

+ {prevMeta + ? getTranslatedVersionField(tMeta, prevVersion.id, "subtitle", prevMeta.subtitle) + : ""} +

-

{prevVersion.loc} LOC

-

{prevVersion.tools.length} tools: {prevVersion.tools.join(", ")}

- {prevVersion.layer} +

{prevVersion.loc} {tVersion("loc")}

+

+ {prevVersion.tools.length} {copy.tools}: {prevVersion.tools.join(", ")} +

+ + {getTranslatedLayerLabel(tLayer, prevVersion.layer, prevVersion.layer)} +
- {meta.title} -

{meta.subtitle}

+ {getTranslatedSessionTitle(tSession, version, meta.title)} +

+ {getTranslatedVersionField(tMeta, version, "subtitle", meta.subtitle)} +

-

{currentVersion.loc} LOC

-

{currentVersion.tools.length} tools: {currentVersion.tools.join(", ")}

- {currentVersion.layer} +

{currentVersion.loc} {tVersion("loc")}

+

+ {currentVersion.tools.length} {copy.tools}: {currentVersion.tools.join(", ")} +

+ + {getTranslatedLayerLabel(tLayer, currentVersion.layer, currentVersion.layer)} +
- {/* Code Diff */}
-

Source Code Diff

+

{copy.sourceDiff}

({ version })); @@ -23,7 +28,9 @@ export default async function VersionPage({ if (!versionData || !meta) { return (
-

Version not found

+

+ {locale === "ru" ? "Версия не найдена" : "Version not found"} +

{version}

); @@ -32,6 +39,7 @@ export default async function VersionPage({ const t = getTranslations(locale, "version"); const tSession = getTranslations(locale, "sessions"); const tLayer = getTranslations(locale, "layer_labels"); + const tMeta = getTranslations(locale, "version_meta"); const layer = LAYERS.find((l) => l.id === meta.layer); const pathIndex = LEARNING_PATH.indexOf(version as typeof LEARNING_PATH[number]); @@ -49,26 +57,30 @@ export default async function VersionPage({ {version} -

{tSession(version) || meta.title}

+

+ {getTranslatedSessionTitle(tSession, version, meta.title)} +

{layer && ( - {tLayer(layer.id)} + + {getTranslatedLayerLabel(tLayer, layer.id, layer.label)} + )}

- {meta.subtitle} + {getTranslatedVersionField(tMeta, version, "subtitle", meta.subtitle)}

{versionData.loc} LOC {versionData.tools.length} {t("tools")} {meta.coreAddition && ( - {meta.coreAddition} + {getTranslatedVersionField(tMeta, version, "coreAddition", meta.coreAddition)} )}
{meta.keyInsight && (
- {meta.keyInsight} + {getTranslatedVersionField(tMeta, version, "keyInsight", meta.keyInsight)}
)} @@ -94,7 +106,11 @@ export default async function VersionPage({
{t("prev")}
- {prevVersion} - {tSession(prevVersion) || VERSION_META[prevVersion]?.title} + {prevVersion} - {getTranslatedSessionTitle( + tSession, + prevVersion, + VERSION_META[prevVersion]?.title || prevVersion + )}
@@ -109,7 +125,11 @@ export default async function VersionPage({
{t("next")}
- {tSession(nextVersion) || VERSION_META[nextVersion]?.title} - {nextVersion} + {getTranslatedSessionTitle( + tSession, + nextVersion, + VERSION_META[nextVersion]?.title || nextVersion + )} - {nextVersion}
diff --git a/web/src/app/[locale]/(learn)/compare/page.tsx b/web/src/app/[locale]/(learn)/compare/page.tsx index a38a4204e..57458cb0b 100644 --- a/web/src/app/[locale]/(learn)/compare/page.tsx +++ b/web/src/app/[locale]/(learn)/compare/page.tsx @@ -1,8 +1,13 @@ "use client"; import { useState, useMemo } from "react"; -import { useLocale, useTranslations } from "@/lib/i18n"; +import { useTranslations } from "@/lib/i18n"; import { LEARNING_PATH, VERSION_META } from "@/lib/constants"; +import { + getTranslatedLayerLabel, + getTranslatedSessionTitle, + getTranslatedVersionField, +} from "@/lib/version-i18n"; import { Card, CardHeader, CardTitle } from "@/components/ui/card"; import { LayerBadge } from "@/components/ui/badge"; import { CodeDiff } from "@/components/diff/code-diff"; @@ -15,7 +20,10 @@ const data = versionData as VersionIndex; export default function ComparePage() { const t = useTranslations("compare"); - const locale = useLocale(); + const tLayer = useTranslations("layer_labels"); + const tSession = useTranslations("sessions"); + const tMeta = useTranslations("version_meta"); + const tVersion = useTranslations("version"); const [versionA, setVersionA] = useState(""); const [versionB, setVersionB] = useState(""); @@ -68,10 +76,10 @@ export default function ComparePage() { onChange={(e) => setVersionA(e.target.value)} className="w-full rounded-lg border border-zinc-300 bg-white px-3 py-2 text-sm dark:border-zinc-600 dark:bg-zinc-800 dark:text-zinc-200" > - + {LEARNING_PATH.map((v) => ( ))} @@ -88,10 +96,10 @@ export default function ComparePage() { onChange={(e) => setVersionB(e.target.value)} className="w-full rounded-lg border border-zinc-300 bg-white px-3 py-2 text-sm dark:border-zinc-600 dark:bg-zinc-800 dark:text-zinc-200" > - + {LEARNING_PATH.map((v) => ( ))} @@ -105,24 +113,40 @@ export default function ComparePage() {
- {metaA?.title || versionA} -

{metaA?.subtitle}

+ + {metaA ? getTranslatedSessionTitle(tSession, versionA, metaA.title) : versionA} + +

+ {metaA ? getTranslatedVersionField(tMeta, versionA, "subtitle", metaA.subtitle) : ""} +

-

{infoA.loc} LOC

-

{infoA.tools.length} tools

- {metaA && {metaA.layer}} +

{infoA.loc} {tVersion("loc")}

+

{infoA.tools.length} {t("tools_label")}

+ {metaA && ( + + {getTranslatedLayerLabel(tLayer, metaA.layer, metaA.layer)} + + )}
- {metaB?.title || versionB} -

{metaB?.subtitle}

+ + {metaB ? getTranslatedSessionTitle(tSession, versionB, metaB.title) : versionB} + +

+ {metaB ? getTranslatedVersionField(tMeta, versionB, "subtitle", metaB.subtitle) : ""} +

-

{infoB.loc} LOC

-

{infoB.tools.length} tools

- {metaB && {metaB.layer}} +

{infoB.loc} {tVersion("loc")}

+

{infoB.tools.length} {t("tools_label")}

+ {metaB && ( + + {getTranslatedLayerLabel(tLayer, metaB.layer, metaB.layer)} + + )}
@@ -133,13 +157,13 @@ export default function ComparePage() {

- {metaA?.title || versionA} + {metaA ? getTranslatedSessionTitle(tSession, versionA, metaA.title) : versionA}

- {metaB?.title || versionB} + {metaB ? getTranslatedSessionTitle(tSession, versionB, metaB.title) : versionB}

@@ -235,7 +259,7 @@ export default function ComparePage() {

- {t("only_in")} {metaA?.title || versionA} + {t("only_in")} {metaA ? getTranslatedSessionTitle(tSession, versionA, metaA.title) : versionA}

{comparison.toolsOnlyA.length === 0 ? (

{t("none")}

@@ -267,7 +291,7 @@ export default function ComparePage() {

- {t("only_in")} {metaB?.title || versionB} + {t("only_in")} {metaB ? getTranslatedSessionTitle(tSession, versionB, metaB.title) : versionB}

{comparison.toolsOnlyB.length === 0 ? (

{t("none")}

diff --git a/web/src/app/[locale]/(learn)/layers/page.tsx b/web/src/app/[locale]/(learn)/layers/page.tsx index ceeee9245..e8fcb1866 100644 --- a/web/src/app/[locale]/(learn)/layers/page.tsx +++ b/web/src/app/[locale]/(learn)/layers/page.tsx @@ -3,7 +3,12 @@ import Link from "next/link"; import { useTranslations, useLocale } from "@/lib/i18n"; import { LAYERS, VERSION_META } from "@/lib/constants"; -import { Card, CardHeader, CardTitle } from "@/components/ui/card"; +import { + getTranslatedLayerLabel, + getTranslatedSessionTitle, + getTranslatedVersionField, +} from "@/lib/version-i18n"; +import { Card } from "@/components/ui/card"; import { LayerBadge } from "@/components/ui/badge"; import { cn } from "@/lib/utils"; import { ChevronRight } from "lucide-react"; @@ -30,6 +35,10 @@ const LAYER_HEADER_BG: Record = { export default function LayersPage() { const t = useTranslations("layers"); + const tLayer = useTranslations("layer_labels"); + const tSession = useTranslations("sessions"); + const tMeta = useTranslations("version_meta"); + const tVersion = useTranslations("version"); const locale = useLocale(); return ( @@ -56,14 +65,12 @@ export default function LayersPage() { LAYER_BORDER_CLASSES[layer.id] )} > - {/* Layer header */}

- L{index + 1} - {" "} - {layer.label} + L{index + 1}{" "} + {getTranslatedLayerLabel(tLayer, layer.id, layer.label)}

{t(layer.id)} @@ -71,28 +78,25 @@ export default function LayersPage() {

- {/* Version cards within this layer */}
{versionInfos.map(({ id, info, meta }) => ( - +
{id} - {layer.id} + + {getTranslatedLayerLabel(tLayer, layer.id, layer.id)} +

- {meta?.title || id} + {meta ? getTranslatedSessionTitle(tSession, id, meta.title) : id}

{meta?.subtitle && (

- {meta.subtitle} + {getTranslatedVersionField(tMeta, id, "subtitle", meta.subtitle)}

)}
@@ -102,12 +106,12 @@ export default function LayersPage() { />
- {info?.loc ?? "?"} LOC - {info?.tools.length ?? "?"} tools + {info?.loc ?? "?"} {tVersion("loc")} + {info?.tools.length ?? "?"} {tVersion("tools")}
{meta?.keyInsight && ( -

- {meta.keyInsight} +

+ {getTranslatedVersionField(tMeta, id, "keyInsight", meta.keyInsight)}

)}
@@ -116,11 +120,16 @@ export default function LayersPage() {
- {/* Composition indicator */} {index < LAYERS.length - 1 && (
- +
)} diff --git a/web/src/app/[locale]/layout.tsx b/web/src/app/[locale]/layout.tsx index 62e7dd60b..cbe174804 100644 --- a/web/src/app/[locale]/layout.tsx +++ b/web/src/app/[locale]/layout.tsx @@ -4,10 +4,11 @@ import { Header } from "@/components/layout/header"; import en from "@/i18n/messages/en.json"; import zh from "@/i18n/messages/zh.json"; import ja from "@/i18n/messages/ja.json"; +import ru from "@/i18n/messages/ru.json"; import "../globals.css"; -const locales = ["en", "zh", "ja"]; -const metaMessages: Record = { en, zh, ja }; +const locales = ["en", "zh", "ja", "ru"]; +const metaMessages: Record = { en, zh, ja, ru }; export function generateStaticParams() { return locales.map((locale) => ({ locale })); diff --git a/web/src/app/[locale]/page.tsx b/web/src/app/[locale]/page.tsx index 686d95615..629264e30 100644 --- a/web/src/app/[locale]/page.tsx +++ b/web/src/app/[locale]/page.tsx @@ -3,6 +3,11 @@ import Link from "next/link"; import { useTranslations, useLocale } from "@/lib/i18n"; import { LEARNING_PATH, VERSION_META, LAYERS } from "@/lib/constants"; +import { + getTranslatedLayerLabel, + getTranslatedSessionTitle, + getTranslatedVersionField, +} from "@/lib/version-i18n"; import { LayerBadge } from "@/components/ui/badge"; import { Card } from "@/components/ui/card"; import { cn } from "@/lib/utils"; @@ -39,6 +44,9 @@ function getVersionData(id: string) { export default function HomePage() { const t = useTranslations("home"); + const tLayer = useTranslations("layer_labels"); + const tSession = useTranslations("sessions"); + const tMeta = useTranslations("version_meta"); const locale = useLocale(); return ( @@ -170,10 +178,15 @@ export default function HomePage() {

- {meta.title} + {getTranslatedSessionTitle(tSession, versionId, meta.title)}

- {meta.keyInsight} + {getTranslatedVersionField( + tMeta, + versionId, + "keyInsight", + meta.keyInsight + )}

@@ -204,7 +217,9 @@ export default function HomePage() { />
-

{layer.label}

+

+ {getTranslatedLayerLabel(tLayer, layer.id, layer.label)} +

{layer.versions.length} {t("versions_in_layer")} @@ -218,7 +233,7 @@ export default function HomePage() { layer={layer.id} className="cursor-pointer transition-opacity hover:opacity-80" > - {vid}: {meta?.title} + {vid}: {meta ? getTranslatedSessionTitle(tSession, vid, meta.title) : vid} ); diff --git a/web/src/components/architecture/arch-diagram.tsx b/web/src/components/architecture/arch-diagram.tsx index 2d8fa9e5e..65ab9b373 100644 --- a/web/src/components/architecture/arch-diagram.tsx +++ b/web/src/components/architecture/arch-diagram.tsx @@ -1,6 +1,7 @@ "use client"; import { motion } from "framer-motion"; +import { useLocale } from "@/lib/i18n"; import { cn } from "@/lib/utils"; import { LAYERS } from "@/lib/constants"; import versionsData from "@/data/generated/versions.json"; @@ -18,6 +19,19 @@ const CLASS_DESCRIPTIONS: Record = { SharedBoard: "Cross-agent shared state coordination", }; +const CLASS_DESCRIPTIONS_RU: Record = { + TodoManager: "Планирование задач с видимыми ограничениями", + SkillLoader: "Динамическая подгрузка знаний из файлов SKILL.md", + ContextManager: "Трёхслойный конвейер сжатия контекста", + Task: "Файловая задача с зависимостями и постоянным состоянием", + TaskManager: "CRUD для файловых задач с зависимостями", + BackgroundTask: "Отдельная единица фонового выполнения", + BackgroundManager: "Неблокирующее выполнение и очередь уведомлений", + TeammateManager: "Жизненный цикл и координация команды агентов", + Teammate: "Идентичность и состояние отдельного агента", + SharedBoard: "Общая координация состояния между агентами", +}; + interface ArchDiagramProps { version: string; } @@ -103,10 +117,18 @@ function getNewClassNames(version: string): Set { } export function ArchDiagram({ version }: ArchDiagramProps) { + const locale = useLocale(); const allClasses = collectClassesUpTo(version); const newClassNames = getNewClassNames(version); const versionData = versionsData.versions.find((v) => v.id === version); const tools = versionData?.tools ?? []; + const classDescriptions = + locale === "ru" ? CLASS_DESCRIPTIONS_RU : CLASS_DESCRIPTIONS; + const newLabel = locale === "ru" ? "НОВОЕ" : "NEW"; + const emptyLabel = + locale === "ru" + ? "В этой версии нет классов (только функции)" + : "No classes in this version (functions only)"; const reversed = [...allClasses].reverse(); @@ -181,7 +203,7 @@ export function ArchDiagram({ version }: ArchDiagramProps) { : "text-zinc-400 dark:text-zinc-500" )} > - {CLASS_DESCRIPTIONS[cls.name] || ""} + {classDescriptions[cls.name] || ""}

@@ -190,7 +212,7 @@ export function ArchDiagram({ version }: ArchDiagramProps) { {isNew && ( - NEW + {newLabel} )}
@@ -202,7 +224,7 @@ export function ArchDiagram({ version }: ArchDiagramProps) { {allClasses.length === 0 && (
- No classes in this version (functions only) + {emptyLabel}
)} diff --git a/web/src/components/architecture/design-decisions.tsx b/web/src/components/architecture/design-decisions.tsx index 5fa47faa4..72618567c 100644 --- a/web/src/components/architecture/design-decisions.tsx +++ b/web/src/components/architecture/design-decisions.tsx @@ -3,6 +3,7 @@ import { useState } from "react"; import { motion, AnimatePresence } from "framer-motion"; import { useTranslations, useLocale } from "@/lib/i18n"; +import { ANNOTATION_RU_OVERRIDES } from "@/data/annotation-ru"; import { ChevronDown } from "lucide-react"; import { cn } from "@/lib/utils"; @@ -26,6 +27,7 @@ interface Decision { alternatives: string; zh?: { title: string; description: string }; ja?: { title: string; description: string }; + ru?: { title: string; description: string; alternatives?: string }; } interface AnnotationFile { @@ -55,18 +57,29 @@ interface DesignDecisionsProps { function DecisionCard({ decision, locale, + override, }: { decision: Decision; locale: string; + override?: { title: string; description: string; alternatives: string }; }) { const [open, setOpen] = useState(false); const t = useTranslations("version"); const localized = - locale !== "en" ? (decision as unknown as Record)[locale] as { title?: string; description?: string } | undefined : undefined; + locale !== "en" + ? ((decision as unknown as Record)[locale] as { + title?: string; + description?: string; + alternatives?: string; + } | undefined) + : undefined; - const title = localized?.title || decision.title; - const description = localized?.description || decision.description; + const title = override?.title || localized?.title || decision.title; + const description = + override?.description || localized?.description || decision.description; + const alternatives = + override?.alternatives || localized?.alternatives || decision.alternatives; return (
@@ -100,13 +113,13 @@ function DecisionCard({ {description}

- {decision.alternatives && ( + {alternatives && (

{t("alternatives")}

- {decision.alternatives} + {alternatives}

)} @@ -138,7 +151,15 @@ export function DesignDecisions({ version }: DesignDecisionsProps) { animate={{ opacity: 1, y: 0 }} transition={{ delay: i * 0.05 }} > - + ))}
diff --git a/web/src/components/architecture/execution-flow.tsx b/web/src/components/architecture/execution-flow.tsx index 986560ce0..0a9fe3473 100644 --- a/web/src/components/architecture/execution-flow.tsx +++ b/web/src/components/architecture/execution-flow.tsx @@ -2,7 +2,7 @@ import { useEffect, useState } from "react"; import { motion } from "framer-motion"; -import { useTranslations } from "@/lib/i18n"; +import { useLocale, useTranslations } from "@/lib/i18n"; import { getFlowForVersion } from "@/data/execution-flows"; import type { FlowNode, FlowEdge } from "@/types/agent-data"; @@ -187,11 +187,12 @@ interface ExecutionFlowProps { export function ExecutionFlow({ version }: ExecutionFlowProps) { const t = useTranslations("version"); + const locale = useLocale(); const [flow, setFlow] = useState>(null); useEffect(() => { - setFlow(getFlowForVersion(version)); - }, [version]); + setFlow(getFlowForVersion(version, locale)); + }, [locale, version]); if (!flow) return null; diff --git a/web/src/components/architecture/message-flow.tsx b/web/src/components/architecture/message-flow.tsx index ff25fd55e..99502f9b3 100644 --- a/web/src/components/architecture/message-flow.tsx +++ b/web/src/components/architecture/message-flow.tsx @@ -2,26 +2,43 @@ import { useState, useEffect, useRef } from "react"; import { motion, AnimatePresence } from "framer-motion"; - -const FLOW_STEPS = [ - { role: "user", label: "user", color: "bg-blue-500" }, - { role: "assistant", label: "assistant", color: "bg-zinc-600" }, - { role: "tool_call", label: "tool_call", color: "bg-amber-500" }, - { role: "tool_result", label: "tool_result", color: "bg-emerald-500" }, - { role: "assistant", label: "assistant", color: "bg-zinc-600" }, - { role: "tool_call", label: "tool_call", color: "bg-amber-500" }, - { role: "tool_result", label: "tool_result", color: "bg-emerald-500" }, - { role: "assistant", label: "assistant (final)", color: "bg-zinc-600" }, -]; +import { useLocale } from "@/lib/i18n"; export function MessageFlow() { + const locale = useLocale(); const [count, setCount] = useState(0); const intervalRef = useRef | null>(null); + const labels = + locale === "ru" + ? { + user: "пользователь", + assistant: "ассистент", + toolCall: "вызов_инструмента", + toolResult: "результат_инструмента", + final: "ассистент (финал)", + } + : { + user: "user", + assistant: "assistant", + toolCall: "tool_call", + toolResult: "tool_result", + final: "assistant (final)", + }; + const flowSteps = [ + { role: "user", label: labels.user, color: "bg-blue-500" }, + { role: "assistant", label: labels.assistant, color: "bg-zinc-600" }, + { role: "tool_call", label: labels.toolCall, color: "bg-amber-500" }, + { role: "tool_result", label: labels.toolResult, color: "bg-emerald-500" }, + { role: "assistant", label: labels.assistant, color: "bg-zinc-600" }, + { role: "tool_call", label: labels.toolCall, color: "bg-amber-500" }, + { role: "tool_result", label: labels.toolResult, color: "bg-emerald-500" }, + { role: "assistant", label: labels.final, color: "bg-zinc-600" }, + ]; useEffect(() => { intervalRef.current = setInterval(() => { setCount((prev) => { - if (prev >= FLOW_STEPS.length) { + if (prev >= flowSteps.length) { setTimeout(() => setCount(0), 1500); return prev; } @@ -31,7 +48,7 @@ export function MessageFlow() { return () => { if (intervalRef.current) clearInterval(intervalRef.current); }; - }, []); + }, [flowSteps.length]); return (
@@ -45,7 +62,7 @@ export function MessageFlow() {
- {FLOW_STEPS.slice(0, count).map((step, i) => ( + {flowSteps.slice(0, count).map((step, i) => ( ("unified"); const changes = useMemo(() => diffLines(oldSource, newSource), [oldSource, newSource]); + const copy = + locale === "ru" + ? { unified: "Общий", split: "Рядом" } + : { unified: "Unified", split: "Split" }; return (
@@ -34,7 +40,7 @@ export function CodeDiff({ oldSource, newSource, oldLabel, newLabel }: CodeDiffP : "text-zinc-500 hover:text-zinc-700 dark:text-zinc-400" )} > - Unified + {copy.unified}
diff --git a/web/src/components/diff/whats-new.tsx b/web/src/components/diff/whats-new.tsx index 54d7ee36b..4389dd6a2 100644 --- a/web/src/components/diff/whats-new.tsx +++ b/web/src/components/diff/whats-new.tsx @@ -1,7 +1,7 @@ "use client"; import { motion } from "framer-motion"; -import { useTranslations } from "@/lib/i18n"; +import { useLocale, useTranslations } from "@/lib/i18n"; import { Card } from "@/components/ui/card"; interface WhatsNewProps { @@ -18,6 +18,11 @@ interface WhatsNewProps { export function WhatsNew({ diff }: WhatsNewProps) { const t = useTranslations("version"); const td = useTranslations("diff"); + const locale = useLocale(); + const copy = + locale === "ru" + ? { def: "функция", lines: "строк" } + : { def: "def", lines: "lines" }; if (!diff) { return null; @@ -100,7 +105,7 @@ export function WhatsNew({ diff }: WhatsNewProps) { {diff.newFunctions.map((fn) => (
  • - def{" "} + {copy.def}{" "} {fn}()
  • @@ -122,7 +127,7 @@ export function WhatsNew({ diff }: WhatsNewProps) { {td("loc_delta")}

    - +{diff.locDelta} lines + +{diff.locDelta} {copy.lines}

    diff --git a/web/src/components/layout/header.tsx b/web/src/components/layout/header.tsx index dd724d198..f8881011c 100644 --- a/web/src/components/layout/header.tsx +++ b/web/src/components/layout/header.tsx @@ -15,14 +15,16 @@ const NAV_ITEMS = [ const LOCALES = [ { code: "en", label: "EN" }, - { code: "zh", label: "中文" }, - { code: "ja", label: "日本語" }, + { code: "zh", label: "ZH" }, + { code: "ja", label: "JA" }, + { code: "ru", label: "RU" }, ]; export function Header() { const t = useTranslations("nav"); const pathname = usePathname(); const locale = useLocale(); + const brand = locale === "ru" ? "Изучаем Claude Code" : "Learn Claude Code"; const [mobileOpen, setMobileOpen] = useState(false); const [dark, setDark] = useState(() => { if (typeof window !== "undefined") { @@ -49,10 +51,9 @@ export function Header() {
    - Learn Claude Code + {brand} - {/* Desktop nav */} - {/* Mobile hamburger */}
    - {/* Mobile menu */} {mobileOpen && (
    {NAV_ITEMS.map((item) => ( diff --git a/web/src/components/simulator/agent-loop-simulator.tsx b/web/src/components/simulator/agent-loop-simulator.tsx index 8de470cd4..f97d47207 100644 --- a/web/src/components/simulator/agent-loop-simulator.tsx +++ b/web/src/components/simulator/agent-loop-simulator.tsx @@ -2,8 +2,9 @@ import { useRef, useEffect, useState } from "react"; import { AnimatePresence } from "framer-motion"; -import { useTranslations } from "@/lib/i18n"; +import { useLocale, useTranslations } from "@/lib/i18n"; import { useSimulator } from "@/hooks/useSimulator"; +import { SCENARIO_RU_OVERRIDES } from "@/data/scenario-ru"; import { SimulatorControls } from "./simulator-controls"; import { SimulatorMessage } from "./simulator-message"; import type { Scenario } from "@/types/agent-data"; @@ -29,8 +30,13 @@ interface AgentLoopSimulatorProps { export function AgentLoopSimulator({ version }: AgentLoopSimulatorProps) { const t = useTranslations("version"); + const locale = useLocale(); const [scenario, setScenario] = useState(null); const scrollRef = useRef(null); + const emptyState = + locale === "ru" + ? "Нажмите «Старт» или «Шаг», чтобы начать" + : "Press Play or Step to begin"; useEffect(() => { const loader = scenarioModules[version]; @@ -39,7 +45,31 @@ export function AgentLoopSimulator({ version }: AgentLoopSimulatorProps) { } }, [version]); - const sim = useSimulator(scenario?.steps ?? []); + const scenarioOverride = + locale === "ru" ? SCENARIO_RU_OVERRIDES[version] : undefined; + const description = + locale === "ru" + ? scenarioOverride?.description || + scenario?.ru?.description || + scenario?.description || + "" + : scenario?.description || ""; + const localizedSteps = scenario + ? locale === "ru" + ? scenario.steps.map((step, index) => ({ + ...step, + content: + scenarioOverride?.steps?.[index]?.content || + step.ru?.content || + step.content, + annotation: + scenarioOverride?.steps?.[index]?.annotation || + step.ru?.annotation || + step.annotation, + })) + : scenario.steps + : []; + const sim = useSimulator(localizedSteps); useEffect(() => { if (scrollRef.current) { @@ -56,7 +86,7 @@ export function AgentLoopSimulator({ version }: AgentLoopSimulatorProps) {

    {t("simulator")}

    - {scenario.description} + {description}

    @@ -81,7 +111,7 @@ export function AgentLoopSimulator({ version }: AgentLoopSimulatorProps) { > {sim.visibleSteps.length === 0 && (
    - Press Play or Step to begin + {emptyState}
    )} diff --git a/web/src/components/simulator/simulator-message.tsx b/web/src/components/simulator/simulator-message.tsx index 2984e3d49..1e8324e45 100644 --- a/web/src/components/simulator/simulator-message.tsx +++ b/web/src/components/simulator/simulator-message.tsx @@ -1,6 +1,7 @@ "use client"; import { motion } from "framer-motion"; +import { useLocale } from "@/lib/i18n"; import { cn } from "@/lib/utils"; import type { SimStep } from "@/types/agent-data"; import { User, Bot, Terminal, ArrowRight, AlertCircle } from "lucide-react"; @@ -47,8 +48,18 @@ const TYPE_CONFIG: Record< }; export function SimulatorMessage({ step, index }: SimulatorMessageProps) { + const locale = useLocale(); const config = TYPE_CONFIG[step.type] || TYPE_CONFIG.assistant_text; const Icon = config.icon; + const ruLabels: Record = { + User: "Пользователь", + Assistant: "Ассистент", + "Tool Call": "Вызов инструмента", + "Tool Result": "Результат инструмента", + System: "Система", + }; + const label = locale === "ru" ? (ruLabels[config.label] || config.label) : config.label; + const emptyText = locale === "ru" ? "(пусто)" : "(empty)"; return ( - {config.label} + {label} {step.toolName && ( {step.toolName} @@ -75,7 +86,7 @@ export function SimulatorMessage({ step, index }: SimulatorMessageProps) { {step.type === "tool_call" || step.type === "tool_result" ? (
    -          {step.content || "(empty)"}
    +          {step.content || emptyText}
             
    ) : step.type === "system_event" ? (
    diff --git a/web/src/components/timeline/timeline.tsx b/web/src/components/timeline/timeline.tsx
    index a30647b92..2fa4bb592 100644
    --- a/web/src/components/timeline/timeline.tsx
    +++ b/web/src/components/timeline/timeline.tsx
    @@ -4,6 +4,11 @@ import Link from "next/link";
     import { motion } from "framer-motion";
     import { useTranslations, useLocale } from "@/lib/i18n";
     import { LEARNING_PATH, VERSION_META, LAYERS } from "@/lib/constants";
    +import {
    +  getTranslatedLayerLabel,
    +  getTranslatedSessionTitle,
    +  getTranslatedVersionField,
    +} from "@/lib/version-i18n";
     import { LayerBadge } from "@/components/ui/badge";
     import { cn } from "@/lib/utils";
     import versionsData from "@/data/generated/versions.json";
    @@ -44,6 +49,9 @@ const MAX_LOC = Math.max(
     
     export function Timeline() {
       const t = useTranslations("timeline");
    +  const tLayer = useTranslations("layer_labels");
    +  const tSession = useTranslations("sessions");
    +  const tMeta = useTranslations("version_meta");
       const tv = useTranslations("version");
       const locale = useLocale();
     
    @@ -60,7 +68,9 @@ export function Timeline() {
                   
    -              {layer.label}
    +              
    +                {getTranslatedLayerLabel(tLayer, layer.id, layer.label)}
    +              
                 
    ))}
    @@ -114,14 +124,14 @@ export function Timeline() {
    {versionId} - {meta.coreAddition} + {getTranslatedVersionField(tMeta, versionId, "coreAddition", meta.coreAddition)}

    - {meta.title} + {getTranslatedSessionTitle(tSession, versionId, meta.title)} - {meta.subtitle} + {getTranslatedVersionField(tMeta, versionId, "subtitle", meta.subtitle)}

    @@ -149,7 +159,12 @@ export function Timeline() { {/* Key insight */} {meta.keyInsight && (

    - “{meta.keyInsight}” + “{getTranslatedVersionField( + tMeta, + versionId, + "keyInsight", + meta.keyInsight + )}”

    )} diff --git a/web/src/components/visualizations/s01-agent-loop.tsx b/web/src/components/visualizations/s01-agent-loop.tsx index 9e911889e..d45c35718 100644 --- a/web/src/components/visualizations/s01-agent-loop.tsx +++ b/web/src/components/visualizations/s01-agent-loop.tsx @@ -4,12 +4,12 @@ import { motion, AnimatePresence } from "framer-motion"; import { useSteppedVisualization } from "@/hooks/useSteppedVisualization"; import { StepControls } from "@/components/visualizations/shared/step-controls"; import { useSvgPalette } from "@/hooks/useDarkMode"; +import { useLocale } from "@/lib/i18n"; // -- Flowchart node definitions -- interface FlowNode { id: string; - label: string; x: number; y: number; w: number; @@ -18,28 +18,28 @@ interface FlowNode { } const NODES: FlowNode[] = [ - { id: "start", label: "Start", x: 160, y: 30, w: 120, h: 40, type: "rect" }, - { id: "api_call", label: "API Call", x: 160, y: 110, w: 120, h: 40, type: "rect" }, - { id: "check", label: "stop_reason?", x: 160, y: 200, w: 140, h: 50, type: "diamond" }, - { id: "execute", label: "Execute Tool", x: 160, y: 300, w: 120, h: 40, type: "rect" }, - { id: "append", label: "Append Result", x: 160, y: 380, w: 120, h: 40, type: "rect" }, - { id: "end", label: "Break / Done", x: 380, y: 200, w: 120, h: 40, type: "rect" }, + { id: "start", x: 160, y: 30, w: 120, h: 40, type: "rect" }, + { id: "api_call", x: 160, y: 110, w: 120, h: 40, type: "rect" }, + { id: "check", x: 160, y: 200, w: 140, h: 50, type: "diamond" }, + { id: "execute", x: 160, y: 300, w: 120, h: 40, type: "rect" }, + { id: "append", x: 160, y: 380, w: 120, h: 40, type: "rect" }, + { id: "end", x: 380, y: 200, w: 120, h: 40, type: "rect" }, ]; // Edges between nodes (SVG path data computed inline) interface FlowEdge { from: string; to: string; - label?: string; + labelKey?: "toolUse" | "endTurn"; } const EDGES: FlowEdge[] = [ { from: "start", to: "api_call" }, { from: "api_call", to: "check" }, - { from: "check", to: "execute", label: "tool_use" }, + { from: "check", to: "execute", labelKey: "toolUse" }, { from: "execute", to: "append" }, { from: "append", to: "api_call" }, - { from: "check", to: "end", label: "end_turn" }, + { from: "check", to: "end", labelKey: "endTurn" }, ]; // Which nodes light up at each step @@ -72,30 +72,100 @@ interface MessageBlock { colorClass: string; } -const MESSAGES_PER_STEP: (MessageBlock | null)[][] = [ - [], - [{ role: "user", detail: "Fix the login bug", colorClass: "bg-blue-500 dark:bg-blue-600" }], - [], - [{ role: "assistant", detail: "tool_use: read_file", colorClass: "bg-zinc-600 dark:bg-zinc-500" }], - [{ role: "tool_result", detail: "auth.ts contents...", colorClass: "bg-emerald-500 dark:bg-emerald-600" }], - [ - { role: "assistant", detail: "tool_use: edit_file", colorClass: "bg-zinc-600 dark:bg-zinc-500" }, - { role: "tool_result", detail: "file updated", colorClass: "bg-emerald-500 dark:bg-emerald-600" }, - ], - [{ role: "assistant", detail: "end_turn: Done!", colorClass: "bg-purple-500 dark:bg-purple-600" }], -]; - -// -- Step annotations -- +interface AgentLoopCopy { + title: string; + nodeLabels: Record; + edgeLabels: { + toolUse: string; + endTurn: string; + }; + messages: (MessageBlock | null)[][]; + stepInfo: { title: string; desc: string }[]; + empty: string; + length: string; + iteration: string; +} -const STEP_INFO = [ - { title: "The While Loop", desc: "Every agent is a while loop that keeps calling the model until it says 'stop'." }, - { title: "User Input", desc: "The loop starts when the user sends a message." }, - { title: "Call the Model", desc: "Send all messages to the LLM. It sees everything and decides what to do." }, - { title: "stop_reason: tool_use", desc: "The model wants to use a tool. The loop continues." }, - { title: "Execute & Append", desc: "Run the tool, append the result to messages[]. Feed it back." }, - { title: "Loop Again", desc: "Same code path, second iteration. The model decides to edit a file." }, - { title: "stop_reason: end_turn", desc: "The model is done. Loop exits. That's the entire agent." }, -]; +const COPY: Record = { + en: { + title: "The Agent While-Loop", + nodeLabels: { + start: "Start", + api_call: "API Call", + check: "stop_reason?", + execute: "Execute Tool", + append: "Append Result", + end: "Break / Done", + }, + edgeLabels: { + toolUse: "tool_use", + endTurn: "end_turn", + }, + messages: [ + [], + [{ role: "user", detail: "Fix the login bug", colorClass: "bg-blue-500 dark:bg-blue-600" }], + [], + [{ role: "assistant", detail: "tool_use: read_file", colorClass: "bg-zinc-600 dark:bg-zinc-500" }], + [{ role: "tool_result", detail: "auth.ts contents...", colorClass: "bg-emerald-500 dark:bg-emerald-600" }], + [ + { role: "assistant", detail: "tool_use: edit_file", colorClass: "bg-zinc-600 dark:bg-zinc-500" }, + { role: "tool_result", detail: "file updated", colorClass: "bg-emerald-500 dark:bg-emerald-600" }, + ], + [{ role: "assistant", detail: "end_turn: Done!", colorClass: "bg-purple-500 dark:bg-purple-600" }], + ], + stepInfo: [ + { title: "The While Loop", desc: "Every agent is a while loop that keeps calling the model until it says 'stop'." }, + { title: "User Input", desc: "The loop starts when the user sends a message." }, + { title: "Call the Model", desc: "Send all messages to the LLM. It sees everything and decides what to do." }, + { title: "stop_reason: tool_use", desc: "The model wants to use a tool. The loop continues." }, + { title: "Execute & Append", desc: "Run the tool, append the result to messages[]. Feed it back." }, + { title: "Loop Again", desc: "Same code path, second iteration. The model decides to edit a file." }, + { title: "stop_reason: end_turn", desc: "The model is done. Loop exits. That's the entire agent." }, + ], + empty: "[ empty ]", + length: "length", + iteration: "iter #2", + }, + ru: { + title: "Цикл агента", + nodeLabels: { + start: "Старт", + api_call: "Вызов API", + check: "stop_reason?", + execute: "Запуск инструмента", + append: "Добавить результат", + end: "Выход / готово", + }, + edgeLabels: { + toolUse: "tool_use", + endTurn: "end_turn", + }, + messages: [ + [], + [{ role: "user", detail: "Исправить баг логина", colorClass: "bg-blue-500 dark:bg-blue-600" }], + [], + [{ role: "assistant", detail: "tool_use: read_file", colorClass: "bg-zinc-600 dark:bg-zinc-500" }], + [{ role: "tool_result", detail: "содержимое auth.ts...", colorClass: "bg-emerald-500 dark:bg-emerald-600" }], + [ + { role: "assistant", detail: "tool_use: edit_file", colorClass: "bg-zinc-600 dark:bg-zinc-500" }, + { role: "tool_result", detail: "файл обновлён", colorClass: "bg-emerald-500 dark:bg-emerald-600" }, + ], + [{ role: "assistant", detail: "end_turn: Готово!", colorClass: "bg-purple-500 dark:bg-purple-600" }], + ], + stepInfo: [ + { title: "Цикл while", desc: "Любой агент по сути крутит while-цикл и вызывает модель, пока она не скажет остановиться." }, + { title: "Сообщение пользователя", desc: "Цикл начинается, когда пользователь отправляет сообщение." }, + { title: "Вызов модели", desc: "Все сообщения отправляются в LLM. Она видит весь контекст и решает, что делать дальше." }, + { title: "stop_reason: tool_use", desc: "Модель хочет вызвать инструмент. Цикл продолжается." }, + { title: "Выполнить и добавить", desc: "Инструмент запускается, результат дописывается в messages[] и подаётся назад модели." }, + { title: "Следующая итерация", desc: "Тот же кодовый путь, вторая итерация. Теперь модель решает править файл." }, + { title: "stop_reason: end_turn", desc: "Модель закончила. Цикл завершается. Это и есть весь агент." }, + ], + empty: "[ пусто ]", + length: "длина", + iteration: "итер. #2", + }, +}; // -- Helpers -- @@ -136,6 +206,8 @@ function edgePath(fromId: string, toId: string): string { // -- Component -- export default function AgentLoop({ title }: { title?: string }) { + const locale = useLocale(); + const copy = COPY[locale] || COPY.en; const { currentStep, totalSteps, @@ -153,17 +225,17 @@ export default function AgentLoop({ title }: { title?: string }) { // Build accumulated messages up to the current step const visibleMessages: MessageBlock[] = []; for (let s = 0; s <= currentStep; s++) { - for (const msg of MESSAGES_PER_STEP[s]) { + for (const msg of copy.messages[s]) { if (msg) visibleMessages.push(msg); } } - const stepInfo = STEP_INFO[currentStep]; + const stepInfo = copy.stepInfo[currentStep]; return (

    - {title || "The Agent While-Loop"} + {title || copy.title}

    @@ -228,7 +300,7 @@ export default function AgentLoop({ title }: { title?: string }) { }} transition={{ duration: 0.4 }} /> - {edge.label && ( + {edge.labelKey && ( - {edge.label} + {copy.edgeLabels[edge.labelKey]} )} @@ -292,7 +364,7 @@ export default function AgentLoop({ title }: { title?: string }) { animate={{ fill: isActive ? palette.activeNodeText : palette.nodeText }} transition={{ duration: 0.4 }} > - {node.label} + {copy.nodeLabels[node.id]} ); @@ -326,7 +398,7 @@ export default function AgentLoop({ title }: { title?: string }) { animate={{ fill: isActive ? palette.activeNodeText : palette.nodeText }} transition={{ duration: 0.4 }} > - {node.label} + {copy.nodeLabels[node.id]} ); @@ -344,7 +416,7 @@ export default function AgentLoop({ title }: { title?: string }) { initial={{ opacity: 0 }} animate={{ opacity: 1 }} > - iter #2 + {copy.iteration} )} @@ -365,7 +437,7 @@ export default function AgentLoop({ title }: { title?: string }) { exit={{ opacity: 0 }} className="py-8 text-center text-xs text-zinc-400 dark:text-zinc-600" > - [ empty ] + {copy.empty} )} {visibleMessages.map((msg, i) => ( @@ -391,7 +463,7 @@ export default function AgentLoop({ title }: { title?: string }) { {visibleMessages.length > 0 && (
    - length: {visibleMessages.length} + {copy.length}: {visibleMessages.length}
    )} diff --git a/web/src/components/visualizations/s02-tool-dispatch.tsx b/web/src/components/visualizations/s02-tool-dispatch.tsx index 8bdcc4e67..8acf4cee4 100644 --- a/web/src/components/visualizations/s02-tool-dispatch.tsx +++ b/web/src/components/visualizations/s02-tool-dispatch.tsx @@ -4,12 +4,12 @@ import { motion, AnimatePresence } from "framer-motion"; import { useSteppedVisualization } from "@/hooks/useSteppedVisualization"; import { StepControls } from "@/components/visualizations/shared/step-controls"; import { useSvgPalette } from "@/hooks/useDarkMode"; +import { useLocale } from "@/lib/i18n"; // -- Tool definitions -- interface ToolDef { name: string; - desc: string; color: string; activeColor: string; darkColor: string; @@ -19,7 +19,6 @@ interface ToolDef { const TOOLS: ToolDef[] = [ { name: "bash", - desc: "Execute shell commands", color: "border-orange-300 bg-orange-50", activeColor: "border-orange-500 bg-orange-100 ring-2 ring-orange-400", darkColor: "dark:border-zinc-700 dark:bg-zinc-800/50", @@ -27,7 +26,6 @@ const TOOLS: ToolDef[] = [ }, { name: "read_file", - desc: "Read file contents", color: "border-sky-300 bg-sky-50", activeColor: "border-sky-500 bg-sky-100 ring-2 ring-sky-400", darkColor: "dark:border-zinc-700 dark:bg-zinc-800/50", @@ -35,7 +33,6 @@ const TOOLS: ToolDef[] = [ }, { name: "write_file", - desc: "Create or overwrite a file", color: "border-emerald-300 bg-emerald-50", activeColor: "border-emerald-500 bg-emerald-100 ring-2 ring-emerald-400", darkColor: "dark:border-zinc-700 dark:bg-zinc-800/50", @@ -43,7 +40,6 @@ const TOOLS: ToolDef[] = [ }, { name: "edit_file", - desc: "Apply targeted edits", color: "border-violet-300 bg-violet-50", activeColor: "border-violet-500 bg-violet-100 ring-2 ring-violet-400", darkColor: "dark:border-zinc-700 dark:bg-zinc-800/50", @@ -55,24 +51,74 @@ const TOOLS: ToolDef[] = [ const ACTIVE_TOOL_PER_STEP: number[] = [-1, 0, 1, 2, 3, 4]; // Incoming request JSON per step -const REQUEST_PER_STEP: (string | null)[] = [ - null, - '{ name: "bash", input: { cmd: "ls -la" } }', - '{ name: "read_file", input: { path: "src/auth.ts" } }', - '{ name: "write_file", input: { path: "config.json" } }', - '{ name: "edit_file", input: { path: "index.ts" } }', - null, -]; +interface ToolDispatchCopy { + title: string; + incoming: string; + waiting: string; + allRoutes: string; + toolDescriptions: string[]; + requests: (string | null)[]; + stepInfo: { title: string; desc: string }[]; +} -// Step annotations -const STEP_INFO = [ - { title: "The Dispatch Map", desc: "A dictionary maps tool names to handler functions. The loop code never changes." }, - { title: "Route: bash", desc: "tool_call.name -> handlers['bash'](input). Name-based routing." }, - { title: "Route: read_file", desc: "Same pattern, different handler. Validate input, execute, return result." }, - { title: "Route: write_file", desc: "Every tool returns a tool_result that goes back into messages[]." }, - { title: "Route: edit_file", desc: "Adding a new tool = adding one entry to the dispatch map." }, - { title: "The Key Insight", desc: "The while loop stays the same. You only grow the dispatch map. That's it." }, -]; +const COPY: Record = { + en: { + title: "Tool Dispatch Map", + incoming: "Incoming:", + waiting: "waiting for tool_call...", + allRoutes: "All routes active", + toolDescriptions: [ + "Execute shell commands", + "Read file contents", + "Create or overwrite a file", + "Apply targeted edits", + ], + requests: [ + null, + '{ name: "bash", input: { cmd: "ls -la" } }', + '{ name: "read_file", input: { path: "src/auth.ts" } }', + '{ name: "write_file", input: { path: "config.json" } }', + '{ name: "edit_file", input: { path: "index.ts" } }', + null, + ], + stepInfo: [ + { title: "The Dispatch Map", desc: "A dictionary maps tool names to handler functions. The loop code never changes." }, + { title: "Route: bash", desc: "tool_call.name -> handlers['bash'](input). Name-based routing." }, + { title: "Route: read_file", desc: "Same pattern, different handler. Validate input, execute, return result." }, + { title: "Route: write_file", desc: "Every tool returns a tool_result that goes back into messages[]." }, + { title: "Route: edit_file", desc: "Adding a new tool = adding one entry to the dispatch map." }, + { title: "The Key Insight", desc: "The while loop stays the same. You only grow the dispatch map. That's it." }, + ], + }, + ru: { + title: "Карта диспетчеризации инструментов", + incoming: "Входящий вызов:", + waiting: "ожидание tool_call...", + allRoutes: "Все маршруты активны", + toolDescriptions: [ + "Выполняет команды shell", + "Читает содержимое файла", + "Создаёт или перезаписывает файл", + "Вносит точечные правки", + ], + requests: [ + null, + '{ name: "bash", input: { cmd: "ls -la" } }', + '{ name: "read_file", input: { path: "src/auth.ts" } }', + '{ name: "write_file", input: { path: "config.json" } }', + '{ name: "edit_file", input: { path: "index.ts" } }', + null, + ], + stepInfo: [ + { title: "Карта диспетчеризации", desc: "Словарь сопоставляет имена инструментов функциям-обработчикам. Сам цикл при этом не меняется." }, + { title: "Маршрут: bash", desc: "tool_call.name -> handlers['bash'](input). Маршрутизация идёт по имени." }, + { title: "Маршрут: read_file", desc: "Тот же шаблон, другой обработчик. Валидировать вход, выполнить, вернуть результат." }, + { title: "Маршрут: write_file", desc: "Каждый инструмент возвращает tool_result, который снова попадает в messages[]." }, + { title: "Маршрут: edit_file", desc: "Добавить новый инструмент = добавить ещё одну запись в карту диспетчеризации." }, + { title: "Ключевая мысль", desc: "while-цикл остаётся тем же. Растёт только карта диспетчеризации. И всё." }, + ], + }, +}; // SVG layout constants const SVG_WIDTH = 600; @@ -93,6 +139,8 @@ function getCardX(index: number): number { } export default function ToolDispatch({ title }: { title?: string }) { + const locale = useLocale(); + const copy = COPY[locale] || COPY.en; const { currentStep, totalSteps, @@ -105,21 +153,21 @@ export default function ToolDispatch({ title }: { title?: string }) { const palette = useSvgPalette(); const activeToolIdx = ACTIVE_TOOL_PER_STEP[currentStep]; - const request = REQUEST_PER_STEP[currentStep]; - const stepInfo = STEP_INFO[currentStep]; + const request = copy.requests[currentStep]; + const stepInfo = copy.stepInfo[currentStep]; const isAllActive = activeToolIdx === 4; return (

    - {title || "Tool Dispatch Map"} + {title || copy.title}

    {/* Incoming request display */}
    - Incoming: + {copy.incoming} {request && ( @@ -141,7 +189,7 @@ export default function ToolDispatch({ title }: { title?: string }) { animate={{ opacity: 0.6 }} className="text-xs text-zinc-400 dark:text-zinc-600" > - waiting for tool_call... + {copy.waiting} )} {isAllActive && ( @@ -151,7 +199,7 @@ export default function ToolDispatch({ title }: { title?: string }) { animate={{ opacity: 1 }} className="text-xs font-medium text-emerald-600 dark:text-emerald-400" > - All routes active + {copy.allRoutes} )} @@ -303,7 +351,7 @@ export default function ToolDispatch({ title }: { title?: string }) { animate={{ fill: isActive ? "rgba(255,255,255,0.8)" : palette.labelFill }} transition={{ duration: 0.4 }} > - {tool.desc} + {copy.toolDescriptions[i]} ); diff --git a/web/src/components/visualizations/s03-todo-write.tsx b/web/src/components/visualizations/s03-todo-write.tsx index 223327ea7..1cab7a4e2 100644 --- a/web/src/components/visualizations/s03-todo-write.tsx +++ b/web/src/components/visualizations/s03-todo-write.tsx @@ -3,6 +3,7 @@ import { motion, AnimatePresence } from "framer-motion"; import { useSteppedVisualization } from "@/hooks/useSteppedVisualization"; import { StepControls } from "@/components/visualizations/shared/step-controls"; +import { useLocale } from "@/lib/i18n"; // -- Task definitions -- @@ -14,59 +15,6 @@ interface Task { status: TaskStatus; } -// Snapshot of all 4 tasks at each step -const TASK_STATES: Task[][] = [ - // Step 0: all pending - [ - { id: 1, label: "Write auth tests", status: "pending" }, - { id: 2, label: "Fix mobile layout", status: "pending" }, - { id: 3, label: "Add error handling", status: "pending" }, - { id: 4, label: "Update config loader", status: "pending" }, - ], - // Step 1: still all pending (idle round 1) - [ - { id: 1, label: "Write auth tests", status: "pending" }, - { id: 2, label: "Fix mobile layout", status: "pending" }, - { id: 3, label: "Add error handling", status: "pending" }, - { id: 4, label: "Update config loader", status: "pending" }, - ], - // Step 2: still all pending (idle round 2) - [ - { id: 1, label: "Write auth tests", status: "pending" }, - { id: 2, label: "Fix mobile layout", status: "pending" }, - { id: 3, label: "Add error handling", status: "pending" }, - { id: 4, label: "Update config loader", status: "pending" }, - ], - // Step 3: NAG fires, task 1 moves to in_progress - [ - { id: 1, label: "Write auth tests", status: "in_progress" }, - { id: 2, label: "Fix mobile layout", status: "pending" }, - { id: 3, label: "Add error handling", status: "pending" }, - { id: 4, label: "Update config loader", status: "pending" }, - ], - // Step 4: task 1 done - [ - { id: 1, label: "Write auth tests", status: "done" }, - { id: 2, label: "Fix mobile layout", status: "pending" }, - { id: 3, label: "Add error handling", status: "pending" }, - { id: 4, label: "Update config loader", status: "pending" }, - ], - // Step 5: task 2 self-directed to in_progress - [ - { id: 1, label: "Write auth tests", status: "done" }, - { id: 2, label: "Fix mobile layout", status: "in_progress" }, - { id: 3, label: "Add error handling", status: "pending" }, - { id: 4, label: "Update config loader", status: "pending" }, - ], - // Step 6: tasks 2,3 done, task 4 in_progress - [ - { id: 1, label: "Write auth tests", status: "done" }, - { id: 2, label: "Fix mobile layout", status: "done" }, - { id: 3, label: "Add error handling", status: "done" }, - { id: 4, label: "Update config loader", status: "in_progress" }, - ], -]; - // Nag timer value at each step (out of 3) const NAG_TIMER_PER_STEP = [0, 1, 2, 3, 0, 0, 0]; const NAG_THRESHOLD = 3; @@ -74,16 +22,135 @@ const NAG_THRESHOLD = 3; // Whether the nag fires at this step const NAG_FIRES_PER_STEP = [false, false, false, true, false, false, false]; -// Step annotations -const STEP_INFO = [ - { title: "The Plan", desc: "TodoWrite gives the model a visible plan. All tasks start as pending." }, - { title: "Round 1 -- Idle", desc: "The model does work but doesn't touch its todos. The nag counter increments." }, - { title: "Round 2 -- Still Idle", desc: "Two rounds without progress. Pressure builds." }, - { title: "NAG!", desc: "Threshold reached! System message injected: 'You have pending tasks. Pick one up now!'" }, - { title: "Task Complete", desc: "The model completes the task. Timer stays at 0 -- working on todos resets the counter." }, - { title: "Self-Directed", desc: "Once the model learns the pattern, it picks up tasks voluntarily." }, - { title: "Mission Accomplished", desc: "Visible plan + nag pressure = reliable task completion." }, -]; +interface TodoWriteCopy { + title: string; + tasks: string[]; + stepInfo: { title: string; desc: string }[]; + statuses: Record; + nagTimer: string; + nagMessage: string; + columns: { + pending: string; + inProgress: string; + done: string; + }; + progress: string; + complete: string; +} + +const COPY: Record = { + en: { + title: "TodoWrite Nag System", + tasks: [ + "Write auth tests", + "Fix mobile layout", + "Add error handling", + "Update config loader", + ], + stepInfo: [ + { title: "The Plan", desc: "TodoWrite gives the model a visible plan. All tasks start as pending." }, + { title: "Round 1 -- Idle", desc: "The model does work but doesn't touch its todos. The nag counter increments." }, + { title: "Round 2 -- Still Idle", desc: "Two rounds without progress. Pressure builds." }, + { title: "NAG!", desc: "Threshold reached! System message injected: 'You have pending tasks. Pick one up now!'" }, + { title: "Task Complete", desc: "The model completes the task. Timer stays at 0 -- working on todos resets the counter." }, + { title: "Self-Directed", desc: "Once the model learns the pattern, it picks up tasks voluntarily." }, + { title: "Mission Accomplished", desc: "Visible plan + nag pressure = reliable task completion." }, + ], + statuses: { + pending: "pending", + in_progress: "in progress", + done: "done", + }, + nagTimer: "Nag Timer", + nagMessage: 'SYSTEM: "You have pending tasks. Pick one up now!"', + columns: { + pending: "Pending", + inProgress: "In Progress", + done: "Done", + }, + progress: "Progress", + complete: "complete", + }, + ru: { + title: "Система напоминаний плана задач", + tasks: [ + "Написать auth-тесты", + "Исправить мобильную вёрстку", + "Добавить обработку ошибок", + "Обновить загрузчик конфига", + ], + stepInfo: [ + { title: "План", desc: "Инструмент плана задач даёт модели видимый план. Все задачи стартуют в статусе pending." }, + { title: "Раунд 1 -- без движения", desc: "Модель что-то делает, но не трогает todo. Счётчик напоминаний растёт." }, + { title: "Раунд 2 -- всё ещё без движения", desc: "Два раунда без прогресса. Давление усиливается." }, + { title: "НАПОМИНАНИЕ!", desc: "Порог достигнут. Вкалывается системное сообщение: 'У тебя есть незавершённые задачи. Возьми одну сейчас.'" }, + { title: "Задача завершена", desc: "Модель закрывает задачу. Таймер остаётся на 0: работа с todo сбрасывает счётчик." }, + { title: "Самонаведение", desc: "Когда модель понимает паттерн, она начинает подхватывать задачи сама." }, + { title: "Миссия выполнена", desc: "Видимый план + давление напоминаний = надёжное завершение задач." }, + ], + statuses: { + pending: "ожидает", + in_progress: "в работе", + done: "готово", + }, + nagTimer: "Таймер напоминаний", + nagMessage: 'СИСТЕМА: "У тебя есть незавершённые задачи. Возьми одну сейчас!"', + columns: { + pending: "Ожидают", + inProgress: "В работе", + done: "Готово", + }, + progress: "Прогресс", + complete: "завершено", + }, +}; + +function getTaskStates(labels: string[]): Task[][] { + return [ + [ + { id: 1, label: labels[0], status: "pending" }, + { id: 2, label: labels[1], status: "pending" }, + { id: 3, label: labels[2], status: "pending" }, + { id: 4, label: labels[3], status: "pending" }, + ], + [ + { id: 1, label: labels[0], status: "pending" }, + { id: 2, label: labels[1], status: "pending" }, + { id: 3, label: labels[2], status: "pending" }, + { id: 4, label: labels[3], status: "pending" }, + ], + [ + { id: 1, label: labels[0], status: "pending" }, + { id: 2, label: labels[1], status: "pending" }, + { id: 3, label: labels[2], status: "pending" }, + { id: 4, label: labels[3], status: "pending" }, + ], + [ + { id: 1, label: labels[0], status: "in_progress" }, + { id: 2, label: labels[1], status: "pending" }, + { id: 3, label: labels[2], status: "pending" }, + { id: 4, label: labels[3], status: "pending" }, + ], + [ + { id: 1, label: labels[0], status: "done" }, + { id: 2, label: labels[1], status: "pending" }, + { id: 3, label: labels[2], status: "pending" }, + { id: 4, label: labels[3], status: "pending" }, + ], + [ + { id: 1, label: labels[0], status: "done" }, + { id: 2, label: labels[1], status: "in_progress" }, + { id: 3, label: labels[2], status: "pending" }, + { id: 4, label: labels[3], status: "pending" }, + ], + [ + { id: 1, label: labels[0], status: "done" }, + { id: 2, label: labels[1], status: "done" }, + { id: 3, label: labels[2], status: "done" }, + { id: 4, label: labels[3], status: "in_progress" }, + ], + ]; +} // -- Column component -- @@ -127,6 +194,8 @@ function KanbanColumn({ // -- Task card -- function TaskCard({ task }: { task: Task }) { + const locale = useLocale(); + const copy = COPY[locale] || COPY.en; const statusStyles: Record = { pending: "bg-zinc-100 text-zinc-600 dark:bg-zinc-800 dark:text-zinc-400", in_progress: "bg-amber-100 text-amber-700 dark:bg-amber-900/40 dark:text-amber-300", @@ -156,7 +225,7 @@ function TaskCard({ task }: { task: Task }) { - {task.status.replace("_", " ")} + {copy.statuses[task.status]}
    @@ -169,6 +238,8 @@ function TaskCard({ task }: { task: Task }) { // -- Nag gauge -- function NagGauge({ value, max, firing }: { value: number; max: number; firing: boolean }) { + const locale = useLocale(); + const copy = COPY[locale] || COPY.en; const pct = Math.min((value / max) * 100, 100); const barColor = @@ -184,7 +255,7 @@ function NagGauge({ value, max, firing }: { value: number; max: number; firing:
    - Nag Timer + {copy.nagTimer} {value}/{max} @@ -219,6 +290,8 @@ function NagGauge({ value, max, firing }: { value: number; max: number; firing: // -- Main component -- export default function TodoWrite({ title }: { title?: string }) { + const locale = useLocale(); + const copy = COPY[locale] || COPY.en; const { currentStep, totalSteps, @@ -229,10 +302,10 @@ export default function TodoWrite({ title }: { title?: string }) { toggleAutoPlay, } = useSteppedVisualization({ totalSteps: 7, autoPlayInterval: 2500 }); - const tasks = TASK_STATES[currentStep]; + const tasks = getTaskStates(copy.tasks)[currentStep]; const nagValue = NAG_TIMER_PER_STEP[currentStep]; const nagFires = NAG_FIRES_PER_STEP[currentStep]; - const stepInfo = STEP_INFO[currentStep]; + const stepInfo = copy.stepInfo[currentStep]; const pendingTasks = tasks.filter((t) => t.status === "pending"); const inProgressTasks = tasks.filter((t) => t.status === "in_progress"); @@ -241,7 +314,7 @@ export default function TodoWrite({ title }: { title?: string }) { return (

    - {title || "TodoWrite Nag System"} + {title || copy.title}

    @@ -257,7 +330,7 @@ export default function TodoWrite({ title }: { title?: string }) { exit={{ opacity: 0, y: -8, height: 0 }} className="rounded-md border border-red-300 bg-red-50 px-3 py-2 text-center text-xs font-bold text-red-700 dark:border-red-700 dark:bg-red-950/30 dark:text-red-300" > - SYSTEM: "You have pending tasks. Pick one up now!" + {copy.nagMessage} )} @@ -266,19 +339,19 @@ export default function TodoWrite({ title }: { title?: string }) { {/* Kanban board */}
    - Progress: {doneTasks.length}/{tasks.length} complete + {copy.progress}: {doneTasks.length}/{tasks.length} {copy.complete}
    {tasks.map((t) => ( diff --git a/web/src/components/visualizations/s04-subagent.tsx b/web/src/components/visualizations/s04-subagent.tsx index d4b05b793..b54c8d284 100644 --- a/web/src/components/visualizations/s04-subagent.tsx +++ b/web/src/components/visualizations/s04-subagent.tsx @@ -3,6 +3,7 @@ import { motion, AnimatePresence } from "framer-motion"; import { useSteppedVisualization } from "@/hooks/useSteppedVisualization"; import { StepControls } from "@/components/visualizations/shared/step-controls"; +import { useLocale } from "@/lib/i18n"; interface MessageBlock { id: string; @@ -10,63 +11,95 @@ interface MessageBlock { color: string; } -const PARENT_BASE_MESSAGES: MessageBlock[] = [ - { id: "p1", label: "user: Build login + tests", color: "bg-blue-500" }, - { id: "p2", label: "assistant: Planning approach...", color: "bg-zinc-600" }, - { id: "p3", label: "tool_result: project structure", color: "bg-emerald-500" }, -]; - -const TASK_PROMPT: MessageBlock = { - id: "task", - label: "task: Write unit tests for auth", - color: "bg-purple-500", -}; - -const CHILD_WORK_MESSAGES: MessageBlock[] = [ - { id: "c1", label: "tool_use: read auth.ts", color: "bg-amber-500" }, - { id: "c2", label: "tool_use: write test.ts", color: "bg-amber-500" }, -]; - -const SUMMARY_BLOCK: MessageBlock = { - id: "summary", - label: "summary: 3 tests written, all passing", - color: "bg-teal-500", -}; +interface SubagentCopy { + title: string; + parentBaseMessages: MessageBlock[]; + taskPrompt: MessageBlock; + childWorkMessages: MessageBlock[]; + summaryBlock: MessageBlock; + steps: { title: string; description: string }[]; + parentProcess: string; + childProcess: string; + messagesFresh: string; + notSpawned: string; + cleanContext: string; + isolation: string; + compressing: string; + discarded: string; + taskPromptChip: string; + summaryChip: string; +} -const STEPS = [ - { - title: "Parent Context", - description: - "The parent agent has accumulated messages from the conversation.", +const COPY: Record = { + en: { + title: "Subagent Context Isolation", + parentBaseMessages: [ + { id: "p1", label: "user: Build login + tests", color: "bg-blue-500" }, + { id: "p2", label: "assistant: Planning approach...", color: "bg-zinc-600" }, + { id: "p3", label: "tool_result: project structure", color: "bg-emerald-500" }, + ], + taskPrompt: { id: "task", label: "task: Write unit tests for auth", color: "bg-purple-500" }, + childWorkMessages: [ + { id: "c1", label: "tool_use: read auth.ts", color: "bg-amber-500" }, + { id: "c2", label: "tool_use: write test.ts", color: "bg-amber-500" }, + ], + summaryBlock: { id: "summary", label: "summary: 3 tests written, all passing", color: "bg-teal-500" }, + steps: [ + { title: "Parent Context", description: "The parent agent has accumulated messages from the conversation." }, + { title: "Spawn Subagent", description: "Task tool creates a child with fresh messages[]. Only the task description is passed." }, + { title: "Independent Work", description: "The child has its own context. It doesn't see the parent's history." }, + { title: "Compress Result", description: "The child's full conversation compresses into one summary." }, + { title: "Return Summary", description: "Only the summary returns. The child's full context is discarded." }, + { title: "Clean Context", description: "The parent gets a clean summary without context bloat. This is fresh-context isolation via messages[]." }, + ], + parentProcess: "Parent Process", + childProcess: "Child Process", + messagesFresh: "messages[] (fresh)", + notSpawned: "not yet spawned", + cleanContext: "3 original + 1 summary = clean context", + isolation: "ISOLATION", + compressing: "Compressing full context into summary...", + discarded: "context discarded", + taskPromptChip: "task prompt", + summaryChip: "summary", }, - { - title: "Spawn Subagent", - description: - "Task tool creates a child with fresh messages[]. Only the task description is passed.", + ru: { + title: "Изоляция контекста сабагента", + parentBaseMessages: [ + { id: "p1", label: "user: Собрать логин + тесты", color: "bg-blue-500" }, + { id: "p2", label: "assistant: Планирую подход...", color: "bg-zinc-600" }, + { id: "p3", label: "tool_result: структура проекта", color: "bg-emerald-500" }, + ], + taskPrompt: { id: "task", label: "task: Написать unit-тесты для auth", color: "bg-purple-500" }, + childWorkMessages: [ + { id: "c1", label: "tool_use: read auth.ts", color: "bg-amber-500" }, + { id: "c2", label: "tool_use: write test.ts", color: "bg-amber-500" }, + ], + summaryBlock: { id: "summary", label: "summary: написаны 3 теста, всё проходит", color: "bg-teal-500" }, + steps: [ + { title: "Контекст родителя", description: "Родительский агент уже накопил сообщения из разговора." }, + { title: "Запуск сабагента", description: "Инструмент Task создаёт дочернего агента со свежим messages[]. Передаётся только описание задачи." }, + { title: "Независимая работа", description: "У дочернего агента свой собственный контекст. Историю родителя он не видит." }, + { title: "Сжать результат", description: "Весь диалог дочернего агента сжимается в одну сводку." }, + { title: "Вернуть сводку", description: "Назад возвращается только сводка. Полный контекст дочернего агента выбрасывается." }, + { title: "Чистый контекст", description: "Родитель получает чистую сводку без раздувания контекста. Это и есть изоляция через свежий messages[]." }, + ], + parentProcess: "Родительский процесс", + childProcess: "Дочерний процесс", + messagesFresh: "messages[] (свежий)", + notSpawned: "ещё не запущен", + cleanContext: "3 исходных + 1 сводка = чистый контекст", + isolation: "ИЗОЛЯЦИЯ", + compressing: "Сжимаем полный контекст в сводку...", + discarded: "контекст отброшен", + taskPromptChip: "описание задачи", + summaryChip: "сводка", }, - { - title: "Independent Work", - description: - "The child has its own context. It doesn't see the parent's history.", - }, - { - title: "Compress Result", - description: - "The child's full conversation compresses into one summary.", - }, - { - title: "Return Summary", - description: - "Only the summary returns. The child's full context is discarded.", - }, - { - title: "Clean Context", - description: - "The parent gets a clean summary without context bloat. This is fresh-context isolation via messages[].", - }, -]; +}; export default function SubagentIsolation({ title }: { title?: string }) { + const locale = useLocale(); + const copy = COPY[locale] || COPY.en; const { currentStep, totalSteps, @@ -75,23 +108,23 @@ export default function SubagentIsolation({ title }: { title?: string }) { reset, isPlaying, toggleAutoPlay, - } = useSteppedVisualization({ totalSteps: STEPS.length, autoPlayInterval: 2500 }); + } = useSteppedVisualization({ totalSteps: copy.steps.length, autoPlayInterval: 2500 }); // Derive what to show in each container based on step const parentMessages: MessageBlock[] = (() => { - const base = [...PARENT_BASE_MESSAGES]; + const base = [...copy.parentBaseMessages]; if (currentStep >= 5) { - base.push(SUMMARY_BLOCK); + base.push(copy.summaryBlock); } return base; })(); const childMessages: MessageBlock[] = (() => { if (currentStep < 1) return []; - if (currentStep === 1) return [TASK_PROMPT]; - if (currentStep === 2) return [TASK_PROMPT, ...CHILD_WORK_MESSAGES]; - if (currentStep === 3) return [SUMMARY_BLOCK]; - return currentStep >= 4 ? [TASK_PROMPT, ...CHILD_WORK_MESSAGES] : []; + if (currentStep === 1) return [copy.taskPrompt]; + if (currentStep === 2) return [copy.taskPrompt, ...copy.childWorkMessages]; + if (currentStep === 3) return [copy.summaryBlock]; + return currentStep >= 4 ? [copy.taskPrompt, ...copy.childWorkMessages] : []; })(); const showChildEmpty = currentStep === 0; @@ -104,7 +137,7 @@ export default function SubagentIsolation({ title }: { title?: string }) { return (

    - {title || "Subagent Context Isolation"} + {title || copy.title}

    - Parent Process + {copy.parentProcess}
    @@ -146,7 +179,7 @@ export default function SubagentIsolation({ title }: { title?: string }) { transition={{ delay: 0.5 }} className="mt-3 rounded border border-blue-200 bg-white/60 px-2 py-1 text-center text-xs text-blue-600 dark:border-blue-700 dark:bg-blue-950/30 dark:text-blue-300" > - 3 original + 1 summary = clean context + {copy.cleanContext} )}
    @@ -161,7 +194,7 @@ export default function SubagentIsolation({ title }: { title?: string }) { className="rounded bg-zinc-200 px-2 py-1 text-center font-mono text-[10px] text-zinc-500 dark:bg-zinc-700 dark:text-zinc-400" style={{ writingMode: "vertical-rl", textOrientation: "mixed" }} > - ISOLATION + {copy.isolation}
    @@ -195,11 +228,11 @@ export default function SubagentIsolation({ title }: { title?: string }) { : "text-purple-700 dark:text-purple-300" }`} > - Child Process + {copy.childProcess}
    - messages[] (fresh) + {copy.messagesFresh}
    {showChildEmpty && ( @@ -209,7 +242,7 @@ export default function SubagentIsolation({ title }: { title?: string }) { className="flex h-24 items-center justify-center rounded-lg border border-dashed border-zinc-200 dark:border-zinc-700" > - not yet spawned + {copy.notSpawned} )} @@ -237,7 +270,7 @@ export default function SubagentIsolation({ title }: { title?: string }) { animate={{ opacity: 1, scale: 1 }} className="mt-3 rounded border border-amber-300 bg-amber-50 px-2 py-1 text-center text-xs text-amber-700 dark:border-amber-600 dark:bg-amber-900/20 dark:text-amber-300" > - Compressing full context into summary... + {copy.compressing} )} @@ -247,7 +280,7 @@ export default function SubagentIsolation({ title }: { title?: string }) { animate={{ opacity: 1 }} className="mt-3 rounded border border-red-200 bg-red-50 px-2 py-1 text-center text-xs text-red-500 dark:border-red-800 dark:bg-red-900/20 dark:text-red-400" > - context discarded + {copy.discarded} )}
    @@ -264,7 +297,7 @@ export default function SubagentIsolation({ title }: { title?: string }) { style={{ zIndex: 10 }} >
    - task prompt + {copy.taskPromptChip}
    )} @@ -281,7 +314,7 @@ export default function SubagentIsolation({ title }: { title?: string }) { style={{ zIndex: 10 }} >
    - summary + {copy.summaryChip}
    )} @@ -298,8 +331,8 @@ export default function SubagentIsolation({ title }: { title?: string }) { onReset={reset} isPlaying={isPlaying} onToggleAutoPlay={toggleAutoPlay} - stepTitle={STEPS[currentStep].title} - stepDescription={STEPS[currentStep].description} + stepTitle={copy.steps[currentStep].title} + stepDescription={copy.steps[currentStep].description} />
    diff --git a/web/src/components/visualizations/s05-skill-loading.tsx b/web/src/components/visualizations/s05-skill-loading.tsx index c0331644f..5c8f27a57 100644 --- a/web/src/components/visualizations/s05-skill-loading.tsx +++ b/web/src/components/visualizations/s05-skill-loading.tsx @@ -3,98 +3,169 @@ import { motion, AnimatePresence } from "framer-motion"; import { useSteppedVisualization } from "@/hooks/useSteppedVisualization"; import { StepControls } from "@/components/visualizations/shared/step-controls"; +import { useLocale } from "@/lib/i18n"; interface SkillEntry { name: string; - summary: string; fullTokens: number; - content: string[]; } const SKILLS: SkillEntry[] = [ { name: "/commit", - summary: "Create git commits following repo conventions", fullTokens: 320, - content: [ - "1. Run git status + git diff to see changes", - "2. Analyze all staged changes and draft message", - "3. Create commit with Co-Authored-By trailer", - "4. Run git status after commit to verify", - ], }, { name: "/review-pr", - summary: "Review pull requests for bugs and style", fullTokens: 480, - content: [ - "1. Fetch PR diff via gh pr view", - "2. Analyze changes file by file for issues", - "3. Check for bugs, security, and style problems", - "4. Post review comments with gh pr review", - ], }, { name: "/test", - summary: "Run and analyze test suites", fullTokens: 290, - content: [ - "1. Detect test framework from package.json", - "2. Run test suite and capture output", - "3. Analyze failures and suggest fixes", - "4. Re-run after applying fixes", - ], }, { name: "/deploy", - summary: "Deploy application to target environment", fullTokens: 350, - content: [ - "1. Verify all tests pass before deploy", - "2. Build production bundle", - "3. Push to deployment target via CI", - "4. Verify health check on deployed URL", - ], }, ]; const TOKEN_STATES = [120, 120, 440, 440, 780, 780]; const MAX_TOKEN_DISPLAY = 1000; -const STEPS = [ - { - title: "Layer 1: Compact Summaries", - description: - "All skills are summarized in the system prompt. Compact, always present.", - }, - { - title: "Skill Invocation", - description: - 'The model recognizes a skill invocation and triggers the Skill tool.', - }, - { - title: "Layer 2: Full Injection", - description: - "The full skill instructions are injected as a tool_result, not into the system prompt.", - }, - { - title: "In Context Now", - description: - "The detailed instructions appear as if a tool returned them. The model follows them precisely.", - }, - { - title: "Stack Skills", - description: - "Multiple skills can be loaded. Only summaries are permanent; full content comes and goes.", +interface SkillLoadingCopy { + title: string; + systemPrompt: string; + alwaysPresent: string; + availableSkills: string; + skillSummaries: string[]; + skillContent: string[][]; + userTypes: string; + toolResult: string; + mechanism: string; + layer1: string; + layer2: string; + layer1Desc: string; + layer2Desc: string; + tokens: string; + steps: { title: string; description: string }[]; +} + +const COPY: Record = { + en: { + title: "On-Demand Skill Loading", + systemPrompt: "System Prompt", + alwaysPresent: "always present", + availableSkills: "# Available Skills", + skillSummaries: [ + "Create git commits following repo conventions", + "Review pull requests for bugs and style", + "Run and analyze test suites", + "Deploy application to target environment", + ], + skillContent: [ + [ + "1. Run git status + git diff to see changes", + "2. Analyze all staged changes and draft message", + "3. Create commit with Co-Authored-By trailer", + "4. Run git status after commit to verify", + ], + [ + "1. Fetch PR diff via gh pr view", + "2. Analyze changes file by file for issues", + "3. Check for bugs, security, and style problems", + "4. Post review comments with gh pr review", + ], + [ + "1. Detect test framework from package.json", + "2. Run test suite and capture output", + "3. Analyze failures and suggest fixes", + "4. Re-run after applying fixes", + ], + [ + "1. Verify all tests pass before deploy", + "2. Build production bundle", + "3. Push to deployment target via CI", + "4. Verify health check on deployed URL", + ], + ], + userTypes: "User types:", + toolResult: "tool_result", + mechanism: + "The Skill tool returns content as a tool_result message. The model sees it in context and follows the instructions. No system prompt bloat.", + layer1: "LAYER 1", + layer2: "LAYER 2", + layer1Desc: "Always present, ~120 tokens", + layer2Desc: "On demand, ~300-500 tokens each", + tokens: "Tokens", + steps: [ + { title: "Layer 1: Compact Summaries", description: "All skills are summarized in the system prompt. Compact, always present." }, + { title: "Skill Invocation", description: "The model recognizes a skill invocation and triggers the Skill tool." }, + { title: "Layer 2: Full Injection", description: "The full skill instructions are injected as a tool_result, not into the system prompt." }, + { title: "In Context Now", description: "The detailed instructions appear as if a tool returned them. The model follows them precisely." }, + { title: "Stack Skills", description: "Multiple skills can be loaded. Only summaries are permanent; full content comes and goes." }, + { title: "Two-Layer Architecture", description: "Layer 1: always present, tiny. Layer 2: loaded on demand, detailed. Elegant separation." }, + ], }, - { - title: "Two-Layer Architecture", - description: - "Layer 1: always present, tiny. Layer 2: loaded on demand, detailed. Elegant separation.", + ru: { + title: "Загрузка навыков по требованию", + systemPrompt: "Системный промпт", + alwaysPresent: "всегда присутствует", + availableSkills: "# Доступные навыки", + skillSummaries: [ + "Создаёт git-коммиты по правилам репозитория", + "Проверяет pull request на баги и стиль", + "Запускает и анализирует тестовые наборы", + "Деплоит приложение в целевое окружение", + ], + skillContent: [ + [ + "1. Запустить git status + git diff и посмотреть изменения", + "2. Проанализировать staged-правки и набросать сообщение", + "3. Создать коммит с trailer Co-Authored-By", + "4. После коммита снова проверить git status", + ], + [ + "1. Получить diff PR через gh pr view", + "2. Разобрать изменения по файлам и найти проблемы", + "3. Проверить баги, безопасность и стиль", + "4. Отправить комментарии через gh pr review", + ], + [ + "1. Определить тестовый фреймворк по package.json", + "2. Запустить тесты и собрать вывод", + "3. Разобрать падения и предложить исправления", + "4. Повторно прогнать после правок", + ], + [ + "1. Убедиться, что перед деплоем все тесты проходят", + "2. Собрать production-бандл", + "3. Отправить в целевое окружение через CI", + "4. Проверить health-check на задеплоенном URL", + ], + ], + userTypes: "Пользователь вводит:", + toolResult: "tool_result", + mechanism: + "Инструмент Skill возвращает содержимое как сообщение tool_result. Модель видит его в контексте и следует инструкциям. Системный промпт не раздувается.", + layer1: "СЛОЙ 1", + layer2: "СЛОЙ 2", + layer1Desc: "Всегда в контексте, около 120 токенов", + layer2Desc: "Подгружается по запросу, по 300-500 токенов", + tokens: "Токены", + steps: [ + { title: "Слой 1: компактные сводки", description: "Все навыки кратко перечислены в системном промпте. Коротко и постоянно." }, + { title: "Вызов навыка", description: "Модель распознаёт вызов навыка и активирует инструмент Skill." }, + { title: "Слой 2: полная подгрузка", description: "Полные инструкции навыка приходят как tool_result, а не живут в системном промпте." }, + { title: "Теперь это в контексте", description: "Подробные инструкции выглядят так, будто их вернул инструмент. Модель следует им буквально." }, + { title: "Стек навыков", description: "Можно подгружать несколько навыков. Постоянны только сводки, полный контент приходит и уходит." }, + { title: "Двухслойная архитектура", description: "Слой 1: всегда присутствует и мал. Слой 2: загружается по требованию и содержит детали." }, + ], }, -]; +}; export default function SkillLoading({ title }: { title?: string }) { + const locale = useLocale(); + const copy = COPY[locale] || COPY.en; const { currentStep, totalSteps, @@ -103,7 +174,7 @@ export default function SkillLoading({ title }: { title?: string }) { reset, isPlaying, toggleAutoPlay, - } = useSteppedVisualization({ totalSteps: STEPS.length, autoPlayInterval: 2500 }); + } = useSteppedVisualization({ totalSteps: copy.steps.length, autoPlayInterval: 2500 }); const tokenCount = TOKEN_STATES[currentStep]; const highlightedSkill = currentStep >= 1 && currentStep <= 3 ? 0 : currentStep >= 4 ? 1 : -1; @@ -114,7 +185,7 @@ export default function SkillLoading({ title }: { title?: string }) { return (

    - {title || "On-Demand Skill Loading"} + {title || copy.title}

    - System Prompt + {copy.systemPrompt} - always present + {copy.alwaysPresent}
    - # Available Skills + {copy.availableSkills}
    {SKILLS.map((skill, i) => { @@ -161,7 +232,7 @@ export default function SkillLoading({ title }: { title?: string }) { {skill.name} {" - "} - {skill.summary} + {copy.skillSummaries[i]} ); })} @@ -179,7 +250,7 @@ export default function SkillLoading({ title }: { title?: string }) { className="flex items-center gap-2 rounded-lg border border-blue-200 bg-blue-50 px-3 py-2 dark:border-blue-800 dark:bg-blue-950/30" > - User types: + {copy.userTypes} /commit @@ -194,7 +265,7 @@ export default function SkillLoading({ title }: { title?: string }) { className="flex items-center gap-2 rounded-lg border border-blue-200 bg-blue-50 px-3 py-2 dark:border-blue-800 dark:bg-blue-950/30" > - User types: + {copy.userTypes} /review-pr @@ -243,11 +314,11 @@ export default function SkillLoading({ title }: { title?: string }) {
    - tool_result + {copy.toolResult}
    - {SKILLS[0].content.map((line, i) => ( + {copy.skillContent[0].map((line, i) => (
    - tool_result + {copy.toolResult}
    - {SKILLS[1].content.map((line, i) => ( + {copy.skillContent[1].map((line, i) => ( - The Skill tool returns content as a tool_result message. - The model sees it in context and follows the instructions. - No system prompt bloat. + {copy.mechanism} )} @@ -334,18 +403,18 @@ export default function SkillLoading({ title }: { title?: string }) { >
    - LAYER 1 + {copy.layer1}
    - Always present, ~120 tokens + {copy.layer1Desc}
    - LAYER 2 + {copy.layer2}
    - On demand, ~300-500 tokens each + {copy.layer2Desc}
    @@ -356,7 +425,7 @@ export default function SkillLoading({ title }: { title?: string }) { {/* Token Gauge (vertical bar on the right) */}
    - Tokens + {copy.tokens}
    diff --git a/web/src/components/visualizations/s06-context-compact.tsx b/web/src/components/visualizations/s06-context-compact.tsx index 3c4270d32..5711fe8ee 100644 --- a/web/src/components/visualizations/s06-context-compact.tsx +++ b/web/src/components/visualizations/s06-context-compact.tsx @@ -4,6 +4,7 @@ import { useMemo } from "react"; import { motion, AnimatePresence } from "framer-motion"; import { useSteppedVisualization } from "@/hooks/useSteppedVisualization"; import { StepControls } from "@/components/visualizations/shared/step-controls"; +import { useLocale } from "@/lib/i18n"; type BlockType = "user" | "assistant" | "tool_result"; @@ -14,6 +15,30 @@ interface ContextBlock { tokens: number; } +interface CompactCopy { + title: string; + contextWindow: string; + tokenUsage: string; + user: string; + assistant: string; + toolResult: string; + largestBlocks: string; + largestBlocksDesc: string; + microCompact: string; + autoCompact: string; + summary: string; + compactSummary: string; + microCompactDesc: string; + autoCompactDesc: string; + compactDesc: string; + stage1: string; + stage1Mode: string; + stage2: string; + stage2Mode: string; + stage3: string; + stage3Mode: string; +} + const BLOCK_COLORS: Record = { user: "bg-blue-500", assistant: "bg-zinc-500 dark:bg-zinc-600", @@ -46,6 +71,57 @@ function generateBlocks(count: number, seed: number): ContextBlock[] { const MAX_TOKENS = 100000; const WINDOW_HEIGHT = 350; +const COPY: Record = { + en: { + title: "Three-Layer Context Compression", + contextWindow: "Context Window", + tokenUsage: "Token usage", + user: "user", + assistant: "assistant", + toolResult: "tool_result", + largestBlocks: "tool_results are the largest blocks", + largestBlocksDesc: + "File contents, command outputs, search results -- each one is thousands of tokens.", + microCompact: "MICRO-COMPACT", + autoCompact: "AUTO-COMPACT", + summary: "SUMMARY", + compactSummary: "COMPACT SUMMARY", + microCompactDesc: "Old tool_results shrunk to tiny summaries", + autoCompactDesc: "Full conversation compressed to summary block", + compactDesc: "Most aggressive compression -- near-empty context", + stage1: "Stage 1: Micro -- shrink old tool_results", + stage1Mode: "automatic", + stage2: "Stage 2: Auto -- summarize entire conversation", + stage2Mode: "at threshold", + stage3: "Stage 3: /compact -- user-triggered, deepest compression", + stage3Mode: "manual", + }, + ru: { + title: "Трёхслойное сжатие контекста", + contextWindow: "Окно контекста", + tokenUsage: "Использование токенов", + user: "пользователь", + assistant: "ассистент", + toolResult: "tool_result", + largestBlocks: "tool_result занимают больше всего места", + largestBlocksDesc: + "Содержимое файлов, вывод команд и результаты поиска — каждый такой блок съедает тысячи токенов.", + microCompact: "МИКРО-СЖАТИЕ", + autoCompact: "АВТО-СЖАТИЕ", + summary: "СВОДКА", + compactSummary: "КОРОТКАЯ СВОДКА", + microCompactDesc: "Старые tool_result ужаты до коротких сводок", + autoCompactDesc: "Весь разговор сжат в один summary-блок", + compactDesc: "Самое агрессивное сжатие — контекст почти пуст", + stage1: "Этап 1: Micro — сжать старые tool_result", + stage1Mode: "автоматически", + stage2: "Этап 2: Auto — сжать весь разговор в summary", + stage2Mode: "по порогу", + stage3: "Этап 3: /compact — ручной запуск самого глубокого сжатия", + stage3Mode: "вручную", + }, +} as const; + interface StepState { blocks: { id: string; type: BlockType; label: string; heightPx: number; compressed?: boolean }[]; tokenCount: number; @@ -53,7 +129,10 @@ interface StepState { compressionLabel: string | null; } -function computeStepState(step: number): StepState { +function computeStepState( + step: number, + copy: CompactCopy +): StepState { switch (step) { case 0: { const raw = generateBlocks(8, 0); @@ -101,7 +180,7 @@ function computeStepState(step: number): StepState { blocks, tokenCount, fillPercent: 60, - compressionLabel: "MICRO-COMPACT", + compressionLabel: copy.microCompact, }; } case 4: { @@ -119,7 +198,7 @@ function computeStepState(step: number): StepState { const summaryBlock = { id: "auto-summary", type: "assistant" as BlockType, - label: "SUMMARY", + label: copy.summary, heightPx: 40, compressed: false, }; @@ -131,7 +210,7 @@ function computeStepState(step: number): StepState { blocks: [summaryBlock, ...recentBlocks], tokenCount, fillPercent: 25, - compressionLabel: "AUTO-COMPACT", + compressionLabel: copy.autoCompact, }; } case 6: { @@ -139,7 +218,7 @@ function computeStepState(step: number): StepState { const compactBlock = { id: "compact-summary", type: "assistant" as BlockType, - label: "COMPACT SUMMARY", + label: copy.compactSummary, heightPx: 24, compressed: false, }; @@ -155,45 +234,85 @@ function computeStepState(step: number): StepState { } } -const STEPS = [ - { - title: "Growing Context", - description: - "The context window holds the conversation. Each API call adds more messages.", - }, - { - title: "Context Growing", - description: - "As the agent works, messages accumulate. The context window fills up.", - }, - { - title: "Approaching Limit", - description: - "Old tool_results are the biggest consumers. Micro-compact targets these first.", - }, - { - title: "Stage 1: Micro-Compact", - description: - "Replace old tool_results with short summaries. Automatic, transparent to the model.", - }, - { - title: "Still Growing", - description: - "Work continues. Context grows again toward the threshold...", - }, - { - title: "Stage 2: Auto-Compact", - description: - "Entire conversation summarized into a compact block. Triggered at token threshold.", - }, - { - title: "Stage 3: /compact", - description: - "User-triggered, most aggressive. Three layers of strategic forgetting enable infinite sessions.", - }, -]; +const STEPS_BY_LOCALE = { + en: [ + { + title: "Growing Context", + description: + "The context window holds the conversation. Each API call adds more messages.", + }, + { + title: "Context Growing", + description: + "As the agent works, messages accumulate. The context window fills up.", + }, + { + title: "Approaching Limit", + description: + "Old tool_results are the biggest consumers. Micro-compact targets these first.", + }, + { + title: "Stage 1: Micro-Compact", + description: + "Replace old tool_results with short summaries. Automatic, transparent to the model.", + }, + { + title: "Still Growing", + description: "Work continues. Context grows again toward the threshold...", + }, + { + title: "Stage 2: Auto-Compact", + description: + "Entire conversation summarized into a compact block. Triggered at token threshold.", + }, + { + title: "Stage 3: /compact", + description: + "User-triggered, most aggressive. Three layers of strategic forgetting enable infinite sessions.", + }, + ], + ru: [ + { + title: "Рост контекста", + description: + "Окно контекста хранит весь разговор. Каждый вызов API добавляет новые сообщения.", + }, + { + title: "Контекст продолжает расти", + description: + "Пока агент работает, сообщения накапливаются. Окно контекста постепенно заполняется.", + }, + { + title: "Приближение к лимиту", + description: + "Старые tool_result — самые тяжёлые куски контекста. Микро-сжатие бьёт по ним первым.", + }, + { + title: "Этап 1: Micro-Compact", + description: + "Старые tool_result заменяются короткими сводками. Это происходит автоматически и прозрачно для модели.", + }, + { + title: "Контекст снова растёт", + description: "Работа продолжается. Контекст снова движется к порогу...", + }, + { + title: "Этап 2: Auto-Compact", + description: + "Весь разговор сворачивается в компактный блок summary. Срабатывает по порогу токенов.", + }, + { + title: "Этап 3: /compact", + description: + "Запускается пользователем и сжимает сильнее всего. Три уровня стратегического забывания позволяют вести бесконечные сессии.", + }, + ], +} as const; export default function ContextCompact({ title }: { title?: string }) { + const locale = useLocale(); + const copy = locale === "ru" ? COPY.ru : COPY.en; + const steps = locale === "ru" ? STEPS_BY_LOCALE.ru : STEPS_BY_LOCALE.en; const { currentStep, totalSteps, @@ -202,9 +321,9 @@ export default function ContextCompact({ title }: { title?: string }) { reset, isPlaying, toggleAutoPlay, - } = useSteppedVisualization({ totalSteps: STEPS.length, autoPlayInterval: 2500 }); + } = useSteppedVisualization({ totalSteps: steps.length, autoPlayInterval: 2500 }); - const state = useMemo(() => computeStepState(currentStep), [currentStep]); + const state = useMemo(() => computeStepState(currentStep, copy), [currentStep, copy]); const fillColor = state.fillPercent > 75 @@ -218,7 +337,7 @@ export default function ContextCompact({ title }: { title?: string }) { return (

    - {title || "Three-Layer Context Compression"} + {title || copy.title}

    - Context Window + {copy.contextWindow}
    - Token usage + {copy.tokenUsage} {state.tokenCount.toLocaleString()} / {MAX_TOKENS.toLocaleString()} @@ -317,15 +436,15 @@ export default function ContextCompact({ title }: { title?: string }) {
    - user + {copy.user}
    - assistant + {copy.assistant}
    - tool_result + {copy.toolResult}
    @@ -339,10 +458,10 @@ export default function ContextCompact({ title }: { title?: string }) { className="mt-3 rounded border border-amber-300 bg-amber-50 px-3 py-2 dark:border-amber-700 dark:bg-amber-900/20" >
    - tool_results are the largest blocks + {copy.largestBlocks}
    - File contents, command outputs, search results -- each one is thousands of tokens. + {copy.largestBlocksDesc}
    )} @@ -381,9 +500,9 @@ export default function ContextCompact({ title }: { title?: string }) { ? "text-blue-500 dark:text-blue-400" : "text-emerald-500 dark:text-emerald-400" }`}> - {currentStep === 3 && "Old tool_results shrunk to tiny summaries"} - {currentStep === 5 && "Full conversation compressed to summary block"} - {currentStep === 6 && "Most aggressive compression -- near-empty context"} + {currentStep === 3 && copy.microCompactDesc} + {currentStep === 5 && copy.autoCompactDesc} + {currentStep === 6 && copy.compactDesc}
    @@ -401,28 +520,28 @@ export default function ContextCompact({ title }: { title?: string }) {
    - Stage 1: Micro -- shrink old tool_results + {copy.stage1} - automatic + {copy.stage1Mode}
    - Stage 2: Auto -- summarize entire conversation + {copy.stage2} - at threshold + {copy.stage2Mode}
    - Stage 3: /compact -- user-triggered, deepest compression + {copy.stage3} - manual + {copy.stage3Mode}
    @@ -440,8 +559,8 @@ export default function ContextCompact({ title }: { title?: string }) { onReset={reset} isPlaying={isPlaying} onToggleAutoPlay={toggleAutoPlay} - stepTitle={STEPS[currentStep].title} - stepDescription={STEPS[currentStep].description} + stepTitle={steps[currentStep].title} + stepDescription={steps[currentStep].description} />
    diff --git a/web/src/components/visualizations/s07-task-system.tsx b/web/src/components/visualizations/s07-task-system.tsx index 5ace18ba4..607227f0d 100644 --- a/web/src/components/visualizations/s07-task-system.tsx +++ b/web/src/components/visualizations/s07-task-system.tsx @@ -5,6 +5,7 @@ import { motion } from "framer-motion"; import { useSteppedVisualization } from "@/hooks/useSteppedVisualization"; import { StepControls } from "@/components/visualizations/shared/step-controls"; import { useDarkMode, useSvgPalette } from "@/hooks/useDarkMode"; +import { useLocale } from "@/lib/i18n"; type TaskStatus = "pending" | "in_progress" | "completed" | "blocked"; @@ -22,51 +23,87 @@ interface StepInfo { } const TASKS: TaskNode[] = [ - { id: "T1", label: "T1: Setup DB", x: 80, y: 160, deps: [] }, - { id: "T2", label: "T2: API routes", x: 280, y: 80, deps: ["T1"] }, - { id: "T3", label: "T3: Auth module", x: 280, y: 240, deps: ["T1"] }, - { id: "T4", label: "T4: Integration", x: 480, y: 160, deps: ["T2", "T3"] }, - { id: "T5", label: "T5: Deploy", x: 650, y: 160, deps: ["T4"] }, + { id: "T1", label: "T1", x: 80, y: 160, deps: [] }, + { id: "T2", label: "T2", x: 280, y: 80, deps: ["T1"] }, + { id: "T3", label: "T3", x: 280, y: 240, deps: ["T1"] }, + { id: "T4", label: "T4", x: 480, y: 160, deps: ["T2", "T3"] }, + { id: "T5", label: "T5", x: 650, y: 160, deps: ["T4"] }, ]; const NODE_W = 140; const NODE_H = 50; -const STEP_INFO: StepInfo[] = [ - { - title: "File-Based Tasks", - description: - "Tasks are stored in JSON files on disk. They survive context compaction -- unlike in-memory state.", - }, - { - title: "Start T1", - description: - "Tasks without dependencies can start immediately. T1 has no blockers.", - }, - { - title: "T1 Complete", - description: "Completing T1 unblocks its dependents: T2 and T3.", - }, - { - title: "Parallel Work", - description: - "T2 and T3 have no dependency on each other. Both can run simultaneously.", - }, - { - title: "Partial Unblock", - description: - "T4 depends on BOTH T2 and T3. It waits for all blockers to complete.", - }, - { - title: "Fully Unblocked", - description: "All blockers resolved. T4 can now proceed.", +interface TaskSystemCopy { + title: string; + stepInfo: StepInfo[]; + taskLabels: Record; + statuses: Record; + blocked: string; + taskDag: string; + persisted: string; + survives: string; +} + +const COPY: Record = { + en: { + title: "Task Dependency Graph", + stepInfo: [ + { title: "File-Based Tasks", description: "Tasks are stored in JSON files on disk. They survive context compaction -- unlike in-memory state." }, + { title: "Start T1", description: "Tasks without dependencies can start immediately. T1 has no blockers." }, + { title: "T1 Complete", description: "Completing T1 unblocks its dependents: T2 and T3." }, + { title: "Parallel Work", description: "T2 and T3 have no dependency on each other. Both can run simultaneously." }, + { title: "Partial Unblock", description: "T4 depends on BOTH T2 and T3. It waits for all blockers to complete." }, + { title: "Fully Unblocked", description: "All blockers resolved. T4 can now proceed." }, + { title: "Graph Resolved", description: "The entire dependency graph is resolved. File-based persistence means this works across context compressions." }, + ], + taskLabels: { + T1: "T1: Setup DB", + T2: "T2: API routes", + T3: "T3: Auth module", + T4: "T4: Integration", + T5: "T5: Deploy", + }, + statuses: { + pending: "pending", + in_progress: "in_progress", + completed: "done", + blocked: "blocked", + }, + blocked: "Blocked: waiting on T3", + taskDag: "Task DAG", + persisted: "Persisted to disk", + survives: "survives context compaction", }, - { - title: "Graph Resolved", - description: - "The entire dependency graph is resolved. File-based persistence means this works across context compressions.", + ru: { + title: "Граф зависимостей задач", + stepInfo: [ + { title: "Файловые задачи", description: "Задачи хранятся в JSON-файлах на диске. В отличие от состояния в памяти, они переживают сжатие контекста." }, + { title: "Старт T1", description: "Задачи без зависимостей можно запускать сразу. У T1 нет блокеров." }, + { title: "T1 завершена", description: "Завершение T1 разблокирует зависимые задачи: T2 и T3." }, + { title: "Параллельная работа", description: "T2 и T3 не зависят друг от друга, поэтому могут идти одновременно." }, + { title: "Частичная разблокировка", description: "T4 зависит И от T2, и от T3. Она ждёт, пока завершатся все блокеры." }, + { title: "Полностью разблокирована", description: "Все блокеры сняты. Теперь T4 может продолжаться." }, + { title: "Граф разрешён", description: "Весь граф зависимостей закрыт. Файловая персистентность сохраняет это даже после сжатия контекста." }, + ], + taskLabels: { + T1: "T1: Настроить БД", + T2: "T2: API-маршруты", + T3: "T3: Модуль auth", + T4: "T4: Интеграция", + T5: "T5: Деплой", + }, + statuses: { + pending: "ожидает", + in_progress: "в работе", + completed: "готово", + blocked: "заблокировано", + }, + blocked: "Блокировано: ждём T3", + taskDag: "Граф задач", + persisted: "Сохранено на диск", + survives: "переживает сжатие контекста", }, -]; +}; function getTaskStatus(taskId: string, step: number): TaskStatus { const statusMap: Record = { @@ -169,16 +206,17 @@ function getStatusColor(status: TaskStatus) { } } -function getStatusLabel(status: TaskStatus): string { +function getStatusLabel(status: TaskStatus, locale: string): string { + const copy = COPY[locale] || COPY.en; switch (status) { case "pending": - return "pending"; + return copy.statuses.pending; case "in_progress": - return "in_progress"; + return copy.statuses.in_progress; case "completed": - return "done"; + return copy.statuses.completed; case "blocked": - return "blocked"; + return copy.statuses.blocked; } } @@ -193,6 +231,8 @@ function buildCurvePath( } export default function TaskSystem({ title }: { title?: string }) { + const locale = useLocale(); + const copy = COPY[locale] || COPY.en; const { currentStep, totalSteps, @@ -232,16 +272,16 @@ export default function TaskSystem({ title }: { title?: string }) { return result; }, []); - const stepInfo = STEP_INFO[currentStep]; + const stepInfo = copy.stepInfo[currentStep]; return (

    - {title || "Task Dependency Graph"} + {title || copy.title}

    - + { const status = getTaskStatus(task.id, currentStep); const colors = getStatusColor(status); - const statusLabel = getStatusLabel(status); + const statusLabel = getStatusLabel(status, locale); const isActive = status === "in_progress"; const isComplete = status === "completed"; @@ -369,7 +409,7 @@ export default function TaskSystem({ title }: { title?: string }) { fontWeight="600" fill={isDark ? colors.darkText : colors.text} > - {task.label} + {copy.taskLabels[task.id]} - Blocked: waiting on T3 + {copy.blocked} )} @@ -439,7 +479,7 @@ export default function TaskSystem({ title }: { title?: string }) { .tasks/tasks.json - Persisted to disk -- survives context compaction + {copy.persisted} -- {copy.survives}
    - pending + {copy.statuses.pending}
    - in_progress + {copy.statuses.in_progress}
    - completed + {copy.statuses.completed}
    - blocked + {copy.statuses.blocked}
    diff --git a/web/src/components/visualizations/s08-background-tasks.tsx b/web/src/components/visualizations/s08-background-tasks.tsx index 617b11741..eba2f4da1 100644 --- a/web/src/components/visualizations/s08-background-tasks.tsx +++ b/web/src/components/visualizations/s08-background-tasks.tsx @@ -4,48 +4,119 @@ import { motion, AnimatePresence } from "framer-motion"; import { useSteppedVisualization } from "@/hooks/useSteppedVisualization"; import { StepControls } from "@/components/visualizations/shared/step-controls"; import { useDarkMode, useSvgPalette } from "@/hooks/useDarkMode"; +import { useLocale } from "@/lib/i18n"; interface StepInfo { title: string; description: string; } -const STEP_INFO: StepInfo[] = [ - { - title: "Three Lanes", - description: - "The agent has a main thread and can spawn daemon background threads for parallel work.", - }, - { - title: "Main Thread Working", - description: - "The main agent loop runs as usual, processing user requests.", - }, - { - title: "Spawn Background", - description: - "Background tasks run as daemon threads. The main loop doesn't wait for them.", - }, - { - title: "Multiple Backgrounds", - description: "Multiple background tasks can run concurrently.", - }, - { - title: "Task Completes", - description: - "Background task finishes. Its result goes to the notification queue.", - }, - { - title: "Queue Fills", - description: - "Results accumulate in the queue, invisible to the model during this turn.", +interface BackgroundTasksCopy { + title: string; + stepInfo: StepInfo[]; + workLabels: { + mainLoop: string; + runTests: string; + lintCode: string; + lintResult: string; + testResult: string; + }; + laneLabels: { + main: string; + bg1: string; + bg2: string; + }; + time: string; + done: string; + llmCall: string; + notification: string; + queue: string; + queueDrained: string; + legend: { + main: string; + bg1: string; + bg2: string; + boundary: string; + }; + ariaLabel: string; +} + +const COPY: Record = { + en: { + title: "Background Task Lanes", + stepInfo: [ + { title: "Three Lanes", description: "The agent has a main thread and can spawn daemon background threads for parallel work." }, + { title: "Main Thread Working", description: "The main agent loop runs as usual, processing user requests." }, + { title: "Spawn Background", description: "Background tasks run as daemon threads. The main loop doesn't wait for them." }, + { title: "Multiple Backgrounds", description: "Multiple background tasks can run concurrently." }, + { title: "Task Completes", description: "Background task finishes. Its result goes to the notification queue." }, + { title: "Queue Fills", description: "Results accumulate in the queue, invisible to the model during this turn." }, + { title: "Drain Queue", description: "Just before the next LLM call, all queued notifications are injected as tool_results. Non-blocking, async." }, + ], + workLabels: { + mainLoop: "Main agent loop", + runTests: "Run tests", + lintCode: "Lint code", + lintResult: "Lint: 0 errors", + testResult: "Tests: 42 passed", + }, + laneLabels: { + main: "Main Thread", + bg1: "Background 1", + bg2: "Background 2", + }, + time: "time", + done: "done", + llmCall: "LLM API call", + notification: "Notification", + queue: "Queue", + queueDrained: "queue drained -- injected into next LLM call", + legend: { + main: "Main thread", + bg1: "Background 1", + bg2: "Background 2", + boundary: "LLM boundary", + }, + ariaLabel: "Background task lanes", }, - { - title: "Drain Queue", - description: - "Just before the next LLM call, all queued notifications are injected as tool_results. Non-blocking, async.", + ru: { + title: "Фоновые дорожки задач", + stepInfo: [ + { title: "Три дорожки", description: "У агента есть основной поток, а для параллельной работы он может поднимать фоновые демон-потоки." }, + { title: "Работает основной поток", description: "Основной цикл агента идёт как обычно и обрабатывает запрос пользователя." }, + { title: "Запуск фона", description: "Фоновые задачи работают как демон-потоки. Основной цикл их не ждёт." }, + { title: "Несколько фоновых задач", description: "Несколько фоновых задач могут выполняться одновременно." }, + { title: "Задача завершилась", description: "Фоновая задача заканчивается, и её результат попадает в очередь уведомлений." }, + { title: "Очередь заполняется", description: "Результаты копятся в очереди и не видны модели в текущем ходе." }, + { title: "Слить очередь", description: "Прямо перед следующим вызовом LLM все накопленные уведомления вкалываются как tool_results. Неблокирующе и асинхронно." }, + ], + workLabels: { + mainLoop: "Основной цикл агента", + runTests: "Запуск тестов", + lintCode: "Линтинг кода", + lintResult: "Линт: 0 ошибок", + testResult: "Тесты: 42 пройдено", + }, + laneLabels: { + main: "Основной поток", + bg1: "Фон 1", + bg2: "Фон 2", + }, + time: "время", + done: "готово", + llmCall: "Вызов API LLM", + notification: "Очередь", + queue: "уведомлений", + queueDrained: "очередь слита -- вставлена в следующий вызов LLM", + legend: { + main: "Основной поток", + bg1: "Фон 1", + bg2: "Фон 2", + boundary: "Граница LLM", + }, + ariaLabel: "Фоновые дорожки задач", }, -]; +}; const LANE_Y = { main: 60, @@ -76,7 +147,7 @@ const WORK_BLOCKS: WorkBlock[] = [ startFraction: 0, endFraction: 1, color: "#8b5cf6", - label: "Main agent loop", + label: "mainLoop", appearsAtStep: 1, }, { @@ -84,7 +155,7 @@ const WORK_BLOCKS: WorkBlock[] = [ startFraction: 0.18, endFraction: 0.75, color: "#10b981", - label: "Run tests", + label: "runTests", appearsAtStep: 2, completesAtStep: 5, }, @@ -93,7 +164,7 @@ const WORK_BLOCKS: WorkBlock[] = [ startFraction: 0.35, endFraction: 0.58, color: "#3b82f6", - label: "Lint code", + label: "lintCode", appearsAtStep: 3, completesAtStep: 4, }, @@ -120,13 +191,13 @@ interface QueueCard { const QUEUE_CARDS: QueueCard[] = [ { id: "lint-result", - label: "Lint: 0 errors", + label: "lintResult", appearsAtStep: 4, drainsAtStep: 6, }, { id: "test-result", - label: "Tests: 42 passed", + label: "testResult", appearsAtStep: 5, drainsAtStep: 6, }, @@ -149,6 +220,8 @@ function getBlockEndFraction(block: WorkBlock, step: number): number { } export default function BackgroundTasks({ title }: { title?: string }) { + const locale = useLocale(); + const copy = COPY[locale] || COPY.en; const { currentStep, totalSteps, @@ -162,7 +235,7 @@ export default function BackgroundTasks({ title }: { title?: string }) { const isDark = useDarkMode(); const palette = useSvgPalette(); - const stepInfo = STEP_INFO[currentStep]; + const stepInfo = copy.stepInfo[currentStep]; const llmCallFraction = 0.82; const showLlmMarker = currentStep >= 5; @@ -170,11 +243,11 @@ export default function BackgroundTasks({ title }: { title?: string }) { return (

    - {title || "Background Task Lanes"} + {title || copy.title}

    - + - time + {copy.time} {/* Lane backgrounds and labels */} {( [ - { key: "main", y: LANE_Y.main, label: "Main Thread" }, - { key: "bg1", y: LANE_Y.bg1, label: "Background 1" }, - { key: "bg2", y: LANE_Y.bg2, label: "Background 2" }, + { key: "main", y: LANE_Y.main, label: copy.laneLabels.main }, + { key: "bg1", y: LANE_Y.bg1, label: copy.laneLabels.bg1 }, + { key: "bg2", y: LANE_Y.bg2, label: copy.laneLabels.bg2 }, ] as const ).map(({ key, y, label }) => ( @@ -321,7 +394,7 @@ export default function BackgroundTasks({ title }: { title?: string }) { animate={{ opacity: 1 }} transition={{ delay: 0.3 }} > - {block.label} + {copy.workLabels[block.label as keyof typeof copy.workLabels]} )} {isComplete && ( @@ -335,7 +408,7 @@ export default function BackgroundTasks({ title }: { title?: string }) { initial={{ opacity: 0 }} animate={{ opacity: 1 }} > - done + {copy.done} )} @@ -399,7 +472,7 @@ export default function BackgroundTasks({ title }: { title?: string }) { fontWeight="600" fill="white" > - LLM API call + {copy.llmCall} )} @@ -423,7 +496,7 @@ export default function BackgroundTasks({ title }: { title?: string }) { fontWeight="600" fill={palette.labelFill} > - Notification + {copy.notification} - Queue + {copy.queue} {/* Queue cards */} @@ -488,7 +561,7 @@ export default function BackgroundTasks({ title }: { title?: string }) { fontFamily="monospace" fill={isDark ? "#f59e0b" : "#92400e"} > - {card.label} + {copy.workLabels[card.label as keyof typeof copy.workLabels]} ); @@ -531,9 +604,9 @@ export default function BackgroundTasks({ title }: { title?: string }) { fontFamily="monospace" fill={isDark ? "#10b981" : "#065f46"} > - {card.label} - - + {copy.workLabels[card.label as keyof typeof copy.workLabels]} + + ); })} @@ -574,7 +647,7 @@ export default function BackgroundTasks({ title }: { title?: string }) { animate={{ opacity: 1 }} transition={{ delay: 0.6 }} > - queue drained -- injected into next LLM call + {copy.queueDrained} )} @@ -584,25 +657,25 @@ export default function BackgroundTasks({ title }: { title?: string }) {
    - Main thread + {copy.legend.main}
    - Background 1 + {copy.legend.bg1}
    - Background 2 + {copy.legend.bg2}
    - LLM boundary + {copy.legend.boundary}
    diff --git a/web/src/components/visualizations/s09-agent-teams.tsx b/web/src/components/visualizations/s09-agent-teams.tsx index bdb58874e..9d2a8eae1 100644 --- a/web/src/components/visualizations/s09-agent-teams.tsx +++ b/web/src/components/visualizations/s09-agent-teams.tsx @@ -4,6 +4,7 @@ import { motion, AnimatePresence } from "framer-motion"; import { useSteppedVisualization } from "@/hooks/useSteppedVisualization"; import { StepControls } from "@/components/visualizations/shared/step-controls"; import { useSvgPalette } from "@/hooks/useDarkMode"; +import { useLocale } from "@/lib/i18n"; // -- Layout constants -- const SVG_W = 560; @@ -12,9 +13,9 @@ const AGENT_R = 40; // Agent positions: inverted triangle (Lead top-center, Coder bottom-left, Reviewer bottom-right) const AGENTS = [ - { id: "lead", label: "Lead", cx: SVG_W / 2, cy: 70, inbox: "lead.jsonl" }, - { id: "coder", label: "Coder", cx: 140, cy: 230, inbox: "coder.jsonl" }, - { id: "reviewer", label: "Reviewer", cx: SVG_W - 140, cy: 230, inbox: "reviewer.jsonl" }, + { id: "lead", label: "lead", cx: SVG_W / 2, cy: 70, inbox: "lead.jsonl" }, + { id: "coder", label: "coder", cx: 140, cy: 230, inbox: "coder.jsonl" }, + { id: "reviewer", label: "reviewer", cx: SVG_W - 140, cy: 230, inbox: "reviewer.jsonl" }, ] as const; // Inbox tray dimensions, positioned below each agent circle @@ -36,15 +37,63 @@ function trayCenter(id: string) { } // Step configuration -const STEPS = [ - { title: "The Team", desc: "Teams use a leader-worker pattern. Each teammate has a file-based mailbox inbox." }, - { title: "Lead Assigns Work", desc: "Communication is async: write a message to the recipient's .jsonl inbox file." }, - { title: "Read Inbox", desc: "Teammates poll their inbox before each LLM call. New messages become context." }, - { title: "Independent Work", desc: "Each teammate runs its own agent loop independently." }, - { title: "Pass Result", desc: "Results flow through the same mailbox mechanism. All communication is via files." }, - { title: "Feedback Loop", desc: "The mailbox pattern supports any communication topology: linear, broadcast, round-robin." }, - { title: "File-Based Coordination", desc: "No shared memory, no locks. All coordination through append-only files. Simple, robust, debuggable." }, -]; +interface AgentTeamsCopy { + title: string; + agents: Record; + steps: { title: string; desc: string }[]; + teamConfig: string; + workers: string; + taskLogin: string; + resultDone: string; + feedback: string; +} + +const COPY: Record = { + en: { + title: "Agent Team Mailboxes", + agents: { + lead: "Lead", + coder: "Coder", + reviewer: "Reviewer", + }, + steps: [ + { title: "The Team", desc: "Teams use a leader-worker pattern. Each teammate has a file-based mailbox inbox." }, + { title: "Lead Assigns Work", desc: "Communication is async: write a message to the recipient's .jsonl inbox file." }, + { title: "Read Inbox", desc: "Teammates poll their inbox before each LLM call. New messages become context." }, + { title: "Independent Work", desc: "Each teammate runs its own agent loop independently." }, + { title: "Pass Result", desc: "Results flow through the same mailbox mechanism. All communication is via files." }, + { title: "Feedback Loop", desc: "The mailbox pattern supports any communication topology: linear, broadcast, round-robin." }, + { title: "File-Based Coordination", desc: "No shared memory, no locks. All coordination through append-only files. Simple, robust, debuggable." }, + ], + teamConfig: "team.config", + workers: "workers: [coder, reviewer]", + taskLogin: "task:login", + resultDone: "result:done", + feedback: "feedback", + }, + ru: { + title: "Почтовые ящики команды агентов", + agents: { + lead: "Лид", + coder: "Кодер", + reviewer: "Ревьюер", + }, + steps: [ + { title: "Команда", desc: "Команды работают по схеме лидер-исполнитель. У каждого участника есть файловый входящий ящик." }, + { title: "Лид назначает работу", desc: "Коммуникация асинхронная: сообщение записывается в .jsonl-файл входящих сообщений получателя." }, + { title: "Чтение ящика", desc: "Участники опрашивают свой входящий ящик перед каждым вызовом LLM. Новые сообщения становятся частью контекста." }, + { title: "Независимая работа", desc: "Каждый участник крутит свой собственный агентный цикл." }, + { title: "Передать результат", desc: "Результаты идут через тот же почтовый механизм. Всё общение проходит через файлы." }, + { title: "Петля обратной связи", desc: "Почтовый шаблон поддерживает любую топологию общения: линейную, broadcast и round-robin." }, + { title: "Координация через файлы", desc: "Никакой общей памяти и локов. Вся координация идёт через append-only файлы. Просто, надёжно и отлаживаемо." }, + ], + teamConfig: "team.config", + workers: "workers: [coder, reviewer]", + taskLogin: "задача:логин", + resultDone: "результат:готово", + feedback: "отзыв", + }, +}; // Helper: determine which agent glows at each step function agentGlows(agentId: string, step: number): boolean { @@ -132,14 +181,16 @@ function TraceLine({ from, to, strokeColor }: { from: string; to: string; stroke } export default function AgentTeams({ title }: { title?: string }) { - const vis = useSteppedVisualization({ totalSteps: STEPS.length, autoPlayInterval: 2500 }); + const locale = useLocale(); + const copy = COPY[locale] || COPY.en; + const vis = useSteppedVisualization({ totalSteps: copy.steps.length, autoPlayInterval: 2500 }); const step = vis.currentStep; const palette = useSvgPalette(); return (

    - {title || "Agent Team Mailboxes"} + {title || copy.title}

    @@ -201,7 +252,7 @@ export default function AgentTeams({ title }: { title?: string }) { fontSize={12} fontWeight={700} > - {agent.label} + {copy.agents[agent.label]} {/* Inbox tray (file icon style) */} @@ -239,10 +290,10 @@ export default function AgentTeams({ title }: { title?: string }) { > - team.config + {copy.teamConfig} - workers: [coder, reviewer] + {copy.workers} )} @@ -256,7 +307,7 @@ export default function AgentTeams({ title }: { title?: string }) { fromY={agentById("lead").cy + AGENT_R} toX={agentById("coder").cx} toY={agentById("coder").cy + TRAY_OFFSET_Y + TRAY_H / 2} - label="task:login" + label={copy.taskLogin} /> )} @@ -270,7 +321,7 @@ export default function AgentTeams({ title }: { title?: string }) { fromY={agentById("coder").cy + TRAY_OFFSET_Y + TRAY_H / 2} toX={agentById("coder").cx} toY={agentById("coder").cy} - label="task:login" + label={copy.taskLogin} /> )} @@ -301,7 +352,7 @@ export default function AgentTeams({ title }: { title?: string }) { fontSize={8} fontWeight={600} > - result:done + {copy.resultDone} )} @@ -316,7 +367,7 @@ export default function AgentTeams({ title }: { title?: string }) { fromY={agentById("coder").cy} toX={agentById("reviewer").cx} toY={agentById("reviewer").cy + TRAY_OFFSET_Y + TRAY_H / 2} - label="result:done" + label={copy.resultDone} /> )} @@ -331,7 +382,7 @@ export default function AgentTeams({ title }: { title?: string }) { fromY={agentById("reviewer").cy + TRAY_OFFSET_Y + TRAY_H / 2} toX={agentById("reviewer").cx} toY={agentById("reviewer").cy} - label="result:done" + label={copy.resultDone} delay={0} /> @@ -383,8 +434,8 @@ export default function AgentTeams({ title }: { title?: string }) { onReset={vis.reset} isPlaying={vis.isPlaying} onToggleAutoPlay={vis.toggleAutoPlay} - stepTitle={STEPS[step].title} - stepDescription={STEPS[step].desc} + stepTitle={copy.steps[step].title} + stepDescription={copy.steps[step].desc} />
    diff --git a/web/src/components/visualizations/s10-team-protocols.tsx b/web/src/components/visualizations/s10-team-protocols.tsx index 11146191d..9361d8dfc 100644 --- a/web/src/components/visualizations/s10-team-protocols.tsx +++ b/web/src/components/visualizations/s10-team-protocols.tsx @@ -5,6 +5,7 @@ import { motion, AnimatePresence } from "framer-motion"; import { useSteppedVisualization } from "@/hooks/useSteppedVisualization"; import { StepControls } from "@/components/visualizations/shared/step-controls"; import { useSvgPalette } from "@/hooks/useDarkMode"; +import { useLocale } from "@/lib/i18n"; type Protocol = "shutdown" | "plan"; @@ -23,19 +24,69 @@ const ARROW_Y_GAP = 70; const REQUEST_ID = "req_abc"; // -- Shutdown protocol step definitions -- -const SHUTDOWN_STEPS = [ - { title: "Structured Protocols", desc: "Protocols define structured message exchanges with correlated request IDs." }, - { title: "Shutdown Request", desc: "The leader initiates shutdown. The request_id links the request to its response." }, - { title: "Teammate Decides", desc: "The teammate can accept or reject. It's not a forced kill -- it's a polite request." }, - { title: "Approved", desc: "Same request_id in the response. Teammate exits cleanly." }, -]; +interface TeamProtocolsCopy { + title: string; + shutdownSteps: { title: string; desc: string }[]; + planSteps: { title: string; desc: string }[]; + leader: string; + teammate: string; + shutdownProtocol: string; + planProtocol: string; + approve: string; + reject: string; + exit: string; + plan: string; + ok: string; +} -// -- Plan approval protocol step definitions -- -const PLAN_STEPS = [ - { title: "Plan Approval", desc: "Teammates in plan_mode must get approval before implementing changes." }, - { title: "Submit Plan", desc: "The teammate designs a plan and sends it to the leader for review." }, - { title: "Leader Reviews", desc: "Leader reviews and approves or rejects with feedback. Same request-response pattern." }, -]; +const COPY: Record = { + en: { + title: "FSM Team Protocols", + shutdownSteps: [ + { title: "Structured Protocols", desc: "Protocols define structured message exchanges with correlated request IDs." }, + { title: "Shutdown Request", desc: "The leader initiates shutdown. The request_id links the request to its response." }, + { title: "Teammate Decides", desc: "The teammate can accept or reject. It's not a forced kill -- it's a polite request." }, + { title: "Approved", desc: "Same request_id in the response. Teammate exits cleanly." }, + ], + planSteps: [ + { title: "Plan Approval", desc: "Teammates in plan_mode must get approval before implementing changes." }, + { title: "Submit Plan", desc: "The teammate designs a plan and sends it to the leader for review." }, + { title: "Leader Reviews", desc: "Leader reviews and approves or rejects with feedback. Same request-response pattern." }, + ], + leader: "Leader", + teammate: "Teammate", + shutdownProtocol: "Shutdown Protocol", + planProtocol: "Plan Approval Protocol", + approve: "approve", + reject: "reject", + exit: "exit", + plan: "Plan:", + ok: "OK", + }, + ru: { + title: "Командные протоколы FSM", + shutdownSteps: [ + { title: "Структурированные протоколы", desc: "Протоколы задают структурированный обмен сообщениями с согласованными идентификаторами запросов." }, + { title: "Запрос на завершение", desc: "Лидер инициирует shutdown. Идентификатор запроса связывает запрос с ответом." }, + { title: "Участник решает", desc: "Участник может принять или отклонить запрос. Это не принудительное убийство, а вежливая просьба." }, + { title: "Одобрено", desc: "В ответе используется тот же идентификатор запроса. Участник завершает работу чисто." }, + ], + planSteps: [ + { title: "Одобрение плана", desc: "Участники в plan_mode должны получить одобрение, прежде чем вносить изменения." }, + { title: "Отправка плана", desc: "Участник составляет план и отправляет его лидеру на проверку." }, + { title: "Лидер проверяет", desc: "Лидер проверяет и одобряет либо отклоняет с обратной связью. Тот же шаблон «запрос-ответ»." }, + ], + leader: "Лидер", + teammate: "Участник", + shutdownProtocol: "Протокол shutdown", + planProtocol: "Протокол одобрения плана", + approve: "одобрить", + reject: "отклонить", + exit: "выход", + plan: "План:", + ok: "OK", + }, +}; // Horizontal arrow between lifelines function SequenceArrow({ @@ -128,7 +179,7 @@ function SequenceArrow({ } // Decision diamond on a lifeline -function DecisionBox({ x, y }: { x: number; y: number }) { +function DecisionBox({ x, y, approve, reject }: { x: number; y: number; approve: string; reject: string }) { const size = 14; return ( - approve + {approve} - reject + {reject} ); @@ -183,10 +234,12 @@ function ActivationBar({ } export default function TeamProtocols({ title }: { title?: string }) { + const locale = useLocale(); + const copy = COPY[locale] || COPY.en; const [protocol, setProtocol] = useState("shutdown"); - const totalSteps = protocol === "shutdown" ? SHUTDOWN_STEPS.length : PLAN_STEPS.length; - const steps = protocol === "shutdown" ? SHUTDOWN_STEPS : PLAN_STEPS; + const totalSteps = protocol === "shutdown" ? copy.shutdownSteps.length : copy.planSteps.length; + const steps = protocol === "shutdown" ? copy.shutdownSteps : copy.planSteps; const vis = useSteppedVisualization({ totalSteps, autoPlayInterval: 2500 }); const step = vis.currentStep; @@ -197,13 +250,13 @@ export default function TeamProtocols({ title }: { title?: string }) { vis.reset(); }; - const leftLabel = protocol === "shutdown" ? "Leader" : "Leader"; - const rightLabel = protocol === "shutdown" ? "Teammate" : "Teammate"; + const leftLabel = copy.leader; + const rightLabel = copy.teammate; return (

    - {title || "FSM Team Protocols"} + {title || copy.title}

    {/* Protocol toggle */} @@ -216,7 +269,7 @@ export default function TeamProtocols({ title }: { title?: string }) { : "bg-zinc-100 text-zinc-600 hover:bg-zinc-200 dark:bg-zinc-800 dark:text-zinc-400 dark:hover:bg-zinc-700" }`} > - Shutdown Protocol + {copy.shutdownProtocol}
    @@ -317,6 +370,8 @@ export default function TeamProtocols({ title }: { title?: string }) { )} @@ -364,7 +419,7 @@ export default function TeamProtocols({ title }: { title?: string }) { fill="#ef4444" fontWeight={600} > - exit + {copy.exit} )} @@ -423,7 +478,7 @@ export default function TeamProtocols({ title }: { title?: string }) { strokeWidth={0.5} /> - Plan: + {copy.plan} 1. Add error handler @@ -468,7 +523,7 @@ export default function TeamProtocols({ title }: { title?: string }) { fill="white" fontWeight={700} > - OK + {copy.ok} )} diff --git a/web/src/components/visualizations/s11-autonomous-agents.tsx b/web/src/components/visualizations/s11-autonomous-agents.tsx index cfd175aa7..d11cce9ae 100644 --- a/web/src/components/visualizations/s11-autonomous-agents.tsx +++ b/web/src/components/visualizations/s11-autonomous-agents.tsx @@ -4,6 +4,7 @@ import { motion } from "framer-motion"; import { useSteppedVisualization } from "@/hooks/useSteppedVisualization"; import { StepControls } from "@/components/visualizations/shared/step-controls"; import { useSvgPalette } from "@/hooks/useDarkMode"; +import { useLocale } from "@/lib/i18n"; // -- FSM states and their layout positions (diamond: idle top, poll right, claim bottom, work left) -- type Phase = "idle" | "poll" | "claim" | "work"; @@ -13,11 +14,11 @@ const FSM_CY = 110; const FSM_R = 65; const FSM_STATE_R = 22; -const FSM_STATES: { id: Phase; label: string; angle: number }[] = [ - { id: "idle", label: "idle", angle: -Math.PI / 2 }, - { id: "poll", label: "poll", angle: 0 }, - { id: "claim", label: "claim", angle: Math.PI / 2 }, - { id: "work", label: "work", angle: Math.PI }, +const FSM_STATES: { id: Phase; angle: number }[] = [ + { id: "idle", angle: -Math.PI / 2 }, + { id: "poll", angle: 0 }, + { id: "claim", angle: Math.PI / 2 }, + { id: "work", angle: Math.PI }, ]; const FSM_TRANSITIONS: { from: Phase; to: Phase }[] = [ @@ -47,10 +48,10 @@ interface TaskRow { } const INITIAL_TASKS: TaskRow[] = [ - { id: "T1", name: "Fix auth bug", status: "unclaimed", owner: "-" }, - { id: "T2", name: "Add rate limiter", status: "unclaimed", owner: "-" }, - { id: "T3", name: "Write tests", status: "unclaimed", owner: "-" }, - { id: "T4", name: "Update API docs", status: "unclaimed", owner: "-" }, + { id: "T1", name: "T1", status: "unclaimed", owner: "-" }, + { id: "T2", name: "T2", status: "unclaimed", owner: "-" }, + { id: "T3", name: "T3", status: "unclaimed", owner: "-" }, + { id: "T4", name: "T4", status: "unclaimed", owner: "-" }, ]; // Agent positions around the task board (left panel) @@ -67,16 +68,110 @@ function agentPos(index: number) { } // -- Step definitions -- -const STEPS = [ - { title: "Self-Governing Agents", desc: "Autonomous agents need no coordinator. They govern themselves with an idle-poll-claim-work cycle." }, - { title: "Idle Timer", desc: "Each idle agent counts rounds. A timeout triggers self-directed task polling." }, - { title: "Poll Task Board", desc: "Timeout! The agent reads the task board looking for unclaimed work." }, - { title: "Claim Task", desc: "The agent writes its name to the task record. Atomic, no conflicts." }, - { title: "Work", desc: "The agent works on the claimed task using its own agent loop." }, - { title: "Independent Polling", desc: "Multiple agents poll and claim independently. No central coordinator needed." }, - { title: "Complete & Reset", desc: "Task done. Agent returns to idle. The cycle repeats." }, - { title: "Self-Organization", desc: "Three agents, zero coordination overhead. Polling + timeout = emergent organization." }, -]; +interface AutonomousAgentsCopy { + title: string; + steps: { title: string; desc: string }[]; + phaseLabels: Record; + taskNames: Record; + spatialView: string; + taskBoard: string; + unclaimed: string; + complete: string; + headers: { + task: string; + status: string; + owner: string; + }; + statuses: { + unclaimed: string; + active: string; + complete: string; + }; + fsmCycle: string; +} + +const COPY: Record = { + en: { + title: "Autonomous Agent Cycle", + steps: [ + { title: "Self-Governing Agents", desc: "Autonomous agents need no coordinator. They govern themselves with an idle-poll-claim-work cycle." }, + { title: "Idle Timer", desc: "Each idle agent counts rounds. A timeout triggers self-directed task polling." }, + { title: "Poll Task Board", desc: "Timeout! The agent reads the task board looking for unclaimed work." }, + { title: "Claim Task", desc: "The agent writes its name to the task record. Atomic, no conflicts." }, + { title: "Work", desc: "The agent works on the claimed task using its own agent loop." }, + { title: "Independent Polling", desc: "Multiple agents poll and claim independently. No central coordinator needed." }, + { title: "Complete & Reset", desc: "Task done. Agent returns to idle. The cycle repeats." }, + { title: "Self-Organization", desc: "Three agents, zero coordination overhead. Polling + timeout = emergent organization." }, + ], + phaseLabels: { + idle: "idle", + poll: "poll", + claim: "claim", + work: "work", + }, + taskNames: { + T1: "Fix auth bug", + T2: "Add rate limiter", + T3: "Write tests", + T4: "Update API docs", + }, + spatialView: "Spatial View", + taskBoard: "Task Board", + unclaimed: "unclaimed", + complete: "complete", + headers: { + task: "Task", + status: "Status", + owner: "Owner", + }, + statuses: { + unclaimed: "unclaimed", + active: "active", + complete: "complete", + }, + fsmCycle: "FSM Cycle", + }, + ru: { + title: "Цикл автономных агентов", + steps: [ + { title: "Самоуправляемые агенты", desc: "Автономным агентам не нужен координатор. Они управляют собой через цикл ожидания, опроса, захвата задач и работы." }, + { title: "Таймер ожидания", desc: "Каждый бездействующий агент считает раунды. Таймаут запускает самостоятельный опрос задач." }, + { title: "Опрос доски задач", desc: "Таймаут. Агент читает доску задач и ищет незахваченную работу." }, + { title: "Захват задачи", desc: "Агент записывает своё имя в запись задачи. Атомарно и без конфликтов." }, + { title: "Работа", desc: "Агент выполняет захваченную задачу в своём собственном цикле." }, + { title: "Независимый опрос", desc: "Несколько агентов независимо опрашивают и захватывают задачи. Централизованный координатор не нужен." }, + { title: "Завершить и сбросить", desc: "Задача выполнена. Агент возвращается в idle. Цикл повторяется." }, + { title: "Самоорганизация", desc: "Три агента, ноль накладных расходов на координацию. Polling + timeout дают самопроизвольную организацию." }, + ], + phaseLabels: { + idle: "ожидание", + poll: "опрос", + claim: "захват", + work: "работа", + }, + taskNames: { + T1: "Исправить баг auth", + T2: "Добавить rate limiter", + T3: "Написать тесты", + T4: "Обновить API-доки", + }, + spatialView: "Пространственный вид", + taskBoard: "Доска задач", + unclaimed: "свободно", + complete: "готово", + headers: { + task: "Задача", + status: "Статус", + owner: "Владелец", + }, + statuses: { + unclaimed: "свободна", + active: "в работе", + complete: "готово", + }, + fsmCycle: "Цикл FSM", + }, +}; // Per-step state for each agent interface AgentState { @@ -222,7 +317,9 @@ function FSMArrow({ from, to, active, inactiveStroke }: { from: Phase; to: Phase } export default function AutonomousAgents({ title }: { title?: string }) { - const vis = useSteppedVisualization({ totalSteps: STEPS.length, autoPlayInterval: 2500 }); + const locale = useLocale(); + const copy = COPY[locale] || COPY.en; + const vis = useSteppedVisualization({ totalSteps: copy.steps.length, autoPlayInterval: 2500 }); const step = vis.currentStep; const palette = useSvgPalette(); @@ -234,13 +331,13 @@ export default function AutonomousAgents({ title }: { title?: string }) { return (

    - {title || "Autonomous Agent Cycle"} + {title || copy.title}

    {/* Left panel: spatial view with agents and task board */}
    -
    Spatial View
    +
    {copy.spatialView}
    {/* Task board (small table in center) */} - Task Board + {copy.taskBoard} - {tasks.filter((t) => t.status === "unclaimed").length} unclaimed + {tasks.filter((t) => t.status === "unclaimed").length} {copy.unclaimed} - {tasks.filter((t) => t.status === "complete").length} complete + {tasks.filter((t) => t.status === "complete").length} {copy.complete} {/* Agents */} @@ -337,15 +434,15 @@ export default function AutonomousAgents({ title }: { title?: string }) { - - - + + + {tasks.map((task) => ( - + @@ -367,7 +464,7 @@ export default function AutonomousAgents({ title }: { title?: string }) { {/* Right panel: FSM state machine diagram */}
    -
    FSM Cycle
    +
    {copy.fsmCycle}
    - {state.label} + {copy.phaseLabels[state.id]} ); @@ -439,7 +536,7 @@ export default function AutonomousAgents({ title }: { title?: string }) { {FSM_STATES.map((s) => (
    - {s.label} + {copy.phaseLabels[s.id]}
    ))}
    @@ -456,8 +553,8 @@ export default function AutonomousAgents({ title }: { title?: string }) { onReset={vis.reset} isPlaying={vis.isPlaying} onToggleAutoPlay={vis.toggleAutoPlay} - stepTitle={STEPS[step].title} - stepDescription={STEPS[step].desc} + stepTitle={copy.steps[step].title} + stepDescription={copy.steps[step].desc} /> diff --git a/web/src/components/visualizations/s12-worktree-task-isolation.tsx b/web/src/components/visualizations/s12-worktree-task-isolation.tsx index 1251c56ab..3ee469f6e 100644 --- a/web/src/components/visualizations/s12-worktree-task-isolation.tsx +++ b/web/src/components/visualizations/s12-worktree-task-isolation.tsx @@ -3,6 +3,7 @@ import { motion } from "framer-motion"; import { useSteppedVisualization } from "@/hooks/useSteppedVisualization"; import { StepControls } from "@/components/visualizations/shared/step-controls"; +import { useLocale } from "@/lib/i18n"; type TaskStatus = "pending" | "in_progress" | "completed"; @@ -142,6 +143,147 @@ const STEPS: StepState[] = [ }, ]; +const STEPS_RU: StepState[] = [ + { + title: "Боль общего рабочего каталога", + desc: "Две задачи уже активны, но обе правки попадут в один каталог и начнут конфликтовать.", + op: "task_create x2", + tasks: [ + { id: 1, subject: "Рефакторинг auth", status: "in_progress", worktree: "" }, + { id: 2, subject: "Полировка UI логина", status: "in_progress", worktree: "" }, + ], + worktrees: [], + lanes: [ + { name: "main", files: ["auth/service.py", "ui/Login.tsx"], highlight: true }, + { name: "wt/auth-refactor", files: [] }, + { name: "wt/ui-login", files: [] }, + ], + }, + { + title: "Выделяем полосу для задачи 1", + desc: "Создаём рабочее дерево и связываем его с задачей 1, чтобы появился явный владелец.", + op: "worktree_create(name='auth-refactor', task_id=1)", + tasks: [ + { id: 1, subject: "Рефакторинг auth", status: "in_progress", worktree: "auth-refactor" }, + { id: 2, subject: "Полировка UI логина", status: "in_progress", worktree: "" }, + ], + worktrees: [{ name: "auth-refactor", branch: "wt/auth-refactor", task: "#1", state: "active" }], + lanes: [ + { name: "main", files: ["ui/Login.tsx"] }, + { name: "wt/auth-refactor", files: ["auth/service.py"], highlight: true }, + { name: "wt/ui-login", files: [] }, + ], + }, + { + title: "Выделяем полосу для задачи 2", + desc: "Создание полосы и привязка к задаче могут идти отдельно. Здесь задача 2 связывается уже после создания полосы.", + op: "worktree_create(name='ui-login')\ntask_bind_worktree(task_id=2, worktree='ui-login')", + tasks: [ + { id: 1, subject: "Рефакторинг auth", status: "in_progress", worktree: "auth-refactor" }, + { id: 2, subject: "Полировка UI логина", status: "in_progress", worktree: "ui-login" }, + ], + worktrees: [ + { name: "auth-refactor", branch: "wt/auth-refactor", task: "#1", state: "active" }, + { name: "ui-login", branch: "wt/ui-login", task: "#2", state: "active" }, + ], + lanes: [ + { name: "main", files: [] }, + { name: "wt/auth-refactor", files: ["auth/service.py"] }, + { name: "wt/ui-login", files: ["ui/Login.tsx"], highlight: true }, + ], + }, + { + title: "Запускаем команды в изолированных полосах", + desc: "Каждая команда маршрутизируется по каталогу выбранной полосы, а не по общему корню репозитория.", + op: "worktree_run('auth-refactor', 'pytest tests/auth -q')", + tasks: [ + { id: 1, subject: "Рефакторинг auth", status: "in_progress", worktree: "auth-refactor" }, + { id: 2, subject: "Полировка UI логина", status: "in_progress", worktree: "ui-login" }, + ], + worktrees: [ + { name: "auth-refactor", branch: "wt/auth-refactor", task: "#1", state: "active" }, + { name: "ui-login", branch: "wt/ui-login", task: "#2", state: "active" }, + ], + lanes: [ + { name: "main", files: [] }, + { name: "wt/auth-refactor", files: ["auth/service.py", "tests/auth/test_login.py"], highlight: true }, + { name: "wt/ui-login", files: ["ui/Login.tsx", "ui/Login.css"] }, + ], + }, + { + title: "Одну полосу оставляем, другую закрываем", + desc: "На завершении можно смешивать решения: ui-login остаётся для следующего шага, а auth-refactor удаляется и закрывает задачу 1.", + op: "worktree_keep('ui-login')\nworktree_remove('auth-refactor', complete_task=true)\nworktree_events(limit=10)", + tasks: [ + { id: 1, subject: "Рефакторинг auth", status: "completed", worktree: "" }, + { id: 2, subject: "Полировка UI логина", status: "in_progress", worktree: "ui-login" }, + ], + worktrees: [ + { name: "auth-refactor", branch: "wt/auth-refactor", task: "#1", state: "removed" }, + { name: "ui-login", branch: "wt/ui-login", task: "#2", state: "kept" }, + ], + lanes: [ + { name: "main", files: [] }, + { name: "wt/auth-refactor", files: [] }, + { name: "wt/ui-login", files: ["ui/Login.tsx"], highlight: true }, + ], + }, + { + title: "Изоляция + координация + события", + desc: "Общая доска хранит единую правду, worktree-lane изолируют выполнение, а события дают проверяемый side-channel-трейс.", + op: "task_list + worktree_list + worktree_events", + tasks: [ + { id: 1, subject: "Рефакторинг auth", status: "completed", worktree: "" }, + { id: 2, subject: "Полировка UI логина", status: "in_progress", worktree: "ui-login" }, + ], + worktrees: [ + { name: "auth-refactor", branch: "wt/auth-refactor", task: "#1", state: "removed" }, + { name: "ui-login", branch: "wt/ui-login", task: "#2", state: "kept" }, + ], + lanes: [ + { name: "main", files: [] }, + { name: "wt/auth-refactor", files: [] }, + { name: "wt/ui-login", files: ["ui/Login.tsx"], highlight: true }, + ], + }, +]; + +const STATUS_LABELS: Record> = { + en: { + pending: "pending", + in_progress: "in_progress", + completed: "completed", + }, + ru: { + pending: "в очереди", + in_progress: "в работе", + completed: "завершена", + }, +}; + +const COPY = { + en: { + title: "Worktree Task Isolation", + board: "Task Board (.tasks)", + worktrees: "Worktree Index (.worktrees/index.json)", + lanes: "Execution Lanes", + noWorktrees: "no worktrees yet", + noChanges: "(no changes)", + worktreeLabel: "worktree", + taskLabel: "task", + }, + ru: { + title: "Изоляция задач через worktree", + board: "Доска задач (.tasks)", + worktrees: "Индекс worktree (.worktrees/index.json)", + lanes: "Исполняющие lane", + noWorktrees: "пока нет worktree", + noChanges: "(без изменений)", + worktreeLabel: "worktree", + taskLabel: "задача", + }, +} as const; + function statusClass(status: TaskStatus): string { if (status === "completed") return "bg-emerald-100 text-emerald-700 dark:bg-emerald-900/30 dark:text-emerald-300"; if (status === "in_progress") return "bg-amber-100 text-amber-700 dark:bg-amber-900/30 dark:text-amber-300"; @@ -156,13 +298,17 @@ function worktreeClass(state: WorktreeRow["state"]): string { } export default function WorktreeTaskIsolation({ title }: { title?: string }) { - const vis = useSteppedVisualization({ totalSteps: STEPS.length, autoPlayInterval: 2600 }); - const step = STEPS[vis.currentStep]; + const locale = useLocale(); + const copy = locale === "ru" ? COPY.ru : COPY.en; + const steps = locale === "ru" ? STEPS_RU : STEPS; + const vis = useSteppedVisualization({ totalSteps: steps.length, autoPlayInterval: 2600 }); + const step = steps[vis.currentStep]; + const statusLabels = locale === "ru" ? STATUS_LABELS.ru : STATUS_LABELS.en; return (

    - {title || "Worktree Task Isolation"} + {title || copy.title}

    @@ -173,7 +319,7 @@ export default function WorktreeTaskIsolation({ title }: { title?: string }) {
    - Task Board (.tasks) + {copy.board}
    {step.tasks.map((task) => ( @@ -187,12 +333,12 @@ export default function WorktreeTaskIsolation({ title }: { title?: string }) {
    #{task.id} - {task.status} + {statusLabels[task.status]}
    {task.subject}
    - worktree: {task.worktree || "-"} + {copy.worktreeLabel}: {task.worktree || "-"}
    ))} @@ -201,12 +347,12 @@ export default function WorktreeTaskIsolation({ title }: { title?: string }) {
    - Worktree Index (.worktrees/index.json) + {copy.worktrees}
    {step.worktrees.length === 0 && (
    - no worktrees yet + {copy.noWorktrees}
    )} {step.worktrees.map((wt) => ( @@ -219,7 +365,7 @@ export default function WorktreeTaskIsolation({ title }: { title?: string }) { >
    {wt.name}
    {wt.branch}
    -
    task: {wt.task}
    +
    {copy.taskLabel}: {wt.task}
    ))}
    @@ -227,7 +373,7 @@ export default function WorktreeTaskIsolation({ title }: { title?: string }) {
    - Execution Lanes + {copy.lanes}
    {step.lanes.map((lane) => ( @@ -245,7 +391,7 @@ export default function WorktreeTaskIsolation({ title }: { title?: string }) {
    {lane.name}
    {lane.files.length === 0 ? ( -
    (no changes)
    +
    {copy.noChanges}
    ) : ( lane.files.map((f) =>
    {f}
    ) )} diff --git a/web/src/components/visualizations/shared/step-controls.tsx b/web/src/components/visualizations/shared/step-controls.tsx index cd0beaa2c..36e054d04 100644 --- a/web/src/components/visualizations/shared/step-controls.tsx +++ b/web/src/components/visualizations/shared/step-controls.tsx @@ -2,6 +2,7 @@ import { Play, Pause, SkipBack, SkipForward, RotateCcw } from "lucide-react"; import { cn } from "@/lib/utils"; +import { useLocale } from "@/lib/i18n"; interface StepControlsProps { currentStep: number; @@ -28,6 +29,23 @@ export function StepControls({ stepDescription, className, }: StepControlsProps) { + const locale = useLocale(); + const copy = locale === "ru" + ? { + reset: "Сброс", + prev: "Предыдущий шаг", + pause: "Пауза", + autoplay: "Автовоспроизведение", + next: "Следующий шаг", + } + : { + reset: "Reset", + prev: "Previous step", + pause: "Pause", + autoplay: "Auto-play", + next: "Next step", + }; + return (
    {/* Annotation */} @@ -46,7 +64,7 @@ export function StepControls({ @@ -54,14 +72,14 @@ export function StepControls({ onClick={onPrev} disabled={currentStep === 0} className="rounded-md p-1.5 text-zinc-500 hover:bg-zinc-100 hover:text-zinc-700 disabled:opacity-30 dark:text-zinc-400 dark:hover:bg-zinc-800 dark:hover:text-zinc-200" - title="Previous step" + title={copy.prev} > @@ -69,7 +87,7 @@ export function StepControls({ onClick={onNext} disabled={currentStep === totalSteps - 1} className="rounded-md p-1.5 text-zinc-500 hover:bg-zinc-100 hover:text-zinc-700 disabled:opacity-30 dark:text-zinc-400 dark:hover:bg-zinc-800 dark:hover:text-zinc-200" - title="Next step" + title={copy.next} > diff --git a/web/src/data/annotation-ru.ts b/web/src/data/annotation-ru.ts new file mode 100644 index 000000000..54fe3bf4f --- /dev/null +++ b/web/src/data/annotation-ru.ts @@ -0,0 +1,201 @@ +export interface AnnotationRuOverride { + title: string; + description: string; + alternatives: string; +} + +export const ANNOTATION_RU_OVERRIDES: Record< + string, + Record +> = { + s02: { + "four-tools-not-twenty": { + title: `Почему именно четыре инструмента`, + description: `Эти четыре инструмента: bash, read_file, write_file и edit_file. Вместе они покрывают примерно 95% типичных задач по разработке. Bash отвечает за запуск команд и произвольное выполнение. Read_file даёт точное чтение файлов с привязкой к строкам. Write_file создаёт или полностью перезаписывает файл. Edit_file делает точечную замену строк. Большее число инструментов увеличило бы когнитивную нагрузку на модель: ей пришлось бы чаще выбирать между похожими вариантами и чаще ошибаться. Меньше инструментов также означает меньше схем, меньше пограничных случаев и меньше кода сопровождения.`, + alternatives: `Можно было бы сразу добавить специализированные инструменты вроде list_directory, search_files или http_request, и более поздние версии так и делают. Но на этом этапе bash уже покрывает эти сценарии. Переход от одного инструмента в v0 к четырём в v1 нужен прежде всего ради структурированного ввода-вывода для файловых операций, где quoting и escaping в bash часто сбивают модель.`, + }, + "model-as-agent": { + title: `Модель и есть агент`, + description: `Базовый цикл агента предельно прост: пока True, вызываем LLM; если она вернула блоки tool_use, выполняем их и возвращаем результаты; если вернулся только текст, останавливаемся. Здесь нет ни роутера, ни дерева решений, ни workflow-движка. Что делать, когда остановиться и как восстанавливаться после ошибки, решает сама модель. Код вокруг — это всего лишь обвязка между моделью и инструментами. В этом и состоит принцип: агентное поведение возникает из модели, а не из фреймворка.`, + alternatives: `Многие агентные фреймворки добавляют сложные слои оркестрации: ReAct-циклы с разбором Thought/Action/Observation, цепочки в духе LangChain, декомпозицию целей как в AutoGPT. Все они исходят из предположения, что модели нужна внешняя опора, чтобы стать агентом. Наш подход исходит из обратного: модель уже умеет быть агентом, ей просто нужны инструменты для действия в мире.`, + }, + "explicit-tool-schemas": { + title: `JSON Schema для каждого инструмента`, + description: `У каждого инструмента есть строгая JSON-схема входных параметров. Например, edit_file требует exact-строки old_string и new_string, а не regex-паттерны. Это убирает целый класс ошибок: модель не может передать некорректный ввод, потому что API валидирует схему до выполнения. Заодно намерение модели становится однозначным: если она вызывает edit_file с конкретными строками, нет двусмысленности в том, что именно она хочет поменять.`, + alternatives: `Некоторые агентные системы позволяют модели выдавать свободный текст, который потом разбирается регулярками или эвристиками, например код из markdown-блоков. Это хрупко: модель чуть изменит формат, и парсер сломается. JSON Schema жертвует гибкостью ради надёжности.`, + }, + }, + s03: { + "visible-planning": { + title: `Делаем план видимым через TodoWrite`, + description: `Вместо того чтобы позволять модели планировать молча внутри chain-of-thought, мы заставляем её выносить план наружу через инструмент TodoWrite. У каждого пункта есть явный статус: pending, in_progress или completed. Это даёт три преимущества: пользователь видит намерение агента до начала работы; разработчик может отлаживать поведение агента, глядя на состояние плана; сам агент может вернуться к плану в следующих ходах, даже если ранний контекст уже ушёл из окна.`, + alternatives: `Модель могла бы планировать внутренне, как в v0 и v1. Это работает, но такой план невидим и эфемерен: как только мысли уходят за пределы контекста, план исчезает. Можно опираться и на extended thinking Claude, но он недоступен для инспекции ни пользователю, ни инструментам ниже по стеку.`, + }, + "single-in-progress": { + title: `Только одна задача может быть in_progress`, + description: `TodoWrite принудительно ограничивает систему так, что одновременно статус in_progress может быть только у одной задачи. Если модель хочет начать вторую, она обязана сначала завершить или бросить текущую. Это защищает от тонкого режима деградации: когда модель пытается мультитаскать и чередует работу над несколькими пунктами, она теряет состояние и выдаёт полуготовый результат. Последовательный фокус стабильно даёт качество выше, чем параллельная суета.`, + alternatives: `Разрешить несколько задач in_progress выглядело бы гибче: агент мог бы переключаться между ними по ситуации. На практике LLM плохо держат такой context switch, путают детали и смешивают состояния. Ограничение на один активный пункт — это полезный guardrail, а не искусственная жёсткость.`, + }, + "max-twenty-items": { + title: `Не больше 20 пунктов плана`, + description: `TodoWrite ограничивает план двадцатью пунктами. Это осознанная защита от over-planning. Без ограничений модель склонна дробить задачу на всё более мелкие шаги и получать планы на 50 пунктов, где каждый шаг тривиален. Такие планы хрупки: если ломается шаг 15, оставшиеся 35 часто уже невалидны. Короткий план, обычно до 20 пунктов, держит правильный уровень абстракции и легче адаптируется, когда реальность расходится с исходной идеей.`, + alternatives: `Без лимита модель получила бы полную свободу, но на практике это приводит к абсурдно детализированным планам. Динамический лимит, зависящий от сложности задачи, звучит умнее, но добавляет сложность. Фиксированный порог в 20 — простая эвристика, которая хорошо работает на реальных задачах: большинство инженерных работ укладываются в 5-15 осмысленных шагов.`, + }, + }, + s04: { + "context-isolation": { + title: `Субагент получает свежий контекст, а не общую историю`, + description: `Когда родительский агент порождает субагента через Task, тот стартует с чистой историей сообщений, где есть только system prompt и описание делегированной подзадачи. Он НЕ наследует разговор родителя. В этом и состоит изоляция контекста: субагент может полностью сосредоточиться на своём узком задании, а не продираться через сотни сообщений из более широкого диалога. Назад родителю возвращается один tool_result, который сворачивает десятки потенциальных ходов субагента в один компактный ответ.`, + alternatives: `Полное копирование контекста родителя дало бы субагенту больше информации, но одновременно залило бы его нерелевантными деталями. Контекстное окно конечно: если забить его историей родителя, останется меньше места на собственную работу субагента. Подходы с форком истории — компромисс, но они всё равно тратят токены на шум.`, + }, + "tool-filtering": { + title: `Explore-агенты не могут писать файлы`, + description: `Если субагент запускается в режиме Explore, он получает только read-only инструменты: ограниченный bash, read_file и поиск. Вызывать write_file или edit_file он не может. Это реализация принципа наименьших привилегий: агенту с задачей "найди все места использования функции X" не нужен доступ на запись. Убирая write-инструменты, мы исключаем случайные правки во время исследования и одновременно сужаем пространство выбора, так что модель принимает решения точнее.`, + alternatives: `Проще всего было бы дать всем субагентам полный набор инструментов, но это нарушает least privilege. Система запроса прав у родителя добавила бы сложность и задержку. Статическая фильтрация инструментов по роли — практичная середина: просто реализуется и эффективно снижает риск ошибок.`, + }, + "no-recursive-task": { + title: `Субагенты не могут порождать своих субагентов`, + description: `Инструмент Task не входит в набор инструментов субагента. Субагент обязан завершить свою работу сам и не может делегировать дальше. Это защищает от бесконечных каскадов делегирования: без ограничения агент мог бы запустить субагента, тот — ещё одного, и все они по кругу перевыражали бы одну и ту же задачу разными словами, сжигая токены без продвижения. Одного уровня делегирования хватает для подавляющего большинства сценариев.`, + alternatives: `Рекурсивное делегирование с ограничением по глубине позволило бы обрабатывать глубоко вложенные задачи, но резко усложнило бы модель исполнения и создало риск runaway token consumption. На практике одного уровня почти всегда достаточно. Более сложная многоуровневая координация появляется позже через persistent teams, а не через рекурсивный spawn.`, + }, + }, + s05: { + "tool-result-injection": { + title: `Навыки попадают в контекст через tool_result, а не через системный промпт`, + description: `Когда агент вызывает навык, содержимое файла SKILL.md возвращается как tool_result внутри пользовательского сообщения, а не встраивается в системный промпт. Это сознательная оптимизация под кэширование. Системный промпт остаётся статичным между ходами, значит провайдер API может его кэшировать. Если бы содержимое навыка жило в системном промпте, каждая новая подгрузка навыка ломала бы кэш. Помещая динамические знания в tool_result, мы сохраняем дорогой системный промпт кэшируемым и всё равно доставляем знания в контекст.`, + alternatives: `Встраивать навыки в системный промпт проще и даёт им больший приоритет во внимании модели. Но это ломает кэширование промпта и постепенно раздувает системный промпт по мере накопления навыков. Подход через tool_result лучше для кэша ценой немного меньшего приоритета внимания.`, + }, + "lazy-loading": { + title: `Навыки загружаются по требованию, а не заранее`, + description: `На старте загружаются не сами навыки, а только их имена и описания из frontmatter. Полное тело SKILL.md попадает в контекст только тогда, когда агент сам решает, что конкретный навык ему нужен, и вызывает инструмент загрузки навыка. Это делает исходный промпт компактным и сфокусированным. Агенту, который чинит Python-баг, не нужен заранее загруженный навык по деплою Kubernetes: он только съест окно контекста и может внести нерелевантные инструкции.`, + alternatives: `Предварительная загрузка всех навыков гарантировала бы постоянную доступность всех знаний, но бессмысленно тратила бы токены и быстрее упиралась бы в лимиты контекста. Рекомендательная схема с подтверждением человеком добавляет задержку. Ленивый подход позволяет модели самостоятельно забрать знания тогда, когда они действительно нужны.`, + }, + "frontmatter-body-split": { + title: `YAML frontmatter + Markdown-тело в SKILL.md`, + description: `Каждый файл SKILL.md состоит из двух частей: YAML frontmatter с метаданными и Markdown-тела с реальными инструкциями. Frontmatter работает как реестр навыков: именно его система показывает, когда агент спрашивает, какие навыки доступны. Тело — это полезная нагрузка, которая загружается по требованию. Благодаря этому можно перечислить 100 навыков, читая только несколько байт метаданных на каждый, и не тянуть в контекст 100 полноценных инструкций по несколько тысяч токенов.`, + alternatives: `Разделить метаданные и тело по двум файлам вроде skill.yaml + skill.md тоже можно, но это удваивает число файлов. Хранить метаданные прямо в Markdown-заголовках или комментариях значит каждый раз парсить весь файл. Frontmatter — уже устоявшаяся конвенция, которая держит метаданные и содержимое рядом, но отделяет их структурно.`, + }, + }, + s06: { + "three-layer-compression": { + title: `Трёхслойная стратегия сжатия`, + description: `Управление контекстом строится на трёх слоях с разным соотношением цены и пользы. Первое, microcompact, работает почти бесплатно на каждом ходу: урезает старые tool_result и вычищает громоздкий вывод команд, который уже не нужен. Второе, auto_compact, включается при превышении порога токенов: оно вызывает LLM для генерации сводки разговора. Это дороже, но резко сокращает размер контекста. Третье, manual compact, запускается пользователем в моменты явного "начнём с чистого листа". Такая слоистость позволяет дешёвой чистке идти постоянно, а дорогому summarization — только когда оно действительно окупается.`, + alternatives: `Одна общая стратегия сжатия, например всегда суммировать на 80% окна, была бы проще, но расточительна: чаще всего microcompact сам удерживает контекст в порядке. Скользящее окно с отбрасыванием старых сообщений дёшево, но выкидывает важный смысл. Трёхслойный подход даёт лучшую эффективность по токенам: дешёвая уборка постоянно, дорогая сводка редко.`, + }, + "min-savings-threshold": { + title: `MIN_SAVINGS = 20 000 токенов перед сжатием`, + description: `Auto_compact запускается только тогда, когда ожидаемая экономия, то есть разница между текущим объёмом токенов и размером будущей сводки, превышает 20 000 токенов. Сжатие не бесплатно: сама сводка расходует токены, плюс нужен отдельный API-вызов для её генерации. Если разговор весит всего 25 000 токенов, можно сэкономить 5 000, но заплатить за вызов и получить менее связное представление, чем исходная история. Порог в 20K гарантирует, что выигрыш заметно перекрывает накладные расходы.`, + alternatives: `Порог в процентах, например сжимать при 80% заполнения окна, подстраивается под разные размеры контекста, но не учитывает фиксированную стоимость генерации сводки. Фиксированный порог в 10K сделал бы систему агрессивнее, но часто без пользы. Значение 20K выбрано эмпирически: именно здесь компрессия почти всегда окупает потерю качества.`, + }, + "summary-replaces-all": { + title: `Сводка заменяет всю историю, а не только её часть`, + description: `Когда срабатывает auto_compact, система генерирует сводку и заменяет ею ВСЮ историю сообщений. Она не хранит рядом со сводкой ещё и последние N сообщений. Это убирает тонкую проблему когерентности: если оставить свежие сообщения плюс сводку старых, модель увидит две версии пересекающегося содержания. Сводка может говорить "мы выбрали подход X", а последние сообщения всё ещё показывать сомнения и обсуждение, создавая противоречивые сигналы. Одна чистая сводка даёт один связный нарратив.`, + alternatives: `Хранить последние 5-10 сообщений вместе со сводкой кажется полезным, потому что сохраняет свежие детали. Но это создаёт пересечение и делает суммарный размер контекста менее предсказуемым. Некоторые системы действительно используют "sliding window + summary", но для этого нужно очень аккуратно настраивать область перекрытия.`, + }, + "transcript-archival": { + title: `Полный диалог архивируется на диск в JSONL`, + description: `Хотя рабочий контекст в памяти сжимается, полная несжатая история разговора дописывается в JSONL-файл на диске. Каждое сообщение, каждый вызов инструмента, каждый результат сохраняются полностью. Это означает, что сжатие является потерянной операцией только для in-memory контекста, но не для постоянной записи. Постфактум можно анализировать полный лог: отлаживать поведение агента, считать расход токенов, извлекать обучающие данные. Формат JSONL append-only, поэтому он безопасен для конкурентных записей и удобен для потоковой обработки.`, + alternatives: `Не архивировать вообще — значит экономить диск, но сильно усложнять отладку: когда агент ошибётся, вы уже не увидите, что произошло 200 сообщений назад, потому что это место было сжато. Хранение в SQLite дало бы удобные запросы, но добавило бы зависимость. JSONL — самый простой формат, который одновременно поддерживает append-only запись и построчную обработку.`, + }, + }, + s08: { + "notification-bus": { + title: `threading.Queue как шина уведомлений`, + description: `Результаты фоновых задач передаются через threading.Queue, а не через прямые callbacks. Когда фоновый поток завершает работу, он кладёт уведомление в очередь. Основной цикл агента опрашивает эту очередь перед каждым вызовом LLM. Такая развязка важна: фоновому потоку не нужно знать ничего о состоянии или тайминге основного цикла. Он просто оставляет сообщение в очереди и идёт дальше. Основной цикл заберёт его в удобный момент: не посреди API-вызова и не посреди выполнения инструмента. Никаких гонок и никакого callback hell.`, + alternatives: `Прямые callbacks доставляли бы результат быстрее, но тут же создавали бы проблемы thread safety: callback мог бы сработать ровно в момент, когда основной поток собирает запрос. Event-driven модели вроде asyncio тоже работают, но добавляют сложность. Очередь — простейшая и надёжная примитивная форма межпоточного взаимодействия.`, + }, + "daemon-threads": { + title: `Фоновые задачи работают как демон-потоки`, + description: `Потоки фоновых задач создаются с daemon=True. В Python это означает, что при завершении главного потока демон-потоки завершаются автоматически. Так устраняется частая проблема: основной агент уже закончил работу и должен выйти, но фоновый поток всё ещё висит на длинном API-вызове или застрял в цикле, из-за чего процесс не завершится никогда. С демон-потоками выход остаётся чистым: заканчивается главный поток, умирают все демон-потоки, завершается процесс. Никаких зомби-процессов и дополнительного кода очистки.`, + alternatives: `Не-демон-потоки с явным join и очисткой дают больше контроля над shutdown, но требуют аккуратного управления жизненным циклом. Параллелизм на процессах через multiprocessing обеспечивает лучшую изоляцию, но дороже по накладным расходам. Демон-потоки — прагматичный выбор: минимум кода и корректное поведение в обычном случае.`, + }, + "attachment-format": { + title: `Структурированный формат уведомлений с type-тегами`, + description: `Уведомления от фоновых задач имеют явную структуру вида {"type": "attachment", "attachment": {status, result, ...}}, а не просто строки текста. Type-tag позволяет основному циклу по-разному обрабатывать разные виды сигналов: attachment можно внедрить в разговор как tool_result, а status_update — только отобразить как прогресс. Машиночитаемый формат также открывает путь к программной фильтрации и UI-рендерингу: показывать только ошибки, скрывать шумные обновления прогресса, рисовать статус как progress bar, а не как сырой текст.`, + alternatives: `Обычные строковые уведомления проще, но теряют структуру. Тогда главному циклу пришлось бы каждый раз парсить свободный текст и гадать, что именно произошло. Классовая иерархия вроде StatusNotification и ResultNotification выглядела бы по-питоновски, но хуже переносится между языками и форматами сериализации. JSON-подобная структура универсальнее.`, + }, + }, + s09: { + "teammate-vs-subagent": { + title: `Постоянные напарники против одноразовых субагентов`, + description: `В s04 субагенты эфемерны: их порождают, они делают одну задачу, возвращают результат и умирают вместе со всем накопленным знанием. В s09 напарники — это постоянные потоки со своей идентичностью, ролью и конфигурацией. Напарник может выполнить задачу A, потом получить задачу B и принести в неё всё, чему уже научился. Такие агенты накапливают знания о проекте, понимают устоявшиеся паттерны и не вынуждены заново читать одни и те же файлы при каждой новой задаче.`, + alternatives: `Одноразовые субагенты проще и дают почти идеальную изоляцию контекста: одна задача не загрязняет другую. Но цена — постоянное переобучение с нуля. Рассматривался промежуточный вариант с субагентами и общей memory/knowledge base, но он добавляет сложность без полного выигрыша от устойчивой identity и состояния.`, + }, + "file-based-team-config": { + title: `Конфиг команды хранится в .teams/{name}/config.json`, + description: `Структура команды — имена участников, роли и agent ID — хранится в JSON-конфиге, а не в памяти какого-то одного агента. Любой агент может узнать состав команды, просто прочитав этот файл. Не нужен ни discovery service, ни общая память. Если агент падает и стартует заново, он читает конфиг и сразу понимает, кто ещё есть в команде. Это полностью соответствует философии s07: файловая система и есть слой координации.`, + alternatives: `In-memory registry быстрее, но не переживает рестарт процессов и требует центрального процесса-хранителя. Сервис discovery по типу DNS или отдельного сервера надёжнее на большом масштабе, но избыточен для локальной multi-agent системы. Файловый конфиг — самый простой вариант, который работает между независимыми процессами.`, + }, + "tool-filtering-by-role": { + title: `У teammates подмножество инструментов, у лидера — полный набор`, + description: `Лидер команды получает ALL_TOOLS, включая spawn, send, read_inbox и прочую координацию, а напарники — TEAMMATE_TOOLS, то есть сокращённый набор, сфокусированный на исполнении задач. Это жёстко разделяет роли: напарники занимаются непосредственной работой — кодом, тестами, исследованием, а лидер занимается координацией — созданием задач, распределением работы и коммуникацией. Если дать напарникам координационные инструменты, они смогут создавать свои подкоманды и перекидывать задачи друг другу, ломая целостность общего плана.`, + alternatives: `Выдать одинаковые инструменты всем агентам проще и вроде бы демократичнее, но на практике это быстро превращается в координационный хаос: несколько агентов одновременно пытаются управлять друг другом и выдают конфликтующие назначения. Статическая фильтрация по роли предсказуема и легко объяснима.`, + }, + }, + s10: { + "jsonl-inbox": { + title: `JSONL-входящий ящик вместо общей памяти`, + description: `У каждого напарника есть собственный файл входящих сообщений — JSONL внутри директории команды. Отправить сообщение значит дописать одну JSON-строку в ящик получателя. Прочитать сообщения значит прочитать файл и помнить, до какой строки мы уже дошли. JSONL по природе append-only, поэтому конкурентные писатели не ломают данные друг друга. Такой механизм работает между процессами без общей памяти, mutex и сложного IPC. Он ещё и crash-safe: если процесс упадёт в середине append, худший сценарий — одна битая строка, которую читатель может пропустить.`, + alternatives: `Общая память вроде Python multiprocessing.Queue быстрее, но не годится, если агенты живут как отдельные независимые процессы. Брокер сообщений вроде Redis или RabbitMQ решает задачу надёжно, но добавляет инфраструктуру. Unix domain sockets тоже сработают, но их сложнее отлаживать, потому что там нет человекочитаемого лога. JSONL-файлы — самый простой способ получить persistence, межпроцессное общение и отладочность сразу.`, + }, + "five-message-types": { + title: `Ровно пять типов сообщений покрывают всю координацию`, + description: `Система сообщений поддерживает ровно пять типов: message для point-to-point общения, broadcast для объявлений на всю команду, shutdown_request для корректного завершения, shutdown_response для ответа на shutdown и plan_approval_response для одобрения или отклонения плана лидером. Эти пять категорий напрямую соответствуют базовым паттернам координации: прямое общение, широковещательные сигналы, управление жизненным циклом и approval workflows.`, + alternatives: `Можно было бы оставить один универсальный тип сообщения с кучей метаполей. Это было бы гибче, но усложнило бы проверку корректности протоколов. Можно было бы сделать и десяток специализированных типов, но тогда выросла бы когнитивная нагрузка на модель. Пять типов — это точка, где у каждого сообщения есть ясная и непересекающаяся роль.`, + }, + "inbox-before-api-call": { + title: `Входящий ящик проверяется перед каждым вызовом LLM`, + description: `Напарники читают свой входящий ящик в начале каждой итерации агентного цикла, до обращения к LLM API. Это даёт максимальную отзывчивость к входящим сигналам: запрос на shutdown замечается в пределах одного цикла, обычно за секунды, а не после завершения текущей задачи, что может занять минуты. Проверка входящего ящика дешева: это чтение маленького файла и проверка, появились ли новые строки. На фоне LLM-вызова с тысячами токенов и секундной задержкой это почти бесплатно. Такой порядок ещё и позволяет входящим сообщениям повлиять на следующий вызов модели сразу же.`, + alternatives: `Проверять входящий ящик после каждого выполнения инструмента было бы ещё отзывчивее, но это добавило бы накладные расходы к каждому вызову инструмента, а они происходят чаще, чем вызовы LLM. Отдельный поток-наблюдатель для входящего ящика сделал бы систему сложнее. Проверка один раз на итерацию LLM — практичный баланс между отзывчивостью и простотой.`, + }, + }, + s11: { + "polling-not-events": { + title: `Непривязанные задачи ищутся polling'ом, а не через события`, + description: `Автономные teammates примерно раз в секунду опрашивают общую task board в поисках свободных задач, а не ждут event-driven уведомлений. Polling фундаментально проще pub/sub-модели: не нужно управлять подписками, маршрутизировать события и ловить баги с пропущенными уведомлениями. В файловой системе polling — это просто "прочитай список файлов". Такая операция дешёвая и работает независимо от количества одновременно живущих агентов. Интервал в одну секунду даёт хороший баланс между отзывчивостью и нагрузкой на файловую систему.`, + alternatives: `Event-driven подход с file watchers или pub/sub-каналом дал бы латентность в миллисекундах, а не секундах. Но file watchers платформозависимы и ненадёжны на сетевых файловых системах, а message broker добавляет инфраструктуру. В системе, где сами задачи обычно идут минуты, разница между 1 секундой и 10 миллисекундами почти ничего не меняет.`, + }, + "idle-timeout": { + title: `60-секундный idle-тайм-аут перед самозавершением`, + description: `Если у автономного teammate нет ни задач, ни сообщений во входящих, он ждёт до 60 секунд, а затем сам завершает работу. Это защищает от зомби-агентов, которые могут висеть вечно в ожидании работы, которая уже не придёт: например, если лидер забыл отправить shutdown или оставшиеся задачи заблокированы внешними событиями. Окно в 60 секунд достаточно длинное, чтобы короткий промежуток между завершением одной задачи и появлением другой не приводил к раннему shutdown, но и достаточно короткое, чтобы простаивающие агенты не тратили ресурсы бесконечно.`, + alternatives: `Отсутствие тайм-аута чревато вечными зомби-процессами. Слишком короткий тайм-аут вроде 5 секунд даёт ложные срабатывания, пока лидер просто думает или печатает. Heartbeat-система, где лидер регулярно пингует teammates, тоже работает, но усложняет протокол. Фиксированные 60 секунд дают хороший дефолтный баланс.`, + }, + "identity-after-compression": { + title: `После сжатия контекста нужно заново внедрять identity teammate`, + description: `Когда auto_compact сжимает разговор, итоговая сводка теряет критические метаданные: имя teammate, название команды и agent_id. Без них агент не сможет забирать задачи, потому что ownership привязан к имени, не сможет читать inbox, потому что он ключуется по agent_id, и не сможет корректно идентифицировать себя в переписке. Поэтому после каждого auto_compact система заново вставляет в разговор структурированный блок identity: "ты [name] из команды [team], твой agent_id = [id], твой inbox лежит по пути [path]". Это минимальный контекст, который нужен агенту, чтобы остаться работоспособным после потери памяти.`, + alternatives: `Можно было бы держать identity в system prompt, который переживает compression, но это нарушило бы cache-friendly подход со статичным системным промптом из s05. Можно пытаться заставить LLM всегда включать identity в summary, но это ненадёжно: модель может забыть. Явная пост-компрессионная реинъекция детерминирована и потому безопаснее.`, + }, + }, + s12: { + "shared-board-isolated-lanes": { + title: `Общая доска задач + изолированные полосы выполнения`, + description: `Доска задач остаётся общей и централизованной в .tasks/, а реальные правки файлов происходят в отдельных worktree-директориях на каждую задачу. Такое разделение сохраняет глобальную видимость — кто чем владеет и что уже завершено — и при этом не вынуждает всех работать в одном мутирующем каталоге. Координация остаётся простой, потому что доска одна, а исполнение остаётся безопасным, потому что каждая lane изолирована.`, + alternatives: `Один общий workspace проще, но быстро приводит к коллизиям правок и смешанному git-состоянию. Полностью независимые task store для каждой lane избегают столкновений, но ломают видимость на уровне команды и усложняют планирование.`, + }, + "index-file-lifecycle": { + title: `Явный индекс жизненного цикла worktree`, + description: `Файл .worktrees/index.json хранит имя worktree, путь, ветку, task_id и статус. Благодаря этому lifecycle-состояние можно инспектировать и восстанавливать даже после сжатия контекста или рестарта процесса. Тот же индекс становится детерминированным источником данных для list/status/remove операций.`, + alternatives: `Если опираться только на git worktree list, исчезнут локальные метаданные о привязке к задаче и кастомных lifecycle-статусах. Держать всё лишь в памяти проще по коду, но это сразу убивает recoverability после падений и перезапусков.`, + }, + "lane-cwd-routing-and-reentry-guard": { + title: `Маршрутизация cwd по lane + защита от повторного входа`, + description: `Команды попадают в директорию нужного worktree через worktree_run(name, command) и параметр cwd. Защита от повторного входа не даёт случайно начать работу внутри уже активного worktree-контекста, сохраняя однозначную ownership-модель жизненного цикла.`, + alternatives: `Глобальная мутация cwd реализуется быстро, но легко протекает между параллельными задачами и загрязняет контекст выполнения. Тихий повторный вход в уже активный worktree делает ownership неоднозначным и осложняет teardown.`, + }, + "event-stream-observability": { + title: `Append-only поток lifecycle-событий`, + description: `События жизненного цикла дописываются в .worktrees/events.jsonl: worktree.create.*, worktree.remove.* и task.completed. Это превращает скрытые переходы состояния в наблюдаемые и запрашиваемые записи, а сбои делает явными через события *.failed вместо тихого исчезновения.`, + alternatives: `Опираться только на консольные логи легче, но они плохо подходят для аудита и хрупки в длинных сессиях. Полноценная event-bus инфраструктура мощнее, но тяжеловесна для учебной базовой реализации.`, + }, + "task-worktree-closeout": { + title: `Задача и workspace закрываются одним действием`, + description: `Вызов worktree_remove(..., complete_task=true) позволяет одним шагом убрать изолированную директорию и пометить связанную задачу как completed. Завершение остаётся явным переходом состояния через инструменты worktree_keep и worktree_remove, а не скрытой автоматической уборкой. Это уменьшает риск висячего состояния, когда задача уже помечена как done, а временная lane всё ещё активна, или наоборот.`, + alternatives: `Полностью ручное закрытие даёт максимум гибкости, но усиливает операционный drift. Полностью автоматическое удаление после любого completion рискует снести рабочее окружение до финальной проверки.`, + }, + "event-stream-side-channel": { + title: `Поток событий — канал наблюдаемости, а не источник истины`, + description: `Lifecycle-события улучшают аудируемость системы, но истинное состояние по-прежнему живёт в файлах задач и worktree. События нужно читать как трассу переходов, а не как замену основной state machine.`, + alternatives: `Если оставить только логи, структурированные переходы будут скрыты. Если же сделать события единственным источником состояния, появится риск рассинхронизации там, где не определены replay и repair-семантики.`, + }, + }, +}; diff --git a/web/src/data/annotations/s01.json b/web/src/data/annotations/s01.json index 80902db18..70d35f96c 100644 --- a/web/src/data/annotations/s01.json +++ b/web/src/data/annotations/s01.json @@ -8,11 +8,16 @@ "alternatives": "We could have started with a richer toolset (file I/O, HTTP, database), but that would obscure the core insight: an LLM with a shell is already a general-purpose agent. Starting minimal also makes it obvious what each subsequent version actually adds.", "zh": { "title": "为什么仅靠 Bash 就够了", - "description": "Bash 能读写文件、运行任意程序、在进程间传递数据、管理文件系统。任何额外的工具(read_file、write_file 等)都只是 bash 已有能力的子集。增加工具并不会解锁新能力,只会增加模型需要理解的接口。模型只需学习一个工具的 schema,实现代码不超过 100 行。这就是最小可行 agent:一个工具,一个循环。" + "description": "Bash 能读写文件、运行任意程序、在进程间传递数据、管理文件系统。任何额外的工具(read_file、write_file 等)都只是 bash 已有能力的子集。增加工具并不解锁新能力,只会增加模型需要理解的接口。模型只需学习一个工具的 schema,代码实现保持在 100 行以内。这就是最小可行 agent:一个工具,一个循环。" }, "ja": { "title": "Bash だけで十分な理由", "description": "Bash はファイルの読み書き、任意のプログラムの実行、プロセス間のデータパイプ、ファイルシステムの管理が可能です。追加のツール(read_file、write_file など)は bash が既に提供している機能の部分集合に過ぎません。ツールを増やしても新しい能力は得られず、モデルが理解すべきインターフェースが増えるだけです。モデルが学習するスキーマは1つだけで、実装は100行以内に収まります。これが最小限の実用的エージェント:1つのツール、1つのループです。" + }, + "ru": { + "title": "Почему одного Bash достаточно", + "description": "Bash умеет читать и писать файлы, запускать произвольные программы, передавать данные между процессами и управлять файловой системой. Любой дополнительный инструмент вроде read_file или write_file был бы лишь подмножеством того, что Bash уже даёт. Добавление новых инструментов не открывает принципиально новых возможностей, а только увеличивает поверхность для путаницы. Модели нужно выучить меньше схем, а реализация остаётся короче 100 строк. Это и есть минимально жизнеспособный агент: один инструмент, один цикл.", + "alternatives": "Можно было бы начать с более богатого набора инструментов: файлового I/O, HTTP, базы данных. Но тогда размылся бы главный вывод: LLM с shell уже является агентом общего назначения. Минимальный старт ещё и делает очевидным, что именно добавляет каждая следующая версия." } }, { @@ -26,7 +31,12 @@ }, "ja": { "title": "再帰プロセス生成によるサブエージェント機構", - "description": "エージェントが `python v0.py \"subtask\"` を実行すると、新しい LLM コンテキストを持つ完全に新しいプロセスが生成されます。この子プロセスは事実上サブエージェントです:独自のシステムプロンプト、会話履歴、タスクフォーカスを持ちます。完了すると、親プロセスは stdout で結果を受け取ります。これはフレームワークなしのサブエージェント委任です——共有メモリもメッセージパッシングもなく、stdin/stdout だけです。各子プロセスは親のコンテキストを参照できないため、関心の分離が自然に実現されます。" + "description": "エージェントが `python v0.py \"subtask\"` を実行すると、新しい LLM コンテキストを持つ完全に新しいプロセスが生成されます。この子プロセスは事実上サブエージェントです:独自のシステムプロンプト、会話履歴、タスクフォーカスを持ちます。完了すると、親プロセスは stdout で結果を受け取ります。これがフレームワークなしのサブエージェント委任です——共有メモリもメッセージパッシングもなく、stdin/stdout だけです。各子プロセスは親のコンテキストを参照できないため、関心の分離が自然に実現されます。" + }, + "ru": { + "title": "Рекурсивный запуск процессов как механизм субагентов", + "description": "Когда агент выполняет `python v0.py \"subtask\"`, он создаёт полностью новый процесс со свежим контекстом LLM. Этот дочерний процесс фактически и есть субагент: у него свой system prompt, своя история диалога и свой фокус на задаче. Когда он заканчивает работу, родитель получает результат через stdout. Это делегирование субагенту без всякого фреймворка, только на семантике Unix-процессов. Каждый дочерний процесс естественным образом изолирует свою область ответственности, потому что буквально не видит контекст родителя.", + "alternatives": "Система субагентов на уровне фреймворка, как Task tool в v3, дала бы больше контроля над тем, какими инструментами может пользоваться субагент и как возвращаются результаты. Но для v0 цель именно в том, чтобы показать: запуск процесса это самая примитивная форма агентной делегации, без общей памяти и без обмена сообщениями, только stdin/stdout." } }, { @@ -41,6 +51,11 @@ "ja": { "title": "計画フレームワークなし——モデルが全てを決定", "description": "プランナーもタスクキューも状態マシンもありません。システムプロンプトがモデルに問題の取り組み方を伝え、モデルがこれまでの会話に基づいて次に実行する bash コマンドを決定します。これは意図的な設計です:このレベルでは計画レイヤーの追加は時期尚早な抽象化です。モデルの思考の連鎖そのものが計画です。エージェントループはモデルがツールの呼び出しを止めるまで、次の行動を問い続けるだけです。" + }, + "ru": { + "title": "Без отдельного планировщика: решение принимает модель", + "description": "Здесь нет ни планировщика, ни очереди задач, ни конечного автомата. System prompt подсказывает модели, как подходить к задачам, а сама модель по истории диалога решает, какую команду Bash запускать следующей. Это сделано намеренно: на таком уровне дополнительный слой планирования был бы преждевременной абстракцией. Ход рассуждений модели и есть план. Сам агентский цикл просто продолжает спрашивать модель, что делать дальше, пока она не перестанет запрашивать инструменты.", + "alternatives": "В более поздних версиях, начиная с v2, появляется явное планирование через TodoWrite. Но v0 доказывает, что неявного планирования через рассуждение модели достаточно для многих задач. Отдельный планировщик становится нужен только тогда, когда важна внешняя видимость намерений агента." } } ] diff --git a/web/src/data/annotations/s07.json b/web/src/data/annotations/s07.json index c51909450..dd00b695a 100644 --- a/web/src/data/annotations/s07.json +++ b/web/src/data/annotations/s07.json @@ -13,6 +13,11 @@ "ja": { "title": "タスクをメモリではなく JSON ファイルとして保存", "description": "タスクはメモリ内ではなく .tasks/ ディレクトリに JSON ファイルとして永続化されます。3つの重要な利点があります:(1) プロセスのクラッシュ後もタスクが存続する――エージェントがタスク途中でクラッシュしても、再起動時にタスクボードはディスク上に残っています。(2) 複数のエージェントが同じタスクディレクトリを読み書きでき、共有メモリなしにマルチエージェント連携が可能になります。(3) 人間がデバッグのためにタスクファイルを検査・手動編集できます。ファイルシステムが共有データベースになります。" + }, + "ru": { + "title": "Задачи хранятся как JSON-файлы, а не в памяти", + "description": "Задачи сохраняются как JSON-файлы в каталоге `.tasks/`, а не живут только в оперативной памяти. Это даёт три важных преимущества: (1) задачи переживают падение процесса — если агент умирает посреди работы, после перезапуска task board всё ещё лежит на диске; (2) несколько агентов могут читать и писать в один и тот же каталог задач, координируясь без общей памяти; (3) человек может открыть и вручную поправить файлы задач при отладке. Файловая система становится общей базой данных.", + "alternatives": "Хранение в памяти, как в `TodoWrite` из v2, проще и быстрее, но теряет состояние при падении и не подходит для нескольких процессов. Настоящая база данных вроде SQLite или Redis дала бы ACID-гарантии и лучшую конкуренцию за записи, но добавила бы зависимость и операционную сложность. Файлы здесь выступают как уровень персистентности без внешних зависимостей, который работает везде." } }, { @@ -22,11 +27,16 @@ "alternatives": "Simple priority ordering (high/medium/low) doesn't capture 'task B literally cannot start until task A finishes.' A centralized coordinator that assigns tasks in order would work but creates a single point of failure and bottleneck. Declarative dependencies let each agent independently determine what it can work on by reading the task files.", "zh": { "title": "任务具有 blocks/blockedBy 依赖字段", - "description": "每个任务可以声明它阻塞哪些任务(下游依赖)以及它被哪些任务阻塞(上游依赖)。Agent 不会开始有未解决 blockedBy 依赖的任务。这对多代理协调至关重要:当 Agent A 在编写数据库 schema、Agent B 需要写查询时,Agent B 的任务被 Agent A 的任务阻塞。没有依赖关系,两个 agent 可能同时开始,而 Agent B 会针对一个尚不存在的 schema 工作。" + "description": "每个任务可以声明它阻塞哪些任务(下游依赖)以及它被哪些任务阻塞(上游依赖)。Agent 不会开始有未解决 blockedBy 依赖的任务。这对多代理协调至关重要:当 Agent A 在编写数据库 schema、Agent B 需要写查询时,Agent B 的任务被 Agent A 的任务阻塞。没有依赖关系,两个 agent 可能同时开始,而 Agent B 会对一个尚不存在的 schema 工作。" }, "ja": { "title": "タスクに blocks/blockedBy 依存関係フィールド", - "description": "各タスクは、自分がブロックするタスク(下流の依存先)と、自分をブロックするタスク(上流の依存元)を宣言できます。エージェントは未解決の blockedBy 依存がある タスクを開始しません。これはマルチエージェント連携に不可欠です:エージェント A がデータベーススキーマを書いていてエージェント B がそれに対するクエリを書く必要がある場合、B のタスクは A のタスクにブロックされます。依存関係がなければ両エージェントが同時に開始し、B はまだ存在しないスキーマに対して作業することになります。" + "description": "各タスクは、自分がブロックするタスク(下流の依存先)と、自分をブロックするタスク(上流の依存元)を宣言できます。エージェントは未解決の blockedBy 依存があるタスクを開始しません。これはマルチエージェント連携に不可欠です:エージェント A がデータベーススキーマを書いていてエージェント B がそれに対するクエリを書く必要がある場合、B のタスクは A のタスクにブロックされます。依存関係がなければ両エージェントが同時に開始し、B はまだ存在しないスキーマに対して作業することになります。" + }, + "ru": { + "title": "У задач есть поля зависимостей blocks и blockedBy", + "description": "Каждая задача может явно указать, какие другие задачи она блокирует, и какими задачами заблокирована сама. Агент не начнёт задачу, пока у неё остаются незакрытые зависимости в `blockedBy`. Это критично для координации нескольких агентов: если агент A пишет схему базы данных, а агент B должен писать запросы к ней, задача B будет заблокирована задачей A. Без зависимостей оба агента могли бы стартовать одновременно, и B работал бы против схемы, которой ещё не существует.", + "alternatives": "Простая сортировка по приоритету вроде high/medium/low не выражает условие «задача B буквально не может начаться, пока не завершится задача A». Централизованный координатор тоже решил бы это, но стал бы узким местом и единой точкой отказа. Декларативные зависимости позволяют каждому агенту самостоятельно понять, что ему доступно для работы, просто читая файлы задач." } }, { @@ -40,7 +50,12 @@ }, "ja": { "title": "Task を主線にしつつ Todo も併存", - "description": "TaskManager は Todo のメンタルモデルを拡張し、本コースでは s07 以降のデフォルトになる。どちらもステータス付き作業項目を扱うが、TaskManager にはファイル永続化(クラッシュ耐性)、依存関係追跡(blocks/blockedBy)、owner、マルチプロセス協調がある。Todo は短く直線的な単発作業では引き続き有効。" + "description": "TaskManager は Todo のメンタルモデルを拡張し、本コースでは s07 以降のデフォルトになります。どちらもステータス付き作業項目を扱いますが、TaskManager にはファイル永続化(クラッシュ耐性)、依存関係追跡(blocks/blockedBy)、owner、マルチプロセス協調があります。Todo は短く直線的な単発作業では引き続き有効です。" + }, + "ru": { + "title": "Task становится основой курса, но Todo всё ещё полезен", + "description": "TaskManager расширяет ментальную модель `Todo` и начиная с `s07` становится основным рабочим процессом в этом курсе. Оба инструмента отслеживают задачи со статусами, но TaskManager добавляет файловую персистентность, зависимость `blocks/blockedBy`, поля владения и координацию между несколькими процессами. Todo всё ещё хорошо подходит для коротких, линейных и одноразовых задач, где тяжёлая координация не нужна.", + "alternatives": "Если использовать только Todo, модель останется минимальной, но плохо подойдёт для долгих или совместных задач. Если использовать только Task вообще везде, выиграем в единообразии, но для крошечных одноразовых задач это может оказаться избыточно тяжёлым." } }, { @@ -50,11 +65,16 @@ "alternatives": "Blind overwrite writes are simpler but can corrupt coordination state under parallel execution. A database with optimistic locking would enforce stronger safety, but the course keeps file-based state for zero-dependency teaching.", "zh": { "title": "持久化仍需要写入纪律", - "description": "文件持久化能降低上下文丢失,但不会自动消除并发写入风险。写任务状态前应先重读 JSON、校验 `status/blockedBy` 是否符合预期,再原子写回,避免不同 agent 悄悄覆盖彼此状态。" + "description": "文件持久化能降低上下文丢失,但并不会自动消除并发写入风险。写任务状态前应先重读 JSON、校验 `status/blockedBy` 是否符合预期,再原子写回,避免不同 agent 悄悄覆盖彼此状态。" }, "ja": { "title": "耐久性には書き込み規律が必要", - "description": "ファイル永続化だけでは並行書き込み競合は防げない。更新前に JSON を再読込し、`status/blockedBy` を検証して原子的に保存することで、他エージェントの遷移上書きを防ぐ。" + "description": "ファイル永続化だけでは並行書き込み競合は防げない。更新前に JSON を再読込みし、`status/blockedBy` を検証して原子的に保存することで、他エージェントの遷移上書きを防ぐ。" + }, + "ru": { + "title": "Надёжность хранения требует дисциплины записи", + "description": "Файловая персистентность уменьшает потерю контекста, но сама по себе не устраняет риск конкурентных записей. Перед обновлением состояния задачи нужно заново прочитать JSON, проверить ожидаемые поля статуса и зависимостей, а затем сохранить изменения атомарно. Это не даёт одному агенту тихо затереть переход состояния, который уже сделал другой агент.", + "alternatives": "Слепая перезапись проще, но при параллельной работе она легко ломает общее координационное состояние. База данных с optimistic locking дала бы более сильные гарантии, но курс специально оставляет файловое состояние как вариант без внешних зависимостей." } } ] diff --git a/web/src/data/execution-flows.ts b/web/src/data/execution-flows.ts index 72ce54dd0..167741258 100644 --- a/web/src/data/execution-flows.ts +++ b/web/src/data/execution-flows.ts @@ -310,6 +310,89 @@ export const EXECUTION_FLOWS: Record = { }, }; -export function getFlowForVersion(version: string): FlowDefinition | null { - return EXECUTION_FLOWS[version] ?? null; +const RU_LABELS: Record = { + "User Input": "Ввод пользователя", + "LLM Call": "Вызов LLM", + "tool_use?": "tool_use?", + "Execute Bash": "Выполнить Bash", + "Append Result": "Добавить результат", + Output: "Ответ", + "Tool Dispatch": "Диспетчеризация инструмента", + "Create Todos": "Создать todo", + "Execute Tool": "Выполнить инструмент", + "task tool?": "инструмент task?", + "Spawn Subagent\n(fresh messages[])": "Запустить субагента\n(свежий messages[])", + "Subagent Loop": "Цикл субагента", + "load_skill?": "load_skill?", + "Read SKILL.md": "Прочитать SKILL.md", + "Inject via\ntool_result": "Вставить через\ntool_result", + "Over token\nlimit?": "Лимит токенов\nпревышен?", + "Compress Context": "Сжать контекст", + "task_manager?": "task_manager?", + "CRUD Task\n(file-based)": "CRUD задач\n(через файлы)", + "Check\nDependencies": "Проверить\nзависимости", + "Background?": "Фоновый режим?", + "Spawn Thread": "Запустить поток", + "Notification\nQueue": "Очередь\nуведомлений", + "LLM Call\n(team lead)": "Вызов LLM\n(лид команды)", + "Team tool?": "Инструмент команды?", + "Spawn\nTeammate": "Запустить\nнапарника", + "Send Message\n(JSONL inbox)": "Отправить сообщение\n(JSONL inbox)", + "Teammate Agent\n(own loop)": "Агент-напарник\n(свой цикл)", + "Protocol?": "Протокол?", + "Shutdown\nRequest": "Запрос на\nостановку", + "FSM:\npending->approved": "FSM:\npending->approved", + "Teammate\nreceives request_id": "Напарник\nполучает request_id", + "Check Inbox": "Проверить inbox", + "Idle Cycle": "Холостой цикл", + "Poll Tasks\n+ Auto-Claim": "Опрос задач\n+ авто-захват", + "worktree tool?": "Инструмент worktree?", + "Task Board\\n(.tasks)": "Доска задач\\n(.tasks)", + "Allocate / Enter\\nWorktree": "Выделить / войти\\nв worktree", + "Run in\\nIsolated Dir": "Запуск в\\nизолированном каталоге", + "Closeout:\\nworktree_keep / remove": "Завершение:\\nworktree_keep / remove", + "Emit Lifecycle Events\\n(side-channel)": "Публиковать события\\nжизненного цикла", + "Optional Read\\nworktree_events": "Необязательное чтение\\nworktree_events", +}; + +const RU_EDGE_LABELS: Record = { + yes: "да", + no: "нет", + task: "task", + other: "другое", + skill: "skill", + bg: "bg", + fg: "fg", + spawn: "spawn", + shutdown: "shutdown", + "task ops": "операции task", + "create/bind": "создать/bind", + "run/status": "run/status", + "allocate lane": "выделить lane", + "task result": "результат task", + "emit create": "событие create", + "create result": "результат create", + "run/status result": "результат run/status", + "emit closeout": "событие closeout", + "closeout result": "результат closeout", + "optional query": "необязательный запрос", + "events result": "результат events", +}; + +export function getFlowForVersion(version: string, locale = "en"): FlowDefinition | null { + const flow = EXECUTION_FLOWS[version] ?? null; + if (!flow || locale !== "ru") { + return flow; + } + + return { + nodes: flow.nodes.map((node) => ({ + ...node, + label: RU_LABELS[node.label] || node.label, + })), + edges: flow.edges.map((edge) => ({ + ...edge, + label: edge.label ? RU_EDGE_LABELS[edge.label] || edge.label : edge.label, + })), + }; } diff --git a/web/src/data/generated/docs.json b/web/src/data/generated/docs.json index 12d520c67..9e0b820f6 100644 --- a/web/src/data/generated/docs.json +++ b/web/src/data/generated/docs.json @@ -3,216 +3,288 @@ "version": "s01", "locale": "en", "title": "s01: The Agent Loop", - "content": "# s01: The Agent Loop\n\n`[ s01 ] s02 > s03 > s04 > s05 > s06 | s07 > s08 > s09 > s10 > s11 > s12`\n\n> *\"One loop & Bash is all you need\"* -- one tool + one loop = an agent.\n\n## Problem\n\nA language model can reason about code, but it can't *touch* the real world -- can't read files, run tests, or check errors. Without a loop, every tool call requires you to manually copy-paste results back. You become the loop.\n\n## Solution\n\n```\n+--------+ +-------+ +---------+\n| User | ---> | LLM | ---> | Tool |\n| prompt | | | | execute |\n+--------+ +---+---+ +----+----+\n ^ |\n | tool_result |\n +----------------+\n (loop until stop_reason != \"tool_use\")\n```\n\nOne exit condition controls the entire flow. The loop runs until the model stops calling tools.\n\n## How It Works\n\n1. User prompt becomes the first message.\n\n```python\nmessages.append({\"role\": \"user\", \"content\": query})\n```\n\n2. Send messages + tool definitions to the LLM.\n\n```python\nresponse = client.messages.create(\n model=MODEL, system=SYSTEM, messages=messages,\n tools=TOOLS, max_tokens=8000,\n)\n```\n\n3. Append the assistant response. Check `stop_reason` -- if the model didn't call a tool, we're done.\n\n```python\nmessages.append({\"role\": \"assistant\", \"content\": response.content})\nif response.stop_reason != \"tool_use\":\n return\n```\n\n4. Execute each tool call, collect results, append as a user message. Loop back to step 2.\n\n```python\nresults = []\nfor block in response.content:\n if block.type == \"tool_use\":\n output = run_bash(block.input[\"command\"])\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": output,\n })\nmessages.append({\"role\": \"user\", \"content\": results})\n```\n\nAssembled into one function:\n\n```python\ndef agent_loop(query):\n messages = [{\"role\": \"user\", \"content\": query}]\n while True:\n response = client.messages.create(\n model=MODEL, system=SYSTEM, messages=messages,\n tools=TOOLS, max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n\n if response.stop_reason != \"tool_use\":\n return\n\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n output = run_bash(block.input[\"command\"])\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": output,\n })\n messages.append({\"role\": \"user\", \"content\": results})\n```\n\nThat's the entire agent in under 30 lines. Everything else in this course layers on top -- without changing the loop.\n\n## What Changed\n\n| Component | Before | After |\n|---------------|------------|--------------------------------|\n| Agent loop | (none) | `while True` + stop_reason |\n| Tools | (none) | `bash` (one tool) |\n| Messages | (none) | Accumulating list |\n| Control flow | (none) | `stop_reason != \"tool_use\"` |\n\n## Try It\n\n```sh\ncd learn-claude-code\npython agents/s01_agent_loop.py\n```\n\n1. `Create a file called hello.py that prints \"Hello, World!\"`\n2. `List all Python files in this directory`\n3. `What is the current git branch?`\n4. `Create a directory called test_output and write 3 files in it`\n" + "content": "# s01: The Agent Loop\r\n\r\n`[ s01 ] s02 > s03 > s04 > s05 > s06 | s07 > s08 > s09 > s10 > s11 > s12`\r\n\r\n> *\"One loop & Bash is all you need\"* -- one tool + one loop = an agent.\r\n\r\n## Problem\r\n\r\nA language model can reason about code, but it can't *touch* the real world -- can't read files, run tests, or check errors. Without a loop, every tool call requires you to manually copy-paste results back. You become the loop.\r\n\r\n## Solution\r\n\r\n```\r\n+--------+ +-------+ +---------+\r\n| User | ---> | LLM | ---> | Tool |\r\n| prompt | | | | execute |\r\n+--------+ +---+---+ +----+----+\r\n ^ |\r\n | tool_result |\r\n +----------------+\r\n (loop until stop_reason != \"tool_use\")\r\n```\r\n\r\nOne exit condition controls the entire flow. The loop runs until the model stops calling tools.\r\n\r\n## How It Works\r\n\r\n1. User prompt becomes the first message.\r\n\r\n```python\r\nmessages.append({\"role\": \"user\", \"content\": query})\r\n```\r\n\r\n2. Send messages + tool definitions to the LLM.\r\n\r\n```python\r\nresponse = client.messages.create(\r\n model=MODEL, system=SYSTEM, messages=messages,\r\n tools=TOOLS, max_tokens=8000,\r\n)\r\n```\r\n\r\n3. Append the assistant response. Check `stop_reason` -- if the model didn't call a tool, we're done.\r\n\r\n```python\r\nmessages.append({\"role\": \"assistant\", \"content\": response.content})\r\nif response.stop_reason != \"tool_use\":\r\n return\r\n```\r\n\r\n4. Execute each tool call, collect results, append as a user message. Loop back to step 2.\r\n\r\n```python\r\nresults = []\r\nfor block in response.content:\r\n if block.type == \"tool_use\":\r\n output = run_bash(block.input[\"command\"])\r\n results.append({\r\n \"type\": \"tool_result\",\r\n \"tool_use_id\": block.id,\r\n \"content\": output,\r\n })\r\nmessages.append({\"role\": \"user\", \"content\": results})\r\n```\r\n\r\nAssembled into one function:\r\n\r\n```python\r\ndef agent_loop(query):\r\n messages = [{\"role\": \"user\", \"content\": query}]\r\n while True:\r\n response = client.messages.create(\r\n model=MODEL, system=SYSTEM, messages=messages,\r\n tools=TOOLS, max_tokens=8000,\r\n )\r\n messages.append({\"role\": \"assistant\", \"content\": response.content})\r\n\r\n if response.stop_reason != \"tool_use\":\r\n return\r\n\r\n results = []\r\n for block in response.content:\r\n if block.type == \"tool_use\":\r\n output = run_bash(block.input[\"command\"])\r\n results.append({\r\n \"type\": \"tool_result\",\r\n \"tool_use_id\": block.id,\r\n \"content\": output,\r\n })\r\n messages.append({\"role\": \"user\", \"content\": results})\r\n```\r\n\r\nThat's the entire agent in under 30 lines. Everything else in this course layers on top -- without changing the loop.\r\n\r\n## What Changed\r\n\r\n| Component | Before | After |\r\n|---------------|------------|--------------------------------|\r\n| Agent loop | (none) | `while True` + stop_reason |\r\n| Tools | (none) | `bash` (one tool) |\r\n| Messages | (none) | Accumulating list |\r\n| Control flow | (none) | `stop_reason != \"tool_use\"` |\r\n\r\n## Try It\r\n\r\n```sh\r\ncd learn-claude-code\r\npython agents/s01_agent_loop.py\r\n```\r\n\r\n1. `Create a file called hello.py that prints \"Hello, World!\"`\r\n2. `List all Python files in this directory`\r\n3. `What is the current git branch?`\r\n4. `Create a directory called test_output and write 3 files in it`\r\n" }, { "version": "s02", "locale": "en", "title": "s02: Tool Use", - "content": "# s02: Tool Use\n\n`s01 > [ s02 ] s03 > s04 > s05 > s06 | s07 > s08 > s09 > s10 > s11 > s12`\n\n> *\"Adding a tool means adding one handler\"* -- the loop stays the same; new tools register into the dispatch map.\n\n## Problem\n\nWith only `bash`, the agent shells out for everything. `cat` truncates unpredictably, `sed` fails on special characters, and every bash call is an unconstrained security surface. Dedicated tools like `read_file` and `write_file` let you enforce path sandboxing at the tool level.\n\nThe key insight: adding tools does not require changing the loop.\n\n## Solution\n\n```\n+--------+ +-------+ +------------------+\n| User | ---> | LLM | ---> | Tool Dispatch |\n| prompt | | | | { |\n+--------+ +---+---+ | bash: run_bash |\n ^ | read: run_read |\n | | write: run_wr |\n +-----------+ edit: run_edit |\n tool_result | } |\n +------------------+\n\nThe dispatch map is a dict: {tool_name: handler_function}.\nOne lookup replaces any if/elif chain.\n```\n\n## How It Works\n\n1. Each tool gets a handler function. Path sandboxing prevents workspace escape.\n\n```python\ndef safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\ndef run_read(path: str, limit: int = None) -> str:\n text = safe_path(path).read_text()\n lines = text.splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit]\n return \"\\n\".join(lines)[:50000]\n```\n\n2. The dispatch map links tool names to handlers.\n\n```python\nTOOL_HANDLERS = {\n \"bash\": lambda **kw: run_bash(kw[\"command\"]),\n \"read_file\": lambda **kw: run_read(kw[\"path\"], kw.get(\"limit\")),\n \"write_file\": lambda **kw: run_write(kw[\"path\"], kw[\"content\"]),\n \"edit_file\": lambda **kw: run_edit(kw[\"path\"], kw[\"old_text\"],\n kw[\"new_text\"]),\n}\n```\n\n3. In the loop, look up the handler by name. The loop body itself is unchanged from s01.\n\n```python\nfor block in response.content:\n if block.type == \"tool_use\":\n handler = TOOL_HANDLERS.get(block.name)\n output = handler(**block.input) if handler \\\n else f\"Unknown tool: {block.name}\"\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": output,\n })\n```\n\nAdd a tool = add a handler + add a schema entry. The loop never changes.\n\n## What Changed From s01\n\n| Component | Before (s01) | After (s02) |\n|----------------|--------------------|----------------------------|\n| Tools | 1 (bash only) | 4 (bash, read, write, edit)|\n| Dispatch | Hardcoded bash call | `TOOL_HANDLERS` dict |\n| Path safety | None | `safe_path()` sandbox |\n| Agent loop | Unchanged | Unchanged |\n\n## Try It\n\n```sh\ncd learn-claude-code\npython agents/s02_tool_use.py\n```\n\n1. `Read the file requirements.txt`\n2. `Create a file called greet.py with a greet(name) function`\n3. `Edit greet.py to add a docstring to the function`\n4. `Read greet.py to verify the edit worked`\n" + "content": "# s02: Tool Use\r\n\r\n`s01 > [ s02 ] s03 > s04 > s05 > s06 | s07 > s08 > s09 > s10 > s11 > s12`\r\n\r\n> *\"Adding a tool means adding one handler\"* -- the loop stays the same; new tools register into the dispatch map.\r\n\r\n## Problem\r\n\r\nWith only `bash`, the agent shells out for everything. `cat` truncates unpredictably, `sed` fails on special characters, and every bash call is an unconstrained security surface. Dedicated tools like `read_file` and `write_file` let you enforce path sandboxing at the tool level.\r\n\r\nThe key insight: adding tools does not require changing the loop.\r\n\r\n## Solution\r\n\r\n```\r\n+--------+ +-------+ +------------------+\r\n| User | ---> | LLM | ---> | Tool Dispatch |\r\n| prompt | | | | { |\r\n+--------+ +---+---+ | bash: run_bash |\r\n ^ | read: run_read |\r\n | | write: run_wr |\r\n +-----------+ edit: run_edit |\r\n tool_result | } |\r\n +------------------+\r\n\r\nThe dispatch map is a dict: {tool_name: handler_function}.\r\nOne lookup replaces any if/elif chain.\r\n```\r\n\r\n## How It Works\r\n\r\n1. Each tool gets a handler function. Path sandboxing prevents workspace escape.\r\n\r\n```python\r\ndef safe_path(p: str) -> Path:\r\n path = (WORKDIR / p).resolve()\r\n if not path.is_relative_to(WORKDIR):\r\n raise ValueError(f\"Path escapes workspace: {p}\")\r\n return path\r\n\r\ndef run_read(path: str, limit: int = None) -> str:\r\n text = safe_path(path).read_text()\r\n lines = text.splitlines()\r\n if limit and limit < len(lines):\r\n lines = lines[:limit]\r\n return \"\\n\".join(lines)[:50000]\r\n```\r\n\r\n2. The dispatch map links tool names to handlers.\r\n\r\n```python\r\nTOOL_HANDLERS = {\r\n \"bash\": lambda **kw: run_bash(kw[\"command\"]),\r\n \"read_file\": lambda **kw: run_read(kw[\"path\"], kw.get(\"limit\")),\r\n \"write_file\": lambda **kw: run_write(kw[\"path\"], kw[\"content\"]),\r\n \"edit_file\": lambda **kw: run_edit(kw[\"path\"], kw[\"old_text\"],\r\n kw[\"new_text\"]),\r\n}\r\n```\r\n\r\n3. In the loop, look up the handler by name. The loop body itself is unchanged from s01.\r\n\r\n```python\r\nfor block in response.content:\r\n if block.type == \"tool_use\":\r\n handler = TOOL_HANDLERS.get(block.name)\r\n output = handler(**block.input) if handler \\\r\n else f\"Unknown tool: {block.name}\"\r\n results.append({\r\n \"type\": \"tool_result\",\r\n \"tool_use_id\": block.id,\r\n \"content\": output,\r\n })\r\n```\r\n\r\nAdd a tool = add a handler + add a schema entry. The loop never changes.\r\n\r\n## What Changed From s01\r\n\r\n| Component | Before (s01) | After (s02) |\r\n|----------------|--------------------|----------------------------|\r\n| Tools | 1 (bash only) | 4 (bash, read, write, edit)|\r\n| Dispatch | Hardcoded bash call | `TOOL_HANDLERS` dict |\r\n| Path safety | None | `safe_path()` sandbox |\r\n| Agent loop | Unchanged | Unchanged |\r\n\r\n## Try It\r\n\r\n```sh\r\ncd learn-claude-code\r\npython agents/s02_tool_use.py\r\n```\r\n\r\n1. `Read the file requirements.txt`\r\n2. `Create a file called greet.py with a greet(name) function`\r\n3. `Edit greet.py to add a docstring to the function`\r\n4. `Read greet.py to verify the edit worked`\r\n" }, { "version": "s03", "locale": "en", "title": "s03: TodoWrite", - "content": "# s03: TodoWrite\n\n`s01 > s02 > [ s03 ] s04 > s05 > s06 | s07 > s08 > s09 > s10 > s11 > s12`\n\n> *\"An agent without a plan drifts\"* -- list the steps first, then execute.\n\n## Problem\n\nOn multi-step tasks, the model loses track. It repeats work, skips steps, or wanders off. Long conversations make this worse -- the system prompt fades as tool results fill the context. A 10-step refactoring might complete steps 1-3, then the model starts improvising because it forgot steps 4-10.\n\n## Solution\n\n```\n+--------+ +-------+ +---------+\n| User | ---> | LLM | ---> | Tools |\n| prompt | | | | + todo |\n+--------+ +---+---+ +----+----+\n ^ |\n | tool_result |\n +----------------+\n |\n +-----------+-----------+\n | TodoManager state |\n | [ ] task A |\n | [>] task B <- doing |\n | [x] task C |\n +-----------------------+\n |\n if rounds_since_todo >= 3:\n inject into tool_result\n```\n\n## How It Works\n\n1. TodoManager stores items with statuses. Only one item can be `in_progress` at a time.\n\n```python\nclass TodoManager:\n def update(self, items: list) -> str:\n validated, in_progress_count = [], 0\n for item in items:\n status = item.get(\"status\", \"pending\")\n if status == \"in_progress\":\n in_progress_count += 1\n validated.append({\"id\": item[\"id\"], \"text\": item[\"text\"],\n \"status\": status})\n if in_progress_count > 1:\n raise ValueError(\"Only one task can be in_progress\")\n self.items = validated\n return self.render()\n```\n\n2. The `todo` tool goes into the dispatch map like any other tool.\n\n```python\nTOOL_HANDLERS = {\n # ...base tools...\n \"todo\": lambda **kw: TODO.update(kw[\"items\"]),\n}\n```\n\n3. A nag reminder injects a nudge if the model goes 3+ rounds without calling `todo`.\n\n```python\nif rounds_since_todo >= 3 and messages:\n last = messages[-1]\n if last[\"role\"] == \"user\" and isinstance(last.get(\"content\"), list):\n last[\"content\"].insert(0, {\n \"type\": \"text\",\n \"text\": \"Update your todos.\",\n })\n```\n\nThe \"one in_progress at a time\" constraint forces sequential focus. The nag reminder creates accountability.\n\n## What Changed From s02\n\n| Component | Before (s02) | After (s03) |\n|----------------|------------------|----------------------------|\n| Tools | 4 | 5 (+todo) |\n| Planning | None | TodoManager with statuses |\n| Nag injection | None | `` after 3 rounds|\n| Agent loop | Simple dispatch | + rounds_since_todo counter|\n\n## Try It\n\n```sh\ncd learn-claude-code\npython agents/s03_todo_write.py\n```\n\n1. `Refactor the file hello.py: add type hints, docstrings, and a main guard`\n2. `Create a Python package with __init__.py, utils.py, and tests/test_utils.py`\n3. `Review all Python files and fix any style issues`\n" + "content": "# s03: TodoWrite\r\n\r\n`s01 > s02 > [ s03 ] s04 > s05 > s06 | s07 > s08 > s09 > s10 > s11 > s12`\r\n\r\n> *\"An agent without a plan drifts\"* -- list the steps first, then execute.\r\n\r\n## Problem\r\n\r\nOn multi-step tasks, the model loses track. It repeats work, skips steps, or wanders off. Long conversations make this worse -- the system prompt fades as tool results fill the context. A 10-step refactoring might complete steps 1-3, then the model starts improvising because it forgot steps 4-10.\r\n\r\n## Solution\r\n\r\n```\r\n+--------+ +-------+ +---------+\r\n| User | ---> | LLM | ---> | Tools |\r\n| prompt | | | | + todo |\r\n+--------+ +---+---+ +----+----+\r\n ^ |\r\n | tool_result |\r\n +----------------+\r\n |\r\n +-----------+-----------+\r\n | TodoManager state |\r\n | [ ] task A |\r\n | [>] task B <- doing |\r\n | [x] task C |\r\n +-----------------------+\r\n |\r\n if rounds_since_todo >= 3:\r\n inject into tool_result\r\n```\r\n\r\n## How It Works\r\n\r\n1. TodoManager stores items with statuses. Only one item can be `in_progress` at a time.\r\n\r\n```python\r\nclass TodoManager:\r\n def update(self, items: list) -> str:\r\n validated, in_progress_count = [], 0\r\n for item in items:\r\n status = item.get(\"status\", \"pending\")\r\n if status == \"in_progress\":\r\n in_progress_count += 1\r\n validated.append({\"id\": item[\"id\"], \"text\": item[\"text\"],\r\n \"status\": status})\r\n if in_progress_count > 1:\r\n raise ValueError(\"Only one task can be in_progress\")\r\n self.items = validated\r\n return self.render()\r\n```\r\n\r\n2. The `todo` tool goes into the dispatch map like any other tool.\r\n\r\n```python\r\nTOOL_HANDLERS = {\r\n # ...base tools...\r\n \"todo\": lambda **kw: TODO.update(kw[\"items\"]),\r\n}\r\n```\r\n\r\n3. A nag reminder injects a nudge if the model goes 3+ rounds without calling `todo`.\r\n\r\n```python\r\nif rounds_since_todo >= 3 and messages:\r\n last = messages[-1]\r\n if last[\"role\"] == \"user\" and isinstance(last.get(\"content\"), list):\r\n last[\"content\"].insert(0, {\r\n \"type\": \"text\",\r\n \"text\": \"Update your todos.\",\r\n })\r\n```\r\n\r\nThe \"one in_progress at a time\" constraint forces sequential focus. The nag reminder creates accountability.\r\n\r\n## What Changed From s02\r\n\r\n| Component | Before (s02) | After (s03) |\r\n|----------------|------------------|----------------------------|\r\n| Tools | 4 | 5 (+todo) |\r\n| Planning | None | TodoManager with statuses |\r\n| Nag injection | None | `` after 3 rounds|\r\n| Agent loop | Simple dispatch | + rounds_since_todo counter|\r\n\r\n## Try It\r\n\r\n```sh\r\ncd learn-claude-code\r\npython agents/s03_todo_write.py\r\n```\r\n\r\n1. `Refactor the file hello.py: add type hints, docstrings, and a main guard`\r\n2. `Create a Python package with __init__.py, utils.py, and tests/test_utils.py`\r\n3. `Review all Python files and fix any style issues`\r\n" }, { "version": "s04", "locale": "en", "title": "s04: Subagents", - "content": "# s04: Subagents\n\n`s01 > s02 > s03 > [ s04 ] s05 > s06 | s07 > s08 > s09 > s10 > s11 > s12`\n\n> *\"Break big tasks down; each subtask gets a clean context\"* -- subagents use independent messages[], keeping the main conversation clean.\n\n## Problem\n\nAs the agent works, its messages array grows. Every file read, every bash output stays in context permanently. \"What testing framework does this project use?\" might require reading 5 files, but the parent only needs the answer: \"pytest.\"\n\n## Solution\n\n```\nParent agent Subagent\n+------------------+ +------------------+\n| messages=[...] | | messages=[] | <-- fresh\n| | dispatch | |\n| tool: task | ----------> | while tool_use: |\n| prompt=\"...\" | | call tools |\n| | summary | append results |\n| result = \"...\" | <---------- | return last text |\n+------------------+ +------------------+\n\nParent context stays clean. Subagent context is discarded.\n```\n\n## How It Works\n\n1. The parent gets a `task` tool. The child gets all base tools except `task` (no recursive spawning).\n\n```python\nPARENT_TOOLS = CHILD_TOOLS + [\n {\"name\": \"task\",\n \"description\": \"Spawn a subagent with fresh context.\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\"prompt\": {\"type\": \"string\"}},\n \"required\": [\"prompt\"],\n }},\n]\n```\n\n2. The subagent starts with `messages=[]` and runs its own loop. Only the final text returns to the parent.\n\n```python\ndef run_subagent(prompt: str) -> str:\n sub_messages = [{\"role\": \"user\", \"content\": prompt}]\n for _ in range(30): # safety limit\n response = client.messages.create(\n model=MODEL, system=SUBAGENT_SYSTEM,\n messages=sub_messages,\n tools=CHILD_TOOLS, max_tokens=8000,\n )\n sub_messages.append({\"role\": \"assistant\",\n \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n break\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n handler = TOOL_HANDLERS.get(block.name)\n output = handler(**block.input)\n results.append({\"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": str(output)[:50000]})\n sub_messages.append({\"role\": \"user\", \"content\": results})\n return \"\".join(\n b.text for b in response.content if hasattr(b, \"text\")\n ) or \"(no summary)\"\n```\n\nThe child's entire message history (possibly 30+ tool calls) is discarded. The parent receives a one-paragraph summary as a normal `tool_result`.\n\n## What Changed From s03\n\n| Component | Before (s03) | After (s04) |\n|----------------|------------------|---------------------------|\n| Tools | 5 | 5 (base) + task (parent) |\n| Context | Single shared | Parent + child isolation |\n| Subagent | None | `run_subagent()` function |\n| Return value | N/A | Summary text only |\n\n## Try It\n\n```sh\ncd learn-claude-code\npython agents/s04_subagent.py\n```\n\n1. `Use a subtask to find what testing framework this project uses`\n2. `Delegate: read all .py files and summarize what each one does`\n3. `Use a task to create a new module, then verify it from here`\n" + "content": "# s04: Subagents\r\n\r\n`s01 > s02 > s03 > [ s04 ] s05 > s06 | s07 > s08 > s09 > s10 > s11 > s12`\r\n\r\n> *\"Break big tasks down; each subtask gets a clean context\"* -- subagents use independent messages[], keeping the main conversation clean.\r\n\r\n## Problem\r\n\r\nAs the agent works, its messages array grows. Every file read, every bash output stays in context permanently. \"What testing framework does this project use?\" might require reading 5 files, but the parent only needs the answer: \"pytest.\"\r\n\r\n## Solution\r\n\r\n```\r\nParent agent Subagent\r\n+------------------+ +------------------+\r\n| messages=[...] | | messages=[] | <-- fresh\r\n| | dispatch | |\r\n| tool: task | ----------> | while tool_use: |\r\n| prompt=\"...\" | | call tools |\r\n| | summary | append results |\r\n| result = \"...\" | <---------- | return last text |\r\n+------------------+ +------------------+\r\n\r\nParent context stays clean. Subagent context is discarded.\r\n```\r\n\r\n## How It Works\r\n\r\n1. The parent gets a `task` tool. The child gets all base tools except `task` (no recursive spawning).\r\n\r\n```python\r\nPARENT_TOOLS = CHILD_TOOLS + [\r\n {\"name\": \"task\",\r\n \"description\": \"Spawn a subagent with fresh context.\",\r\n \"input_schema\": {\r\n \"type\": \"object\",\r\n \"properties\": {\"prompt\": {\"type\": \"string\"}},\r\n \"required\": [\"prompt\"],\r\n }},\r\n]\r\n```\r\n\r\n2. The subagent starts with `messages=[]` and runs its own loop. Only the final text returns to the parent.\r\n\r\n```python\r\ndef run_subagent(prompt: str) -> str:\r\n sub_messages = [{\"role\": \"user\", \"content\": prompt}]\r\n for _ in range(30): # safety limit\r\n response = client.messages.create(\r\n model=MODEL, system=SUBAGENT_SYSTEM,\r\n messages=sub_messages,\r\n tools=CHILD_TOOLS, max_tokens=8000,\r\n )\r\n sub_messages.append({\"role\": \"assistant\",\r\n \"content\": response.content})\r\n if response.stop_reason != \"tool_use\":\r\n break\r\n results = []\r\n for block in response.content:\r\n if block.type == \"tool_use\":\r\n handler = TOOL_HANDLERS.get(block.name)\r\n output = handler(**block.input)\r\n results.append({\"type\": \"tool_result\",\r\n \"tool_use_id\": block.id,\r\n \"content\": str(output)[:50000]})\r\n sub_messages.append({\"role\": \"user\", \"content\": results})\r\n return \"\".join(\r\n b.text for b in response.content if hasattr(b, \"text\")\r\n ) or \"(no summary)\"\r\n```\r\n\r\nThe child's entire message history (possibly 30+ tool calls) is discarded. The parent receives a one-paragraph summary as a normal `tool_result`.\r\n\r\n## What Changed From s03\r\n\r\n| Component | Before (s03) | After (s04) |\r\n|----------------|------------------|---------------------------|\r\n| Tools | 5 | 5 (base) + task (parent) |\r\n| Context | Single shared | Parent + child isolation |\r\n| Subagent | None | `run_subagent()` function |\r\n| Return value | N/A | Summary text only |\r\n\r\n## Try It\r\n\r\n```sh\r\ncd learn-claude-code\r\npython agents/s04_subagent.py\r\n```\r\n\r\n1. `Use a subtask to find what testing framework this project uses`\r\n2. `Delegate: read all .py files and summarize what each one does`\r\n3. `Use a task to create a new module, then verify it from here`\r\n" }, { "version": "s05", "locale": "en", "title": "s05: Skills", - "content": "# s05: Skills\n\n`s01 > s02 > s03 > s04 > [ s05 ] s06 | s07 > s08 > s09 > s10 > s11 > s12`\n\n> *\"Load knowledge when you need it, not upfront\"* -- inject via tool_result, not the system prompt.\n\n## Problem\n\nYou want the agent to follow domain-specific workflows: git conventions, testing patterns, code review checklists. Putting everything in the system prompt wastes tokens on unused skills. 10 skills at 2000 tokens each = 20,000 tokens, most of which are irrelevant to any given task.\n\n## Solution\n\n```\nSystem prompt (Layer 1 -- always present):\n+--------------------------------------+\n| You are a coding agent. |\n| Skills available: |\n| - git: Git workflow helpers | ~100 tokens/skill\n| - test: Testing best practices |\n+--------------------------------------+\n\nWhen model calls load_skill(\"git\"):\n+--------------------------------------+\n| tool_result (Layer 2 -- on demand): |\n| |\n| Full git workflow instructions... | ~2000 tokens\n| Step 1: ... |\n| |\n+--------------------------------------+\n```\n\nLayer 1: skill *names* in system prompt (cheap). Layer 2: full *body* via tool_result (on demand).\n\n## How It Works\n\n1. Each skill is a directory containing a `SKILL.md` with YAML frontmatter.\n\n```\nskills/\n pdf/\n SKILL.md # ---\\n name: pdf\\n description: Process PDF files\\n ---\\n ...\n code-review/\n SKILL.md # ---\\n name: code-review\\n description: Review code\\n ---\\n ...\n```\n\n2. SkillLoader scans for `SKILL.md` files, uses the directory name as the skill identifier.\n\n```python\nclass SkillLoader:\n def __init__(self, skills_dir: Path):\n self.skills = {}\n for f in sorted(skills_dir.rglob(\"SKILL.md\")):\n text = f.read_text()\n meta, body = self._parse_frontmatter(text)\n name = meta.get(\"name\", f.parent.name)\n self.skills[name] = {\"meta\": meta, \"body\": body}\n\n def get_descriptions(self) -> str:\n lines = []\n for name, skill in self.skills.items():\n desc = skill[\"meta\"].get(\"description\", \"\")\n lines.append(f\" - {name}: {desc}\")\n return \"\\n\".join(lines)\n\n def get_content(self, name: str) -> str:\n skill = self.skills.get(name)\n if not skill:\n return f\"Error: Unknown skill '{name}'.\"\n return f\"\\n{skill['body']}\\n\"\n```\n\n3. Layer 1 goes into the system prompt. Layer 2 is just another tool handler.\n\n```python\nSYSTEM = f\"\"\"You are a coding agent at {WORKDIR}.\nSkills available:\n{SKILL_LOADER.get_descriptions()}\"\"\"\n\nTOOL_HANDLERS = {\n # ...base tools...\n \"load_skill\": lambda **kw: SKILL_LOADER.get_content(kw[\"name\"]),\n}\n```\n\nThe model learns what skills exist (cheap) and loads them when relevant (expensive).\n\n## What Changed From s04\n\n| Component | Before (s04) | After (s05) |\n|----------------|------------------|----------------------------|\n| Tools | 5 (base + task) | 5 (base + load_skill) |\n| System prompt | Static string | + skill descriptions |\n| Knowledge | None | skills/\\*/SKILL.md files |\n| Injection | None | Two-layer (system + result)|\n\n## Try It\n\n```sh\ncd learn-claude-code\npython agents/s05_skill_loading.py\n```\n\n1. `What skills are available?`\n2. `Load the agent-builder skill and follow its instructions`\n3. `I need to do a code review -- load the relevant skill first`\n4. `Build an MCP server using the mcp-builder skill`\n" + "content": "# s05: Skills\r\n\r\n`s01 > s02 > s03 > s04 > [ s05 ] s06 | s07 > s08 > s09 > s10 > s11 > s12`\r\n\r\n> *\"Load knowledge when you need it, not upfront\"* -- inject via tool_result, not the system prompt.\r\n\r\n## Problem\r\n\r\nYou want the agent to follow domain-specific workflows: git conventions, testing patterns, code review checklists. Putting everything in the system prompt wastes tokens on unused skills. 10 skills at 2000 tokens each = 20,000 tokens, most of which are irrelevant to any given task.\r\n\r\n## Solution\r\n\r\n```\r\nSystem prompt (Layer 1 -- always present):\r\n+--------------------------------------+\r\n| You are a coding agent. |\r\n| Skills available: |\r\n| - git: Git workflow helpers | ~100 tokens/skill\r\n| - test: Testing best practices |\r\n+--------------------------------------+\r\n\r\nWhen model calls load_skill(\"git\"):\r\n+--------------------------------------+\r\n| tool_result (Layer 2 -- on demand): |\r\n| |\r\n| Full git workflow instructions... | ~2000 tokens\r\n| Step 1: ... |\r\n| |\r\n+--------------------------------------+\r\n```\r\n\r\nLayer 1: skill *names* in system prompt (cheap). Layer 2: full *body* via tool_result (on demand).\r\n\r\n## How It Works\r\n\r\n1. Each skill is a directory containing a `SKILL.md` with YAML frontmatter.\r\n\r\n```\r\nskills/\r\n pdf/\r\n SKILL.md # ---\\n name: pdf\\n description: Process PDF files\\n ---\\n ...\r\n code-review/\r\n SKILL.md # ---\\n name: code-review\\n description: Review code\\n ---\\n ...\r\n```\r\n\r\n2. SkillLoader scans for `SKILL.md` files, uses the directory name as the skill identifier.\r\n\r\n```python\r\nclass SkillLoader:\r\n def __init__(self, skills_dir: Path):\r\n self.skills = {}\r\n for f in sorted(skills_dir.rglob(\"SKILL.md\")):\r\n text = f.read_text()\r\n meta, body = self._parse_frontmatter(text)\r\n name = meta.get(\"name\", f.parent.name)\r\n self.skills[name] = {\"meta\": meta, \"body\": body}\r\n\r\n def get_descriptions(self) -> str:\r\n lines = []\r\n for name, skill in self.skills.items():\r\n desc = skill[\"meta\"].get(\"description\", \"\")\r\n lines.append(f\" - {name}: {desc}\")\r\n return \"\\n\".join(lines)\r\n\r\n def get_content(self, name: str) -> str:\r\n skill = self.skills.get(name)\r\n if not skill:\r\n return f\"Error: Unknown skill '{name}'.\"\r\n return f\"\\n{skill['body']}\\n\"\r\n```\r\n\r\n3. Layer 1 goes into the system prompt. Layer 2 is just another tool handler.\r\n\r\n```python\r\nSYSTEM = f\"\"\"You are a coding agent at {WORKDIR}.\r\nSkills available:\r\n{SKILL_LOADER.get_descriptions()}\"\"\"\r\n\r\nTOOL_HANDLERS = {\r\n # ...base tools...\r\n \"load_skill\": lambda **kw: SKILL_LOADER.get_content(kw[\"name\"]),\r\n}\r\n```\r\n\r\nThe model learns what skills exist (cheap) and loads them when relevant (expensive).\r\n\r\n## What Changed From s04\r\n\r\n| Component | Before (s04) | After (s05) |\r\n|----------------|------------------|----------------------------|\r\n| Tools | 5 (base + task) | 5 (base + load_skill) |\r\n| System prompt | Static string | + skill descriptions |\r\n| Knowledge | None | skills/\\*/SKILL.md files |\r\n| Injection | None | Two-layer (system + result)|\r\n\r\n## Try It\r\n\r\n```sh\r\ncd learn-claude-code\r\npython agents/s05_skill_loading.py\r\n```\r\n\r\n1. `What skills are available?`\r\n2. `Load the agent-builder skill and follow its instructions`\r\n3. `I need to do a code review -- load the relevant skill first`\r\n4. `Build an MCP server using the mcp-builder skill`\r\n" }, { "version": "s06", "locale": "en", "title": "s06: Context Compact", - "content": "# s06: Context Compact\n\n`s01 > s02 > s03 > s04 > s05 > [ s06 ] | s07 > s08 > s09 > s10 > s11 > s12`\n\n> *\"Context will fill up; you need a way to make room\"* -- three-layer compression strategy for infinite sessions.\n\n## Problem\n\nThe context window is finite. A single `read_file` on a 1000-line file costs ~4000 tokens. After reading 30 files and running 20 bash commands, you hit 100,000+ tokens. The agent cannot work on large codebases without compression.\n\n## Solution\n\nThree layers, increasing in aggressiveness:\n\n```\nEvery turn:\n+------------------+\n| Tool call result |\n+------------------+\n |\n v\n[Layer 1: micro_compact] (silent, every turn)\n Replace tool_result > 3 turns old\n with \"[Previous: used {tool_name}]\"\n |\n v\n[Check: tokens > 50000?]\n | |\n no yes\n | |\n v v\ncontinue [Layer 2: auto_compact]\n Save transcript to .transcripts/\n LLM summarizes conversation.\n Replace all messages with [summary].\n |\n v\n [Layer 3: compact tool]\n Model calls compact explicitly.\n Same summarization as auto_compact.\n```\n\n## How It Works\n\n1. **Layer 1 -- micro_compact**: Before each LLM call, replace old tool results with placeholders.\n\n```python\ndef micro_compact(messages: list) -> list:\n tool_results = []\n for i, msg in enumerate(messages):\n if msg[\"role\"] == \"user\" and isinstance(msg.get(\"content\"), list):\n for j, part in enumerate(msg[\"content\"]):\n if isinstance(part, dict) and part.get(\"type\") == \"tool_result\":\n tool_results.append((i, j, part))\n if len(tool_results) <= KEEP_RECENT:\n return messages\n for _, _, part in tool_results[:-KEEP_RECENT]:\n if len(part.get(\"content\", \"\")) > 100:\n part[\"content\"] = f\"[Previous: used {tool_name}]\"\n return messages\n```\n\n2. **Layer 2 -- auto_compact**: When tokens exceed threshold, save full transcript to disk, then ask the LLM to summarize.\n\n```python\ndef auto_compact(messages: list) -> list:\n # Save transcript for recovery\n transcript_path = TRANSCRIPT_DIR / f\"transcript_{int(time.time())}.jsonl\"\n with open(transcript_path, \"w\") as f:\n for msg in messages:\n f.write(json.dumps(msg, default=str) + \"\\n\")\n # LLM summarizes\n response = client.messages.create(\n model=MODEL,\n messages=[{\"role\": \"user\", \"content\":\n \"Summarize this conversation for continuity...\"\n + json.dumps(messages, default=str)[:80000]}],\n max_tokens=2000,\n )\n return [\n {\"role\": \"user\", \"content\": f\"[Compressed]\\n\\n{response.content[0].text}\"},\n {\"role\": \"assistant\", \"content\": \"Understood. Continuing.\"},\n ]\n```\n\n3. **Layer 3 -- manual compact**: The `compact` tool triggers the same summarization on demand.\n\n4. The loop integrates all three:\n\n```python\ndef agent_loop(messages: list):\n while True:\n micro_compact(messages) # Layer 1\n if estimate_tokens(messages) > THRESHOLD:\n messages[:] = auto_compact(messages) # Layer 2\n response = client.messages.create(...)\n # ... tool execution ...\n if manual_compact:\n messages[:] = auto_compact(messages) # Layer 3\n```\n\nTranscripts preserve full history on disk. Nothing is truly lost -- just moved out of active context.\n\n## What Changed From s05\n\n| Component | Before (s05) | After (s06) |\n|----------------|------------------|----------------------------|\n| Tools | 5 | 5 (base + compact) |\n| Context mgmt | None | Three-layer compression |\n| Micro-compact | None | Old results -> placeholders|\n| Auto-compact | None | Token threshold trigger |\n| Transcripts | None | Saved to .transcripts/ |\n\n## Try It\n\n```sh\ncd learn-claude-code\npython agents/s06_context_compact.py\n```\n\n1. `Read every Python file in the agents/ directory one by one` (watch micro-compact replace old results)\n2. `Keep reading files until compression triggers automatically`\n3. `Use the compact tool to manually compress the conversation`\n" + "content": "# s06: Context Compact\r\n\r\n`s01 > s02 > s03 > s04 > s05 > [ s06 ] | s07 > s08 > s09 > s10 > s11 > s12`\r\n\r\n> *\"Context will fill up; you need a way to make room\"* -- three-layer compression strategy for infinite sessions.\r\n\r\n## Problem\r\n\r\nThe context window is finite. A single `read_file` on a 1000-line file costs ~4000 tokens. After reading 30 files and running 20 bash commands, you hit 100,000+ tokens. The agent cannot work on large codebases without compression.\r\n\r\n## Solution\r\n\r\nThree layers, increasing in aggressiveness:\r\n\r\n```\r\nEvery turn:\r\n+------------------+\r\n| Tool call result |\r\n+------------------+\r\n |\r\n v\r\n[Layer 1: micro_compact] (silent, every turn)\r\n Replace tool_result > 3 turns old\r\n with \"[Previous: used {tool_name}]\"\r\n |\r\n v\r\n[Check: tokens > 50000?]\r\n | |\r\n no yes\r\n | |\r\n v v\r\ncontinue [Layer 2: auto_compact]\r\n Save transcript to .transcripts/\r\n LLM summarizes conversation.\r\n Replace all messages with [summary].\r\n |\r\n v\r\n [Layer 3: compact tool]\r\n Model calls compact explicitly.\r\n Same summarization as auto_compact.\r\n```\r\n\r\n## How It Works\r\n\r\n1. **Layer 1 -- micro_compact**: Before each LLM call, replace old tool results with placeholders.\r\n\r\n```python\r\ndef micro_compact(messages: list) -> list:\r\n tool_results = []\r\n for i, msg in enumerate(messages):\r\n if msg[\"role\"] == \"user\" and isinstance(msg.get(\"content\"), list):\r\n for j, part in enumerate(msg[\"content\"]):\r\n if isinstance(part, dict) and part.get(\"type\") == \"tool_result\":\r\n tool_results.append((i, j, part))\r\n if len(tool_results) <= KEEP_RECENT:\r\n return messages\r\n for _, _, part in tool_results[:-KEEP_RECENT]:\r\n if len(part.get(\"content\", \"\")) > 100:\r\n part[\"content\"] = f\"[Previous: used {tool_name}]\"\r\n return messages\r\n```\r\n\r\n2. **Layer 2 -- auto_compact**: When tokens exceed threshold, save full transcript to disk, then ask the LLM to summarize.\r\n\r\n```python\r\ndef auto_compact(messages: list) -> list:\r\n # Save transcript for recovery\r\n transcript_path = TRANSCRIPT_DIR / f\"transcript_{int(time.time())}.jsonl\"\r\n with open(transcript_path, \"w\") as f:\r\n for msg in messages:\r\n f.write(json.dumps(msg, default=str) + \"\\n\")\r\n # LLM summarizes\r\n response = client.messages.create(\r\n model=MODEL,\r\n messages=[{\"role\": \"user\", \"content\":\r\n \"Summarize this conversation for continuity...\"\r\n + json.dumps(messages, default=str)[:80000]}],\r\n max_tokens=2000,\r\n )\r\n return [\r\n {\"role\": \"user\", \"content\": f\"[Compressed]\\n\\n{response.content[0].text}\"},\r\n {\"role\": \"assistant\", \"content\": \"Understood. Continuing.\"},\r\n ]\r\n```\r\n\r\n3. **Layer 3 -- manual compact**: The `compact` tool triggers the same summarization on demand.\r\n\r\n4. The loop integrates all three:\r\n\r\n```python\r\ndef agent_loop(messages: list):\r\n while True:\r\n micro_compact(messages) # Layer 1\r\n if estimate_tokens(messages) > THRESHOLD:\r\n messages[:] = auto_compact(messages) # Layer 2\r\n response = client.messages.create(...)\r\n # ... tool execution ...\r\n if manual_compact:\r\n messages[:] = auto_compact(messages) # Layer 3\r\n```\r\n\r\nTranscripts preserve full history on disk. Nothing is truly lost -- just moved out of active context.\r\n\r\n## What Changed From s05\r\n\r\n| Component | Before (s05) | After (s06) |\r\n|----------------|------------------|----------------------------|\r\n| Tools | 5 | 5 (base + compact) |\r\n| Context mgmt | None | Three-layer compression |\r\n| Micro-compact | None | Old results -> placeholders|\r\n| Auto-compact | None | Token threshold trigger |\r\n| Transcripts | None | Saved to .transcripts/ |\r\n\r\n## Try It\r\n\r\n```sh\r\ncd learn-claude-code\r\npython agents/s06_context_compact.py\r\n```\r\n\r\n1. `Read every Python file in the agents/ directory one by one` (watch micro-compact replace old results)\r\n2. `Keep reading files until compression triggers automatically`\r\n3. `Use the compact tool to manually compress the conversation`\r\n" }, { "version": "s07", "locale": "en", "title": "s07: Task System", - "content": "# s07: Task System\n\n`s01 > s02 > s03 > s04 > s05 > s06 | [ s07 ] s08 > s09 > s10 > s11 > s12`\n\n> *\"Break big goals into small tasks, order them, persist to disk\"* -- a file-based task graph with dependencies, laying the foundation for multi-agent collaboration.\n\n## Problem\n\ns03's TodoManager is a flat checklist in memory: no ordering, no dependencies, no status beyond done-or-not. Real goals have structure -- task B depends on task A, tasks C and D can run in parallel, task E waits for both C and D.\n\nWithout explicit relationships, the agent can't tell what's ready, what's blocked, or what can run concurrently. And because the list lives only in memory, context compression (s06) wipes it clean.\n\n## Solution\n\nPromote the checklist into a **task graph** persisted to disk. Each task is a JSON file with status, dependencies (`blockedBy`), and dependents (`blocks`). The graph answers three questions at any moment:\n\n- **What's ready?** -- tasks with `pending` status and empty `blockedBy`.\n- **What's blocked?** -- tasks waiting on unfinished dependencies.\n- **What's done?** -- `completed` tasks, whose completion automatically unblocks dependents.\n\n```\n.tasks/\n task_1.json {\"id\":1, \"status\":\"completed\"}\n task_2.json {\"id\":2, \"blockedBy\":[1], \"status\":\"pending\"}\n task_3.json {\"id\":3, \"blockedBy\":[1], \"status\":\"pending\"}\n task_4.json {\"id\":4, \"blockedBy\":[2,3], \"status\":\"pending\"}\n\nTask graph (DAG):\n +----------+\n +--> | task 2 | --+\n | | pending | |\n+----------+ +----------+ +--> +----------+\n| task 1 | | task 4 |\n| completed| --> +----------+ +--> | blocked |\n+----------+ | task 3 | --+ +----------+\n | pending |\n +----------+\n\nOrdering: task 1 must finish before 2 and 3\nParallelism: tasks 2 and 3 can run at the same time\nDependencies: task 4 waits for both 2 and 3\nStatus: pending -> in_progress -> completed\n```\n\nThis task graph becomes the coordination backbone for everything after s07: background execution (s08), multi-agent teams (s09+), and worktree isolation (s12) all read from and write to this same structure.\n\n## How It Works\n\n1. **TaskManager**: one JSON file per task, CRUD with dependency graph.\n\n```python\nclass TaskManager:\n def __init__(self, tasks_dir: Path):\n self.dir = tasks_dir\n self.dir.mkdir(exist_ok=True)\n self._next_id = self._max_id() + 1\n\n def create(self, subject, description=\"\"):\n task = {\"id\": self._next_id, \"subject\": subject,\n \"status\": \"pending\", \"blockedBy\": [],\n \"blocks\": [], \"owner\": \"\"}\n self._save(task)\n self._next_id += 1\n return json.dumps(task, indent=2)\n```\n\n2. **Dependency resolution**: completing a task clears its ID from every other task's `blockedBy` list, automatically unblocking dependents.\n\n```python\ndef _clear_dependency(self, completed_id):\n for f in self.dir.glob(\"task_*.json\"):\n task = json.loads(f.read_text())\n if completed_id in task.get(\"blockedBy\", []):\n task[\"blockedBy\"].remove(completed_id)\n self._save(task)\n```\n\n3. **Status + dependency wiring**: `update` handles transitions and dependency edges.\n\n```python\ndef update(self, task_id, status=None,\n add_blocked_by=None, add_blocks=None):\n task = self._load(task_id)\n if status:\n task[\"status\"] = status\n if status == \"completed\":\n self._clear_dependency(task_id)\n self._save(task)\n```\n\n4. Four task tools go into the dispatch map.\n\n```python\nTOOL_HANDLERS = {\n # ...base tools...\n \"task_create\": lambda **kw: TASKS.create(kw[\"subject\"]),\n \"task_update\": lambda **kw: TASKS.update(kw[\"task_id\"], kw.get(\"status\")),\n \"task_list\": lambda **kw: TASKS.list_all(),\n \"task_get\": lambda **kw: TASKS.get(kw[\"task_id\"]),\n}\n```\n\nFrom s07 onward, the task graph is the default for multi-step work. s03's Todo remains for quick single-session checklists.\n\n## What Changed From s06\n\n| Component | Before (s06) | After (s07) |\n|---|---|---|\n| Tools | 5 | 8 (`task_create/update/list/get`) |\n| Planning model | Flat checklist (in-memory) | Task graph with dependencies (on disk) |\n| Relationships | None | `blockedBy` + `blocks` edges |\n| Status tracking | Done or not | `pending` -> `in_progress` -> `completed` |\n| Persistence | Lost on compression | Survives compression and restarts |\n\n## Try It\n\n```sh\ncd learn-claude-code\npython agents/s07_task_system.py\n```\n\n1. `Create 3 tasks: \"Setup project\", \"Write code\", \"Write tests\". Make them depend on each other in order.`\n2. `List all tasks and show the dependency graph`\n3. `Complete task 1 and then list tasks to see task 2 unblocked`\n4. `Create a task board for refactoring: parse -> transform -> emit -> test, where transform and emit can run in parallel after parse`\n" + "content": "# s07: Task System\r\n\r\n`s01 > s02 > s03 > s04 > s05 > s06 | [ s07 ] s08 > s09 > s10 > s11 > s12`\r\n\r\n> *\"Break big goals into small tasks, order them, persist to disk\"* -- a file-based task graph with dependencies, laying the foundation for multi-agent collaboration.\r\n\r\n## Problem\r\n\r\ns03's TodoManager is a flat checklist in memory: no ordering, no dependencies, no status beyond done-or-not. Real goals have structure -- task B depends on task A, tasks C and D can run in parallel, task E waits for both C and D.\r\n\r\nWithout explicit relationships, the agent can't tell what's ready, what's blocked, or what can run concurrently. And because the list lives only in memory, context compression (s06) wipes it clean.\r\n\r\n## Solution\r\n\r\nPromote the checklist into a **task graph** persisted to disk. Each task is a JSON file with status, dependencies (`blockedBy`), and dependents (`blocks`). The graph answers three questions at any moment:\r\n\r\n- **What's ready?** -- tasks with `pending` status and empty `blockedBy`.\r\n- **What's blocked?** -- tasks waiting on unfinished dependencies.\r\n- **What's done?** -- `completed` tasks, whose completion automatically unblocks dependents.\r\n\r\n```\r\n.tasks/\r\n task_1.json {\"id\":1, \"status\":\"completed\"}\r\n task_2.json {\"id\":2, \"blockedBy\":[1], \"status\":\"pending\"}\r\n task_3.json {\"id\":3, \"blockedBy\":[1], \"status\":\"pending\"}\r\n task_4.json {\"id\":4, \"blockedBy\":[2,3], \"status\":\"pending\"}\r\n\r\nTask graph (DAG):\r\n +----------+\r\n +--> | task 2 | --+\r\n | | pending | |\r\n+----------+ +----------+ +--> +----------+\r\n| task 1 | | task 4 |\r\n| completed| --> +----------+ +--> | blocked |\r\n+----------+ | task 3 | --+ +----------+\r\n | pending |\r\n +----------+\r\n\r\nOrdering: task 1 must finish before 2 and 3\r\nParallelism: tasks 2 and 3 can run at the same time\r\nDependencies: task 4 waits for both 2 and 3\r\nStatus: pending -> in_progress -> completed\r\n```\r\n\r\nThis task graph becomes the coordination backbone for everything after s07: background execution (s08), multi-agent teams (s09+), and worktree isolation (s12) all read from and write to this same structure.\r\n\r\n## How It Works\r\n\r\n1. **TaskManager**: one JSON file per task, CRUD with dependency graph.\r\n\r\n```python\r\nclass TaskManager:\r\n def __init__(self, tasks_dir: Path):\r\n self.dir = tasks_dir\r\n self.dir.mkdir(exist_ok=True)\r\n self._next_id = self._max_id() + 1\r\n\r\n def create(self, subject, description=\"\"):\r\n task = {\"id\": self._next_id, \"subject\": subject,\r\n \"status\": \"pending\", \"blockedBy\": [],\r\n \"blocks\": [], \"owner\": \"\"}\r\n self._save(task)\r\n self._next_id += 1\r\n return json.dumps(task, indent=2)\r\n```\r\n\r\n2. **Dependency resolution**: completing a task clears its ID from every other task's `blockedBy` list, automatically unblocking dependents.\r\n\r\n```python\r\ndef _clear_dependency(self, completed_id):\r\n for f in self.dir.glob(\"task_*.json\"):\r\n task = json.loads(f.read_text())\r\n if completed_id in task.get(\"blockedBy\", []):\r\n task[\"blockedBy\"].remove(completed_id)\r\n self._save(task)\r\n```\r\n\r\n3. **Status + dependency wiring**: `update` handles transitions and dependency edges.\r\n\r\n```python\r\ndef update(self, task_id, status=None,\r\n add_blocked_by=None, add_blocks=None):\r\n task = self._load(task_id)\r\n if status:\r\n task[\"status\"] = status\r\n if status == \"completed\":\r\n self._clear_dependency(task_id)\r\n self._save(task)\r\n```\r\n\r\n4. Four task tools go into the dispatch map.\r\n\r\n```python\r\nTOOL_HANDLERS = {\r\n # ...base tools...\r\n \"task_create\": lambda **kw: TASKS.create(kw[\"subject\"]),\r\n \"task_update\": lambda **kw: TASKS.update(kw[\"task_id\"], kw.get(\"status\")),\r\n \"task_list\": lambda **kw: TASKS.list_all(),\r\n \"task_get\": lambda **kw: TASKS.get(kw[\"task_id\"]),\r\n}\r\n```\r\n\r\nFrom s07 onward, the task graph is the default for multi-step work. s03's Todo remains for quick single-session checklists.\r\n\r\n## What Changed From s06\r\n\r\n| Component | Before (s06) | After (s07) |\r\n|---|---|---|\r\n| Tools | 5 | 8 (`task_create/update/list/get`) |\r\n| Planning model | Flat checklist (in-memory) | Task graph with dependencies (on disk) |\r\n| Relationships | None | `blockedBy` + `blocks` edges |\r\n| Status tracking | Done or not | `pending` -> `in_progress` -> `completed` |\r\n| Persistence | Lost on compression | Survives compression and restarts |\r\n\r\n## Try It\r\n\r\n```sh\r\ncd learn-claude-code\r\npython agents/s07_task_system.py\r\n```\r\n\r\n1. `Create 3 tasks: \"Setup project\", \"Write code\", \"Write tests\". Make them depend on each other in order.`\r\n2. `List all tasks and show the dependency graph`\r\n3. `Complete task 1 and then list tasks to see task 2 unblocked`\r\n4. `Create a task board for refactoring: parse -> transform -> emit -> test, where transform and emit can run in parallel after parse`\r\n" }, { "version": "s08", "locale": "en", "title": "s08: Background Tasks", - "content": "# s08: Background Tasks\n\n`s01 > s02 > s03 > s04 > s05 > s06 | s07 > [ s08 ] s09 > s10 > s11 > s12`\n\n> *\"Run slow operations in the background; the agent keeps thinking\"* -- daemon threads run commands, inject notifications on completion.\n\n## Problem\n\nSome commands take minutes: `npm install`, `pytest`, `docker build`. With a blocking loop, the model sits idle waiting. If the user asks \"install dependencies and while that runs, create the config file,\" the agent does them sequentially, not in parallel.\n\n## Solution\n\n```\nMain thread Background thread\n+-----------------+ +-----------------+\n| agent loop | | subprocess runs |\n| ... | | ... |\n| [LLM call] <---+------- | enqueue(result) |\n| ^drain queue | +-----------------+\n+-----------------+\n\nTimeline:\nAgent --[spawn A]--[spawn B]--[other work]----\n | |\n v v\n [A runs] [B runs] (parallel)\n | |\n +-- results injected before next LLM call --+\n```\n\n## How It Works\n\n1. BackgroundManager tracks tasks with a thread-safe notification queue.\n\n```python\nclass BackgroundManager:\n def __init__(self):\n self.tasks = {}\n self._notification_queue = []\n self._lock = threading.Lock()\n```\n\n2. `run()` starts a daemon thread and returns immediately.\n\n```python\ndef run(self, command: str) -> str:\n task_id = str(uuid.uuid4())[:8]\n self.tasks[task_id] = {\"status\": \"running\", \"command\": command}\n thread = threading.Thread(\n target=self._execute, args=(task_id, command), daemon=True)\n thread.start()\n return f\"Background task {task_id} started\"\n```\n\n3. When the subprocess finishes, its result goes into the notification queue.\n\n```python\ndef _execute(self, task_id, command):\n try:\n r = subprocess.run(command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=300)\n output = (r.stdout + r.stderr).strip()[:50000]\n except subprocess.TimeoutExpired:\n output = \"Error: Timeout (300s)\"\n with self._lock:\n self._notification_queue.append({\n \"task_id\": task_id, \"result\": output[:500]})\n```\n\n4. The agent loop drains notifications before each LLM call.\n\n```python\ndef agent_loop(messages: list):\n while True:\n notifs = BG.drain_notifications()\n if notifs:\n notif_text = \"\\n\".join(\n f\"[bg:{n['task_id']}] {n['result']}\" for n in notifs)\n messages.append({\"role\": \"user\",\n \"content\": f\"\\n{notif_text}\\n\"\n f\"\"})\n messages.append({\"role\": \"assistant\",\n \"content\": \"Noted background results.\"})\n response = client.messages.create(...)\n```\n\nThe loop stays single-threaded. Only subprocess I/O is parallelized.\n\n## What Changed From s07\n\n| Component | Before (s07) | After (s08) |\n|----------------|------------------|----------------------------|\n| Tools | 8 | 6 (base + background_run + check)|\n| Execution | Blocking only | Blocking + background threads|\n| Notification | None | Queue drained per loop |\n| Concurrency | None | Daemon threads |\n\n## Try It\n\n```sh\ncd learn-claude-code\npython agents/s08_background_tasks.py\n```\n\n1. `Run \"sleep 5 && echo done\" in the background, then create a file while it runs`\n2. `Start 3 background tasks: \"sleep 2\", \"sleep 4\", \"sleep 6\". Check their status.`\n3. `Run pytest in the background and keep working on other things`\n" + "content": "# s08: Background Tasks\r\n\r\n`s01 > s02 > s03 > s04 > s05 > s06 | s07 > [ s08 ] s09 > s10 > s11 > s12`\r\n\r\n> *\"Run slow operations in the background; the agent keeps thinking\"* -- daemon threads run commands, inject notifications on completion.\r\n\r\n## Problem\r\n\r\nSome commands take minutes: `npm install`, `pytest`, `docker build`. With a blocking loop, the model sits idle waiting. If the user asks \"install dependencies and while that runs, create the config file,\" the agent does them sequentially, not in parallel.\r\n\r\n## Solution\r\n\r\n```\r\nMain thread Background thread\r\n+-----------------+ +-----------------+\r\n| agent loop | | subprocess runs |\r\n| ... | | ... |\r\n| [LLM call] <---+------- | enqueue(result) |\r\n| ^drain queue | +-----------------+\r\n+-----------------+\r\n\r\nTimeline:\r\nAgent --[spawn A]--[spawn B]--[other work]----\r\n | |\r\n v v\r\n [A runs] [B runs] (parallel)\r\n | |\r\n +-- results injected before next LLM call --+\r\n```\r\n\r\n## How It Works\r\n\r\n1. BackgroundManager tracks tasks with a thread-safe notification queue.\r\n\r\n```python\r\nclass BackgroundManager:\r\n def __init__(self):\r\n self.tasks = {}\r\n self._notification_queue = []\r\n self._lock = threading.Lock()\r\n```\r\n\r\n2. `run()` starts a daemon thread and returns immediately.\r\n\r\n```python\r\ndef run(self, command: str) -> str:\r\n task_id = str(uuid.uuid4())[:8]\r\n self.tasks[task_id] = {\"status\": \"running\", \"command\": command}\r\n thread = threading.Thread(\r\n target=self._execute, args=(task_id, command), daemon=True)\r\n thread.start()\r\n return f\"Background task {task_id} started\"\r\n```\r\n\r\n3. When the subprocess finishes, its result goes into the notification queue.\r\n\r\n```python\r\ndef _execute(self, task_id, command):\r\n try:\r\n r = subprocess.run(command, shell=True, cwd=WORKDIR,\r\n capture_output=True, text=True, timeout=300)\r\n output = (r.stdout + r.stderr).strip()[:50000]\r\n except subprocess.TimeoutExpired:\r\n output = \"Error: Timeout (300s)\"\r\n with self._lock:\r\n self._notification_queue.append({\r\n \"task_id\": task_id, \"result\": output[:500]})\r\n```\r\n\r\n4. The agent loop drains notifications before each LLM call.\r\n\r\n```python\r\ndef agent_loop(messages: list):\r\n while True:\r\n notifs = BG.drain_notifications()\r\n if notifs:\r\n notif_text = \"\\n\".join(\r\n f\"[bg:{n['task_id']}] {n['result']}\" for n in notifs)\r\n messages.append({\"role\": \"user\",\r\n \"content\": f\"\\n{notif_text}\\n\"\r\n f\"\"})\r\n messages.append({\"role\": \"assistant\",\r\n \"content\": \"Noted background results.\"})\r\n response = client.messages.create(...)\r\n```\r\n\r\nThe loop stays single-threaded. Only subprocess I/O is parallelized.\r\n\r\n## What Changed From s07\r\n\r\n| Component | Before (s07) | After (s08) |\r\n|----------------|------------------|----------------------------|\r\n| Tools | 8 | 6 (base + background_run + check)|\r\n| Execution | Blocking only | Blocking + background threads|\r\n| Notification | None | Queue drained per loop |\r\n| Concurrency | None | Daemon threads |\r\n\r\n## Try It\r\n\r\n```sh\r\ncd learn-claude-code\r\npython agents/s08_background_tasks.py\r\n```\r\n\r\n1. `Run \"sleep 5 && echo done\" in the background, then create a file while it runs`\r\n2. `Start 3 background tasks: \"sleep 2\", \"sleep 4\", \"sleep 6\". Check their status.`\r\n3. `Run pytest in the background and keep working on other things`\r\n" }, { "version": "s09", "locale": "en", "title": "s09: Agent Teams", - "content": "# s09: Agent Teams\n\n`s01 > s02 > s03 > s04 > s05 > s06 | s07 > s08 > [ s09 ] s10 > s11 > s12`\n\n> *\"When the task is too big for one, delegate to teammates\"* -- persistent teammates + async mailboxes.\n\n## Problem\n\nSubagents (s04) are disposable: spawn, work, return summary, die. No identity, no memory between invocations. Background tasks (s08) run shell commands but can't make LLM-guided decisions.\n\nReal teamwork needs: (1) persistent agents that outlive a single prompt, (2) identity and lifecycle management, (3) a communication channel between agents.\n\n## Solution\n\n```\nTeammate lifecycle:\n spawn -> WORKING -> IDLE -> WORKING -> ... -> SHUTDOWN\n\nCommunication:\n .team/\n config.json <- team roster + statuses\n inbox/\n alice.jsonl <- append-only, drain-on-read\n bob.jsonl\n lead.jsonl\n\n +--------+ send(\"alice\",\"bob\",\"...\") +--------+\n | alice | -----------------------------> | bob |\n | loop | bob.jsonl << {json_line} | loop |\n +--------+ +--------+\n ^ |\n | BUS.read_inbox(\"alice\") |\n +---- alice.jsonl -> read + drain ---------+\n```\n\n## How It Works\n\n1. TeammateManager maintains config.json with the team roster.\n\n```python\nclass TeammateManager:\n def __init__(self, team_dir: Path):\n self.dir = team_dir\n self.dir.mkdir(exist_ok=True)\n self.config_path = self.dir / \"config.json\"\n self.config = self._load_config()\n self.threads = {}\n```\n\n2. `spawn()` creates a teammate and starts its agent loop in a thread.\n\n```python\ndef spawn(self, name: str, role: str, prompt: str) -> str:\n member = {\"name\": name, \"role\": role, \"status\": \"working\"}\n self.config[\"members\"].append(member)\n self._save_config()\n thread = threading.Thread(\n target=self._teammate_loop,\n args=(name, role, prompt), daemon=True)\n thread.start()\n return f\"Spawned teammate '{name}' (role: {role})\"\n```\n\n3. MessageBus: append-only JSONL inboxes. `send()` appends a JSON line; `read_inbox()` reads all and drains.\n\n```python\nclass MessageBus:\n def send(self, sender, to, content, msg_type=\"message\", extra=None):\n msg = {\"type\": msg_type, \"from\": sender,\n \"content\": content, \"timestamp\": time.time()}\n if extra:\n msg.update(extra)\n with open(self.dir / f\"{to}.jsonl\", \"a\") as f:\n f.write(json.dumps(msg) + \"\\n\")\n\n def read_inbox(self, name):\n path = self.dir / f\"{name}.jsonl\"\n if not path.exists(): return \"[]\"\n msgs = [json.loads(l) for l in path.read_text().strip().splitlines() if l]\n path.write_text(\"\") # drain\n return json.dumps(msgs, indent=2)\n```\n\n4. Each teammate checks its inbox before every LLM call, injecting received messages into context.\n\n```python\ndef _teammate_loop(self, name, role, prompt):\n messages = [{\"role\": \"user\", \"content\": prompt}]\n for _ in range(50):\n inbox = BUS.read_inbox(name)\n if inbox != \"[]\":\n messages.append({\"role\": \"user\",\n \"content\": f\"{inbox}\"})\n messages.append({\"role\": \"assistant\",\n \"content\": \"Noted inbox messages.\"})\n response = client.messages.create(...)\n if response.stop_reason != \"tool_use\":\n break\n # execute tools, append results...\n self._find_member(name)[\"status\"] = \"idle\"\n```\n\n## What Changed From s08\n\n| Component | Before (s08) | After (s09) |\n|----------------|------------------|----------------------------|\n| Tools | 6 | 9 (+spawn/send/read_inbox) |\n| Agents | Single | Lead + N teammates |\n| Persistence | None | config.json + JSONL inboxes|\n| Threads | Background cmds | Full agent loops per thread|\n| Lifecycle | Fire-and-forget | idle -> working -> idle |\n| Communication | None | message + broadcast |\n\n## Try It\n\n```sh\ncd learn-claude-code\npython agents/s09_agent_teams.py\n```\n\n1. `Spawn alice (coder) and bob (tester). Have alice send bob a message.`\n2. `Broadcast \"status update: phase 1 complete\" to all teammates`\n3. `Check the lead inbox for any messages`\n4. Type `/team` to see the team roster with statuses\n5. Type `/inbox` to manually check the lead's inbox\n" + "content": "# s09: Agent Teams\r\n\r\n`s01 > s02 > s03 > s04 > s05 > s06 | s07 > s08 > [ s09 ] s10 > s11 > s12`\r\n\r\n> *\"When the task is too big for one, delegate to teammates\"* -- persistent teammates + async mailboxes.\r\n\r\n## Problem\r\n\r\nSubagents (s04) are disposable: spawn, work, return summary, die. No identity, no memory between invocations. Background tasks (s08) run shell commands but can't make LLM-guided decisions.\r\n\r\nReal teamwork needs: (1) persistent agents that outlive a single prompt, (2) identity and lifecycle management, (3) a communication channel between agents.\r\n\r\n## Solution\r\n\r\n```\r\nTeammate lifecycle:\r\n spawn -> WORKING -> IDLE -> WORKING -> ... -> SHUTDOWN\r\n\r\nCommunication:\r\n .team/\r\n config.json <- team roster + statuses\r\n inbox/\r\n alice.jsonl <- append-only, drain-on-read\r\n bob.jsonl\r\n lead.jsonl\r\n\r\n +--------+ send(\"alice\",\"bob\",\"...\") +--------+\r\n | alice | -----------------------------> | bob |\r\n | loop | bob.jsonl << {json_line} | loop |\r\n +--------+ +--------+\r\n ^ |\r\n | BUS.read_inbox(\"alice\") |\r\n +---- alice.jsonl -> read + drain ---------+\r\n```\r\n\r\n## How It Works\r\n\r\n1. TeammateManager maintains config.json with the team roster.\r\n\r\n```python\r\nclass TeammateManager:\r\n def __init__(self, team_dir: Path):\r\n self.dir = team_dir\r\n self.dir.mkdir(exist_ok=True)\r\n self.config_path = self.dir / \"config.json\"\r\n self.config = self._load_config()\r\n self.threads = {}\r\n```\r\n\r\n2. `spawn()` creates a teammate and starts its agent loop in a thread.\r\n\r\n```python\r\ndef spawn(self, name: str, role: str, prompt: str) -> str:\r\n member = {\"name\": name, \"role\": role, \"status\": \"working\"}\r\n self.config[\"members\"].append(member)\r\n self._save_config()\r\n thread = threading.Thread(\r\n target=self._teammate_loop,\r\n args=(name, role, prompt), daemon=True)\r\n thread.start()\r\n return f\"Spawned teammate '{name}' (role: {role})\"\r\n```\r\n\r\n3. MessageBus: append-only JSONL inboxes. `send()` appends a JSON line; `read_inbox()` reads all and drains.\r\n\r\n```python\r\nclass MessageBus:\r\n def send(self, sender, to, content, msg_type=\"message\", extra=None):\r\n msg = {\"type\": msg_type, \"from\": sender,\r\n \"content\": content, \"timestamp\": time.time()}\r\n if extra:\r\n msg.update(extra)\r\n with open(self.dir / f\"{to}.jsonl\", \"a\") as f:\r\n f.write(json.dumps(msg) + \"\\n\")\r\n\r\n def read_inbox(self, name):\r\n path = self.dir / f\"{name}.jsonl\"\r\n if not path.exists(): return \"[]\"\r\n msgs = [json.loads(l) for l in path.read_text().strip().splitlines() if l]\r\n path.write_text(\"\") # drain\r\n return json.dumps(msgs, indent=2)\r\n```\r\n\r\n4. Each teammate checks its inbox before every LLM call, injecting received messages into context.\r\n\r\n```python\r\ndef _teammate_loop(self, name, role, prompt):\r\n messages = [{\"role\": \"user\", \"content\": prompt}]\r\n for _ in range(50):\r\n inbox = BUS.read_inbox(name)\r\n if inbox != \"[]\":\r\n messages.append({\"role\": \"user\",\r\n \"content\": f\"{inbox}\"})\r\n messages.append({\"role\": \"assistant\",\r\n \"content\": \"Noted inbox messages.\"})\r\n response = client.messages.create(...)\r\n if response.stop_reason != \"tool_use\":\r\n break\r\n # execute tools, append results...\r\n self._find_member(name)[\"status\"] = \"idle\"\r\n```\r\n\r\n## What Changed From s08\r\n\r\n| Component | Before (s08) | After (s09) |\r\n|----------------|------------------|----------------------------|\r\n| Tools | 6 | 9 (+spawn/send/read_inbox) |\r\n| Agents | Single | Lead + N teammates |\r\n| Persistence | None | config.json + JSONL inboxes|\r\n| Threads | Background cmds | Full agent loops per thread|\r\n| Lifecycle | Fire-and-forget | idle -> working -> idle |\r\n| Communication | None | message + broadcast |\r\n\r\n## Try It\r\n\r\n```sh\r\ncd learn-claude-code\r\npython agents/s09_agent_teams.py\r\n```\r\n\r\n1. `Spawn alice (coder) and bob (tester). Have alice send bob a message.`\r\n2. `Broadcast \"status update: phase 1 complete\" to all teammates`\r\n3. `Check the lead inbox for any messages`\r\n4. Type `/team` to see the team roster with statuses\r\n5. Type `/inbox` to manually check the lead's inbox\r\n" }, { "version": "s10", "locale": "en", "title": "s10: Team Protocols", - "content": "# s10: Team Protocols\n\n`s01 > s02 > s03 > s04 > s05 > s06 | s07 > s08 > s09 > [ s10 ] s11 > s12`\n\n> *\"Teammates need shared communication rules\"* -- one request-response pattern drives all negotiation.\n\n## Problem\n\nIn s09, teammates work and communicate but lack structured coordination:\n\n**Shutdown**: Killing a thread leaves files half-written and config.json stale. You need a handshake: the lead requests, the teammate approves (finish and exit) or rejects (keep working).\n\n**Plan approval**: When the lead says \"refactor the auth module,\" the teammate starts immediately. For high-risk changes, the lead should review the plan first.\n\nBoth share the same structure: one side sends a request with a unique ID, the other responds referencing that ID.\n\n## Solution\n\n```\nShutdown Protocol Plan Approval Protocol\n================== ======================\n\nLead Teammate Teammate Lead\n | | | |\n |--shutdown_req-->| |--plan_req------>|\n | {req_id:\"abc\"} | | {req_id:\"xyz\"} |\n | | | |\n |<--shutdown_resp-| |<--plan_resp-----|\n | {req_id:\"abc\", | | {req_id:\"xyz\", |\n | approve:true} | | approve:true} |\n\nShared FSM:\n [pending] --approve--> [approved]\n [pending] --reject---> [rejected]\n\nTrackers:\n shutdown_requests = {req_id: {target, status}}\n plan_requests = {req_id: {from, plan, status}}\n```\n\n## How It Works\n\n1. The lead initiates shutdown by generating a request_id and sending through the inbox.\n\n```python\nshutdown_requests = {}\n\ndef handle_shutdown_request(teammate: str) -> str:\n req_id = str(uuid.uuid4())[:8]\n shutdown_requests[req_id] = {\"target\": teammate, \"status\": \"pending\"}\n BUS.send(\"lead\", teammate, \"Please shut down gracefully.\",\n \"shutdown_request\", {\"request_id\": req_id})\n return f\"Shutdown request {req_id} sent (status: pending)\"\n```\n\n2. The teammate receives the request and responds with approve/reject.\n\n```python\nif tool_name == \"shutdown_response\":\n req_id = args[\"request_id\"]\n approve = args[\"approve\"]\n shutdown_requests[req_id][\"status\"] = \"approved\" if approve else \"rejected\"\n BUS.send(sender, \"lead\", args.get(\"reason\", \"\"),\n \"shutdown_response\",\n {\"request_id\": req_id, \"approve\": approve})\n```\n\n3. Plan approval follows the identical pattern. The teammate submits a plan (generating a request_id), the lead reviews (referencing the same request_id).\n\n```python\nplan_requests = {}\n\ndef handle_plan_review(request_id, approve, feedback=\"\"):\n req = plan_requests[request_id]\n req[\"status\"] = \"approved\" if approve else \"rejected\"\n BUS.send(\"lead\", req[\"from\"], feedback,\n \"plan_approval_response\",\n {\"request_id\": request_id, \"approve\": approve})\n```\n\nOne FSM, two applications. The same `pending -> approved | rejected` state machine handles any request-response protocol.\n\n## What Changed From s09\n\n| Component | Before (s09) | After (s10) |\n|----------------|------------------|------------------------------|\n| Tools | 9 | 12 (+shutdown_req/resp +plan)|\n| Shutdown | Natural exit only| Request-response handshake |\n| Plan gating | None | Submit/review with approval |\n| Correlation | None | request_id per request |\n| FSM | None | pending -> approved/rejected |\n\n## Try It\n\n```sh\ncd learn-claude-code\npython agents/s10_team_protocols.py\n```\n\n1. `Spawn alice as a coder. Then request her shutdown.`\n2. `List teammates to see alice's status after shutdown approval`\n3. `Spawn bob with a risky refactoring task. Review and reject his plan.`\n4. `Spawn charlie, have him submit a plan, then approve it.`\n5. Type `/team` to monitor statuses\n" + "content": "# s10: Team Protocols\r\n\r\n`s01 > s02 > s03 > s04 > s05 > s06 | s07 > s08 > s09 > [ s10 ] s11 > s12`\r\n\r\n> *\"Teammates need shared communication rules\"* -- one request-response pattern drives all negotiation.\r\n\r\n## Problem\r\n\r\nIn s09, teammates work and communicate but lack structured coordination:\r\n\r\n**Shutdown**: Killing a thread leaves files half-written and config.json stale. You need a handshake: the lead requests, the teammate approves (finish and exit) or rejects (keep working).\r\n\r\n**Plan approval**: When the lead says \"refactor the auth module,\" the teammate starts immediately. For high-risk changes, the lead should review the plan first.\r\n\r\nBoth share the same structure: one side sends a request with a unique ID, the other responds referencing that ID.\r\n\r\n## Solution\r\n\r\n```\r\nShutdown Protocol Plan Approval Protocol\r\n================== ======================\r\n\r\nLead Teammate Teammate Lead\r\n | | | |\r\n |--shutdown_req-->| |--plan_req------>|\r\n | {req_id:\"abc\"} | | {req_id:\"xyz\"} |\r\n | | | |\r\n |<--shutdown_resp-| |<--plan_resp-----|\r\n | {req_id:\"abc\", | | {req_id:\"xyz\", |\r\n | approve:true} | | approve:true} |\r\n\r\nShared FSM:\r\n [pending] --approve--> [approved]\r\n [pending] --reject---> [rejected]\r\n\r\nTrackers:\r\n shutdown_requests = {req_id: {target, status}}\r\n plan_requests = {req_id: {from, plan, status}}\r\n```\r\n\r\n## How It Works\r\n\r\n1. The lead initiates shutdown by generating a request_id and sending through the inbox.\r\n\r\n```python\r\nshutdown_requests = {}\r\n\r\ndef handle_shutdown_request(teammate: str) -> str:\r\n req_id = str(uuid.uuid4())[:8]\r\n shutdown_requests[req_id] = {\"target\": teammate, \"status\": \"pending\"}\r\n BUS.send(\"lead\", teammate, \"Please shut down gracefully.\",\r\n \"shutdown_request\", {\"request_id\": req_id})\r\n return f\"Shutdown request {req_id} sent (status: pending)\"\r\n```\r\n\r\n2. The teammate receives the request and responds with approve/reject.\r\n\r\n```python\r\nif tool_name == \"shutdown_response\":\r\n req_id = args[\"request_id\"]\r\n approve = args[\"approve\"]\r\n shutdown_requests[req_id][\"status\"] = \"approved\" if approve else \"rejected\"\r\n BUS.send(sender, \"lead\", args.get(\"reason\", \"\"),\r\n \"shutdown_response\",\r\n {\"request_id\": req_id, \"approve\": approve})\r\n```\r\n\r\n3. Plan approval follows the identical pattern. The teammate submits a plan (generating a request_id), the lead reviews (referencing the same request_id).\r\n\r\n```python\r\nplan_requests = {}\r\n\r\ndef handle_plan_review(request_id, approve, feedback=\"\"):\r\n req = plan_requests[request_id]\r\n req[\"status\"] = \"approved\" if approve else \"rejected\"\r\n BUS.send(\"lead\", req[\"from\"], feedback,\r\n \"plan_approval_response\",\r\n {\"request_id\": request_id, \"approve\": approve})\r\n```\r\n\r\nOne FSM, two applications. The same `pending -> approved | rejected` state machine handles any request-response protocol.\r\n\r\n## What Changed From s09\r\n\r\n| Component | Before (s09) | After (s10) |\r\n|----------------|------------------|------------------------------|\r\n| Tools | 9 | 12 (+shutdown_req/resp +plan)|\r\n| Shutdown | Natural exit only| Request-response handshake |\r\n| Plan gating | None | Submit/review with approval |\r\n| Correlation | None | request_id per request |\r\n| FSM | None | pending -> approved/rejected |\r\n\r\n## Try It\r\n\r\n```sh\r\ncd learn-claude-code\r\npython agents/s10_team_protocols.py\r\n```\r\n\r\n1. `Spawn alice as a coder. Then request her shutdown.`\r\n2. `List teammates to see alice's status after shutdown approval`\r\n3. `Spawn bob with a risky refactoring task. Review and reject his plan.`\r\n4. `Spawn charlie, have him submit a plan, then approve it.`\r\n5. Type `/team` to monitor statuses\r\n" }, { "version": "s11", "locale": "en", "title": "s11: Autonomous Agents", - "content": "# s11: Autonomous Agents\n\n`s01 > s02 > s03 > s04 > s05 > s06 | s07 > s08 > s09 > s10 > [ s11 ] s12`\n\n> *\"Teammates scan the board and claim tasks themselves\"* -- no need for the lead to assign each one.\n\n## Problem\n\nIn s09-s10, teammates only work when explicitly told to. The lead must spawn each one with a specific prompt. 10 unclaimed tasks on the board? The lead assigns each one manually. Doesn't scale.\n\nTrue autonomy: teammates scan the task board themselves, claim unclaimed tasks, work on them, then look for more.\n\nOne subtlety: after context compression (s06), the agent might forget who it is. Identity re-injection fixes this.\n\n## Solution\n\n```\nTeammate lifecycle with idle cycle:\n\n+-------+\n| spawn |\n+---+---+\n |\n v\n+-------+ tool_use +-------+\n| WORK | <------------- | LLM |\n+---+---+ +-------+\n |\n | stop_reason != tool_use (or idle tool called)\n v\n+--------+\n| IDLE | poll every 5s for up to 60s\n+---+----+\n |\n +---> check inbox --> message? ----------> WORK\n |\n +---> scan .tasks/ --> unclaimed? -------> claim -> WORK\n |\n +---> 60s timeout ----------------------> SHUTDOWN\n\nIdentity re-injection after compression:\n if len(messages) <= 3:\n messages.insert(0, identity_block)\n```\n\n## How It Works\n\n1. The teammate loop has two phases: WORK and IDLE. When the LLM stops calling tools (or calls `idle`), the teammate enters IDLE.\n\n```python\ndef _loop(self, name, role, prompt):\n while True:\n # -- WORK PHASE --\n messages = [{\"role\": \"user\", \"content\": prompt}]\n for _ in range(50):\n response = client.messages.create(...)\n if response.stop_reason != \"tool_use\":\n break\n # execute tools...\n if idle_requested:\n break\n\n # -- IDLE PHASE --\n self._set_status(name, \"idle\")\n resume = self._idle_poll(name, messages)\n if not resume:\n self._set_status(name, \"shutdown\")\n return\n self._set_status(name, \"working\")\n```\n\n2. The idle phase polls inbox and task board in a loop.\n\n```python\ndef _idle_poll(self, name, messages):\n for _ in range(IDLE_TIMEOUT // POLL_INTERVAL): # 60s / 5s = 12\n time.sleep(POLL_INTERVAL)\n inbox = BUS.read_inbox(name)\n if inbox:\n messages.append({\"role\": \"user\",\n \"content\": f\"{inbox}\"})\n return True\n unclaimed = scan_unclaimed_tasks()\n if unclaimed:\n claim_task(unclaimed[0][\"id\"], name)\n messages.append({\"role\": \"user\",\n \"content\": f\"Task #{unclaimed[0]['id']}: \"\n f\"{unclaimed[0]['subject']}\"})\n return True\n return False # timeout -> shutdown\n```\n\n3. Task board scanning: find pending, unowned, unblocked tasks.\n\n```python\ndef scan_unclaimed_tasks() -> list:\n unclaimed = []\n for f in sorted(TASKS_DIR.glob(\"task_*.json\")):\n task = json.loads(f.read_text())\n if (task.get(\"status\") == \"pending\"\n and not task.get(\"owner\")\n and not task.get(\"blockedBy\")):\n unclaimed.append(task)\n return unclaimed\n```\n\n4. Identity re-injection: when context is too short (compression happened), insert an identity block.\n\n```python\nif len(messages) <= 3:\n messages.insert(0, {\"role\": \"user\",\n \"content\": f\"You are '{name}', role: {role}, \"\n f\"team: {team_name}. Continue your work.\"})\n messages.insert(1, {\"role\": \"assistant\",\n \"content\": f\"I am {name}. Continuing.\"})\n```\n\n## What Changed From s10\n\n| Component | Before (s10) | After (s11) |\n|----------------|------------------|----------------------------|\n| Tools | 12 | 14 (+idle, +claim_task) |\n| Autonomy | Lead-directed | Self-organizing |\n| Idle phase | None | Poll inbox + task board |\n| Task claiming | Manual only | Auto-claim unclaimed tasks |\n| Identity | System prompt | + re-injection after compress|\n| Timeout | None | 60s idle -> auto shutdown |\n\n## Try It\n\n```sh\ncd learn-claude-code\npython agents/s11_autonomous_agents.py\n```\n\n1. `Create 3 tasks on the board, then spawn alice and bob. Watch them auto-claim.`\n2. `Spawn a coder teammate and let it find work from the task board itself`\n3. `Create tasks with dependencies. Watch teammates respect the blocked order.`\n4. Type `/tasks` to see the task board with owners\n5. Type `/team` to monitor who is working vs idle\n" + "content": "# s11: Autonomous Agents\r\n\r\n`s01 > s02 > s03 > s04 > s05 > s06 | s07 > s08 > s09 > s10 > [ s11 ] s12`\r\n\r\n> *\"Teammates scan the board and claim tasks themselves\"* -- no need for the lead to assign each one.\r\n\r\n## Problem\r\n\r\nIn s09-s10, teammates only work when explicitly told to. The lead must spawn each one with a specific prompt. 10 unclaimed tasks on the board? The lead assigns each one manually. Doesn't scale.\r\n\r\nTrue autonomy: teammates scan the task board themselves, claim unclaimed tasks, work on them, then look for more.\r\n\r\nOne subtlety: after context compression (s06), the agent might forget who it is. Identity re-injection fixes this.\r\n\r\n## Solution\r\n\r\n```\r\nTeammate lifecycle with idle cycle:\r\n\r\n+-------+\r\n| spawn |\r\n+---+---+\r\n |\r\n v\r\n+-------+ tool_use +-------+\r\n| WORK | <------------- | LLM |\r\n+---+---+ +-------+\r\n |\r\n | stop_reason != tool_use (or idle tool called)\r\n v\r\n+--------+\r\n| IDLE | poll every 5s for up to 60s\r\n+---+----+\r\n |\r\n +---> check inbox --> message? ----------> WORK\r\n |\r\n +---> scan .tasks/ --> unclaimed? -------> claim -> WORK\r\n |\r\n +---> 60s timeout ----------------------> SHUTDOWN\r\n\r\nIdentity re-injection after compression:\r\n if len(messages) <= 3:\r\n messages.insert(0, identity_block)\r\n```\r\n\r\n## How It Works\r\n\r\n1. The teammate loop has two phases: WORK and IDLE. When the LLM stops calling tools (or calls `idle`), the teammate enters IDLE.\r\n\r\n```python\r\ndef _loop(self, name, role, prompt):\r\n while True:\r\n # -- WORK PHASE --\r\n messages = [{\"role\": \"user\", \"content\": prompt}]\r\n for _ in range(50):\r\n response = client.messages.create(...)\r\n if response.stop_reason != \"tool_use\":\r\n break\r\n # execute tools...\r\n if idle_requested:\r\n break\r\n\r\n # -- IDLE PHASE --\r\n self._set_status(name, \"idle\")\r\n resume = self._idle_poll(name, messages)\r\n if not resume:\r\n self._set_status(name, \"shutdown\")\r\n return\r\n self._set_status(name, \"working\")\r\n```\r\n\r\n2. The idle phase polls inbox and task board in a loop.\r\n\r\n```python\r\ndef _idle_poll(self, name, messages):\r\n for _ in range(IDLE_TIMEOUT // POLL_INTERVAL): # 60s / 5s = 12\r\n time.sleep(POLL_INTERVAL)\r\n inbox = BUS.read_inbox(name)\r\n if inbox:\r\n messages.append({\"role\": \"user\",\r\n \"content\": f\"{inbox}\"})\r\n return True\r\n unclaimed = scan_unclaimed_tasks()\r\n if unclaimed:\r\n claim_task(unclaimed[0][\"id\"], name)\r\n messages.append({\"role\": \"user\",\r\n \"content\": f\"Task #{unclaimed[0]['id']}: \"\r\n f\"{unclaimed[0]['subject']}\"})\r\n return True\r\n return False # timeout -> shutdown\r\n```\r\n\r\n3. Task board scanning: find pending, unowned, unblocked tasks.\r\n\r\n```python\r\ndef scan_unclaimed_tasks() -> list:\r\n unclaimed = []\r\n for f in sorted(TASKS_DIR.glob(\"task_*.json\")):\r\n task = json.loads(f.read_text())\r\n if (task.get(\"status\") == \"pending\"\r\n and not task.get(\"owner\")\r\n and not task.get(\"blockedBy\")):\r\n unclaimed.append(task)\r\n return unclaimed\r\n```\r\n\r\n4. Identity re-injection: when context is too short (compression happened), insert an identity block.\r\n\r\n```python\r\nif len(messages) <= 3:\r\n messages.insert(0, {\"role\": \"user\",\r\n \"content\": f\"You are '{name}', role: {role}, \"\r\n f\"team: {team_name}. Continue your work.\"})\r\n messages.insert(1, {\"role\": \"assistant\",\r\n \"content\": f\"I am {name}. Continuing.\"})\r\n```\r\n\r\n## What Changed From s10\r\n\r\n| Component | Before (s10) | After (s11) |\r\n|----------------|------------------|----------------------------|\r\n| Tools | 12 | 14 (+idle, +claim_task) |\r\n| Autonomy | Lead-directed | Self-organizing |\r\n| Idle phase | None | Poll inbox + task board |\r\n| Task claiming | Manual only | Auto-claim unclaimed tasks |\r\n| Identity | System prompt | + re-injection after compress|\r\n| Timeout | None | 60s idle -> auto shutdown |\r\n\r\n## Try It\r\n\r\n```sh\r\ncd learn-claude-code\r\npython agents/s11_autonomous_agents.py\r\n```\r\n\r\n1. `Create 3 tasks on the board, then spawn alice and bob. Watch them auto-claim.`\r\n2. `Spawn a coder teammate and let it find work from the task board itself`\r\n3. `Create tasks with dependencies. Watch teammates respect the blocked order.`\r\n4. Type `/tasks` to see the task board with owners\r\n5. Type `/team` to monitor who is working vs idle\r\n" }, { "version": "s12", "locale": "en", "title": "s12: Worktree + Task Isolation", - "content": "# s12: Worktree + Task Isolation\n\n`s01 > s02 > s03 > s04 > s05 > s06 | s07 > s08 > s09 > s10 > s11 > [ s12 ]`\n\n> *\"Each works in its own directory, no interference\"* -- tasks manage goals, worktrees manage directories, bound by ID.\n\n## Problem\n\nBy s11, agents can claim and complete tasks autonomously. But every task runs in one shared directory. Two agents refactoring different modules at the same time will collide: agent A edits `config.py`, agent B edits `config.py`, unstaged changes mix, and neither can roll back cleanly.\n\nThe task board tracks *what to do* but has no opinion about *where to do it*. The fix: give each task its own git worktree directory. Tasks manage goals, worktrees manage execution context. Bind them by task ID.\n\n## Solution\n\n```\nControl plane (.tasks/) Execution plane (.worktrees/)\n+------------------+ +------------------------+\n| task_1.json | | auth-refactor/ |\n| status: in_progress <------> branch: wt/auth-refactor\n| worktree: \"auth-refactor\" | task_id: 1 |\n+------------------+ +------------------------+\n| task_2.json | | ui-login/ |\n| status: pending <------> branch: wt/ui-login\n| worktree: \"ui-login\" | task_id: 2 |\n+------------------+ +------------------------+\n |\n index.json (worktree registry)\n events.jsonl (lifecycle log)\n\nState machines:\n Task: pending -> in_progress -> completed\n Worktree: absent -> active -> removed | kept\n```\n\n## How It Works\n\n1. **Create a task.** Persist the goal first.\n\n```python\nTASKS.create(\"Implement auth refactor\")\n# -> .tasks/task_1.json status=pending worktree=\"\"\n```\n\n2. **Create a worktree and bind to the task.** Passing `task_id` auto-advances the task to `in_progress`.\n\n```python\nWORKTREES.create(\"auth-refactor\", task_id=1)\n# -> git worktree add -b wt/auth-refactor .worktrees/auth-refactor HEAD\n# -> index.json gets new entry, task_1.json gets worktree=\"auth-refactor\"\n```\n\nThe binding writes state to both sides:\n\n```python\ndef bind_worktree(self, task_id, worktree):\n task = self._load(task_id)\n task[\"worktree\"] = worktree\n if task[\"status\"] == \"pending\":\n task[\"status\"] = \"in_progress\"\n self._save(task)\n```\n\n3. **Run commands in the worktree.** `cwd` points to the isolated directory.\n\n```python\nsubprocess.run(command, shell=True, cwd=worktree_path,\n capture_output=True, text=True, timeout=300)\n```\n\n4. **Close out.** Two choices:\n - `worktree_keep(name)` -- preserve the directory for later.\n - `worktree_remove(name, complete_task=True)` -- remove directory, complete the bound task, emit event. One call handles teardown + completion.\n\n```python\ndef remove(self, name, force=False, complete_task=False):\n self._run_git([\"worktree\", \"remove\", wt[\"path\"]])\n if complete_task and wt.get(\"task_id\") is not None:\n self.tasks.update(wt[\"task_id\"], status=\"completed\")\n self.tasks.unbind_worktree(wt[\"task_id\"])\n self.events.emit(\"task.completed\", ...)\n```\n\n5. **Event stream.** Every lifecycle step emits to `.worktrees/events.jsonl`:\n\n```json\n{\n \"event\": \"worktree.remove.after\",\n \"task\": {\"id\": 1, \"status\": \"completed\"},\n \"worktree\": {\"name\": \"auth-refactor\", \"status\": \"removed\"},\n \"ts\": 1730000000\n}\n```\n\nEvents emitted: `worktree.create.before/after/failed`, `worktree.remove.before/after/failed`, `worktree.keep`, `task.completed`.\n\nAfter a crash, state reconstructs from `.tasks/` + `.worktrees/index.json` on disk. Conversation memory is volatile; file state is durable.\n\n## What Changed From s11\n\n| Component | Before (s11) | After (s12) |\n|--------------------|----------------------------|----------------------------------------------|\n| Coordination | Task board (owner/status) | Task board + explicit worktree binding |\n| Execution scope | Shared directory | Task-scoped isolated directory |\n| Recoverability | Task status only | Task status + worktree index |\n| Teardown | Task completion | Task completion + explicit keep/remove |\n| Lifecycle visibility | Implicit in logs | Explicit events in `.worktrees/events.jsonl` |\n\n## Try It\n\n```sh\ncd learn-claude-code\npython agents/s12_worktree_task_isolation.py\n```\n\n1. `Create tasks for backend auth and frontend login page, then list tasks.`\n2. `Create worktree \"auth-refactor\" for task 1, then bind task 2 to a new worktree \"ui-login\".`\n3. `Run \"git status --short\" in worktree \"auth-refactor\".`\n4. `Keep worktree \"ui-login\", then list worktrees and inspect events.`\n5. `Remove worktree \"auth-refactor\" with complete_task=true, then list tasks/worktrees/events.`\n" + "content": "# s12: Worktree + Task Isolation\r\n\r\n`s01 > s02 > s03 > s04 > s05 > s06 | s07 > s08 > s09 > s10 > s11 > [ s12 ]`\r\n\r\n> *\"Each works in its own directory, no interference\"* -- tasks manage goals, worktrees manage directories, bound by ID.\r\n\r\n## Problem\r\n\r\nBy s11, agents can claim and complete tasks autonomously. But every task runs in one shared directory. Two agents refactoring different modules at the same time will collide: agent A edits `config.py`, agent B edits `config.py`, unstaged changes mix, and neither can roll back cleanly.\r\n\r\nThe task board tracks *what to do* but has no opinion about *where to do it*. The fix: give each task its own git worktree directory. Tasks manage goals, worktrees manage execution context. Bind them by task ID.\r\n\r\n## Solution\r\n\r\n```\r\nControl plane (.tasks/) Execution plane (.worktrees/)\r\n+------------------+ +------------------------+\r\n| task_1.json | | auth-refactor/ |\r\n| status: in_progress <------> branch: wt/auth-refactor\r\n| worktree: \"auth-refactor\" | task_id: 1 |\r\n+------------------+ +------------------------+\r\n| task_2.json | | ui-login/ |\r\n| status: pending <------> branch: wt/ui-login\r\n| worktree: \"ui-login\" | task_id: 2 |\r\n+------------------+ +------------------------+\r\n |\r\n index.json (worktree registry)\r\n events.jsonl (lifecycle log)\r\n\r\nState machines:\r\n Task: pending -> in_progress -> completed\r\n Worktree: absent -> active -> removed | kept\r\n```\r\n\r\n## How It Works\r\n\r\n1. **Create a task.** Persist the goal first.\r\n\r\n```python\r\nTASKS.create(\"Implement auth refactor\")\r\n# -> .tasks/task_1.json status=pending worktree=\"\"\r\n```\r\n\r\n2. **Create a worktree and bind to the task.** Passing `task_id` auto-advances the task to `in_progress`.\r\n\r\n```python\r\nWORKTREES.create(\"auth-refactor\", task_id=1)\r\n# -> git worktree add -b wt/auth-refactor .worktrees/auth-refactor HEAD\r\n# -> index.json gets new entry, task_1.json gets worktree=\"auth-refactor\"\r\n```\r\n\r\nThe binding writes state to both sides:\r\n\r\n```python\r\ndef bind_worktree(self, task_id, worktree):\r\n task = self._load(task_id)\r\n task[\"worktree\"] = worktree\r\n if task[\"status\"] == \"pending\":\r\n task[\"status\"] = \"in_progress\"\r\n self._save(task)\r\n```\r\n\r\n3. **Run commands in the worktree.** `cwd` points to the isolated directory.\r\n\r\n```python\r\nsubprocess.run(command, shell=True, cwd=worktree_path,\r\n capture_output=True, text=True, timeout=300)\r\n```\r\n\r\n4. **Close out.** Two choices:\r\n - `worktree_keep(name)` -- preserve the directory for later.\r\n - `worktree_remove(name, complete_task=True)` -- remove directory, complete the bound task, emit event. One call handles teardown + completion.\r\n\r\n```python\r\ndef remove(self, name, force=False, complete_task=False):\r\n self._run_git([\"worktree\", \"remove\", wt[\"path\"]])\r\n if complete_task and wt.get(\"task_id\") is not None:\r\n self.tasks.update(wt[\"task_id\"], status=\"completed\")\r\n self.tasks.unbind_worktree(wt[\"task_id\"])\r\n self.events.emit(\"task.completed\", ...)\r\n```\r\n\r\n5. **Event stream.** Every lifecycle step emits to `.worktrees/events.jsonl`:\r\n\r\n```json\r\n{\r\n \"event\": \"worktree.remove.after\",\r\n \"task\": {\"id\": 1, \"status\": \"completed\"},\r\n \"worktree\": {\"name\": \"auth-refactor\", \"status\": \"removed\"},\r\n \"ts\": 1730000000\r\n}\r\n```\r\n\r\nEvents emitted: `worktree.create.before/after/failed`, `worktree.remove.before/after/failed`, `worktree.keep`, `task.completed`.\r\n\r\nAfter a crash, state reconstructs from `.tasks/` + `.worktrees/index.json` on disk. Conversation memory is volatile; file state is durable.\r\n\r\n## What Changed From s11\r\n\r\n| Component | Before (s11) | After (s12) |\r\n|--------------------|----------------------------|----------------------------------------------|\r\n| Coordination | Task board (owner/status) | Task board + explicit worktree binding |\r\n| Execution scope | Shared directory | Task-scoped isolated directory |\r\n| Recoverability | Task status only | Task status + worktree index |\r\n| Teardown | Task completion | Task completion + explicit keep/remove |\r\n| Lifecycle visibility | Implicit in logs | Explicit events in `.worktrees/events.jsonl` |\r\n\r\n## Try It\r\n\r\n```sh\r\ncd learn-claude-code\r\npython agents/s12_worktree_task_isolation.py\r\n```\r\n\r\n1. `Create tasks for backend auth and frontend login page, then list tasks.`\r\n2. `Create worktree \"auth-refactor\" for task 1, then bind task 2 to a new worktree \"ui-login\".`\r\n3. `Run \"git status --short\" in worktree \"auth-refactor\".`\r\n4. `Keep worktree \"ui-login\", then list worktrees and inspect events.`\r\n5. `Remove worktree \"auth-refactor\" with complete_task=true, then list tasks/worktrees/events.`\r\n" }, { "version": "s01", "locale": "zh", "title": "s01: The Agent Loop (智能体循环)", - "content": "# s01: The Agent Loop (智能体循环)\n\n`[ s01 ] s02 > s03 > s04 > s05 > s06 | s07 > s08 > s09 > s10 > s11 > s12`\n\n> *\"One loop & Bash is all you need\"* -- 一个工具 + 一个循环 = 一个智能体。\n\n## 问题\n\n语言模型能推理代码, 但碰不到真实世界 -- 不能读文件、跑测试、看报错。没有循环, 每次工具调用你都得手动把结果粘回去。你自己就是那个循环。\n\n## 解决方案\n\n```\n+--------+ +-------+ +---------+\n| User | ---> | LLM | ---> | Tool |\n| prompt | | | | execute |\n+--------+ +---+---+ +----+----+\n ^ |\n | tool_result |\n +----------------+\n (loop until stop_reason != \"tool_use\")\n```\n\n一个退出条件控制整个流程。循环持续运行, 直到模型不再调用工具。\n\n## 工作原理\n\n1. 用户 prompt 作为第一条消息。\n\n```python\nmessages.append({\"role\": \"user\", \"content\": query})\n```\n\n2. 将消息和工具定义一起发给 LLM。\n\n```python\nresponse = client.messages.create(\n model=MODEL, system=SYSTEM, messages=messages,\n tools=TOOLS, max_tokens=8000,\n)\n```\n\n3. 追加助手响应。检查 `stop_reason` -- 如果模型没有调用工具, 结束。\n\n```python\nmessages.append({\"role\": \"assistant\", \"content\": response.content})\nif response.stop_reason != \"tool_use\":\n return\n```\n\n4. 执行每个工具调用, 收集结果, 作为 user 消息追加。回到第 2 步。\n\n```python\nresults = []\nfor block in response.content:\n if block.type == \"tool_use\":\n output = run_bash(block.input[\"command\"])\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": output,\n })\nmessages.append({\"role\": \"user\", \"content\": results})\n```\n\n组装为一个完整函数:\n\n```python\ndef agent_loop(query):\n messages = [{\"role\": \"user\", \"content\": query}]\n while True:\n response = client.messages.create(\n model=MODEL, system=SYSTEM, messages=messages,\n tools=TOOLS, max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n\n if response.stop_reason != \"tool_use\":\n return\n\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n output = run_bash(block.input[\"command\"])\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": output,\n })\n messages.append({\"role\": \"user\", \"content\": results})\n```\n\n不到 30 行, 这就是整个智能体。后面 11 个章节都在这个循环上叠加机制 -- 循环本身始终不变。\n\n## 变更内容\n\n| 组件 | 之前 | 之后 |\n|---------------|------------|--------------------------------|\n| Agent loop | (无) | `while True` + stop_reason |\n| Tools | (无) | `bash` (单一工具) |\n| Messages | (无) | 累积式消息列表 |\n| Control flow | (无) | `stop_reason != \"tool_use\"` |\n\n## 试一试\n\n```sh\ncd learn-claude-code\npython agents/s01_agent_loop.py\n```\n\n试试这些 prompt (英文 prompt 对 LLM 效果更好, 也可以用中文):\n\n1. `Create a file called hello.py that prints \"Hello, World!\"`\n2. `List all Python files in this directory`\n3. `What is the current git branch?`\n4. `Create a directory called test_output and write 3 files in it`\n" + "content": "# s01: The Agent Loop (智能体循环)\r\n\r\n`[ s01 ] s02 > s03 > s04 > s05 > s06 | s07 > s08 > s09 > s10 > s11 > s12`\r\n\r\n> *\"One loop & Bash is all you need\"* -- 一个工具 + 一个循环 = 一个智能体。\r\n\r\n## 问题\r\n\r\n语言模型能推理代码, 但碰不到真实世界 -- 不能读文件、跑测试、看报错。没有循环, 每次工具调用你都得手动把结果粘回去。你自己就是那个循环。\r\n\r\n## 解决方案\r\n\r\n```\r\n+--------+ +-------+ +---------+\r\n| User | ---> | LLM | ---> | Tool |\r\n| prompt | | | | execute |\r\n+--------+ +---+---+ +----+----+\r\n ^ |\r\n | tool_result |\r\n +----------------+\r\n (loop until stop_reason != \"tool_use\")\r\n```\r\n\r\n一个退出条件控制整个流程。循环持续运行, 直到模型不再调用工具。\r\n\r\n## 工作原理\r\n\r\n1. 用户 prompt 作为第一条消息。\r\n\r\n```python\r\nmessages.append({\"role\": \"user\", \"content\": query})\r\n```\r\n\r\n2. 将消息和工具定义一起发给 LLM。\r\n\r\n```python\r\nresponse = client.messages.create(\r\n model=MODEL, system=SYSTEM, messages=messages,\r\n tools=TOOLS, max_tokens=8000,\r\n)\r\n```\r\n\r\n3. 追加助手响应。检查 `stop_reason` -- 如果模型没有调用工具, 结束。\r\n\r\n```python\r\nmessages.append({\"role\": \"assistant\", \"content\": response.content})\r\nif response.stop_reason != \"tool_use\":\r\n return\r\n```\r\n\r\n4. 执行每个工具调用, 收集结果, 作为 user 消息追加。回到第 2 步。\r\n\r\n```python\r\nresults = []\r\nfor block in response.content:\r\n if block.type == \"tool_use\":\r\n output = run_bash(block.input[\"command\"])\r\n results.append({\r\n \"type\": \"tool_result\",\r\n \"tool_use_id\": block.id,\r\n \"content\": output,\r\n })\r\nmessages.append({\"role\": \"user\", \"content\": results})\r\n```\r\n\r\n组装为一个完整函数:\r\n\r\n```python\r\ndef agent_loop(query):\r\n messages = [{\"role\": \"user\", \"content\": query}]\r\n while True:\r\n response = client.messages.create(\r\n model=MODEL, system=SYSTEM, messages=messages,\r\n tools=TOOLS, max_tokens=8000,\r\n )\r\n messages.append({\"role\": \"assistant\", \"content\": response.content})\r\n\r\n if response.stop_reason != \"tool_use\":\r\n return\r\n\r\n results = []\r\n for block in response.content:\r\n if block.type == \"tool_use\":\r\n output = run_bash(block.input[\"command\"])\r\n results.append({\r\n \"type\": \"tool_result\",\r\n \"tool_use_id\": block.id,\r\n \"content\": output,\r\n })\r\n messages.append({\"role\": \"user\", \"content\": results})\r\n```\r\n\r\n不到 30 行, 这就是整个智能体。后面 11 个章节都在这个循环上叠加机制 -- 循环本身始终不变。\r\n\r\n## 变更内容\r\n\r\n| 组件 | 之前 | 之后 |\r\n|---------------|------------|--------------------------------|\r\n| Agent loop | (无) | `while True` + stop_reason |\r\n| Tools | (无) | `bash` (单一工具) |\r\n| Messages | (无) | 累积式消息列表 |\r\n| Control flow | (无) | `stop_reason != \"tool_use\"` |\r\n\r\n## 试一试\r\n\r\n```sh\r\ncd learn-claude-code\r\npython agents/s01_agent_loop.py\r\n```\r\n\r\n试试这些 prompt (英文 prompt 对 LLM 效果更好, 也可以用中文):\r\n\r\n1. `Create a file called hello.py that prints \"Hello, World!\"`\r\n2. `List all Python files in this directory`\r\n3. `What is the current git branch?`\r\n4. `Create a directory called test_output and write 3 files in it`\r\n" }, { "version": "s02", "locale": "zh", "title": "s02: Tool Use (工具使用)", - "content": "# s02: Tool Use (工具使用)\n\n`s01 > [ s02 ] s03 > s04 > s05 > s06 | s07 > s08 > s09 > s10 > s11 > s12`\n\n> *\"加一个工具, 只加一个 handler\"* -- 循环不用动, 新工具注册进 dispatch map 就行。\n\n## 问题\n\n只有 `bash` 时, 所有操作都走 shell。`cat` 截断不可预测, `sed` 遇到特殊字符就崩, 每次 bash 调用都是不受约束的安全面。专用工具 (`read_file`, `write_file`) 可以在工具层面做路径沙箱。\n\n关键洞察: 加工具不需要改循环。\n\n## 解决方案\n\n```\n+--------+ +-------+ +------------------+\n| User | ---> | LLM | ---> | Tool Dispatch |\n| prompt | | | | { |\n+--------+ +---+---+ | bash: run_bash |\n ^ | read: run_read |\n | | write: run_wr |\n +-----------+ edit: run_edit |\n tool_result | } |\n +------------------+\n\nThe dispatch map is a dict: {tool_name: handler_function}.\nOne lookup replaces any if/elif chain.\n```\n\n## 工作原理\n\n1. 每个工具有一个处理函数。路径沙箱防止逃逸工作区。\n\n```python\ndef safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\ndef run_read(path: str, limit: int = None) -> str:\n text = safe_path(path).read_text()\n lines = text.splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit]\n return \"\\n\".join(lines)[:50000]\n```\n\n2. dispatch map 将工具名映射到处理函数。\n\n```python\nTOOL_HANDLERS = {\n \"bash\": lambda **kw: run_bash(kw[\"command\"]),\n \"read_file\": lambda **kw: run_read(kw[\"path\"], kw.get(\"limit\")),\n \"write_file\": lambda **kw: run_write(kw[\"path\"], kw[\"content\"]),\n \"edit_file\": lambda **kw: run_edit(kw[\"path\"], kw[\"old_text\"],\n kw[\"new_text\"]),\n}\n```\n\n3. 循环中按名称查找处理函数。循环体本身与 s01 完全一致。\n\n```python\nfor block in response.content:\n if block.type == \"tool_use\":\n handler = TOOL_HANDLERS.get(block.name)\n output = handler(**block.input) if handler \\\n else f\"Unknown tool: {block.name}\"\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": output,\n })\n```\n\n加工具 = 加 handler + 加 schema。循环永远不变。\n\n## 相对 s01 的变更\n\n| 组件 | 之前 (s01) | 之后 (s02) |\n|----------------|--------------------|--------------------------------|\n| Tools | 1 (仅 bash) | 4 (bash, read, write, edit) |\n| Dispatch | 硬编码 bash 调用 | `TOOL_HANDLERS` 字典 |\n| 路径安全 | 无 | `safe_path()` 沙箱 |\n| Agent loop | 不变 | 不变 |\n\n## 试一试\n\n```sh\ncd learn-claude-code\npython agents/s02_tool_use.py\n```\n\n试试这些 prompt (英文 prompt 对 LLM 效果更好, 也可以用中文):\n\n1. `Read the file requirements.txt`\n2. `Create a file called greet.py with a greet(name) function`\n3. `Edit greet.py to add a docstring to the function`\n4. `Read greet.py to verify the edit worked`\n" + "content": "# s02: Tool Use (工具使用)\r\n\r\n`s01 > [ s02 ] s03 > s04 > s05 > s06 | s07 > s08 > s09 > s10 > s11 > s12`\r\n\r\n> *\"加一个工具, 只加一个 handler\"* -- 循环不用动, 新工具注册进 dispatch map 就行。\r\n\r\n## 问题\r\n\r\n只有 `bash` 时, 所有操作都走 shell。`cat` 截断不可预测, `sed` 遇到特殊字符就崩, 每次 bash 调用都是不受约束的安全面。专用工具 (`read_file`, `write_file`) 可以在工具层面做路径沙箱。\r\n\r\n关键洞察: 加工具不需要改循环。\r\n\r\n## 解决方案\r\n\r\n```\r\n+--------+ +-------+ +------------------+\r\n| User | ---> | LLM | ---> | Tool Dispatch |\r\n| prompt | | | | { |\r\n+--------+ +---+---+ | bash: run_bash |\r\n ^ | read: run_read |\r\n | | write: run_wr |\r\n +-----------+ edit: run_edit |\r\n tool_result | } |\r\n +------------------+\r\n\r\nThe dispatch map is a dict: {tool_name: handler_function}.\r\nOne lookup replaces any if/elif chain.\r\n```\r\n\r\n## 工作原理\r\n\r\n1. 每个工具有一个处理函数。路径沙箱防止逃逸工作区。\r\n\r\n```python\r\ndef safe_path(p: str) -> Path:\r\n path = (WORKDIR / p).resolve()\r\n if not path.is_relative_to(WORKDIR):\r\n raise ValueError(f\"Path escapes workspace: {p}\")\r\n return path\r\n\r\ndef run_read(path: str, limit: int = None) -> str:\r\n text = safe_path(path).read_text()\r\n lines = text.splitlines()\r\n if limit and limit < len(lines):\r\n lines = lines[:limit]\r\n return \"\\n\".join(lines)[:50000]\r\n```\r\n\r\n2. dispatch map 将工具名映射到处理函数。\r\n\r\n```python\r\nTOOL_HANDLERS = {\r\n \"bash\": lambda **kw: run_bash(kw[\"command\"]),\r\n \"read_file\": lambda **kw: run_read(kw[\"path\"], kw.get(\"limit\")),\r\n \"write_file\": lambda **kw: run_write(kw[\"path\"], kw[\"content\"]),\r\n \"edit_file\": lambda **kw: run_edit(kw[\"path\"], kw[\"old_text\"],\r\n kw[\"new_text\"]),\r\n}\r\n```\r\n\r\n3. 循环中按名称查找处理函数。循环体本身与 s01 完全一致。\r\n\r\n```python\r\nfor block in response.content:\r\n if block.type == \"tool_use\":\r\n handler = TOOL_HANDLERS.get(block.name)\r\n output = handler(**block.input) if handler \\\r\n else f\"Unknown tool: {block.name}\"\r\n results.append({\r\n \"type\": \"tool_result\",\r\n \"tool_use_id\": block.id,\r\n \"content\": output,\r\n })\r\n```\r\n\r\n加工具 = 加 handler + 加 schema。循环永远不变。\r\n\r\n## 相对 s01 的变更\r\n\r\n| 组件 | 之前 (s01) | 之后 (s02) |\r\n|----------------|--------------------|--------------------------------|\r\n| Tools | 1 (仅 bash) | 4 (bash, read, write, edit) |\r\n| Dispatch | 硬编码 bash 调用 | `TOOL_HANDLERS` 字典 |\r\n| 路径安全 | 无 | `safe_path()` 沙箱 |\r\n| Agent loop | 不变 | 不变 |\r\n\r\n## 试一试\r\n\r\n```sh\r\ncd learn-claude-code\r\npython agents/s02_tool_use.py\r\n```\r\n\r\n试试这些 prompt (英文 prompt 对 LLM 效果更好, 也可以用中文):\r\n\r\n1. `Read the file requirements.txt`\r\n2. `Create a file called greet.py with a greet(name) function`\r\n3. `Edit greet.py to add a docstring to the function`\r\n4. `Read greet.py to verify the edit worked`\r\n" }, { "version": "s03", "locale": "zh", "title": "s03: TodoWrite (待办写入)", - "content": "# s03: TodoWrite (待办写入)\n\n`s01 > s02 > [ s03 ] s04 > s05 > s06 | s07 > s08 > s09 > s10 > s11 > s12`\n\n> *\"没有计划的 agent 走哪算哪\"* -- 先列步骤再动手, 完成率翻倍。\n\n## 问题\n\n多步任务中, 模型会丢失进度 -- 重复做过的事、跳步、跑偏。对话越长越严重: 工具结果不断填满上下文, 系统提示的影响力逐渐被稀释。一个 10 步重构可能做完 1-3 步就开始即兴发挥, 因为 4-10 步已经被挤出注意力了。\n\n## 解决方案\n\n```\n+--------+ +-------+ +---------+\n| User | ---> | LLM | ---> | Tools |\n| prompt | | | | + todo |\n+--------+ +---+---+ +----+----+\n ^ |\n | tool_result |\n +----------------+\n |\n +-----------+-----------+\n | TodoManager state |\n | [ ] task A |\n | [>] task B <- doing |\n | [x] task C |\n +-----------------------+\n |\n if rounds_since_todo >= 3:\n inject into tool_result\n```\n\n## 工作原理\n\n1. TodoManager 存储带状态的项目。同一时间只允许一个 `in_progress`。\n\n```python\nclass TodoManager:\n def update(self, items: list) -> str:\n validated, in_progress_count = [], 0\n for item in items:\n status = item.get(\"status\", \"pending\")\n if status == \"in_progress\":\n in_progress_count += 1\n validated.append({\"id\": item[\"id\"], \"text\": item[\"text\"],\n \"status\": status})\n if in_progress_count > 1:\n raise ValueError(\"Only one task can be in_progress\")\n self.items = validated\n return self.render()\n```\n\n2. `todo` 工具和其他工具一样加入 dispatch map。\n\n```python\nTOOL_HANDLERS = {\n # ...base tools...\n \"todo\": lambda **kw: TODO.update(kw[\"items\"]),\n}\n```\n\n3. nag reminder: 模型连续 3 轮以上不调用 `todo` 时注入提醒。\n\n```python\nif rounds_since_todo >= 3 and messages:\n last = messages[-1]\n if last[\"role\"] == \"user\" and isinstance(last.get(\"content\"), list):\n last[\"content\"].insert(0, {\n \"type\": \"text\",\n \"text\": \"Update your todos.\",\n })\n```\n\n\"同时只能有一个 in_progress\" 强制顺序聚焦。nag reminder 制造问责压力 -- 你不更新计划, 系统就追着你问。\n\n## 相对 s02 的变更\n\n| 组件 | 之前 (s02) | 之后 (s03) |\n|----------------|------------------|--------------------------------|\n| Tools | 4 | 5 (+todo) |\n| 规划 | 无 | 带状态的 TodoManager |\n| Nag 注入 | 无 | 3 轮后注入 `` |\n| Agent loop | 简单分发 | + rounds_since_todo 计数器 |\n\n## 试一试\n\n```sh\ncd learn-claude-code\npython agents/s03_todo_write.py\n```\n\n试试这些 prompt (英文 prompt 对 LLM 效果更好, 也可以用中文):\n\n1. `Refactor the file hello.py: add type hints, docstrings, and a main guard`\n2. `Create a Python package with __init__.py, utils.py, and tests/test_utils.py`\n3. `Review all Python files and fix any style issues`\n" + "content": "# s03: TodoWrite (待办写入)\r\n\r\n`s01 > s02 > [ s03 ] s04 > s05 > s06 | s07 > s08 > s09 > s10 > s11 > s12`\r\n\r\n> *\"没有计划的 agent 走哪算哪\"* -- 先列步骤再动手, 完成率翻倍。\r\n\r\n## 问题\r\n\r\n多步任务中, 模型会丢失进度 -- 重复做过的事、跳步、跑偏。对话越长越严重: 工具结果不断填满上下文, 系统提示的影响力逐渐被稀释。一个 10 步重构可能做完 1-3 步就开始即兴发挥, 因为 4-10 步已经被挤出注意力了。\r\n\r\n## 解决方案\r\n\r\n```\r\n+--------+ +-------+ +---------+\r\n| User | ---> | LLM | ---> | Tools |\r\n| prompt | | | | + todo |\r\n+--------+ +---+---+ +----+----+\r\n ^ |\r\n | tool_result |\r\n +----------------+\r\n |\r\n +-----------+-----------+\r\n | TodoManager state |\r\n | [ ] task A |\r\n | [>] task B <- doing |\r\n | [x] task C |\r\n +-----------------------+\r\n |\r\n if rounds_since_todo >= 3:\r\n inject into tool_result\r\n```\r\n\r\n## 工作原理\r\n\r\n1. TodoManager 存储带状态的项目。同一时间只允许一个 `in_progress`。\r\n\r\n```python\r\nclass TodoManager:\r\n def update(self, items: list) -> str:\r\n validated, in_progress_count = [], 0\r\n for item in items:\r\n status = item.get(\"status\", \"pending\")\r\n if status == \"in_progress\":\r\n in_progress_count += 1\r\n validated.append({\"id\": item[\"id\"], \"text\": item[\"text\"],\r\n \"status\": status})\r\n if in_progress_count > 1:\r\n raise ValueError(\"Only one task can be in_progress\")\r\n self.items = validated\r\n return self.render()\r\n```\r\n\r\n2. `todo` 工具和其他工具一样加入 dispatch map。\r\n\r\n```python\r\nTOOL_HANDLERS = {\r\n # ...base tools...\r\n \"todo\": lambda **kw: TODO.update(kw[\"items\"]),\r\n}\r\n```\r\n\r\n3. nag reminder: 模型连续 3 轮以上不调用 `todo` 时注入提醒。\r\n\r\n```python\r\nif rounds_since_todo >= 3 and messages:\r\n last = messages[-1]\r\n if last[\"role\"] == \"user\" and isinstance(last.get(\"content\"), list):\r\n last[\"content\"].insert(0, {\r\n \"type\": \"text\",\r\n \"text\": \"Update your todos.\",\r\n })\r\n```\r\n\r\n\"同时只能有一个 in_progress\" 强制顺序聚焦。nag reminder 制造问责压力 -- 你不更新计划, 系统就追着你问。\r\n\r\n## 相对 s02 的变更\r\n\r\n| 组件 | 之前 (s02) | 之后 (s03) |\r\n|----------------|------------------|--------------------------------|\r\n| Tools | 4 | 5 (+todo) |\r\n| 规划 | 无 | 带状态的 TodoManager |\r\n| Nag 注入 | 无 | 3 轮后注入 `` |\r\n| Agent loop | 简单分发 | + rounds_since_todo 计数器 |\r\n\r\n## 试一试\r\n\r\n```sh\r\ncd learn-claude-code\r\npython agents/s03_todo_write.py\r\n```\r\n\r\n试试这些 prompt (英文 prompt 对 LLM 效果更好, 也可以用中文):\r\n\r\n1. `Refactor the file hello.py: add type hints, docstrings, and a main guard`\r\n2. `Create a Python package with __init__.py, utils.py, and tests/test_utils.py`\r\n3. `Review all Python files and fix any style issues`\r\n" }, { "version": "s04", "locale": "zh", "title": "s04: Subagents (子智能体)", - "content": "# s04: Subagents (子智能体)\n\n`s01 > s02 > s03 > [ s04 ] s05 > s06 | s07 > s08 > s09 > s10 > s11 > s12`\n\n> *\"大任务拆小, 每个小任务干净的上下文\"* -- 子智能体用独立 messages[], 不污染主对话。\n\n## 问题\n\n智能体工作越久, messages 数组越胖。每次读文件、跑命令的输出都永久留在上下文里。\"这个项目用什么测试框架?\" 可能要读 5 个文件, 但父智能体只需要一个词: \"pytest。\"\n\n## 解决方案\n\n```\nParent agent Subagent\n+------------------+ +------------------+\n| messages=[...] | | messages=[] | <-- fresh\n| | dispatch | |\n| tool: task | ----------> | while tool_use: |\n| prompt=\"...\" | | call tools |\n| | summary | append results |\n| result = \"...\" | <---------- | return last text |\n+------------------+ +------------------+\n\nParent context stays clean. Subagent context is discarded.\n```\n\n## 工作原理\n\n1. 父智能体有一个 `task` 工具。子智能体拥有除 `task` 外的所有基础工具 (禁止递归生成)。\n\n```python\nPARENT_TOOLS = CHILD_TOOLS + [\n {\"name\": \"task\",\n \"description\": \"Spawn a subagent with fresh context.\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\"prompt\": {\"type\": \"string\"}},\n \"required\": [\"prompt\"],\n }},\n]\n```\n\n2. 子智能体以 `messages=[]` 启动, 运行自己的循环。只有最终文本返回给父智能体。\n\n```python\ndef run_subagent(prompt: str) -> str:\n sub_messages = [{\"role\": \"user\", \"content\": prompt}]\n for _ in range(30): # safety limit\n response = client.messages.create(\n model=MODEL, system=SUBAGENT_SYSTEM,\n messages=sub_messages,\n tools=CHILD_TOOLS, max_tokens=8000,\n )\n sub_messages.append({\"role\": \"assistant\",\n \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n break\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n handler = TOOL_HANDLERS.get(block.name)\n output = handler(**block.input)\n results.append({\"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": str(output)[:50000]})\n sub_messages.append({\"role\": \"user\", \"content\": results})\n return \"\".join(\n b.text for b in response.content if hasattr(b, \"text\")\n ) or \"(no summary)\"\n```\n\n子智能体可能跑了 30+ 次工具调用, 但整个消息历史直接丢弃。父智能体收到的只是一段摘要文本, 作为普通 `tool_result` 返回。\n\n## 相对 s03 的变更\n\n| 组件 | 之前 (s03) | 之后 (s04) |\n|----------------|------------------|-------------------------------|\n| Tools | 5 | 5 (基础) + task (仅父端) |\n| 上下文 | 单一共享 | 父 + 子隔离 |\n| Subagent | 无 | `run_subagent()` 函数 |\n| 返回值 | 不适用 | 仅摘要文本 |\n\n## 试一试\n\n```sh\ncd learn-claude-code\npython agents/s04_subagent.py\n```\n\n试试这些 prompt (英文 prompt 对 LLM 效果更好, 也可以用中文):\n\n1. `Use a subtask to find what testing framework this project uses`\n2. `Delegate: read all .py files and summarize what each one does`\n3. `Use a task to create a new module, then verify it from here`\n" + "content": "# s04: Subagents (子智能体)\r\n\r\n`s01 > s02 > s03 > [ s04 ] s05 > s06 | s07 > s08 > s09 > s10 > s11 > s12`\r\n\r\n> *\"大任务拆小, 每个小任务干净的上下文\"* -- 子智能体用独立 messages[], 不污染主对话。\r\n\r\n## 问题\r\n\r\n智能体工作越久, messages 数组越胖。每次读文件、跑命令的输出都永久留在上下文里。\"这个项目用什么测试框架?\" 可能要读 5 个文件, 但父智能体只需要一个词: \"pytest。\"\r\n\r\n## 解决方案\r\n\r\n```\r\nParent agent Subagent\r\n+------------------+ +------------------+\r\n| messages=[...] | | messages=[] | <-- fresh\r\n| | dispatch | |\r\n| tool: task | ----------> | while tool_use: |\r\n| prompt=\"...\" | | call tools |\r\n| | summary | append results |\r\n| result = \"...\" | <---------- | return last text |\r\n+------------------+ +------------------+\r\n\r\nParent context stays clean. Subagent context is discarded.\r\n```\r\n\r\n## 工作原理\r\n\r\n1. 父智能体有一个 `task` 工具。子智能体拥有除 `task` 外的所有基础工具 (禁止递归生成)。\r\n\r\n```python\r\nPARENT_TOOLS = CHILD_TOOLS + [\r\n {\"name\": \"task\",\r\n \"description\": \"Spawn a subagent with fresh context.\",\r\n \"input_schema\": {\r\n \"type\": \"object\",\r\n \"properties\": {\"prompt\": {\"type\": \"string\"}},\r\n \"required\": [\"prompt\"],\r\n }},\r\n]\r\n```\r\n\r\n2. 子智能体以 `messages=[]` 启动, 运行自己的循环。只有最终文本返回给父智能体。\r\n\r\n```python\r\ndef run_subagent(prompt: str) -> str:\r\n sub_messages = [{\"role\": \"user\", \"content\": prompt}]\r\n for _ in range(30): # safety limit\r\n response = client.messages.create(\r\n model=MODEL, system=SUBAGENT_SYSTEM,\r\n messages=sub_messages,\r\n tools=CHILD_TOOLS, max_tokens=8000,\r\n )\r\n sub_messages.append({\"role\": \"assistant\",\r\n \"content\": response.content})\r\n if response.stop_reason != \"tool_use\":\r\n break\r\n results = []\r\n for block in response.content:\r\n if block.type == \"tool_use\":\r\n handler = TOOL_HANDLERS.get(block.name)\r\n output = handler(**block.input)\r\n results.append({\"type\": \"tool_result\",\r\n \"tool_use_id\": block.id,\r\n \"content\": str(output)[:50000]})\r\n sub_messages.append({\"role\": \"user\", \"content\": results})\r\n return \"\".join(\r\n b.text for b in response.content if hasattr(b, \"text\")\r\n ) or \"(no summary)\"\r\n```\r\n\r\n子智能体可能跑了 30+ 次工具调用, 但整个消息历史直接丢弃。父智能体收到的只是一段摘要文本, 作为普通 `tool_result` 返回。\r\n\r\n## 相对 s03 的变更\r\n\r\n| 组件 | 之前 (s03) | 之后 (s04) |\r\n|----------------|------------------|-------------------------------|\r\n| Tools | 5 | 5 (基础) + task (仅父端) |\r\n| 上下文 | 单一共享 | 父 + 子隔离 |\r\n| Subagent | 无 | `run_subagent()` 函数 |\r\n| 返回值 | 不适用 | 仅摘要文本 |\r\n\r\n## 试一试\r\n\r\n```sh\r\ncd learn-claude-code\r\npython agents/s04_subagent.py\r\n```\r\n\r\n试试这些 prompt (英文 prompt 对 LLM 效果更好, 也可以用中文):\r\n\r\n1. `Use a subtask to find what testing framework this project uses`\r\n2. `Delegate: read all .py files and summarize what each one does`\r\n3. `Use a task to create a new module, then verify it from here`\r\n" }, { "version": "s05", "locale": "zh", "title": "s05: Skills (技能加载)", - "content": "# s05: Skills (技能加载)\n\n`s01 > s02 > s03 > s04 > [ s05 ] s06 | s07 > s08 > s09 > s10 > s11 > s12`\n\n> *\"用到什么知识, 临时加载什么知识\"* -- 通过 tool_result 注入, 不塞 system prompt。\n\n## 问题\n\n你希望智能体遵循特定领域的工作流: git 约定、测试模式、代码审查清单。全塞进系统提示太浪费 -- 10 个技能, 每个 2000 token, 就是 20,000 token, 大部分跟当前任务毫无关系。\n\n## 解决方案\n\n```\nSystem prompt (Layer 1 -- always present):\n+--------------------------------------+\n| You are a coding agent. |\n| Skills available: |\n| - git: Git workflow helpers | ~100 tokens/skill\n| - test: Testing best practices |\n+--------------------------------------+\n\nWhen model calls load_skill(\"git\"):\n+--------------------------------------+\n| tool_result (Layer 2 -- on demand): |\n| |\n| Full git workflow instructions... | ~2000 tokens\n| Step 1: ... |\n| |\n+--------------------------------------+\n```\n\n第一层: 系统提示中放技能名称 (低成本)。第二层: tool_result 中按需放完整内容。\n\n## 工作原理\n\n1. 每个技能是一个目录, 包含 `SKILL.md` 文件和 YAML frontmatter。\n\n```\nskills/\n pdf/\n SKILL.md # ---\\n name: pdf\\n description: Process PDF files\\n ---\\n ...\n code-review/\n SKILL.md # ---\\n name: code-review\\n description: Review code\\n ---\\n ...\n```\n\n2. SkillLoader 递归扫描 `SKILL.md` 文件, 用目录名作为技能标识。\n\n```python\nclass SkillLoader:\n def __init__(self, skills_dir: Path):\n self.skills = {}\n for f in sorted(skills_dir.rglob(\"SKILL.md\")):\n text = f.read_text()\n meta, body = self._parse_frontmatter(text)\n name = meta.get(\"name\", f.parent.name)\n self.skills[name] = {\"meta\": meta, \"body\": body}\n\n def get_descriptions(self) -> str:\n lines = []\n for name, skill in self.skills.items():\n desc = skill[\"meta\"].get(\"description\", \"\")\n lines.append(f\" - {name}: {desc}\")\n return \"\\n\".join(lines)\n\n def get_content(self, name: str) -> str:\n skill = self.skills.get(name)\n if not skill:\n return f\"Error: Unknown skill '{name}'.\"\n return f\"\\n{skill['body']}\\n\"\n```\n\n3. 第一层写入系统提示。第二层不过是 dispatch map 中的又一个工具。\n\n```python\nSYSTEM = f\"\"\"You are a coding agent at {WORKDIR}.\nSkills available:\n{SKILL_LOADER.get_descriptions()}\"\"\"\n\nTOOL_HANDLERS = {\n # ...base tools...\n \"load_skill\": lambda **kw: SKILL_LOADER.get_content(kw[\"name\"]),\n}\n```\n\n模型知道有哪些技能 (便宜), 需要时再加载完整内容 (贵)。\n\n## 相对 s04 的变更\n\n| 组件 | 之前 (s04) | 之后 (s05) |\n|----------------|------------------|--------------------------------|\n| Tools | 5 (基础 + task) | 5 (基础 + load_skill) |\n| 系统提示 | 静态字符串 | + 技能描述列表 |\n| 知识库 | 无 | skills/\\*/SKILL.md 文件 |\n| 注入方式 | 无 | 两层 (系统提示 + result) |\n\n## 试一试\n\n```sh\ncd learn-claude-code\npython agents/s05_skill_loading.py\n```\n\n试试这些 prompt (英文 prompt 对 LLM 效果更好, 也可以用中文):\n\n1. `What skills are available?`\n2. `Load the agent-builder skill and follow its instructions`\n3. `I need to do a code review -- load the relevant skill first`\n4. `Build an MCP server using the mcp-builder skill`\n" + "content": "# s05: Skills (技能加载)\r\n\r\n`s01 > s02 > s03 > s04 > [ s05 ] s06 | s07 > s08 > s09 > s10 > s11 > s12`\r\n\r\n> *\"用到什么知识, 临时加载什么知识\"* -- 通过 tool_result 注入, 不塞 system prompt。\r\n\r\n## 问题\r\n\r\n你希望智能体遵循特定领域的工作流: git 约定、测试模式、代码审查清单。全塞进系统提示太浪费 -- 10 个技能, 每个 2000 token, 就是 20,000 token, 大部分跟当前任务毫无关系。\r\n\r\n## 解决方案\r\n\r\n```\r\nSystem prompt (Layer 1 -- always present):\r\n+--------------------------------------+\r\n| You are a coding agent. |\r\n| Skills available: |\r\n| - git: Git workflow helpers | ~100 tokens/skill\r\n| - test: Testing best practices |\r\n+--------------------------------------+\r\n\r\nWhen model calls load_skill(\"git\"):\r\n+--------------------------------------+\r\n| tool_result (Layer 2 -- on demand): |\r\n| |\r\n| Full git workflow instructions... | ~2000 tokens\r\n| Step 1: ... |\r\n| |\r\n+--------------------------------------+\r\n```\r\n\r\n第一层: 系统提示中放技能名称 (低成本)。第二层: tool_result 中按需放完整内容。\r\n\r\n## 工作原理\r\n\r\n1. 每个技能是一个目录, 包含 `SKILL.md` 文件和 YAML frontmatter。\r\n\r\n```\r\nskills/\r\n pdf/\r\n SKILL.md # ---\\n name: pdf\\n description: Process PDF files\\n ---\\n ...\r\n code-review/\r\n SKILL.md # ---\\n name: code-review\\n description: Review code\\n ---\\n ...\r\n```\r\n\r\n2. SkillLoader 递归扫描 `SKILL.md` 文件, 用目录名作为技能标识。\r\n\r\n```python\r\nclass SkillLoader:\r\n def __init__(self, skills_dir: Path):\r\n self.skills = {}\r\n for f in sorted(skills_dir.rglob(\"SKILL.md\")):\r\n text = f.read_text()\r\n meta, body = self._parse_frontmatter(text)\r\n name = meta.get(\"name\", f.parent.name)\r\n self.skills[name] = {\"meta\": meta, \"body\": body}\r\n\r\n def get_descriptions(self) -> str:\r\n lines = []\r\n for name, skill in self.skills.items():\r\n desc = skill[\"meta\"].get(\"description\", \"\")\r\n lines.append(f\" - {name}: {desc}\")\r\n return \"\\n\".join(lines)\r\n\r\n def get_content(self, name: str) -> str:\r\n skill = self.skills.get(name)\r\n if not skill:\r\n return f\"Error: Unknown skill '{name}'.\"\r\n return f\"\\n{skill['body']}\\n\"\r\n```\r\n\r\n3. 第一层写入系统提示。第二层不过是 dispatch map 中的又一个工具。\r\n\r\n```python\r\nSYSTEM = f\"\"\"You are a coding agent at {WORKDIR}.\r\nSkills available:\r\n{SKILL_LOADER.get_descriptions()}\"\"\"\r\n\r\nTOOL_HANDLERS = {\r\n # ...base tools...\r\n \"load_skill\": lambda **kw: SKILL_LOADER.get_content(kw[\"name\"]),\r\n}\r\n```\r\n\r\n模型知道有哪些技能 (便宜), 需要时再加载完整内容 (贵)。\r\n\r\n## 相对 s04 的变更\r\n\r\n| 组件 | 之前 (s04) | 之后 (s05) |\r\n|----------------|------------------|--------------------------------|\r\n| Tools | 5 (基础 + task) | 5 (基础 + load_skill) |\r\n| 系统提示 | 静态字符串 | + 技能描述列表 |\r\n| 知识库 | 无 | skills/\\*/SKILL.md 文件 |\r\n| 注入方式 | 无 | 两层 (系统提示 + result) |\r\n\r\n## 试一试\r\n\r\n```sh\r\ncd learn-claude-code\r\npython agents/s05_skill_loading.py\r\n```\r\n\r\n试试这些 prompt (英文 prompt 对 LLM 效果更好, 也可以用中文):\r\n\r\n1. `What skills are available?`\r\n2. `Load the agent-builder skill and follow its instructions`\r\n3. `I need to do a code review -- load the relevant skill first`\r\n4. `Build an MCP server using the mcp-builder skill`\r\n" }, { "version": "s06", "locale": "zh", "title": "s06: Context Compact (上下文压缩)", - "content": "# s06: Context Compact (上下文压缩)\n\n`s01 > s02 > s03 > s04 > s05 > [ s06 ] | s07 > s08 > s09 > s10 > s11 > s12`\n\n> *\"上下文总会满, 要有办法腾地方\"* -- 三层压缩策略, 换来无限会话。\n\n## 问题\n\n上下文窗口是有限的。读一个 1000 行的文件就吃掉 ~4000 token; 读 30 个文件、跑 20 条命令, 轻松突破 100k token。不压缩, 智能体根本没法在大项目里干活。\n\n## 解决方案\n\n三层压缩, 激进程度递增:\n\n```\nEvery turn:\n+------------------+\n| Tool call result |\n+------------------+\n |\n v\n[Layer 1: micro_compact] (silent, every turn)\n Replace tool_result > 3 turns old\n with \"[Previous: used {tool_name}]\"\n |\n v\n[Check: tokens > 50000?]\n | |\n no yes\n | |\n v v\ncontinue [Layer 2: auto_compact]\n Save transcript to .transcripts/\n LLM summarizes conversation.\n Replace all messages with [summary].\n |\n v\n [Layer 3: compact tool]\n Model calls compact explicitly.\n Same summarization as auto_compact.\n```\n\n## 工作原理\n\n1. **第一层 -- micro_compact**: 每次 LLM 调用前, 将旧的 tool result 替换为占位符。\n\n```python\ndef micro_compact(messages: list) -> list:\n tool_results = []\n for i, msg in enumerate(messages):\n if msg[\"role\"] == \"user\" and isinstance(msg.get(\"content\"), list):\n for j, part in enumerate(msg[\"content\"]):\n if isinstance(part, dict) and part.get(\"type\") == \"tool_result\":\n tool_results.append((i, j, part))\n if len(tool_results) <= KEEP_RECENT:\n return messages\n for _, _, part in tool_results[:-KEEP_RECENT]:\n if len(part.get(\"content\", \"\")) > 100:\n part[\"content\"] = f\"[Previous: used {tool_name}]\"\n return messages\n```\n\n2. **第二层 -- auto_compact**: token 超过阈值时, 保存完整对话到磁盘, 让 LLM 做摘要。\n\n```python\ndef auto_compact(messages: list) -> list:\n # Save transcript for recovery\n transcript_path = TRANSCRIPT_DIR / f\"transcript_{int(time.time())}.jsonl\"\n with open(transcript_path, \"w\") as f:\n for msg in messages:\n f.write(json.dumps(msg, default=str) + \"\\n\")\n # LLM summarizes\n response = client.messages.create(\n model=MODEL,\n messages=[{\"role\": \"user\", \"content\":\n \"Summarize this conversation for continuity...\"\n + json.dumps(messages, default=str)[:80000]}],\n max_tokens=2000,\n )\n return [\n {\"role\": \"user\", \"content\": f\"[Compressed]\\n\\n{response.content[0].text}\"},\n {\"role\": \"assistant\", \"content\": \"Understood. Continuing.\"},\n ]\n```\n\n3. **第三层 -- manual compact**: `compact` 工具按需触发同样的摘要机制。\n\n4. 循环整合三层:\n\n```python\ndef agent_loop(messages: list):\n while True:\n micro_compact(messages) # Layer 1\n if estimate_tokens(messages) > THRESHOLD:\n messages[:] = auto_compact(messages) # Layer 2\n response = client.messages.create(...)\n # ... tool execution ...\n if manual_compact:\n messages[:] = auto_compact(messages) # Layer 3\n```\n\n完整历史通过 transcript 保存在磁盘上。信息没有真正丢失, 只是移出了活跃上下文。\n\n## 相对 s05 的变更\n\n| 组件 | 之前 (s05) | 之后 (s06) |\n|----------------|------------------|--------------------------------|\n| Tools | 5 | 5 (基础 + compact) |\n| 上下文管理 | 无 | 三层压缩 |\n| Micro-compact | 无 | 旧结果 -> 占位符 |\n| Auto-compact | 无 | token 阈值触发 |\n| Transcripts | 无 | 保存到 .transcripts/ |\n\n## 试一试\n\n```sh\ncd learn-claude-code\npython agents/s06_context_compact.py\n```\n\n试试这些 prompt (英文 prompt 对 LLM 效果更好, 也可以用中文):\n\n1. `Read every Python file in the agents/ directory one by one` (观察 micro-compact 替换旧结果)\n2. `Keep reading files until compression triggers automatically`\n3. `Use the compact tool to manually compress the conversation`\n" + "content": "# s06: Context Compact (上下文压缩)\r\n\r\n`s01 > s02 > s03 > s04 > s05 > [ s06 ] | s07 > s08 > s09 > s10 > s11 > s12`\r\n\r\n> *\"上下文总会满, 要有办法腾地方\"* -- 三层压缩策略, 换来无限会话。\r\n\r\n## 问题\r\n\r\n上下文窗口是有限的。读一个 1000 行的文件就吃掉 ~4000 token; 读 30 个文件、跑 20 条命令, 轻松突破 100k token。不压缩, 智能体根本没法在大项目里干活。\r\n\r\n## 解决方案\r\n\r\n三层压缩, 激进程度递增:\r\n\r\n```\r\nEvery turn:\r\n+------------------+\r\n| Tool call result |\r\n+------------------+\r\n |\r\n v\r\n[Layer 1: micro_compact] (silent, every turn)\r\n Replace tool_result > 3 turns old\r\n with \"[Previous: used {tool_name}]\"\r\n |\r\n v\r\n[Check: tokens > 50000?]\r\n | |\r\n no yes\r\n | |\r\n v v\r\ncontinue [Layer 2: auto_compact]\r\n Save transcript to .transcripts/\r\n LLM summarizes conversation.\r\n Replace all messages with [summary].\r\n |\r\n v\r\n [Layer 3: compact tool]\r\n Model calls compact explicitly.\r\n Same summarization as auto_compact.\r\n```\r\n\r\n## 工作原理\r\n\r\n1. **第一层 -- micro_compact**: 每次 LLM 调用前, 将旧的 tool result 替换为占位符。\r\n\r\n```python\r\ndef micro_compact(messages: list) -> list:\r\n tool_results = []\r\n for i, msg in enumerate(messages):\r\n if msg[\"role\"] == \"user\" and isinstance(msg.get(\"content\"), list):\r\n for j, part in enumerate(msg[\"content\"]):\r\n if isinstance(part, dict) and part.get(\"type\") == \"tool_result\":\r\n tool_results.append((i, j, part))\r\n if len(tool_results) <= KEEP_RECENT:\r\n return messages\r\n for _, _, part in tool_results[:-KEEP_RECENT]:\r\n if len(part.get(\"content\", \"\")) > 100:\r\n part[\"content\"] = f\"[Previous: used {tool_name}]\"\r\n return messages\r\n```\r\n\r\n2. **第二层 -- auto_compact**: token 超过阈值时, 保存完整对话到磁盘, 让 LLM 做摘要。\r\n\r\n```python\r\ndef auto_compact(messages: list) -> list:\r\n # Save transcript for recovery\r\n transcript_path = TRANSCRIPT_DIR / f\"transcript_{int(time.time())}.jsonl\"\r\n with open(transcript_path, \"w\") as f:\r\n for msg in messages:\r\n f.write(json.dumps(msg, default=str) + \"\\n\")\r\n # LLM summarizes\r\n response = client.messages.create(\r\n model=MODEL,\r\n messages=[{\"role\": \"user\", \"content\":\r\n \"Summarize this conversation for continuity...\"\r\n + json.dumps(messages, default=str)[:80000]}],\r\n max_tokens=2000,\r\n )\r\n return [\r\n {\"role\": \"user\", \"content\": f\"[Compressed]\\n\\n{response.content[0].text}\"},\r\n {\"role\": \"assistant\", \"content\": \"Understood. Continuing.\"},\r\n ]\r\n```\r\n\r\n3. **第三层 -- manual compact**: `compact` 工具按需触发同样的摘要机制。\r\n\r\n4. 循环整合三层:\r\n\r\n```python\r\ndef agent_loop(messages: list):\r\n while True:\r\n micro_compact(messages) # Layer 1\r\n if estimate_tokens(messages) > THRESHOLD:\r\n messages[:] = auto_compact(messages) # Layer 2\r\n response = client.messages.create(...)\r\n # ... tool execution ...\r\n if manual_compact:\r\n messages[:] = auto_compact(messages) # Layer 3\r\n```\r\n\r\n完整历史通过 transcript 保存在磁盘上。信息没有真正丢失, 只是移出了活跃上下文。\r\n\r\n## 相对 s05 的变更\r\n\r\n| 组件 | 之前 (s05) | 之后 (s06) |\r\n|----------------|------------------|--------------------------------|\r\n| Tools | 5 | 5 (基础 + compact) |\r\n| 上下文管理 | 无 | 三层压缩 |\r\n| Micro-compact | 无 | 旧结果 -> 占位符 |\r\n| Auto-compact | 无 | token 阈值触发 |\r\n| Transcripts | 无 | 保存到 .transcripts/ |\r\n\r\n## 试一试\r\n\r\n```sh\r\ncd learn-claude-code\r\npython agents/s06_context_compact.py\r\n```\r\n\r\n试试这些 prompt (英文 prompt 对 LLM 效果更好, 也可以用中文):\r\n\r\n1. `Read every Python file in the agents/ directory one by one` (观察 micro-compact 替换旧结果)\r\n2. `Keep reading files until compression triggers automatically`\r\n3. `Use the compact tool to manually compress the conversation`\r\n" }, { "version": "s07", "locale": "zh", "title": "s07: Task System (任务系统)", - "content": "# s07: Task System (任务系统)\n\n`s01 > s02 > s03 > s04 > s05 > s06 | [ s07 ] s08 > s09 > s10 > s11 > s12`\n\n> *\"大目标要拆成小任务, 排好序, 记在磁盘上\"* -- 文件持久化的任务图, 为多 agent 协作打基础。\n\n## 问题\n\ns03 的 TodoManager 只是内存中的扁平清单: 没有顺序、没有依赖、状态只有做完没做完。真实目标是有结构的 -- 任务 B 依赖任务 A, 任务 C 和 D 可以并行, 任务 E 要等 C 和 D 都完成。\n\n没有显式的关系, 智能体分不清什么能做、什么被卡住、什么能同时跑。而且清单只活在内存里, 上下文压缩 (s06) 一跑就没了。\n\n## 解决方案\n\n把扁平清单升级为持久化到磁盘的**任务图**。每个任务是一个 JSON 文件, 有状态、前置依赖 (`blockedBy`) 和后置依赖 (`blocks`)。任务图随时回答三个问题:\n\n- **什么可以做?** -- 状态为 `pending` 且 `blockedBy` 为空的任务。\n- **什么被卡住?** -- 等待前置任务完成的任务。\n- **什么做完了?** -- 状态为 `completed` 的任务, 完成时自动解锁后续任务。\n\n```\n.tasks/\n task_1.json {\"id\":1, \"status\":\"completed\"}\n task_2.json {\"id\":2, \"blockedBy\":[1], \"status\":\"pending\"}\n task_3.json {\"id\":3, \"blockedBy\":[1], \"status\":\"pending\"}\n task_4.json {\"id\":4, \"blockedBy\":[2,3], \"status\":\"pending\"}\n\n任务图 (DAG):\n +----------+\n +--> | task 2 | --+\n | | pending | |\n+----------+ +----------+ +--> +----------+\n| task 1 | | task 4 |\n| completed| --> +----------+ +--> | blocked |\n+----------+ | task 3 | --+ +----------+\n | pending |\n +----------+\n\n顺序: task 1 必须先完成, 才能开始 2 和 3\n并行: task 2 和 3 可以同时执行\n依赖: task 4 要等 2 和 3 都完成\n状态: pending -> in_progress -> completed\n```\n\n这个任务图是 s07 之后所有机制的协调骨架: 后台执行 (s08)、多 agent 团队 (s09+)、worktree 隔离 (s12) 都读写这同一个结构。\n\n## 工作原理\n\n1. **TaskManager**: 每个任务一个 JSON 文件, CRUD + 依赖图。\n\n```python\nclass TaskManager:\n def __init__(self, tasks_dir: Path):\n self.dir = tasks_dir\n self.dir.mkdir(exist_ok=True)\n self._next_id = self._max_id() + 1\n\n def create(self, subject, description=\"\"):\n task = {\"id\": self._next_id, \"subject\": subject,\n \"status\": \"pending\", \"blockedBy\": [],\n \"blocks\": [], \"owner\": \"\"}\n self._save(task)\n self._next_id += 1\n return json.dumps(task, indent=2)\n```\n\n2. **依赖解除**: 完成任务时, 自动将其 ID 从其他任务的 `blockedBy` 中移除, 解锁后续任务。\n\n```python\ndef _clear_dependency(self, completed_id):\n for f in self.dir.glob(\"task_*.json\"):\n task = json.loads(f.read_text())\n if completed_id in task.get(\"blockedBy\", []):\n task[\"blockedBy\"].remove(completed_id)\n self._save(task)\n```\n\n3. **状态变更 + 依赖关联**: `update` 处理状态转换和依赖边。\n\n```python\ndef update(self, task_id, status=None,\n add_blocked_by=None, add_blocks=None):\n task = self._load(task_id)\n if status:\n task[\"status\"] = status\n if status == \"completed\":\n self._clear_dependency(task_id)\n self._save(task)\n```\n\n4. 四个任务工具加入 dispatch map。\n\n```python\nTOOL_HANDLERS = {\n # ...base tools...\n \"task_create\": lambda **kw: TASKS.create(kw[\"subject\"]),\n \"task_update\": lambda **kw: TASKS.update(kw[\"task_id\"], kw.get(\"status\")),\n \"task_list\": lambda **kw: TASKS.list_all(),\n \"task_get\": lambda **kw: TASKS.get(kw[\"task_id\"]),\n}\n```\n\n从 s07 起, 任务图是多步工作的默认选择。s03 的 Todo 仍可用于单次会话内的快速清单。\n\n## 相对 s06 的变更\n\n| 组件 | 之前 (s06) | 之后 (s07) |\n|---|---|---|\n| Tools | 5 | 8 (`task_create/update/list/get`) |\n| 规划模型 | 扁平清单 (仅内存) | 带依赖关系的任务图 (磁盘) |\n| 关系 | 无 | `blockedBy` + `blocks` 边 |\n| 状态追踪 | 做完没做完 | `pending` -> `in_progress` -> `completed` |\n| 持久化 | 压缩后丢失 | 压缩和重启后存活 |\n\n## 试一试\n\n```sh\ncd learn-claude-code\npython agents/s07_task_system.py\n```\n\n试试这些 prompt (英文 prompt 对 LLM 效果更好, 也可以用中文):\n\n1. `Create 3 tasks: \"Setup project\", \"Write code\", \"Write tests\". Make them depend on each other in order.`\n2. `List all tasks and show the dependency graph`\n3. `Complete task 1 and then list tasks to see task 2 unblocked`\n4. `Create a task board for refactoring: parse -> transform -> emit -> test, where transform and emit can run in parallel after parse`\n" + "content": "# s07: Task System (任务系统)\r\n\r\n`s01 > s02 > s03 > s04 > s05 > s06 | [ s07 ] s08 > s09 > s10 > s11 > s12`\r\n\r\n> *\"大目标要拆成小任务, 排好序, 记在磁盘上\"* -- 文件持久化的任务图, 为多 agent 协作打基础。\r\n\r\n## 问题\r\n\r\ns03 的 TodoManager 只是内存中的扁平清单: 没有顺序、没有依赖、状态只有做完没做完。真实目标是有结构的 -- 任务 B 依赖任务 A, 任务 C 和 D 可以并行, 任务 E 要等 C 和 D 都完成。\r\n\r\n没有显式的关系, 智能体分不清什么能做、什么被卡住、什么能同时跑。而且清单只活在内存里, 上下文压缩 (s06) 一跑就没了。\r\n\r\n## 解决方案\r\n\r\n把扁平清单升级为持久化到磁盘的**任务图**。每个任务是一个 JSON 文件, 有状态、前置依赖 (`blockedBy`) 和后置依赖 (`blocks`)。任务图随时回答三个问题:\r\n\r\n- **什么可以做?** -- 状态为 `pending` 且 `blockedBy` 为空的任务。\r\n- **什么被卡住?** -- 等待前置任务完成的任务。\r\n- **什么做完了?** -- 状态为 `completed` 的任务, 完成时自动解锁后续任务。\r\n\r\n```\r\n.tasks/\r\n task_1.json {\"id\":1, \"status\":\"completed\"}\r\n task_2.json {\"id\":2, \"blockedBy\":[1], \"status\":\"pending\"}\r\n task_3.json {\"id\":3, \"blockedBy\":[1], \"status\":\"pending\"}\r\n task_4.json {\"id\":4, \"blockedBy\":[2,3], \"status\":\"pending\"}\r\n\r\n任务图 (DAG):\r\n +----------+\r\n +--> | task 2 | --+\r\n | | pending | |\r\n+----------+ +----------+ +--> +----------+\r\n| task 1 | | task 4 |\r\n| completed| --> +----------+ +--> | blocked |\r\n+----------+ | task 3 | --+ +----------+\r\n | pending |\r\n +----------+\r\n\r\n顺序: task 1 必须先完成, 才能开始 2 和 3\r\n并行: task 2 和 3 可以同时执行\r\n依赖: task 4 要等 2 和 3 都完成\r\n状态: pending -> in_progress -> completed\r\n```\r\n\r\n这个任务图是 s07 之后所有机制的协调骨架: 后台执行 (s08)、多 agent 团队 (s09+)、worktree 隔离 (s12) 都读写这同一个结构。\r\n\r\n## 工作原理\r\n\r\n1. **TaskManager**: 每个任务一个 JSON 文件, CRUD + 依赖图。\r\n\r\n```python\r\nclass TaskManager:\r\n def __init__(self, tasks_dir: Path):\r\n self.dir = tasks_dir\r\n self.dir.mkdir(exist_ok=True)\r\n self._next_id = self._max_id() + 1\r\n\r\n def create(self, subject, description=\"\"):\r\n task = {\"id\": self._next_id, \"subject\": subject,\r\n \"status\": \"pending\", \"blockedBy\": [],\r\n \"blocks\": [], \"owner\": \"\"}\r\n self._save(task)\r\n self._next_id += 1\r\n return json.dumps(task, indent=2)\r\n```\r\n\r\n2. **依赖解除**: 完成任务时, 自动将其 ID 从其他任务的 `blockedBy` 中移除, 解锁后续任务。\r\n\r\n```python\r\ndef _clear_dependency(self, completed_id):\r\n for f in self.dir.glob(\"task_*.json\"):\r\n task = json.loads(f.read_text())\r\n if completed_id in task.get(\"blockedBy\", []):\r\n task[\"blockedBy\"].remove(completed_id)\r\n self._save(task)\r\n```\r\n\r\n3. **状态变更 + 依赖关联**: `update` 处理状态转换和依赖边。\r\n\r\n```python\r\ndef update(self, task_id, status=None,\r\n add_blocked_by=None, add_blocks=None):\r\n task = self._load(task_id)\r\n if status:\r\n task[\"status\"] = status\r\n if status == \"completed\":\r\n self._clear_dependency(task_id)\r\n self._save(task)\r\n```\r\n\r\n4. 四个任务工具加入 dispatch map。\r\n\r\n```python\r\nTOOL_HANDLERS = {\r\n # ...base tools...\r\n \"task_create\": lambda **kw: TASKS.create(kw[\"subject\"]),\r\n \"task_update\": lambda **kw: TASKS.update(kw[\"task_id\"], kw.get(\"status\")),\r\n \"task_list\": lambda **kw: TASKS.list_all(),\r\n \"task_get\": lambda **kw: TASKS.get(kw[\"task_id\"]),\r\n}\r\n```\r\n\r\n从 s07 起, 任务图是多步工作的默认选择。s03 的 Todo 仍可用于单次会话内的快速清单。\r\n\r\n## 相对 s06 的变更\r\n\r\n| 组件 | 之前 (s06) | 之后 (s07) |\r\n|---|---|---|\r\n| Tools | 5 | 8 (`task_create/update/list/get`) |\r\n| 规划模型 | 扁平清单 (仅内存) | 带依赖关系的任务图 (磁盘) |\r\n| 关系 | 无 | `blockedBy` + `blocks` 边 |\r\n| 状态追踪 | 做完没做完 | `pending` -> `in_progress` -> `completed` |\r\n| 持久化 | 压缩后丢失 | 压缩和重启后存活 |\r\n\r\n## 试一试\r\n\r\n```sh\r\ncd learn-claude-code\r\npython agents/s07_task_system.py\r\n```\r\n\r\n试试这些 prompt (英文 prompt 对 LLM 效果更好, 也可以用中文):\r\n\r\n1. `Create 3 tasks: \"Setup project\", \"Write code\", \"Write tests\". Make them depend on each other in order.`\r\n2. `List all tasks and show the dependency graph`\r\n3. `Complete task 1 and then list tasks to see task 2 unblocked`\r\n4. `Create a task board for refactoring: parse -> transform -> emit -> test, where transform and emit can run in parallel after parse`\r\n" }, { "version": "s08", "locale": "zh", "title": "s08: Background Tasks (后台任务)", - "content": "# s08: Background Tasks (后台任务)\n\n`s01 > s02 > s03 > s04 > s05 > s06 | s07 > [ s08 ] s09 > s10 > s11 > s12`\n\n> *\"慢操作丢后台, agent 继续想下一步\"* -- 后台线程跑命令, 完成后注入通知。\n\n## 问题\n\n有些命令要跑好几分钟: `npm install`、`pytest`、`docker build`。阻塞式循环下模型只能干等。用户说 \"装依赖, 顺便建个配置文件\", 智能体却只能一个一个来。\n\n## 解决方案\n\n```\nMain thread Background thread\n+-----------------+ +-----------------+\n| agent loop | | subprocess runs |\n| ... | | ... |\n| [LLM call] <---+------- | enqueue(result) |\n| ^drain queue | +-----------------+\n+-----------------+\n\nTimeline:\nAgent --[spawn A]--[spawn B]--[other work]----\n | |\n v v\n [A runs] [B runs] (parallel)\n | |\n +-- results injected before next LLM call --+\n```\n\n## 工作原理\n\n1. BackgroundManager 用线程安全的通知队列追踪任务。\n\n```python\nclass BackgroundManager:\n def __init__(self):\n self.tasks = {}\n self._notification_queue = []\n self._lock = threading.Lock()\n```\n\n2. `run()` 启动守护线程, 立即返回。\n\n```python\ndef run(self, command: str) -> str:\n task_id = str(uuid.uuid4())[:8]\n self.tasks[task_id] = {\"status\": \"running\", \"command\": command}\n thread = threading.Thread(\n target=self._execute, args=(task_id, command), daemon=True)\n thread.start()\n return f\"Background task {task_id} started\"\n```\n\n3. 子进程完成后, 结果进入通知队列。\n\n```python\ndef _execute(self, task_id, command):\n try:\n r = subprocess.run(command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=300)\n output = (r.stdout + r.stderr).strip()[:50000]\n except subprocess.TimeoutExpired:\n output = \"Error: Timeout (300s)\"\n with self._lock:\n self._notification_queue.append({\n \"task_id\": task_id, \"result\": output[:500]})\n```\n\n4. 每次 LLM 调用前排空通知队列。\n\n```python\ndef agent_loop(messages: list):\n while True:\n notifs = BG.drain_notifications()\n if notifs:\n notif_text = \"\\n\".join(\n f\"[bg:{n['task_id']}] {n['result']}\" for n in notifs)\n messages.append({\"role\": \"user\",\n \"content\": f\"\\n{notif_text}\\n\"\n f\"\"})\n messages.append({\"role\": \"assistant\",\n \"content\": \"Noted background results.\"})\n response = client.messages.create(...)\n```\n\n循环保持单线程。只有子进程 I/O 被并行化。\n\n## 相对 s07 的变更\n\n| 组件 | 之前 (s07) | 之后 (s08) |\n|----------------|------------------|------------------------------------|\n| Tools | 8 | 6 (基础 + background_run + check) |\n| 执行方式 | 仅阻塞 | 阻塞 + 后台线程 |\n| 通知机制 | 无 | 每轮排空的队列 |\n| 并发 | 无 | 守护线程 |\n\n## 试一试\n\n```sh\ncd learn-claude-code\npython agents/s08_background_tasks.py\n```\n\n试试这些 prompt (英文 prompt 对 LLM 效果更好, 也可以用中文):\n\n1. `Run \"sleep 5 && echo done\" in the background, then create a file while it runs`\n2. `Start 3 background tasks: \"sleep 2\", \"sleep 4\", \"sleep 6\". Check their status.`\n3. `Run pytest in the background and keep working on other things`\n" + "content": "# s08: Background Tasks (后台任务)\r\n\r\n`s01 > s02 > s03 > s04 > s05 > s06 | s07 > [ s08 ] s09 > s10 > s11 > s12`\r\n\r\n> *\"慢操作丢后台, agent 继续想下一步\"* -- 后台线程跑命令, 完成后注入通知。\r\n\r\n## 问题\r\n\r\n有些命令要跑好几分钟: `npm install`、`pytest`、`docker build`。阻塞式循环下模型只能干等。用户说 \"装依赖, 顺便建个配置文件\", 智能体却只能一个一个来。\r\n\r\n## 解决方案\r\n\r\n```\r\nMain thread Background thread\r\n+-----------------+ +-----------------+\r\n| agent loop | | subprocess runs |\r\n| ... | | ... |\r\n| [LLM call] <---+------- | enqueue(result) |\r\n| ^drain queue | +-----------------+\r\n+-----------------+\r\n\r\nTimeline:\r\nAgent --[spawn A]--[spawn B]--[other work]----\r\n | |\r\n v v\r\n [A runs] [B runs] (parallel)\r\n | |\r\n +-- results injected before next LLM call --+\r\n```\r\n\r\n## 工作原理\r\n\r\n1. BackgroundManager 用线程安全的通知队列追踪任务。\r\n\r\n```python\r\nclass BackgroundManager:\r\n def __init__(self):\r\n self.tasks = {}\r\n self._notification_queue = []\r\n self._lock = threading.Lock()\r\n```\r\n\r\n2. `run()` 启动守护线程, 立即返回。\r\n\r\n```python\r\ndef run(self, command: str) -> str:\r\n task_id = str(uuid.uuid4())[:8]\r\n self.tasks[task_id] = {\"status\": \"running\", \"command\": command}\r\n thread = threading.Thread(\r\n target=self._execute, args=(task_id, command), daemon=True)\r\n thread.start()\r\n return f\"Background task {task_id} started\"\r\n```\r\n\r\n3. 子进程完成后, 结果进入通知队列。\r\n\r\n```python\r\ndef _execute(self, task_id, command):\r\n try:\r\n r = subprocess.run(command, shell=True, cwd=WORKDIR,\r\n capture_output=True, text=True, timeout=300)\r\n output = (r.stdout + r.stderr).strip()[:50000]\r\n except subprocess.TimeoutExpired:\r\n output = \"Error: Timeout (300s)\"\r\n with self._lock:\r\n self._notification_queue.append({\r\n \"task_id\": task_id, \"result\": output[:500]})\r\n```\r\n\r\n4. 每次 LLM 调用前排空通知队列。\r\n\r\n```python\r\ndef agent_loop(messages: list):\r\n while True:\r\n notifs = BG.drain_notifications()\r\n if notifs:\r\n notif_text = \"\\n\".join(\r\n f\"[bg:{n['task_id']}] {n['result']}\" for n in notifs)\r\n messages.append({\"role\": \"user\",\r\n \"content\": f\"\\n{notif_text}\\n\"\r\n f\"\"})\r\n messages.append({\"role\": \"assistant\",\r\n \"content\": \"Noted background results.\"})\r\n response = client.messages.create(...)\r\n```\r\n\r\n循环保持单线程。只有子进程 I/O 被并行化。\r\n\r\n## 相对 s07 的变更\r\n\r\n| 组件 | 之前 (s07) | 之后 (s08) |\r\n|----------------|------------------|------------------------------------|\r\n| Tools | 8 | 6 (基础 + background_run + check) |\r\n| 执行方式 | 仅阻塞 | 阻塞 + 后台线程 |\r\n| 通知机制 | 无 | 每轮排空的队列 |\r\n| 并发 | 无 | 守护线程 |\r\n\r\n## 试一试\r\n\r\n```sh\r\ncd learn-claude-code\r\npython agents/s08_background_tasks.py\r\n```\r\n\r\n试试这些 prompt (英文 prompt 对 LLM 效果更好, 也可以用中文):\r\n\r\n1. `Run \"sleep 5 && echo done\" in the background, then create a file while it runs`\r\n2. `Start 3 background tasks: \"sleep 2\", \"sleep 4\", \"sleep 6\". Check their status.`\r\n3. `Run pytest in the background and keep working on other things`\r\n" }, { "version": "s09", "locale": "zh", "title": "s09: Agent Teams (智能体团队)", - "content": "# s09: Agent Teams (智能体团队)\n\n`s01 > s02 > s03 > s04 > s05 > s06 | s07 > s08 > [ s09 ] s10 > s11 > s12`\n\n> *\"任务太大一个人干不完, 要能分给队友\"* -- 持久化队友 + JSONL 邮箱。\n\n## 问题\n\n子智能体 (s04) 是一次性的: 生成、干活、返回摘要、消亡。没有身份, 没有跨调用的记忆。后台任务 (s08) 能跑 shell 命令, 但做不了 LLM 引导的决策。\n\n真正的团队协作需要三样东西: (1) 能跨多轮对话存活的持久智能体, (2) 身份和生命周期管理, (3) 智能体之间的通信通道。\n\n## 解决方案\n\n```\nTeammate lifecycle:\n spawn -> WORKING -> IDLE -> WORKING -> ... -> SHUTDOWN\n\nCommunication:\n .team/\n config.json <- team roster + statuses\n inbox/\n alice.jsonl <- append-only, drain-on-read\n bob.jsonl\n lead.jsonl\n\n +--------+ send(\"alice\",\"bob\",\"...\") +--------+\n | alice | -----------------------------> | bob |\n | loop | bob.jsonl << {json_line} | loop |\n +--------+ +--------+\n ^ |\n | BUS.read_inbox(\"alice\") |\n +---- alice.jsonl -> read + drain ---------+\n```\n\n## 工作原理\n\n1. TeammateManager 通过 config.json 维护团队名册。\n\n```python\nclass TeammateManager:\n def __init__(self, team_dir: Path):\n self.dir = team_dir\n self.dir.mkdir(exist_ok=True)\n self.config_path = self.dir / \"config.json\"\n self.config = self._load_config()\n self.threads = {}\n```\n\n2. `spawn()` 创建队友并在线程中启动 agent loop。\n\n```python\ndef spawn(self, name: str, role: str, prompt: str) -> str:\n member = {\"name\": name, \"role\": role, \"status\": \"working\"}\n self.config[\"members\"].append(member)\n self._save_config()\n thread = threading.Thread(\n target=self._teammate_loop,\n args=(name, role, prompt), daemon=True)\n thread.start()\n return f\"Spawned teammate '{name}' (role: {role})\"\n```\n\n3. MessageBus: append-only 的 JSONL 收件箱。`send()` 追加一行; `read_inbox()` 读取全部并清空。\n\n```python\nclass MessageBus:\n def send(self, sender, to, content, msg_type=\"message\", extra=None):\n msg = {\"type\": msg_type, \"from\": sender,\n \"content\": content, \"timestamp\": time.time()}\n if extra:\n msg.update(extra)\n with open(self.dir / f\"{to}.jsonl\", \"a\") as f:\n f.write(json.dumps(msg) + \"\\n\")\n\n def read_inbox(self, name):\n path = self.dir / f\"{name}.jsonl\"\n if not path.exists(): return \"[]\"\n msgs = [json.loads(l) for l in path.read_text().strip().splitlines() if l]\n path.write_text(\"\") # drain\n return json.dumps(msgs, indent=2)\n```\n\n4. 每个队友在每次 LLM 调用前检查收件箱, 将消息注入上下文。\n\n```python\ndef _teammate_loop(self, name, role, prompt):\n messages = [{\"role\": \"user\", \"content\": prompt}]\n for _ in range(50):\n inbox = BUS.read_inbox(name)\n if inbox != \"[]\":\n messages.append({\"role\": \"user\",\n \"content\": f\"{inbox}\"})\n messages.append({\"role\": \"assistant\",\n \"content\": \"Noted inbox messages.\"})\n response = client.messages.create(...)\n if response.stop_reason != \"tool_use\":\n break\n # execute tools, append results...\n self._find_member(name)[\"status\"] = \"idle\"\n```\n\n## 相对 s08 的变更\n\n| 组件 | 之前 (s08) | 之后 (s09) |\n|----------------|------------------|------------------------------------|\n| Tools | 6 | 9 (+spawn/send/read_inbox) |\n| 智能体数量 | 单一 | 领导 + N 个队友 |\n| 持久化 | 无 | config.json + JSONL 收件箱 |\n| 线程 | 后台命令 | 每线程完整 agent loop |\n| 生命周期 | 一次性 | idle -> working -> idle |\n| 通信 | 无 | message + broadcast |\n\n## 试一试\n\n```sh\ncd learn-claude-code\npython agents/s09_agent_teams.py\n```\n\n试试这些 prompt (英文 prompt 对 LLM 效果更好, 也可以用中文):\n\n1. `Spawn alice (coder) and bob (tester). Have alice send bob a message.`\n2. `Broadcast \"status update: phase 1 complete\" to all teammates`\n3. `Check the lead inbox for any messages`\n4. 输入 `/team` 查看团队名册和状态\n5. 输入 `/inbox` 手动检查领导的收件箱\n" + "content": "# s09: Agent Teams (智能体团队)\r\n\r\n`s01 > s02 > s03 > s04 > s05 > s06 | s07 > s08 > [ s09 ] s10 > s11 > s12`\r\n\r\n> *\"任务太大一个人干不完, 要能分给队友\"* -- 持久化队友 + JSONL 邮箱。\r\n\r\n## 问题\r\n\r\n子智能体 (s04) 是一次性的: 生成、干活、返回摘要、消亡。没有身份, 没有跨调用的记忆。后台任务 (s08) 能跑 shell 命令, 但做不了 LLM 引导的决策。\r\n\r\n真正的团队协作需要三样东西: (1) 能跨多轮对话存活的持久智能体, (2) 身份和生命周期管理, (3) 智能体之间的通信通道。\r\n\r\n## 解决方案\r\n\r\n```\r\nTeammate lifecycle:\r\n spawn -> WORKING -> IDLE -> WORKING -> ... -> SHUTDOWN\r\n\r\nCommunication:\r\n .team/\r\n config.json <- team roster + statuses\r\n inbox/\r\n alice.jsonl <- append-only, drain-on-read\r\n bob.jsonl\r\n lead.jsonl\r\n\r\n +--------+ send(\"alice\",\"bob\",\"...\") +--------+\r\n | alice | -----------------------------> | bob |\r\n | loop | bob.jsonl << {json_line} | loop |\r\n +--------+ +--------+\r\n ^ |\r\n | BUS.read_inbox(\"alice\") |\r\n +---- alice.jsonl -> read + drain ---------+\r\n```\r\n\r\n## 工作原理\r\n\r\n1. TeammateManager 通过 config.json 维护团队名册。\r\n\r\n```python\r\nclass TeammateManager:\r\n def __init__(self, team_dir: Path):\r\n self.dir = team_dir\r\n self.dir.mkdir(exist_ok=True)\r\n self.config_path = self.dir / \"config.json\"\r\n self.config = self._load_config()\r\n self.threads = {}\r\n```\r\n\r\n2. `spawn()` 创建队友并在线程中启动 agent loop。\r\n\r\n```python\r\ndef spawn(self, name: str, role: str, prompt: str) -> str:\r\n member = {\"name\": name, \"role\": role, \"status\": \"working\"}\r\n self.config[\"members\"].append(member)\r\n self._save_config()\r\n thread = threading.Thread(\r\n target=self._teammate_loop,\r\n args=(name, role, prompt), daemon=True)\r\n thread.start()\r\n return f\"Spawned teammate '{name}' (role: {role})\"\r\n```\r\n\r\n3. MessageBus: append-only 的 JSONL 收件箱。`send()` 追加一行; `read_inbox()` 读取全部并清空。\r\n\r\n```python\r\nclass MessageBus:\r\n def send(self, sender, to, content, msg_type=\"message\", extra=None):\r\n msg = {\"type\": msg_type, \"from\": sender,\r\n \"content\": content, \"timestamp\": time.time()}\r\n if extra:\r\n msg.update(extra)\r\n with open(self.dir / f\"{to}.jsonl\", \"a\") as f:\r\n f.write(json.dumps(msg) + \"\\n\")\r\n\r\n def read_inbox(self, name):\r\n path = self.dir / f\"{name}.jsonl\"\r\n if not path.exists(): return \"[]\"\r\n msgs = [json.loads(l) for l in path.read_text().strip().splitlines() if l]\r\n path.write_text(\"\") # drain\r\n return json.dumps(msgs, indent=2)\r\n```\r\n\r\n4. 每个队友在每次 LLM 调用前检查收件箱, 将消息注入上下文。\r\n\r\n```python\r\ndef _teammate_loop(self, name, role, prompt):\r\n messages = [{\"role\": \"user\", \"content\": prompt}]\r\n for _ in range(50):\r\n inbox = BUS.read_inbox(name)\r\n if inbox != \"[]\":\r\n messages.append({\"role\": \"user\",\r\n \"content\": f\"{inbox}\"})\r\n messages.append({\"role\": \"assistant\",\r\n \"content\": \"Noted inbox messages.\"})\r\n response = client.messages.create(...)\r\n if response.stop_reason != \"tool_use\":\r\n break\r\n # execute tools, append results...\r\n self._find_member(name)[\"status\"] = \"idle\"\r\n```\r\n\r\n## 相对 s08 的变更\r\n\r\n| 组件 | 之前 (s08) | 之后 (s09) |\r\n|----------------|------------------|------------------------------------|\r\n| Tools | 6 | 9 (+spawn/send/read_inbox) |\r\n| 智能体数量 | 单一 | 领导 + N 个队友 |\r\n| 持久化 | 无 | config.json + JSONL 收件箱 |\r\n| 线程 | 后台命令 | 每线程完整 agent loop |\r\n| 生命周期 | 一次性 | idle -> working -> idle |\r\n| 通信 | 无 | message + broadcast |\r\n\r\n## 试一试\r\n\r\n```sh\r\ncd learn-claude-code\r\npython agents/s09_agent_teams.py\r\n```\r\n\r\n试试这些 prompt (英文 prompt 对 LLM 效果更好, 也可以用中文):\r\n\r\n1. `Spawn alice (coder) and bob (tester). Have alice send bob a message.`\r\n2. `Broadcast \"status update: phase 1 complete\" to all teammates`\r\n3. `Check the lead inbox for any messages`\r\n4. 输入 `/team` 查看团队名册和状态\r\n5. 输入 `/inbox` 手动检查领导的收件箱\r\n" }, { "version": "s10", "locale": "zh", "title": "s10: Team Protocols (团队协议)", - "content": "# s10: Team Protocols (团队协议)\n\n`s01 > s02 > s03 > s04 > s05 > s06 | s07 > s08 > s09 > [ s10 ] s11 > s12`\n\n> *\"队友之间要有统一的沟通规矩\"* -- 一个 request-response 模式驱动所有协商。\n\n## 问题\n\ns09 中队友能干活能通信, 但缺少结构化协调:\n\n**关机**: 直接杀线程会留下写了一半的文件和过期的 config.json。需要握手 -- 领导请求, 队友批准 (收尾退出) 或拒绝 (继续干)。\n\n**计划审批**: 领导说 \"重构认证模块\", 队友立刻开干。高风险变更应该先过审。\n\n两者结构一样: 一方发带唯一 ID 的请求, 另一方引用同一 ID 响应。\n\n## 解决方案\n\n```\nShutdown Protocol Plan Approval Protocol\n================== ======================\n\nLead Teammate Teammate Lead\n | | | |\n |--shutdown_req-->| |--plan_req------>|\n | {req_id:\"abc\"} | | {req_id:\"xyz\"} |\n | | | |\n |<--shutdown_resp-| |<--plan_resp-----|\n | {req_id:\"abc\", | | {req_id:\"xyz\", |\n | approve:true} | | approve:true} |\n\nShared FSM:\n [pending] --approve--> [approved]\n [pending] --reject---> [rejected]\n\nTrackers:\n shutdown_requests = {req_id: {target, status}}\n plan_requests = {req_id: {from, plan, status}}\n```\n\n## 工作原理\n\n1. 领导生成 request_id, 通过收件箱发起关机请求。\n\n```python\nshutdown_requests = {}\n\ndef handle_shutdown_request(teammate: str) -> str:\n req_id = str(uuid.uuid4())[:8]\n shutdown_requests[req_id] = {\"target\": teammate, \"status\": \"pending\"}\n BUS.send(\"lead\", teammate, \"Please shut down gracefully.\",\n \"shutdown_request\", {\"request_id\": req_id})\n return f\"Shutdown request {req_id} sent (status: pending)\"\n```\n\n2. 队友收到请求后, 用 approve/reject 响应。\n\n```python\nif tool_name == \"shutdown_response\":\n req_id = args[\"request_id\"]\n approve = args[\"approve\"]\n shutdown_requests[req_id][\"status\"] = \"approved\" if approve else \"rejected\"\n BUS.send(sender, \"lead\", args.get(\"reason\", \"\"),\n \"shutdown_response\",\n {\"request_id\": req_id, \"approve\": approve})\n```\n\n3. 计划审批遵循完全相同的模式。队友提交计划 (生成 request_id), 领导审查 (引用同一个 request_id)。\n\n```python\nplan_requests = {}\n\ndef handle_plan_review(request_id, approve, feedback=\"\"):\n req = plan_requests[request_id]\n req[\"status\"] = \"approved\" if approve else \"rejected\"\n BUS.send(\"lead\", req[\"from\"], feedback,\n \"plan_approval_response\",\n {\"request_id\": request_id, \"approve\": approve})\n```\n\n一个 FSM, 两种用途。同样的 `pending -> approved | rejected` 状态机可以套用到任何请求-响应协议上。\n\n## 相对 s09 的变更\n\n| 组件 | 之前 (s09) | 之后 (s10) |\n|----------------|------------------|--------------------------------------|\n| Tools | 9 | 12 (+shutdown_req/resp +plan) |\n| 关机 | 仅自然退出 | 请求-响应握手 |\n| 计划门控 | 无 | 提交/审查与审批 |\n| 关联 | 无 | 每个请求一个 request_id |\n| FSM | 无 | pending -> approved/rejected |\n\n## 试一试\n\n```sh\ncd learn-claude-code\npython agents/s10_team_protocols.py\n```\n\n试试这些 prompt (英文 prompt 对 LLM 效果更好, 也可以用中文):\n\n1. `Spawn alice as a coder. Then request her shutdown.`\n2. `List teammates to see alice's status after shutdown approval`\n3. `Spawn bob with a risky refactoring task. Review and reject his plan.`\n4. `Spawn charlie, have him submit a plan, then approve it.`\n5. 输入 `/team` 监控状态\n" + "content": "# s10: Team Protocols (团队协议)\r\n\r\n`s01 > s02 > s03 > s04 > s05 > s06 | s07 > s08 > s09 > [ s10 ] s11 > s12`\r\n\r\n> *\"队友之间要有统一的沟通规矩\"* -- 一个 request-response 模式驱动所有协商。\r\n\r\n## 问题\r\n\r\ns09 中队友能干活能通信, 但缺少结构化协调:\r\n\r\n**关机**: 直接杀线程会留下写了一半的文件和过期的 config.json。需要握手 -- 领导请求, 队友批准 (收尾退出) 或拒绝 (继续干)。\r\n\r\n**计划审批**: 领导说 \"重构认证模块\", 队友立刻开干。高风险变更应该先过审。\r\n\r\n两者结构一样: 一方发带唯一 ID 的请求, 另一方引用同一 ID 响应。\r\n\r\n## 解决方案\r\n\r\n```\r\nShutdown Protocol Plan Approval Protocol\r\n================== ======================\r\n\r\nLead Teammate Teammate Lead\r\n | | | |\r\n |--shutdown_req-->| |--plan_req------>|\r\n | {req_id:\"abc\"} | | {req_id:\"xyz\"} |\r\n | | | |\r\n |<--shutdown_resp-| |<--plan_resp-----|\r\n | {req_id:\"abc\", | | {req_id:\"xyz\", |\r\n | approve:true} | | approve:true} |\r\n\r\nShared FSM:\r\n [pending] --approve--> [approved]\r\n [pending] --reject---> [rejected]\r\n\r\nTrackers:\r\n shutdown_requests = {req_id: {target, status}}\r\n plan_requests = {req_id: {from, plan, status}}\r\n```\r\n\r\n## 工作原理\r\n\r\n1. 领导生成 request_id, 通过收件箱发起关机请求。\r\n\r\n```python\r\nshutdown_requests = {}\r\n\r\ndef handle_shutdown_request(teammate: str) -> str:\r\n req_id = str(uuid.uuid4())[:8]\r\n shutdown_requests[req_id] = {\"target\": teammate, \"status\": \"pending\"}\r\n BUS.send(\"lead\", teammate, \"Please shut down gracefully.\",\r\n \"shutdown_request\", {\"request_id\": req_id})\r\n return f\"Shutdown request {req_id} sent (status: pending)\"\r\n```\r\n\r\n2. 队友收到请求后, 用 approve/reject 响应。\r\n\r\n```python\r\nif tool_name == \"shutdown_response\":\r\n req_id = args[\"request_id\"]\r\n approve = args[\"approve\"]\r\n shutdown_requests[req_id][\"status\"] = \"approved\" if approve else \"rejected\"\r\n BUS.send(sender, \"lead\", args.get(\"reason\", \"\"),\r\n \"shutdown_response\",\r\n {\"request_id\": req_id, \"approve\": approve})\r\n```\r\n\r\n3. 计划审批遵循完全相同的模式。队友提交计划 (生成 request_id), 领导审查 (引用同一个 request_id)。\r\n\r\n```python\r\nplan_requests = {}\r\n\r\ndef handle_plan_review(request_id, approve, feedback=\"\"):\r\n req = plan_requests[request_id]\r\n req[\"status\"] = \"approved\" if approve else \"rejected\"\r\n BUS.send(\"lead\", req[\"from\"], feedback,\r\n \"plan_approval_response\",\r\n {\"request_id\": request_id, \"approve\": approve})\r\n```\r\n\r\n一个 FSM, 两种用途。同样的 `pending -> approved | rejected` 状态机可以套用到任何请求-响应协议上。\r\n\r\n## 相对 s09 的变更\r\n\r\n| 组件 | 之前 (s09) | 之后 (s10) |\r\n|----------------|------------------|--------------------------------------|\r\n| Tools | 9 | 12 (+shutdown_req/resp +plan) |\r\n| 关机 | 仅自然退出 | 请求-响应握手 |\r\n| 计划门控 | 无 | 提交/审查与审批 |\r\n| 关联 | 无 | 每个请求一个 request_id |\r\n| FSM | 无 | pending -> approved/rejected |\r\n\r\n## 试一试\r\n\r\n```sh\r\ncd learn-claude-code\r\npython agents/s10_team_protocols.py\r\n```\r\n\r\n试试这些 prompt (英文 prompt 对 LLM 效果更好, 也可以用中文):\r\n\r\n1. `Spawn alice as a coder. Then request her shutdown.`\r\n2. `List teammates to see alice's status after shutdown approval`\r\n3. `Spawn bob with a risky refactoring task. Review and reject his plan.`\r\n4. `Spawn charlie, have him submit a plan, then approve it.`\r\n5. 输入 `/team` 监控状态\r\n" }, { "version": "s11", "locale": "zh", "title": "s11: Autonomous Agents (自治智能体)", - "content": "# s11: Autonomous Agents (自治智能体)\n\n`s01 > s02 > s03 > s04 > s05 > s06 | s07 > s08 > s09 > s10 > [ s11 ] s12`\n\n> *\"队友自己看看板, 有活就认领\"* -- 不需要领导逐个分配, 自组织。\n\n## 问题\n\ns09-s10 中, 队友只在被明确指派时才动。领导得给每个队友写 prompt, 任务看板上 10 个未认领的任务得手动分配。这扩展不了。\n\n真正的自治: 队友自己扫描任务看板, 认领没人做的任务, 做完再找下一个。\n\n一个细节: 上下文压缩 (s06) 后智能体可能忘了自己是谁。身份重注入解决这个问题。\n\n## 解决方案\n\n```\nTeammate lifecycle with idle cycle:\n\n+-------+\n| spawn |\n+---+---+\n |\n v\n+-------+ tool_use +-------+\n| WORK | <------------- | LLM |\n+---+---+ +-------+\n |\n | stop_reason != tool_use (or idle tool called)\n v\n+--------+\n| IDLE | poll every 5s for up to 60s\n+---+----+\n |\n +---> check inbox --> message? ----------> WORK\n |\n +---> scan .tasks/ --> unclaimed? -------> claim -> WORK\n |\n +---> 60s timeout ----------------------> SHUTDOWN\n\nIdentity re-injection after compression:\n if len(messages) <= 3:\n messages.insert(0, identity_block)\n```\n\n## 工作原理\n\n1. 队友循环分两个阶段: WORK 和 IDLE。LLM 停止调用工具 (或调用了 `idle`) 时, 进入 IDLE。\n\n```python\ndef _loop(self, name, role, prompt):\n while True:\n # -- WORK PHASE --\n messages = [{\"role\": \"user\", \"content\": prompt}]\n for _ in range(50):\n response = client.messages.create(...)\n if response.stop_reason != \"tool_use\":\n break\n # execute tools...\n if idle_requested:\n break\n\n # -- IDLE PHASE --\n self._set_status(name, \"idle\")\n resume = self._idle_poll(name, messages)\n if not resume:\n self._set_status(name, \"shutdown\")\n return\n self._set_status(name, \"working\")\n```\n\n2. 空闲阶段循环轮询收件箱和任务看板。\n\n```python\ndef _idle_poll(self, name, messages):\n for _ in range(IDLE_TIMEOUT // POLL_INTERVAL): # 60s / 5s = 12\n time.sleep(POLL_INTERVAL)\n inbox = BUS.read_inbox(name)\n if inbox:\n messages.append({\"role\": \"user\",\n \"content\": f\"{inbox}\"})\n return True\n unclaimed = scan_unclaimed_tasks()\n if unclaimed:\n claim_task(unclaimed[0][\"id\"], name)\n messages.append({\"role\": \"user\",\n \"content\": f\"Task #{unclaimed[0]['id']}: \"\n f\"{unclaimed[0]['subject']}\"})\n return True\n return False # timeout -> shutdown\n```\n\n3. 任务看板扫描: 找 pending 状态、无 owner、未被阻塞的任务。\n\n```python\ndef scan_unclaimed_tasks() -> list:\n unclaimed = []\n for f in sorted(TASKS_DIR.glob(\"task_*.json\")):\n task = json.loads(f.read_text())\n if (task.get(\"status\") == \"pending\"\n and not task.get(\"owner\")\n and not task.get(\"blockedBy\")):\n unclaimed.append(task)\n return unclaimed\n```\n\n4. 身份重注入: 上下文过短 (说明发生了压缩) 时, 在开头插入身份块。\n\n```python\nif len(messages) <= 3:\n messages.insert(0, {\"role\": \"user\",\n \"content\": f\"You are '{name}', role: {role}, \"\n f\"team: {team_name}. Continue your work.\"})\n messages.insert(1, {\"role\": \"assistant\",\n \"content\": f\"I am {name}. Continuing.\"})\n```\n\n## 相对 s10 的变更\n\n| 组件 | 之前 (s10) | 之后 (s11) |\n|----------------|------------------|----------------------------------|\n| Tools | 12 | 14 (+idle, +claim_task) |\n| 自治性 | 领导指派 | 自组织 |\n| 空闲阶段 | 无 | 轮询收件箱 + 任务看板 |\n| 任务认领 | 仅手动 | 自动认领未分配任务 |\n| 身份 | 系统提示 | + 压缩后重注入 |\n| 超时 | 无 | 60 秒空闲 -> 自动关机 |\n\n## 试一试\n\n```sh\ncd learn-claude-code\npython agents/s11_autonomous_agents.py\n```\n\n试试这些 prompt (英文 prompt 对 LLM 效果更好, 也可以用中文):\n\n1. `Create 3 tasks on the board, then spawn alice and bob. Watch them auto-claim.`\n2. `Spawn a coder teammate and let it find work from the task board itself`\n3. `Create tasks with dependencies. Watch teammates respect the blocked order.`\n4. 输入 `/tasks` 查看带 owner 的任务看板\n5. 输入 `/team` 监控谁在工作、谁在空闲\n" + "content": "# s11: Autonomous Agents (自治智能体)\r\n\r\n`s01 > s02 > s03 > s04 > s05 > s06 | s07 > s08 > s09 > s10 > [ s11 ] s12`\r\n\r\n> *\"队友自己看看板, 有活就认领\"* -- 不需要领导逐个分配, 自组织。\r\n\r\n## 问题\r\n\r\ns09-s10 中, 队友只在被明确指派时才动。领导得给每个队友写 prompt, 任务看板上 10 个未认领的任务得手动分配。这扩展不了。\r\n\r\n真正的自治: 队友自己扫描任务看板, 认领没人做的任务, 做完再找下一个。\r\n\r\n一个细节: 上下文压缩 (s06) 后智能体可能忘了自己是谁。身份重注入解决这个问题。\r\n\r\n## 解决方案\r\n\r\n```\r\nTeammate lifecycle with idle cycle:\r\n\r\n+-------+\r\n| spawn |\r\n+---+---+\r\n |\r\n v\r\n+-------+ tool_use +-------+\r\n| WORK | <------------- | LLM |\r\n+---+---+ +-------+\r\n |\r\n | stop_reason != tool_use (or idle tool called)\r\n v\r\n+--------+\r\n| IDLE | poll every 5s for up to 60s\r\n+---+----+\r\n |\r\n +---> check inbox --> message? ----------> WORK\r\n |\r\n +---> scan .tasks/ --> unclaimed? -------> claim -> WORK\r\n |\r\n +---> 60s timeout ----------------------> SHUTDOWN\r\n\r\nIdentity re-injection after compression:\r\n if len(messages) <= 3:\r\n messages.insert(0, identity_block)\r\n```\r\n\r\n## 工作原理\r\n\r\n1. 队友循环分两个阶段: WORK 和 IDLE。LLM 停止调用工具 (或调用了 `idle`) 时, 进入 IDLE。\r\n\r\n```python\r\ndef _loop(self, name, role, prompt):\r\n while True:\r\n # -- WORK PHASE --\r\n messages = [{\"role\": \"user\", \"content\": prompt}]\r\n for _ in range(50):\r\n response = client.messages.create(...)\r\n if response.stop_reason != \"tool_use\":\r\n break\r\n # execute tools...\r\n if idle_requested:\r\n break\r\n\r\n # -- IDLE PHASE --\r\n self._set_status(name, \"idle\")\r\n resume = self._idle_poll(name, messages)\r\n if not resume:\r\n self._set_status(name, \"shutdown\")\r\n return\r\n self._set_status(name, \"working\")\r\n```\r\n\r\n2. 空闲阶段循环轮询收件箱和任务看板。\r\n\r\n```python\r\ndef _idle_poll(self, name, messages):\r\n for _ in range(IDLE_TIMEOUT // POLL_INTERVAL): # 60s / 5s = 12\r\n time.sleep(POLL_INTERVAL)\r\n inbox = BUS.read_inbox(name)\r\n if inbox:\r\n messages.append({\"role\": \"user\",\r\n \"content\": f\"{inbox}\"})\r\n return True\r\n unclaimed = scan_unclaimed_tasks()\r\n if unclaimed:\r\n claim_task(unclaimed[0][\"id\"], name)\r\n messages.append({\"role\": \"user\",\r\n \"content\": f\"Task #{unclaimed[0]['id']}: \"\r\n f\"{unclaimed[0]['subject']}\"})\r\n return True\r\n return False # timeout -> shutdown\r\n```\r\n\r\n3. 任务看板扫描: 找 pending 状态、无 owner、未被阻塞的任务。\r\n\r\n```python\r\ndef scan_unclaimed_tasks() -> list:\r\n unclaimed = []\r\n for f in sorted(TASKS_DIR.glob(\"task_*.json\")):\r\n task = json.loads(f.read_text())\r\n if (task.get(\"status\") == \"pending\"\r\n and not task.get(\"owner\")\r\n and not task.get(\"blockedBy\")):\r\n unclaimed.append(task)\r\n return unclaimed\r\n```\r\n\r\n4. 身份重注入: 上下文过短 (说明发生了压缩) 时, 在开头插入身份块。\r\n\r\n```python\r\nif len(messages) <= 3:\r\n messages.insert(0, {\"role\": \"user\",\r\n \"content\": f\"You are '{name}', role: {role}, \"\r\n f\"team: {team_name}. Continue your work.\"})\r\n messages.insert(1, {\"role\": \"assistant\",\r\n \"content\": f\"I am {name}. Continuing.\"})\r\n```\r\n\r\n## 相对 s10 的变更\r\n\r\n| 组件 | 之前 (s10) | 之后 (s11) |\r\n|----------------|------------------|----------------------------------|\r\n| Tools | 12 | 14 (+idle, +claim_task) |\r\n| 自治性 | 领导指派 | 自组织 |\r\n| 空闲阶段 | 无 | 轮询收件箱 + 任务看板 |\r\n| 任务认领 | 仅手动 | 自动认领未分配任务 |\r\n| 身份 | 系统提示 | + 压缩后重注入 |\r\n| 超时 | 无 | 60 秒空闲 -> 自动关机 |\r\n\r\n## 试一试\r\n\r\n```sh\r\ncd learn-claude-code\r\npython agents/s11_autonomous_agents.py\r\n```\r\n\r\n试试这些 prompt (英文 prompt 对 LLM 效果更好, 也可以用中文):\r\n\r\n1. `Create 3 tasks on the board, then spawn alice and bob. Watch them auto-claim.`\r\n2. `Spawn a coder teammate and let it find work from the task board itself`\r\n3. `Create tasks with dependencies. Watch teammates respect the blocked order.`\r\n4. 输入 `/tasks` 查看带 owner 的任务看板\r\n5. 输入 `/team` 监控谁在工作、谁在空闲\r\n" }, { "version": "s12", "locale": "zh", "title": "s12: Worktree + Task Isolation (Worktree 任务隔离)", - "content": "# s12: Worktree + Task Isolation (Worktree 任务隔离)\n\n`s01 > s02 > s03 > s04 > s05 > s06 | s07 > s08 > s09 > s10 > s11 > [ s12 ]`\n\n> *\"各干各的目录, 互不干扰\"* -- 任务管目标, worktree 管目录, 按 ID 绑定。\n\n## 问题\n\n到 s11, 智能体已经能自主认领和完成任务。但所有任务共享一个目录。两个智能体同时重构不同模块 -- A 改 `config.py`, B 也改 `config.py`, 未提交的改动互相污染, 谁也没法干净回滚。\n\n任务板管 \"做什么\" 但不管 \"在哪做\"。解法: 给每个任务一个独立的 git worktree 目录, 用任务 ID 把两边关联起来。\n\n## 解决方案\n\n```\nControl plane (.tasks/) Execution plane (.worktrees/)\n+------------------+ +------------------------+\n| task_1.json | | auth-refactor/ |\n| status: in_progress <------> branch: wt/auth-refactor\n| worktree: \"auth-refactor\" | task_id: 1 |\n+------------------+ +------------------------+\n| task_2.json | | ui-login/ |\n| status: pending <------> branch: wt/ui-login\n| worktree: \"ui-login\" | task_id: 2 |\n+------------------+ +------------------------+\n |\n index.json (worktree registry)\n events.jsonl (lifecycle log)\n\nState machines:\n Task: pending -> in_progress -> completed\n Worktree: absent -> active -> removed | kept\n```\n\n## 工作原理\n\n1. **创建任务。** 先把目标持久化。\n\n```python\nTASKS.create(\"Implement auth refactor\")\n# -> .tasks/task_1.json status=pending worktree=\"\"\n```\n\n2. **创建 worktree 并绑定任务。** 传入 `task_id` 自动将任务推进到 `in_progress`。\n\n```python\nWORKTREES.create(\"auth-refactor\", task_id=1)\n# -> git worktree add -b wt/auth-refactor .worktrees/auth-refactor HEAD\n# -> index.json gets new entry, task_1.json gets worktree=\"auth-refactor\"\n```\n\n绑定同时写入两侧状态:\n\n```python\ndef bind_worktree(self, task_id, worktree):\n task = self._load(task_id)\n task[\"worktree\"] = worktree\n if task[\"status\"] == \"pending\":\n task[\"status\"] = \"in_progress\"\n self._save(task)\n```\n\n3. **在 worktree 中执行命令。** `cwd` 指向隔离目录。\n\n```python\nsubprocess.run(command, shell=True, cwd=worktree_path,\n capture_output=True, text=True, timeout=300)\n```\n\n4. **收尾。** 两种选择:\n - `worktree_keep(name)` -- 保留目录供后续使用。\n - `worktree_remove(name, complete_task=True)` -- 删除目录, 完成绑定任务, 发出事件。一个调用搞定拆除 + 完成。\n\n```python\ndef remove(self, name, force=False, complete_task=False):\n self._run_git([\"worktree\", \"remove\", wt[\"path\"]])\n if complete_task and wt.get(\"task_id\") is not None:\n self.tasks.update(wt[\"task_id\"], status=\"completed\")\n self.tasks.unbind_worktree(wt[\"task_id\"])\n self.events.emit(\"task.completed\", ...)\n```\n\n5. **事件流。** 每个生命周期步骤写入 `.worktrees/events.jsonl`:\n\n```json\n{\n \"event\": \"worktree.remove.after\",\n \"task\": {\"id\": 1, \"status\": \"completed\"},\n \"worktree\": {\"name\": \"auth-refactor\", \"status\": \"removed\"},\n \"ts\": 1730000000\n}\n```\n\n事件类型: `worktree.create.before/after/failed`, `worktree.remove.before/after/failed`, `worktree.keep`, `task.completed`。\n\n崩溃后从 `.tasks/` + `.worktrees/index.json` 重建现场。会话记忆是易失的; 磁盘状态是持久的。\n\n## 相对 s11 的变更\n\n| 组件 | 之前 (s11) | 之后 (s12) |\n|--------------------|----------------------------|----------------------------------------------|\n| 协调 | 任务板 (owner/status) | 任务板 + worktree 显式绑定 |\n| 执行范围 | 共享目录 | 每个任务独立目录 |\n| 可恢复性 | 仅任务状态 | 任务状态 + worktree 索引 |\n| 收尾 | 任务完成 | 任务完成 + 显式 keep/remove |\n| 生命周期可见性 | 隐式日志 | `.worktrees/events.jsonl` 显式事件流 |\n\n## 试一试\n\n```sh\ncd learn-claude-code\npython agents/s12_worktree_task_isolation.py\n```\n\n试试这些 prompt (英文 prompt 对 LLM 效果更好, 也可以用中文):\n\n1. `Create tasks for backend auth and frontend login page, then list tasks.`\n2. `Create worktree \"auth-refactor\" for task 1, then bind task 2 to a new worktree \"ui-login\".`\n3. `Run \"git status --short\" in worktree \"auth-refactor\".`\n4. `Keep worktree \"ui-login\", then list worktrees and inspect events.`\n5. `Remove worktree \"auth-refactor\" with complete_task=true, then list tasks/worktrees/events.`\n" + "content": "# s12: Worktree + Task Isolation (Worktree 任务隔离)\r\n\r\n`s01 > s02 > s03 > s04 > s05 > s06 | s07 > s08 > s09 > s10 > s11 > [ s12 ]`\r\n\r\n> *\"各干各的目录, 互不干扰\"* -- 任务管目标, worktree 管目录, 按 ID 绑定。\r\n\r\n## 问题\r\n\r\n到 s11, 智能体已经能自主认领和完成任务。但所有任务共享一个目录。两个智能体同时重构不同模块 -- A 改 `config.py`, B 也改 `config.py`, 未提交的改动互相污染, 谁也没法干净回滚。\r\n\r\n任务板管 \"做什么\" 但不管 \"在哪做\"。解法: 给每个任务一个独立的 git worktree 目录, 用任务 ID 把两边关联起来。\r\n\r\n## 解决方案\r\n\r\n```\r\nControl plane (.tasks/) Execution plane (.worktrees/)\r\n+------------------+ +------------------------+\r\n| task_1.json | | auth-refactor/ |\r\n| status: in_progress <------> branch: wt/auth-refactor\r\n| worktree: \"auth-refactor\" | task_id: 1 |\r\n+------------------+ +------------------------+\r\n| task_2.json | | ui-login/ |\r\n| status: pending <------> branch: wt/ui-login\r\n| worktree: \"ui-login\" | task_id: 2 |\r\n+------------------+ +------------------------+\r\n |\r\n index.json (worktree registry)\r\n events.jsonl (lifecycle log)\r\n\r\nState machines:\r\n Task: pending -> in_progress -> completed\r\n Worktree: absent -> active -> removed | kept\r\n```\r\n\r\n## 工作原理\r\n\r\n1. **创建任务。** 先把目标持久化。\r\n\r\n```python\r\nTASKS.create(\"Implement auth refactor\")\r\n# -> .tasks/task_1.json status=pending worktree=\"\"\r\n```\r\n\r\n2. **创建 worktree 并绑定任务。** 传入 `task_id` 自动将任务推进到 `in_progress`。\r\n\r\n```python\r\nWORKTREES.create(\"auth-refactor\", task_id=1)\r\n# -> git worktree add -b wt/auth-refactor .worktrees/auth-refactor HEAD\r\n# -> index.json gets new entry, task_1.json gets worktree=\"auth-refactor\"\r\n```\r\n\r\n绑定同时写入两侧状态:\r\n\r\n```python\r\ndef bind_worktree(self, task_id, worktree):\r\n task = self._load(task_id)\r\n task[\"worktree\"] = worktree\r\n if task[\"status\"] == \"pending\":\r\n task[\"status\"] = \"in_progress\"\r\n self._save(task)\r\n```\r\n\r\n3. **在 worktree 中执行命令。** `cwd` 指向隔离目录。\r\n\r\n```python\r\nsubprocess.run(command, shell=True, cwd=worktree_path,\r\n capture_output=True, text=True, timeout=300)\r\n```\r\n\r\n4. **收尾。** 两种选择:\r\n - `worktree_keep(name)` -- 保留目录供后续使用。\r\n - `worktree_remove(name, complete_task=True)` -- 删除目录, 完成绑定任务, 发出事件。一个调用搞定拆除 + 完成。\r\n\r\n```python\r\ndef remove(self, name, force=False, complete_task=False):\r\n self._run_git([\"worktree\", \"remove\", wt[\"path\"]])\r\n if complete_task and wt.get(\"task_id\") is not None:\r\n self.tasks.update(wt[\"task_id\"], status=\"completed\")\r\n self.tasks.unbind_worktree(wt[\"task_id\"])\r\n self.events.emit(\"task.completed\", ...)\r\n```\r\n\r\n5. **事件流。** 每个生命周期步骤写入 `.worktrees/events.jsonl`:\r\n\r\n```json\r\n{\r\n \"event\": \"worktree.remove.after\",\r\n \"task\": {\"id\": 1, \"status\": \"completed\"},\r\n \"worktree\": {\"name\": \"auth-refactor\", \"status\": \"removed\"},\r\n \"ts\": 1730000000\r\n}\r\n```\r\n\r\n事件类型: `worktree.create.before/after/failed`, `worktree.remove.before/after/failed`, `worktree.keep`, `task.completed`。\r\n\r\n崩溃后从 `.tasks/` + `.worktrees/index.json` 重建现场。会话记忆是易失的; 磁盘状态是持久的。\r\n\r\n## 相对 s11 的变更\r\n\r\n| 组件 | 之前 (s11) | 之后 (s12) |\r\n|--------------------|----------------------------|----------------------------------------------|\r\n| 协调 | 任务板 (owner/status) | 任务板 + worktree 显式绑定 |\r\n| 执行范围 | 共享目录 | 每个任务独立目录 |\r\n| 可恢复性 | 仅任务状态 | 任务状态 + worktree 索引 |\r\n| 收尾 | 任务完成 | 任务完成 + 显式 keep/remove |\r\n| 生命周期可见性 | 隐式日志 | `.worktrees/events.jsonl` 显式事件流 |\r\n\r\n## 试一试\r\n\r\n```sh\r\ncd learn-claude-code\r\npython agents/s12_worktree_task_isolation.py\r\n```\r\n\r\n试试这些 prompt (英文 prompt 对 LLM 效果更好, 也可以用中文):\r\n\r\n1. `Create tasks for backend auth and frontend login page, then list tasks.`\r\n2. `Create worktree \"auth-refactor\" for task 1, then bind task 2 to a new worktree \"ui-login\".`\r\n3. `Run \"git status --short\" in worktree \"auth-refactor\".`\r\n4. `Keep worktree \"ui-login\", then list worktrees and inspect events.`\r\n5. `Remove worktree \"auth-refactor\" with complete_task=true, then list tasks/worktrees/events.`\r\n" }, { "version": "s01", "locale": "ja", "title": "s01: The Agent Loop", - "content": "# s01: The Agent Loop\n\n`[ s01 ] s02 > s03 > s04 > s05 > s06 | s07 > s08 > s09 > s10 > s11 > s12`\n\n> *\"One loop & Bash is all you need\"* -- 1つのツール + 1つのループ = エージェント。\n\n## 問題\n\n言語モデルはコードについて推論できるが、現実世界に触れられない。ファイルを読めず、テストを実行できず、エラーを確認できない。ループがなければ、ツール呼び出しのたびにユーザーが手動で結果をコピーペーストする必要がある。つまりユーザー自身がループになる。\n\n## 解決策\n\n```\n+--------+ +-------+ +---------+\n| User | ---> | LLM | ---> | Tool |\n| prompt | | | | execute |\n+--------+ +---+---+ +----+----+\n ^ |\n | tool_result |\n +----------------+\n (loop until stop_reason != \"tool_use\")\n```\n\n1つの終了条件がフロー全体を制御する。モデルがツール呼び出しを止めるまでループが回り続ける。\n\n## 仕組み\n\n1. ユーザーのプロンプトが最初のメッセージになる。\n\n```python\nmessages.append({\"role\": \"user\", \"content\": query})\n```\n\n2. メッセージとツール定義をLLMに送信する。\n\n```python\nresponse = client.messages.create(\n model=MODEL, system=SYSTEM, messages=messages,\n tools=TOOLS, max_tokens=8000,\n)\n```\n\n3. アシスタントのレスポンスを追加し、`stop_reason`を確認する。ツールが呼ばれなければ終了。\n\n```python\nmessages.append({\"role\": \"assistant\", \"content\": response.content})\nif response.stop_reason != \"tool_use\":\n return\n```\n\n4. 各ツール呼び出しを実行し、結果を収集してuserメッセージとして追加。ステップ2に戻る。\n\n```python\nresults = []\nfor block in response.content:\n if block.type == \"tool_use\":\n output = run_bash(block.input[\"command\"])\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": output,\n })\nmessages.append({\"role\": \"user\", \"content\": results})\n```\n\n1つの関数にまとめると:\n\n```python\ndef agent_loop(query):\n messages = [{\"role\": \"user\", \"content\": query}]\n while True:\n response = client.messages.create(\n model=MODEL, system=SYSTEM, messages=messages,\n tools=TOOLS, max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n\n if response.stop_reason != \"tool_use\":\n return\n\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n output = run_bash(block.input[\"command\"])\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": output,\n })\n messages.append({\"role\": \"user\", \"content\": results})\n```\n\nこれでエージェント全体が30行未満に収まる。本コースの残りはすべてこのループの上に積み重なる -- ループ自体は変わらない。\n\n## 変更点\n\n| Component | Before | After |\n|---------------|------------|--------------------------------|\n| Agent loop | (none) | `while True` + stop_reason |\n| Tools | (none) | `bash` (one tool) |\n| Messages | (none) | Accumulating list |\n| Control flow | (none) | `stop_reason != \"tool_use\"` |\n\n## 試してみる\n\n```sh\ncd learn-claude-code\npython agents/s01_agent_loop.py\n```\n\n1. `Create a file called hello.py that prints \"Hello, World!\"`\n2. `List all Python files in this directory`\n3. `What is the current git branch?`\n4. `Create a directory called test_output and write 3 files in it`\n" + "content": "# s01: The Agent Loop\r\n\r\n`[ s01 ] s02 > s03 > s04 > s05 > s06 | s07 > s08 > s09 > s10 > s11 > s12`\r\n\r\n> *\"One loop & Bash is all you need\"* -- 1つのツール + 1つのループ = エージェント。\r\n\r\n## 問題\r\n\r\n言語モデルはコードについて推論できるが、現実世界に触れられない。ファイルを読めず、テストを実行できず、エラーを確認できない。ループがなければ、ツール呼び出しのたびにユーザーが手動で結果をコピーペーストする必要がある。つまりユーザー自身がループになる。\r\n\r\n## 解決策\r\n\r\n```\r\n+--------+ +-------+ +---------+\r\n| User | ---> | LLM | ---> | Tool |\r\n| prompt | | | | execute |\r\n+--------+ +---+---+ +----+----+\r\n ^ |\r\n | tool_result |\r\n +----------------+\r\n (loop until stop_reason != \"tool_use\")\r\n```\r\n\r\n1つの終了条件がフロー全体を制御する。モデルがツール呼び出しを止めるまでループが回り続ける。\r\n\r\n## 仕組み\r\n\r\n1. ユーザーのプロンプトが最初のメッセージになる。\r\n\r\n```python\r\nmessages.append({\"role\": \"user\", \"content\": query})\r\n```\r\n\r\n2. メッセージとツール定義をLLMに送信する。\r\n\r\n```python\r\nresponse = client.messages.create(\r\n model=MODEL, system=SYSTEM, messages=messages,\r\n tools=TOOLS, max_tokens=8000,\r\n)\r\n```\r\n\r\n3. アシスタントのレスポンスを追加し、`stop_reason`を確認する。ツールが呼ばれなければ終了。\r\n\r\n```python\r\nmessages.append({\"role\": \"assistant\", \"content\": response.content})\r\nif response.stop_reason != \"tool_use\":\r\n return\r\n```\r\n\r\n4. 各ツール呼び出しを実行し、結果を収集してuserメッセージとして追加。ステップ2に戻る。\r\n\r\n```python\r\nresults = []\r\nfor block in response.content:\r\n if block.type == \"tool_use\":\r\n output = run_bash(block.input[\"command\"])\r\n results.append({\r\n \"type\": \"tool_result\",\r\n \"tool_use_id\": block.id,\r\n \"content\": output,\r\n })\r\nmessages.append({\"role\": \"user\", \"content\": results})\r\n```\r\n\r\n1つの関数にまとめると:\r\n\r\n```python\r\ndef agent_loop(query):\r\n messages = [{\"role\": \"user\", \"content\": query}]\r\n while True:\r\n response = client.messages.create(\r\n model=MODEL, system=SYSTEM, messages=messages,\r\n tools=TOOLS, max_tokens=8000,\r\n )\r\n messages.append({\"role\": \"assistant\", \"content\": response.content})\r\n\r\n if response.stop_reason != \"tool_use\":\r\n return\r\n\r\n results = []\r\n for block in response.content:\r\n if block.type == \"tool_use\":\r\n output = run_bash(block.input[\"command\"])\r\n results.append({\r\n \"type\": \"tool_result\",\r\n \"tool_use_id\": block.id,\r\n \"content\": output,\r\n })\r\n messages.append({\"role\": \"user\", \"content\": results})\r\n```\r\n\r\nこれでエージェント全体が30行未満に収まる。本コースの残りはすべてこのループの上に積み重なる -- ループ自体は変わらない。\r\n\r\n## 変更点\r\n\r\n| Component | Before | After |\r\n|---------------|------------|--------------------------------|\r\n| Agent loop | (none) | `while True` + stop_reason |\r\n| Tools | (none) | `bash` (one tool) |\r\n| Messages | (none) | Accumulating list |\r\n| Control flow | (none) | `stop_reason != \"tool_use\"` |\r\n\r\n## 試してみる\r\n\r\n```sh\r\ncd learn-claude-code\r\npython agents/s01_agent_loop.py\r\n```\r\n\r\n1. `Create a file called hello.py that prints \"Hello, World!\"`\r\n2. `List all Python files in this directory`\r\n3. `What is the current git branch?`\r\n4. `Create a directory called test_output and write 3 files in it`\r\n" }, { "version": "s02", "locale": "ja", "title": "s02: Tool Use", - "content": "# s02: Tool Use\n\n`s01 > [ s02 ] s03 > s04 > s05 > s06 | s07 > s08 > s09 > s10 > s11 > s12`\n\n> *\"ツールを足すなら、ハンドラーを1つ足すだけ\"* -- ループは変わらない。新ツールは dispatch map に登録するだけ。\n\n## 問題\n\n`bash`だけでは、エージェントは何でもシェル経由で行う。`cat`は予測不能に切り詰め、`sed`は特殊文字で壊れ、すべてのbash呼び出しが制約のないセキュリティ面になる。`read_file`や`write_file`のような専用ツールなら、ツールレベルでパスのサンドボックス化を強制できる。\n\n重要な点: ツールを追加してもループの変更は不要。\n\n## 解決策\n\n```\n+--------+ +-------+ +------------------+\n| User | ---> | LLM | ---> | Tool Dispatch |\n| prompt | | | | { |\n+--------+ +---+---+ | bash: run_bash |\n ^ | read: run_read |\n | | write: run_wr |\n +-----------+ edit: run_edit |\n tool_result | } |\n +------------------+\n\nThe dispatch map is a dict: {tool_name: handler_function}.\nOne lookup replaces any if/elif chain.\n```\n\n## 仕組み\n\n1. 各ツールにハンドラ関数を定義する。パスのサンドボックス化でワークスペース外への脱出を防ぐ。\n\n```python\ndef safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\ndef run_read(path: str, limit: int = None) -> str:\n text = safe_path(path).read_text()\n lines = text.splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit]\n return \"\\n\".join(lines)[:50000]\n```\n\n2. ディスパッチマップがツール名とハンドラを結びつける。\n\n```python\nTOOL_HANDLERS = {\n \"bash\": lambda **kw: run_bash(kw[\"command\"]),\n \"read_file\": lambda **kw: run_read(kw[\"path\"], kw.get(\"limit\")),\n \"write_file\": lambda **kw: run_write(kw[\"path\"], kw[\"content\"]),\n \"edit_file\": lambda **kw: run_edit(kw[\"path\"], kw[\"old_text\"],\n kw[\"new_text\"]),\n}\n```\n\n3. ループ内で名前によりハンドラをルックアップする。ループ本体はs01から不変。\n\n```python\nfor block in response.content:\n if block.type == \"tool_use\":\n handler = TOOL_HANDLERS.get(block.name)\n output = handler(**block.input) if handler \\\n else f\"Unknown tool: {block.name}\"\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": output,\n })\n```\n\nツール追加 = ハンドラ追加 + スキーマ追加。ループは決して変わらない。\n\n## s01からの変更点\n\n| Component | Before (s01) | After (s02) |\n|----------------|--------------------|----------------------------|\n| Tools | 1 (bash only) | 4 (bash, read, write, edit)|\n| Dispatch | Hardcoded bash call | `TOOL_HANDLERS` dict |\n| Path safety | None | `safe_path()` sandbox |\n| Agent loop | Unchanged | Unchanged |\n\n## 試してみる\n\n```sh\ncd learn-claude-code\npython agents/s02_tool_use.py\n```\n\n1. `Read the file requirements.txt`\n2. `Create a file called greet.py with a greet(name) function`\n3. `Edit greet.py to add a docstring to the function`\n4. `Read greet.py to verify the edit worked`\n" + "content": "# s02: Tool Use\r\n\r\n`s01 > [ s02 ] s03 > s04 > s05 > s06 | s07 > s08 > s09 > s10 > s11 > s12`\r\n\r\n> *\"ツールを足すなら、ハンドラーを1つ足すだけ\"* -- ループは変わらない。新ツールは dispatch map に登録するだけ。\r\n\r\n## 問題\r\n\r\n`bash`だけでは、エージェントは何でもシェル経由で行う。`cat`は予測不能に切り詰め、`sed`は特殊文字で壊れ、すべてのbash呼び出しが制約のないセキュリティ面になる。`read_file`や`write_file`のような専用ツールなら、ツールレベルでパスのサンドボックス化を強制できる。\r\n\r\n重要な点: ツールを追加してもループの変更は不要。\r\n\r\n## 解決策\r\n\r\n```\r\n+--------+ +-------+ +------------------+\r\n| User | ---> | LLM | ---> | Tool Dispatch |\r\n| prompt | | | | { |\r\n+--------+ +---+---+ | bash: run_bash |\r\n ^ | read: run_read |\r\n | | write: run_wr |\r\n +-----------+ edit: run_edit |\r\n tool_result | } |\r\n +------------------+\r\n\r\nThe dispatch map is a dict: {tool_name: handler_function}.\r\nOne lookup replaces any if/elif chain.\r\n```\r\n\r\n## 仕組み\r\n\r\n1. 各ツールにハンドラ関数を定義する。パスのサンドボックス化でワークスペース外への脱出を防ぐ。\r\n\r\n```python\r\ndef safe_path(p: str) -> Path:\r\n path = (WORKDIR / p).resolve()\r\n if not path.is_relative_to(WORKDIR):\r\n raise ValueError(f\"Path escapes workspace: {p}\")\r\n return path\r\n\r\ndef run_read(path: str, limit: int = None) -> str:\r\n text = safe_path(path).read_text()\r\n lines = text.splitlines()\r\n if limit and limit < len(lines):\r\n lines = lines[:limit]\r\n return \"\\n\".join(lines)[:50000]\r\n```\r\n\r\n2. ディスパッチマップがツール名とハンドラを結びつける。\r\n\r\n```python\r\nTOOL_HANDLERS = {\r\n \"bash\": lambda **kw: run_bash(kw[\"command\"]),\r\n \"read_file\": lambda **kw: run_read(kw[\"path\"], kw.get(\"limit\")),\r\n \"write_file\": lambda **kw: run_write(kw[\"path\"], kw[\"content\"]),\r\n \"edit_file\": lambda **kw: run_edit(kw[\"path\"], kw[\"old_text\"],\r\n kw[\"new_text\"]),\r\n}\r\n```\r\n\r\n3. ループ内で名前によりハンドラをルックアップする。ループ本体はs01から不変。\r\n\r\n```python\r\nfor block in response.content:\r\n if block.type == \"tool_use\":\r\n handler = TOOL_HANDLERS.get(block.name)\r\n output = handler(**block.input) if handler \\\r\n else f\"Unknown tool: {block.name}\"\r\n results.append({\r\n \"type\": \"tool_result\",\r\n \"tool_use_id\": block.id,\r\n \"content\": output,\r\n })\r\n```\r\n\r\nツール追加 = ハンドラ追加 + スキーマ追加。ループは決して変わらない。\r\n\r\n## s01からの変更点\r\n\r\n| Component | Before (s01) | After (s02) |\r\n|----------------|--------------------|----------------------------|\r\n| Tools | 1 (bash only) | 4 (bash, read, write, edit)|\r\n| Dispatch | Hardcoded bash call | `TOOL_HANDLERS` dict |\r\n| Path safety | None | `safe_path()` sandbox |\r\n| Agent loop | Unchanged | Unchanged |\r\n\r\n## 試してみる\r\n\r\n```sh\r\ncd learn-claude-code\r\npython agents/s02_tool_use.py\r\n```\r\n\r\n1. `Read the file requirements.txt`\r\n2. `Create a file called greet.py with a greet(name) function`\r\n3. `Edit greet.py to add a docstring to the function`\r\n4. `Read greet.py to verify the edit worked`\r\n" }, { "version": "s03", "locale": "ja", "title": "s03: TodoWrite", - "content": "# s03: TodoWrite\n\n`s01 > s02 > [ s03 ] s04 > s05 > s06 | s07 > s08 > s09 > s10 > s11 > s12`\n\n> *\"計画のないエージェントは行き当たりばったり\"* -- まずステップを書き出し、それから実行。\n\n## 問題\n\nマルチステップのタスクで、モデルは途中で迷子になる。作業を繰り返したり、ステップを飛ばしたり、脱線したりする。長い会話になるほど悪化する -- ツール結果がコンテキストを埋めるにつれ、システムプロンプトの影響力が薄れる。10ステップのリファクタリングでステップ1-3を完了した後、残りを忘れて即興を始めてしまう。\n\n## 解決策\n\n```\n+--------+ +-------+ +---------+\n| User | ---> | LLM | ---> | Tools |\n| prompt | | | | + todo |\n+--------+ +---+---+ +----+----+\n ^ |\n | tool_result |\n +----------------+\n |\n +-----------+-----------+\n | TodoManager state |\n | [ ] task A |\n | [>] task B <- doing |\n | [x] task C |\n +-----------------------+\n |\n if rounds_since_todo >= 3:\n inject into tool_result\n```\n\n## 仕組み\n\n1. TodoManagerはアイテムのリストをステータス付きで保持する。`in_progress`にできるのは同時に1つだけ。\n\n```python\nclass TodoManager:\n def update(self, items: list) -> str:\n validated, in_progress_count = [], 0\n for item in items:\n status = item.get(\"status\", \"pending\")\n if status == \"in_progress\":\n in_progress_count += 1\n validated.append({\"id\": item[\"id\"], \"text\": item[\"text\"],\n \"status\": status})\n if in_progress_count > 1:\n raise ValueError(\"Only one task can be in_progress\")\n self.items = validated\n return self.render()\n```\n\n2. `todo`ツールは他のツールと同様にディスパッチマップに追加される。\n\n```python\nTOOL_HANDLERS = {\n # ...base tools...\n \"todo\": lambda **kw: TODO.update(kw[\"items\"]),\n}\n```\n\n3. nagリマインダーが、モデルが3ラウンド以上`todo`を呼ばなかった場合にナッジを注入する。\n\n```python\nif rounds_since_todo >= 3 and messages:\n last = messages[-1]\n if last[\"role\"] == \"user\" and isinstance(last.get(\"content\"), list):\n last[\"content\"].insert(0, {\n \"type\": \"text\",\n \"text\": \"Update your todos.\",\n })\n```\n\n「一度にin_progressは1つだけ」の制約が逐次的な集中を強制し、nagリマインダーが説明責任を生む。\n\n## s02からの変更点\n\n| Component | Before (s02) | After (s03) |\n|----------------|------------------|----------------------------|\n| Tools | 4 | 5 (+todo) |\n| Planning | None | TodoManager with statuses |\n| Nag injection | None | `` after 3 rounds|\n| Agent loop | Simple dispatch | + rounds_since_todo counter|\n\n## 試してみる\n\n```sh\ncd learn-claude-code\npython agents/s03_todo_write.py\n```\n\n1. `Refactor the file hello.py: add type hints, docstrings, and a main guard`\n2. `Create a Python package with __init__.py, utils.py, and tests/test_utils.py`\n3. `Review all Python files and fix any style issues`\n" + "content": "# s03: TodoWrite\r\n\r\n`s01 > s02 > [ s03 ] s04 > s05 > s06 | s07 > s08 > s09 > s10 > s11 > s12`\r\n\r\n> *\"計画のないエージェントは行き当たりばったり\"* -- まずステップを書き出し、それから実行。\r\n\r\n## 問題\r\n\r\nマルチステップのタスクで、モデルは途中で迷子になる。作業を繰り返したり、ステップを飛ばしたり、脱線したりする。長い会話になるほど悪化する -- ツール結果がコンテキストを埋めるにつれ、システムプロンプトの影響力が薄れる。10ステップのリファクタリングでステップ1-3を完了した後、残りを忘れて即興を始めてしまう。\r\n\r\n## 解決策\r\n\r\n```\r\n+--------+ +-------+ +---------+\r\n| User | ---> | LLM | ---> | Tools |\r\n| prompt | | | | + todo |\r\n+--------+ +---+---+ +----+----+\r\n ^ |\r\n | tool_result |\r\n +----------------+\r\n |\r\n +-----------+-----------+\r\n | TodoManager state |\r\n | [ ] task A |\r\n | [>] task B <- doing |\r\n | [x] task C |\r\n +-----------------------+\r\n |\r\n if rounds_since_todo >= 3:\r\n inject into tool_result\r\n```\r\n\r\n## 仕組み\r\n\r\n1. TodoManagerはアイテムのリストをステータス付きで保持する。`in_progress`にできるのは同時に1つだけ。\r\n\r\n```python\r\nclass TodoManager:\r\n def update(self, items: list) -> str:\r\n validated, in_progress_count = [], 0\r\n for item in items:\r\n status = item.get(\"status\", \"pending\")\r\n if status == \"in_progress\":\r\n in_progress_count += 1\r\n validated.append({\"id\": item[\"id\"], \"text\": item[\"text\"],\r\n \"status\": status})\r\n if in_progress_count > 1:\r\n raise ValueError(\"Only one task can be in_progress\")\r\n self.items = validated\r\n return self.render()\r\n```\r\n\r\n2. `todo`ツールは他のツールと同様にディスパッチマップに追加される。\r\n\r\n```python\r\nTOOL_HANDLERS = {\r\n # ...base tools...\r\n \"todo\": lambda **kw: TODO.update(kw[\"items\"]),\r\n}\r\n```\r\n\r\n3. nagリマインダーが、モデルが3ラウンド以上`todo`を呼ばなかった場合にナッジを注入する。\r\n\r\n```python\r\nif rounds_since_todo >= 3 and messages:\r\n last = messages[-1]\r\n if last[\"role\"] == \"user\" and isinstance(last.get(\"content\"), list):\r\n last[\"content\"].insert(0, {\r\n \"type\": \"text\",\r\n \"text\": \"Update your todos.\",\r\n })\r\n```\r\n\r\n「一度にin_progressは1つだけ」の制約が逐次的な集中を強制し、nagリマインダーが説明責任を生む。\r\n\r\n## s02からの変更点\r\n\r\n| Component | Before (s02) | After (s03) |\r\n|----------------|------------------|----------------------------|\r\n| Tools | 4 | 5 (+todo) |\r\n| Planning | None | TodoManager with statuses |\r\n| Nag injection | None | `` after 3 rounds|\r\n| Agent loop | Simple dispatch | + rounds_since_todo counter|\r\n\r\n## 試してみる\r\n\r\n```sh\r\ncd learn-claude-code\r\npython agents/s03_todo_write.py\r\n```\r\n\r\n1. `Refactor the file hello.py: add type hints, docstrings, and a main guard`\r\n2. `Create a Python package with __init__.py, utils.py, and tests/test_utils.py`\r\n3. `Review all Python files and fix any style issues`\r\n" }, { "version": "s04", "locale": "ja", "title": "s04: Subagents", - "content": "# s04: Subagents\n\n`s01 > s02 > s03 > [ s04 ] s05 > s06 | s07 > s08 > s09 > s10 > s11 > s12`\n\n> *\"大きなタスクを分割し、各サブタスクにクリーンなコンテキストを\"* -- サブエージェントは独立した messages[] を使い、メイン会話を汚さない。\n\n## 問題\n\nエージェントが作業するにつれ、messages配列は膨張し続ける。すべてのファイル読み取り、すべてのbash出力がコンテキストに永久に残る。「このプロジェクトはどのテストフレームワークを使っているか」という質問は5つのファイルを読む必要があるかもしれないが、親に必要なのは「pytest」という答えだけだ。\n\n## 解決策\n\n```\nParent agent Subagent\n+------------------+ +------------------+\n| messages=[...] | | messages=[] | <-- fresh\n| | dispatch | |\n| tool: task | ----------> | while tool_use: |\n| prompt=\"...\" | | call tools |\n| | summary | append results |\n| result = \"...\" | <---------- | return last text |\n+------------------+ +------------------+\n\nParent context stays clean. Subagent context is discarded.\n```\n\n## 仕組み\n\n1. 親に`task`ツールを追加する。子は`task`を除くすべての基本ツールを取得する(再帰的な生成は不可)。\n\n```python\nPARENT_TOOLS = CHILD_TOOLS + [\n {\"name\": \"task\",\n \"description\": \"Spawn a subagent with fresh context.\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\"prompt\": {\"type\": \"string\"}},\n \"required\": [\"prompt\"],\n }},\n]\n```\n\n2. サブエージェントは`messages=[]`で開始し、自身のループを実行する。最終テキストだけが親に返る。\n\n```python\ndef run_subagent(prompt: str) -> str:\n sub_messages = [{\"role\": \"user\", \"content\": prompt}]\n for _ in range(30): # safety limit\n response = client.messages.create(\n model=MODEL, system=SUBAGENT_SYSTEM,\n messages=sub_messages,\n tools=CHILD_TOOLS, max_tokens=8000,\n )\n sub_messages.append({\"role\": \"assistant\",\n \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n break\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n handler = TOOL_HANDLERS.get(block.name)\n output = handler(**block.input)\n results.append({\"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": str(output)[:50000]})\n sub_messages.append({\"role\": \"user\", \"content\": results})\n return \"\".join(\n b.text for b in response.content if hasattr(b, \"text\")\n ) or \"(no summary)\"\n```\n\n子のメッセージ履歴全体(30回以上のツール呼び出し)は破棄される。親は1段落の要約を通常の`tool_result`として受け取る。\n\n## s03からの変更点\n\n| Component | Before (s03) | After (s04) |\n|----------------|------------------|---------------------------|\n| Tools | 5 | 5 (base) + task (parent) |\n| Context | Single shared | Parent + child isolation |\n| Subagent | None | `run_subagent()` function |\n| Return value | N/A | Summary text only |\n\n## 試してみる\n\n```sh\ncd learn-claude-code\npython agents/s04_subagent.py\n```\n\n1. `Use a subtask to find what testing framework this project uses`\n2. `Delegate: read all .py files and summarize what each one does`\n3. `Use a task to create a new module, then verify it from here`\n" + "content": "# s04: Subagents\r\n\r\n`s01 > s02 > s03 > [ s04 ] s05 > s06 | s07 > s08 > s09 > s10 > s11 > s12`\r\n\r\n> *\"大きなタスクを分割し、各サブタスクにクリーンなコンテキストを\"* -- サブエージェントは独立した messages[] を使い、メイン会話を汚さない。\r\n\r\n## 問題\r\n\r\nエージェントが作業するにつれ、messages配列は膨張し続ける。すべてのファイル読み取り、すべてのbash出力がコンテキストに永久に残る。「このプロジェクトはどのテストフレームワークを使っているか」という質問は5つのファイルを読む必要があるかもしれないが、親に必要なのは「pytest」という答えだけだ。\r\n\r\n## 解決策\r\n\r\n```\r\nParent agent Subagent\r\n+------------------+ +------------------+\r\n| messages=[...] | | messages=[] | <-- fresh\r\n| | dispatch | |\r\n| tool: task | ----------> | while tool_use: |\r\n| prompt=\"...\" | | call tools |\r\n| | summary | append results |\r\n| result = \"...\" | <---------- | return last text |\r\n+------------------+ +------------------+\r\n\r\nParent context stays clean. Subagent context is discarded.\r\n```\r\n\r\n## 仕組み\r\n\r\n1. 親に`task`ツールを追加する。子は`task`を除くすべての基本ツールを取得する(再帰的な生成は不可)。\r\n\r\n```python\r\nPARENT_TOOLS = CHILD_TOOLS + [\r\n {\"name\": \"task\",\r\n \"description\": \"Spawn a subagent with fresh context.\",\r\n \"input_schema\": {\r\n \"type\": \"object\",\r\n \"properties\": {\"prompt\": {\"type\": \"string\"}},\r\n \"required\": [\"prompt\"],\r\n }},\r\n]\r\n```\r\n\r\n2. サブエージェントは`messages=[]`で開始し、自身のループを実行する。最終テキストだけが親に返る。\r\n\r\n```python\r\ndef run_subagent(prompt: str) -> str:\r\n sub_messages = [{\"role\": \"user\", \"content\": prompt}]\r\n for _ in range(30): # safety limit\r\n response = client.messages.create(\r\n model=MODEL, system=SUBAGENT_SYSTEM,\r\n messages=sub_messages,\r\n tools=CHILD_TOOLS, max_tokens=8000,\r\n )\r\n sub_messages.append({\"role\": \"assistant\",\r\n \"content\": response.content})\r\n if response.stop_reason != \"tool_use\":\r\n break\r\n results = []\r\n for block in response.content:\r\n if block.type == \"tool_use\":\r\n handler = TOOL_HANDLERS.get(block.name)\r\n output = handler(**block.input)\r\n results.append({\"type\": \"tool_result\",\r\n \"tool_use_id\": block.id,\r\n \"content\": str(output)[:50000]})\r\n sub_messages.append({\"role\": \"user\", \"content\": results})\r\n return \"\".join(\r\n b.text for b in response.content if hasattr(b, \"text\")\r\n ) or \"(no summary)\"\r\n```\r\n\r\n子のメッセージ履歴全体(30回以上のツール呼び出し)は破棄される。親は1段落の要約を通常の`tool_result`として受け取る。\r\n\r\n## s03からの変更点\r\n\r\n| Component | Before (s03) | After (s04) |\r\n|----------------|------------------|---------------------------|\r\n| Tools | 5 | 5 (base) + task (parent) |\r\n| Context | Single shared | Parent + child isolation |\r\n| Subagent | None | `run_subagent()` function |\r\n| Return value | N/A | Summary text only |\r\n\r\n## 試してみる\r\n\r\n```sh\r\ncd learn-claude-code\r\npython agents/s04_subagent.py\r\n```\r\n\r\n1. `Use a subtask to find what testing framework this project uses`\r\n2. `Delegate: read all .py files and summarize what each one does`\r\n3. `Use a task to create a new module, then verify it from here`\r\n" }, { "version": "s05", "locale": "ja", "title": "s05: Skills", - "content": "# s05: Skills\n\n`s01 > s02 > s03 > s04 > [ s05 ] s06 | s07 > s08 > s09 > s10 > s11 > s12`\n\n> *\"必要な知識を、必要な時に読み込む\"* -- system prompt ではなく tool_result で注入。\n\n## 問題\n\nエージェントにドメイン固有のワークフローを遵守させたい: gitの規約、テストパターン、コードレビューチェックリスト。すべてをシステムプロンプトに入れると、使われないスキルにトークンを浪費する。10スキル x 2000トークン = 20,000トークン、ほとんどが任意のタスクに無関係だ。\n\n## 解決策\n\n```\nSystem prompt (Layer 1 -- always present):\n+--------------------------------------+\n| You are a coding agent. |\n| Skills available: |\n| - git: Git workflow helpers | ~100 tokens/skill\n| - test: Testing best practices |\n+--------------------------------------+\n\nWhen model calls load_skill(\"git\"):\n+--------------------------------------+\n| tool_result (Layer 2 -- on demand): |\n| |\n| Full git workflow instructions... | ~2000 tokens\n| Step 1: ... |\n| |\n+--------------------------------------+\n```\n\n第1層: スキル*名*をシステムプロンプトに(低コスト)。第2層: スキル*本体*をtool_resultに(オンデマンド)。\n\n## 仕組み\n\n1. 各スキルは `SKILL.md` ファイルを含むディレクトリとして配置される。\n\n```\nskills/\n pdf/\n SKILL.md # ---\\n name: pdf\\n description: Process PDF files\\n ---\\n ...\n code-review/\n SKILL.md # ---\\n name: code-review\\n description: Review code\\n ---\\n ...\n```\n\n2. SkillLoaderが `SKILL.md` を再帰的に探索し、ディレクトリ名をスキル識別子として使用する。\n\n```python\nclass SkillLoader:\n def __init__(self, skills_dir: Path):\n self.skills = {}\n for f in sorted(skills_dir.rglob(\"SKILL.md\")):\n text = f.read_text()\n meta, body = self._parse_frontmatter(text)\n name = meta.get(\"name\", f.parent.name)\n self.skills[name] = {\"meta\": meta, \"body\": body}\n\n def get_descriptions(self) -> str:\n lines = []\n for name, skill in self.skills.items():\n desc = skill[\"meta\"].get(\"description\", \"\")\n lines.append(f\" - {name}: {desc}\")\n return \"\\n\".join(lines)\n\n def get_content(self, name: str) -> str:\n skill = self.skills.get(name)\n if not skill:\n return f\"Error: Unknown skill '{name}'.\"\n return f\"\\n{skill['body']}\\n\"\n```\n\n3. 第1層はシステムプロンプトに配置。第2層は通常のツールハンドラ。\n\n```python\nSYSTEM = f\"\"\"You are a coding agent at {WORKDIR}.\nSkills available:\n{SKILL_LOADER.get_descriptions()}\"\"\"\n\nTOOL_HANDLERS = {\n # ...base tools...\n \"load_skill\": lambda **kw: SKILL_LOADER.get_content(kw[\"name\"]),\n}\n```\n\nモデルはどのスキルが存在するかを知り(低コスト)、関連する時にだけ読み込む(高コスト)。\n\n## s04からの変更点\n\n| Component | Before (s04) | After (s05) |\n|----------------|------------------|----------------------------|\n| Tools | 5 (base + task) | 5 (base + load_skill) |\n| System prompt | Static string | + skill descriptions |\n| Knowledge | None | skills/\\*/SKILL.md files |\n| Injection | None | Two-layer (system + result)|\n\n## 試してみる\n\n```sh\ncd learn-claude-code\npython agents/s05_skill_loading.py\n```\n\n1. `What skills are available?`\n2. `Load the agent-builder skill and follow its instructions`\n3. `I need to do a code review -- load the relevant skill first`\n4. `Build an MCP server using the mcp-builder skill`\n" + "content": "# s05: Skills\r\n\r\n`s01 > s02 > s03 > s04 > [ s05 ] s06 | s07 > s08 > s09 > s10 > s11 > s12`\r\n\r\n> *\"必要な知識を、必要な時に読み込む\"* -- system prompt ではなく tool_result で注入。\r\n\r\n## 問題\r\n\r\nエージェントにドメイン固有のワークフローを遵守させたい: gitの規約、テストパターン、コードレビューチェックリスト。すべてをシステムプロンプトに入れると、使われないスキルにトークンを浪費する。10スキル x 2000トークン = 20,000トークン、ほとんどが任意のタスクに無関係だ。\r\n\r\n## 解決策\r\n\r\n```\r\nSystem prompt (Layer 1 -- always present):\r\n+--------------------------------------+\r\n| You are a coding agent. |\r\n| Skills available: |\r\n| - git: Git workflow helpers | ~100 tokens/skill\r\n| - test: Testing best practices |\r\n+--------------------------------------+\r\n\r\nWhen model calls load_skill(\"git\"):\r\n+--------------------------------------+\r\n| tool_result (Layer 2 -- on demand): |\r\n| |\r\n| Full git workflow instructions... | ~2000 tokens\r\n| Step 1: ... |\r\n| |\r\n+--------------------------------------+\r\n```\r\n\r\n第1層: スキル*名*をシステムプロンプトに(低コスト)。第2層: スキル*本体*をtool_resultに(オンデマンド)。\r\n\r\n## 仕組み\r\n\r\n1. 各スキルは `SKILL.md` ファイルを含むディレクトリとして配置される。\r\n\r\n```\r\nskills/\r\n pdf/\r\n SKILL.md # ---\\n name: pdf\\n description: Process PDF files\\n ---\\n ...\r\n code-review/\r\n SKILL.md # ---\\n name: code-review\\n description: Review code\\n ---\\n ...\r\n```\r\n\r\n2. SkillLoaderが `SKILL.md` を再帰的に探索し、ディレクトリ名をスキル識別子として使用する。\r\n\r\n```python\r\nclass SkillLoader:\r\n def __init__(self, skills_dir: Path):\r\n self.skills = {}\r\n for f in sorted(skills_dir.rglob(\"SKILL.md\")):\r\n text = f.read_text()\r\n meta, body = self._parse_frontmatter(text)\r\n name = meta.get(\"name\", f.parent.name)\r\n self.skills[name] = {\"meta\": meta, \"body\": body}\r\n\r\n def get_descriptions(self) -> str:\r\n lines = []\r\n for name, skill in self.skills.items():\r\n desc = skill[\"meta\"].get(\"description\", \"\")\r\n lines.append(f\" - {name}: {desc}\")\r\n return \"\\n\".join(lines)\r\n\r\n def get_content(self, name: str) -> str:\r\n skill = self.skills.get(name)\r\n if not skill:\r\n return f\"Error: Unknown skill '{name}'.\"\r\n return f\"\\n{skill['body']}\\n\"\r\n```\r\n\r\n3. 第1層はシステムプロンプトに配置。第2層は通常のツールハンドラ。\r\n\r\n```python\r\nSYSTEM = f\"\"\"You are a coding agent at {WORKDIR}.\r\nSkills available:\r\n{SKILL_LOADER.get_descriptions()}\"\"\"\r\n\r\nTOOL_HANDLERS = {\r\n # ...base tools...\r\n \"load_skill\": lambda **kw: SKILL_LOADER.get_content(kw[\"name\"]),\r\n}\r\n```\r\n\r\nモデルはどのスキルが存在するかを知り(低コスト)、関連する時にだけ読み込む(高コスト)。\r\n\r\n## s04からの変更点\r\n\r\n| Component | Before (s04) | After (s05) |\r\n|----------------|------------------|----------------------------|\r\n| Tools | 5 (base + task) | 5 (base + load_skill) |\r\n| System prompt | Static string | + skill descriptions |\r\n| Knowledge | None | skills/\\*/SKILL.md files |\r\n| Injection | None | Two-layer (system + result)|\r\n\r\n## 試してみる\r\n\r\n```sh\r\ncd learn-claude-code\r\npython agents/s05_skill_loading.py\r\n```\r\n\r\n1. `What skills are available?`\r\n2. `Load the agent-builder skill and follow its instructions`\r\n3. `I need to do a code review -- load the relevant skill first`\r\n4. `Build an MCP server using the mcp-builder skill`\r\n" }, { "version": "s06", "locale": "ja", "title": "s06: Context Compact", - "content": "# s06: Context Compact\n\n`s01 > s02 > s03 > s04 > s05 > [ s06 ] | s07 > s08 > s09 > s10 > s11 > s12`\n\n> *\"コンテキストはいつか溢れる、空ける手段が要る\"* -- 3層圧縮で無限セッションを実現。\n\n## 問題\n\nコンテキストウィンドウは有限だ。1000行のファイルに対する`read_file`1回で約4000トークンを消費する。30ファイルを読み20回のbashコマンドを実行すると、100,000トークン超。圧縮なしでは、エージェントは大規模コードベースで作業できない。\n\n## 解決策\n\n積極性を段階的に上げる3層構成:\n\n```\nEvery turn:\n+------------------+\n| Tool call result |\n+------------------+\n |\n v\n[Layer 1: micro_compact] (silent, every turn)\n Replace tool_result > 3 turns old\n with \"[Previous: used {tool_name}]\"\n |\n v\n[Check: tokens > 50000?]\n | |\n no yes\n | |\n v v\ncontinue [Layer 2: auto_compact]\n Save transcript to .transcripts/\n LLM summarizes conversation.\n Replace all messages with [summary].\n |\n v\n [Layer 3: compact tool]\n Model calls compact explicitly.\n Same summarization as auto_compact.\n```\n\n## 仕組み\n\n1. **第1層 -- micro_compact**: 各LLM呼び出しの前に、古いツール結果をプレースホルダーに置換する。\n\n```python\ndef micro_compact(messages: list) -> list:\n tool_results = []\n for i, msg in enumerate(messages):\n if msg[\"role\"] == \"user\" and isinstance(msg.get(\"content\"), list):\n for j, part in enumerate(msg[\"content\"]):\n if isinstance(part, dict) and part.get(\"type\") == \"tool_result\":\n tool_results.append((i, j, part))\n if len(tool_results) <= KEEP_RECENT:\n return messages\n for _, _, part in tool_results[:-KEEP_RECENT]:\n if len(part.get(\"content\", \"\")) > 100:\n part[\"content\"] = f\"[Previous: used {tool_name}]\"\n return messages\n```\n\n2. **第2層 -- auto_compact**: トークンが閾値を超えたら、完全なトランスクリプトをディスクに保存し、LLMに要約を依頼する。\n\n```python\ndef auto_compact(messages: list) -> list:\n # Save transcript for recovery\n transcript_path = TRANSCRIPT_DIR / f\"transcript_{int(time.time())}.jsonl\"\n with open(transcript_path, \"w\") as f:\n for msg in messages:\n f.write(json.dumps(msg, default=str) + \"\\n\")\n # LLM summarizes\n response = client.messages.create(\n model=MODEL,\n messages=[{\"role\": \"user\", \"content\":\n \"Summarize this conversation for continuity...\"\n + json.dumps(messages, default=str)[:80000]}],\n max_tokens=2000,\n )\n return [\n {\"role\": \"user\", \"content\": f\"[Compressed]\\n\\n{response.content[0].text}\"},\n {\"role\": \"assistant\", \"content\": \"Understood. Continuing.\"},\n ]\n```\n\n3. **第3層 -- manual compact**: `compact`ツールが同じ要約処理をオンデマンドでトリガーする。\n\n4. ループが3層すべてを統合する:\n\n```python\ndef agent_loop(messages: list):\n while True:\n micro_compact(messages) # Layer 1\n if estimate_tokens(messages) > THRESHOLD:\n messages[:] = auto_compact(messages) # Layer 2\n response = client.messages.create(...)\n # ... tool execution ...\n if manual_compact:\n messages[:] = auto_compact(messages) # Layer 3\n```\n\nトランスクリプトがディスク上に完全な履歴を保持する。何も真に失われず、アクティブなコンテキストの外に移動されるだけ。\n\n## s05からの変更点\n\n| Component | Before (s05) | After (s06) |\n|----------------|------------------|----------------------------|\n| Tools | 5 | 5 (base + compact) |\n| Context mgmt | None | Three-layer compression |\n| Micro-compact | None | Old results -> placeholders|\n| Auto-compact | None | Token threshold trigger |\n| Transcripts | None | Saved to .transcripts/ |\n\n## 試してみる\n\n```sh\ncd learn-claude-code\npython agents/s06_context_compact.py\n```\n\n1. `Read every Python file in the agents/ directory one by one` (micro-compactが古い結果を置換するのを観察する)\n2. `Keep reading files until compression triggers automatically`\n3. `Use the compact tool to manually compress the conversation`\n" + "content": "# s06: Context Compact\r\n\r\n`s01 > s02 > s03 > s04 > s05 > [ s06 ] | s07 > s08 > s09 > s10 > s11 > s12`\r\n\r\n> *\"コンテキストはいつか溢れる、空ける手段が要る\"* -- 3層圧縮で無限セッションを実現。\r\n\r\n## 問題\r\n\r\nコンテキストウィンドウは有限だ。1000行のファイルに対する`read_file`1回で約4000トークンを消費する。30ファイルを読み20回のbashコマンドを実行すると、100,000トークン超。圧縮なしでは、エージェントは大規模コードベースで作業できない。\r\n\r\n## 解決策\r\n\r\n積極性を段階的に上げる3層構成:\r\n\r\n```\r\nEvery turn:\r\n+------------------+\r\n| Tool call result |\r\n+------------------+\r\n |\r\n v\r\n[Layer 1: micro_compact] (silent, every turn)\r\n Replace tool_result > 3 turns old\r\n with \"[Previous: used {tool_name}]\"\r\n |\r\n v\r\n[Check: tokens > 50000?]\r\n | |\r\n no yes\r\n | |\r\n v v\r\ncontinue [Layer 2: auto_compact]\r\n Save transcript to .transcripts/\r\n LLM summarizes conversation.\r\n Replace all messages with [summary].\r\n |\r\n v\r\n [Layer 3: compact tool]\r\n Model calls compact explicitly.\r\n Same summarization as auto_compact.\r\n```\r\n\r\n## 仕組み\r\n\r\n1. **第1層 -- micro_compact**: 各LLM呼び出しの前に、古いツール結果をプレースホルダーに置換する。\r\n\r\n```python\r\ndef micro_compact(messages: list) -> list:\r\n tool_results = []\r\n for i, msg in enumerate(messages):\r\n if msg[\"role\"] == \"user\" and isinstance(msg.get(\"content\"), list):\r\n for j, part in enumerate(msg[\"content\"]):\r\n if isinstance(part, dict) and part.get(\"type\") == \"tool_result\":\r\n tool_results.append((i, j, part))\r\n if len(tool_results) <= KEEP_RECENT:\r\n return messages\r\n for _, _, part in tool_results[:-KEEP_RECENT]:\r\n if len(part.get(\"content\", \"\")) > 100:\r\n part[\"content\"] = f\"[Previous: used {tool_name}]\"\r\n return messages\r\n```\r\n\r\n2. **第2層 -- auto_compact**: トークンが閾値を超えたら、完全なトランスクリプトをディスクに保存し、LLMに要約を依頼する。\r\n\r\n```python\r\ndef auto_compact(messages: list) -> list:\r\n # Save transcript for recovery\r\n transcript_path = TRANSCRIPT_DIR / f\"transcript_{int(time.time())}.jsonl\"\r\n with open(transcript_path, \"w\") as f:\r\n for msg in messages:\r\n f.write(json.dumps(msg, default=str) + \"\\n\")\r\n # LLM summarizes\r\n response = client.messages.create(\r\n model=MODEL,\r\n messages=[{\"role\": \"user\", \"content\":\r\n \"Summarize this conversation for continuity...\"\r\n + json.dumps(messages, default=str)[:80000]}],\r\n max_tokens=2000,\r\n )\r\n return [\r\n {\"role\": \"user\", \"content\": f\"[Compressed]\\n\\n{response.content[0].text}\"},\r\n {\"role\": \"assistant\", \"content\": \"Understood. Continuing.\"},\r\n ]\r\n```\r\n\r\n3. **第3層 -- manual compact**: `compact`ツールが同じ要約処理をオンデマンドでトリガーする。\r\n\r\n4. ループが3層すべてを統合する:\r\n\r\n```python\r\ndef agent_loop(messages: list):\r\n while True:\r\n micro_compact(messages) # Layer 1\r\n if estimate_tokens(messages) > THRESHOLD:\r\n messages[:] = auto_compact(messages) # Layer 2\r\n response = client.messages.create(...)\r\n # ... tool execution ...\r\n if manual_compact:\r\n messages[:] = auto_compact(messages) # Layer 3\r\n```\r\n\r\nトランスクリプトがディスク上に完全な履歴を保持する。何も真に失われず、アクティブなコンテキストの外に移動されるだけ。\r\n\r\n## s05からの変更点\r\n\r\n| Component | Before (s05) | After (s06) |\r\n|----------------|------------------|----------------------------|\r\n| Tools | 5 | 5 (base + compact) |\r\n| Context mgmt | None | Three-layer compression |\r\n| Micro-compact | None | Old results -> placeholders|\r\n| Auto-compact | None | Token threshold trigger |\r\n| Transcripts | None | Saved to .transcripts/ |\r\n\r\n## 試してみる\r\n\r\n```sh\r\ncd learn-claude-code\r\npython agents/s06_context_compact.py\r\n```\r\n\r\n1. `Read every Python file in the agents/ directory one by one` (micro-compactが古い結果を置換するのを観察する)\r\n2. `Keep reading files until compression triggers automatically`\r\n3. `Use the compact tool to manually compress the conversation`\r\n" }, { "version": "s07", "locale": "ja", "title": "s07: Task System", - "content": "# s07: Task System\n\n`s01 > s02 > s03 > s04 > s05 > s06 | [ s07 ] s08 > s09 > s10 > s11 > s12`\n\n> *\"大きな目標を小タスクに分解し、順序付けし、ディスクに記録する\"* -- ファイルベースのタスクグラフ、マルチエージェント協調の基盤。\n\n## 問題\n\ns03のTodoManagerはメモリ上のフラットなチェックリストに過ぎない: 順序なし、依存関係なし、ステータスは完了か未完了のみ。実際の目標には構造がある -- タスクBはタスクAに依存し、タスクCとDは並行実行でき、タスクEはCとDの両方を待つ。\n\n明示的な関係がなければ、エージェントは何が実行可能で、何がブロックされ、何が同時に走れるかを判断できない。しかもリストはメモリ上にしかないため、コンテキスト圧縮(s06)で消える。\n\n## 解決策\n\nフラットなチェックリストをディスクに永続化する**タスクグラフ**に昇格させる。各タスクは1つのJSONファイルで、ステータス・前方依存(`blockedBy`)・後方依存(`blocks`)を持つ。タスクグラフは常に3つの問いに答える:\n\n- **何が実行可能か?** -- `pending`ステータスで`blockedBy`が空のタスク。\n- **何がブロックされているか?** -- 未完了の依存を待つタスク。\n- **何が完了したか?** -- `completed`のタスク。完了時に後続タスクを自動的にアンブロックする。\n\n```\n.tasks/\n task_1.json {\"id\":1, \"status\":\"completed\"}\n task_2.json {\"id\":2, \"blockedBy\":[1], \"status\":\"pending\"}\n task_3.json {\"id\":3, \"blockedBy\":[1], \"status\":\"pending\"}\n task_4.json {\"id\":4, \"blockedBy\":[2,3], \"status\":\"pending\"}\n\nタスクグラフ (DAG):\n +----------+\n +--> | task 2 | --+\n | | pending | |\n+----------+ +----------+ +--> +----------+\n| task 1 | | task 4 |\n| completed| --> +----------+ +--> | blocked |\n+----------+ | task 3 | --+ +----------+\n | pending |\n +----------+\n\n順序: task 1 は 2 と 3 より先に完了する必要がある\n並行: task 2 と 3 は同時に実行できる\n依存: task 4 は 2 と 3 の両方を待つ\nステータス: pending -> in_progress -> completed\n```\n\nこのタスクグラフは s07 以降の全メカニズムの協調バックボーンとなる: バックグラウンド実行(s08)、マルチエージェントチーム(s09+)、worktree分離(s12)はすべてこの同じ構造を読み書きする。\n\n## 仕組み\n\n1. **TaskManager**: タスクごとに1つのJSONファイル、依存グラフ付きCRUD。\n\n```python\nclass TaskManager:\n def __init__(self, tasks_dir: Path):\n self.dir = tasks_dir\n self.dir.mkdir(exist_ok=True)\n self._next_id = self._max_id() + 1\n\n def create(self, subject, description=\"\"):\n task = {\"id\": self._next_id, \"subject\": subject,\n \"status\": \"pending\", \"blockedBy\": [],\n \"blocks\": [], \"owner\": \"\"}\n self._save(task)\n self._next_id += 1\n return json.dumps(task, indent=2)\n```\n\n2. **依存解除**: タスク完了時に、他タスクの`blockedBy`リストから完了IDを除去し、後続タスクをアンブロックする。\n\n```python\ndef _clear_dependency(self, completed_id):\n for f in self.dir.glob(\"task_*.json\"):\n task = json.loads(f.read_text())\n if completed_id in task.get(\"blockedBy\", []):\n task[\"blockedBy\"].remove(completed_id)\n self._save(task)\n```\n\n3. **ステータス遷移 + 依存配線**: `update`がステータス変更と依存エッジを担う。\n\n```python\ndef update(self, task_id, status=None,\n add_blocked_by=None, add_blocks=None):\n task = self._load(task_id)\n if status:\n task[\"status\"] = status\n if status == \"completed\":\n self._clear_dependency(task_id)\n self._save(task)\n```\n\n4. 4つのタスクツールをディスパッチマップに追加する。\n\n```python\nTOOL_HANDLERS = {\n # ...base tools...\n \"task_create\": lambda **kw: TASKS.create(kw[\"subject\"]),\n \"task_update\": lambda **kw: TASKS.update(kw[\"task_id\"], kw.get(\"status\")),\n \"task_list\": lambda **kw: TASKS.list_all(),\n \"task_get\": lambda **kw: TASKS.get(kw[\"task_id\"]),\n}\n```\n\ns07以降、タスクグラフがマルチステップ作業のデフォルト。s03のTodoは軽量な単一セッション用チェックリストとして残る。\n\n## s06からの変更点\n\n| コンポーネント | Before (s06) | After (s07) |\n|---|---|---|\n| Tools | 5 | 8 (`task_create/update/list/get`) |\n| 計画モデル | フラットチェックリスト (メモリ) | 依存関係付きタスクグラフ (ディスク) |\n| 関係 | なし | `blockedBy` + `blocks` エッジ |\n| ステータス追跡 | 完了か未完了 | `pending` -> `in_progress` -> `completed` |\n| 永続性 | 圧縮で消失 | 圧縮・再起動後も存続 |\n\n## 試してみる\n\n```sh\ncd learn-claude-code\npython agents/s07_task_system.py\n```\n\n1. `Create 3 tasks: \"Setup project\", \"Write code\", \"Write tests\". Make them depend on each other in order.`\n2. `List all tasks and show the dependency graph`\n3. `Complete task 1 and then list tasks to see task 2 unblocked`\n4. `Create a task board for refactoring: parse -> transform -> emit -> test, where transform and emit can run in parallel after parse`\n" + "content": "# s07: Task System\r\n\r\n`s01 > s02 > s03 > s04 > s05 > s06 | [ s07 ] s08 > s09 > s10 > s11 > s12`\r\n\r\n> *\"大きな目標を小タスクに分解し、順序付けし、ディスクに記録する\"* -- ファイルベースのタスクグラフ、マルチエージェント協調の基盤。\r\n\r\n## 問題\r\n\r\ns03のTodoManagerはメモリ上のフラットなチェックリストに過ぎない: 順序なし、依存関係なし、ステータスは完了か未完了のみ。実際の目標には構造がある -- タスクBはタスクAに依存し、タスクCとDは並行実行でき、タスクEはCとDの両方を待つ。\r\n\r\n明示的な関係がなければ、エージェントは何が実行可能で、何がブロックされ、何が同時に走れるかを判断できない。しかもリストはメモリ上にしかないため、コンテキスト圧縮(s06)で消える。\r\n\r\n## 解決策\r\n\r\nフラットなチェックリストをディスクに永続化する**タスクグラフ**に昇格させる。各タスクは1つのJSONファイルで、ステータス・前方依存(`blockedBy`)・後方依存(`blocks`)を持つ。タスクグラフは常に3つの問いに答える:\r\n\r\n- **何が実行可能か?** -- `pending`ステータスで`blockedBy`が空のタスク。\r\n- **何がブロックされているか?** -- 未完了の依存を待つタスク。\r\n- **何が完了したか?** -- `completed`のタスク。完了時に後続タスクを自動的にアンブロックする。\r\n\r\n```\r\n.tasks/\r\n task_1.json {\"id\":1, \"status\":\"completed\"}\r\n task_2.json {\"id\":2, \"blockedBy\":[1], \"status\":\"pending\"}\r\n task_3.json {\"id\":3, \"blockedBy\":[1], \"status\":\"pending\"}\r\n task_4.json {\"id\":4, \"blockedBy\":[2,3], \"status\":\"pending\"}\r\n\r\nタスクグラフ (DAG):\r\n +----------+\r\n +--> | task 2 | --+\r\n | | pending | |\r\n+----------+ +----------+ +--> +----------+\r\n| task 1 | | task 4 |\r\n| completed| --> +----------+ +--> | blocked |\r\n+----------+ | task 3 | --+ +----------+\r\n | pending |\r\n +----------+\r\n\r\n順序: task 1 は 2 と 3 より先に完了する必要がある\r\n並行: task 2 と 3 は同時に実行できる\r\n依存: task 4 は 2 と 3 の両方を待つ\r\nステータス: pending -> in_progress -> completed\r\n```\r\n\r\nこのタスクグラフは s07 以降の全メカニズムの協調バックボーンとなる: バックグラウンド実行(s08)、マルチエージェントチーム(s09+)、worktree分離(s12)はすべてこの同じ構造を読み書きする。\r\n\r\n## 仕組み\r\n\r\n1. **TaskManager**: タスクごとに1つのJSONファイル、依存グラフ付きCRUD。\r\n\r\n```python\r\nclass TaskManager:\r\n def __init__(self, tasks_dir: Path):\r\n self.dir = tasks_dir\r\n self.dir.mkdir(exist_ok=True)\r\n self._next_id = self._max_id() + 1\r\n\r\n def create(self, subject, description=\"\"):\r\n task = {\"id\": self._next_id, \"subject\": subject,\r\n \"status\": \"pending\", \"blockedBy\": [],\r\n \"blocks\": [], \"owner\": \"\"}\r\n self._save(task)\r\n self._next_id += 1\r\n return json.dumps(task, indent=2)\r\n```\r\n\r\n2. **依存解除**: タスク完了時に、他タスクの`blockedBy`リストから完了IDを除去し、後続タスクをアンブロックする。\r\n\r\n```python\r\ndef _clear_dependency(self, completed_id):\r\n for f in self.dir.glob(\"task_*.json\"):\r\n task = json.loads(f.read_text())\r\n if completed_id in task.get(\"blockedBy\", []):\r\n task[\"blockedBy\"].remove(completed_id)\r\n self._save(task)\r\n```\r\n\r\n3. **ステータス遷移 + 依存配線**: `update`がステータス変更と依存エッジを担う。\r\n\r\n```python\r\ndef update(self, task_id, status=None,\r\n add_blocked_by=None, add_blocks=None):\r\n task = self._load(task_id)\r\n if status:\r\n task[\"status\"] = status\r\n if status == \"completed\":\r\n self._clear_dependency(task_id)\r\n self._save(task)\r\n```\r\n\r\n4. 4つのタスクツールをディスパッチマップに追加する。\r\n\r\n```python\r\nTOOL_HANDLERS = {\r\n # ...base tools...\r\n \"task_create\": lambda **kw: TASKS.create(kw[\"subject\"]),\r\n \"task_update\": lambda **kw: TASKS.update(kw[\"task_id\"], kw.get(\"status\")),\r\n \"task_list\": lambda **kw: TASKS.list_all(),\r\n \"task_get\": lambda **kw: TASKS.get(kw[\"task_id\"]),\r\n}\r\n```\r\n\r\ns07以降、タスクグラフがマルチステップ作業のデフォルト。s03のTodoは軽量な単一セッション用チェックリストとして残る。\r\n\r\n## s06からの変更点\r\n\r\n| コンポーネント | Before (s06) | After (s07) |\r\n|---|---|---|\r\n| Tools | 5 | 8 (`task_create/update/list/get`) |\r\n| 計画モデル | フラットチェックリスト (メモリ) | 依存関係付きタスクグラフ (ディスク) |\r\n| 関係 | なし | `blockedBy` + `blocks` エッジ |\r\n| ステータス追跡 | 完了か未完了 | `pending` -> `in_progress` -> `completed` |\r\n| 永続性 | 圧縮で消失 | 圧縮・再起動後も存続 |\r\n\r\n## 試してみる\r\n\r\n```sh\r\ncd learn-claude-code\r\npython agents/s07_task_system.py\r\n```\r\n\r\n1. `Create 3 tasks: \"Setup project\", \"Write code\", \"Write tests\". Make them depend on each other in order.`\r\n2. `List all tasks and show the dependency graph`\r\n3. `Complete task 1 and then list tasks to see task 2 unblocked`\r\n4. `Create a task board for refactoring: parse -> transform -> emit -> test, where transform and emit can run in parallel after parse`\r\n" }, { "version": "s08", "locale": "ja", "title": "s08: Background Tasks", - "content": "# s08: Background Tasks\n\n`s01 > s02 > s03 > s04 > s05 > s06 | s07 > [ s08 ] s09 > s10 > s11 > s12`\n\n> *\"遅い操作はバックグラウンドへ、エージェントは次を考え続ける\"* -- デーモンスレッドがコマンド実行、完了後に通知を注入。\n\n## 問題\n\n一部のコマンドは数分かかる: `npm install`、`pytest`、`docker build`。ブロッキングループでは、モデルはサブプロセスの完了を待って座っている。ユーザーが「依存関係をインストールして、その間にconfigファイルを作って」と言っても、エージェントは並列ではなく逐次的に処理する。\n\n## 解決策\n\n```\nMain thread Background thread\n+-----------------+ +-----------------+\n| agent loop | | subprocess runs |\n| ... | | ... |\n| [LLM call] <---+------- | enqueue(result) |\n| ^drain queue | +-----------------+\n+-----------------+\n\nTimeline:\nAgent --[spawn A]--[spawn B]--[other work]----\n | |\n v v\n [A runs] [B runs] (parallel)\n | |\n +-- results injected before next LLM call --+\n```\n\n## 仕組み\n\n1. BackgroundManagerがスレッドセーフな通知キューでタスクを追跡する。\n\n```python\nclass BackgroundManager:\n def __init__(self):\n self.tasks = {}\n self._notification_queue = []\n self._lock = threading.Lock()\n```\n\n2. `run()`がデーモンスレッドを開始し、即座にリターンする。\n\n```python\ndef run(self, command: str) -> str:\n task_id = str(uuid.uuid4())[:8]\n self.tasks[task_id] = {\"status\": \"running\", \"command\": command}\n thread = threading.Thread(\n target=self._execute, args=(task_id, command), daemon=True)\n thread.start()\n return f\"Background task {task_id} started\"\n```\n\n3. サブプロセス完了時に、結果を通知キューへ。\n\n```python\ndef _execute(self, task_id, command):\n try:\n r = subprocess.run(command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=300)\n output = (r.stdout + r.stderr).strip()[:50000]\n except subprocess.TimeoutExpired:\n output = \"Error: Timeout (300s)\"\n with self._lock:\n self._notification_queue.append({\n \"task_id\": task_id, \"result\": output[:500]})\n```\n\n4. エージェントループが各LLM呼び出しの前に通知をドレインする。\n\n```python\ndef agent_loop(messages: list):\n while True:\n notifs = BG.drain_notifications()\n if notifs:\n notif_text = \"\\n\".join(\n f\"[bg:{n['task_id']}] {n['result']}\" for n in notifs)\n messages.append({\"role\": \"user\",\n \"content\": f\"\\n{notif_text}\\n\"\n f\"\"})\n messages.append({\"role\": \"assistant\",\n \"content\": \"Noted background results.\"})\n response = client.messages.create(...)\n```\n\nループはシングルスレッドのまま。サブプロセスI/Oだけが並列化される。\n\n## s07からの変更点\n\n| Component | Before (s07) | After (s08) |\n|----------------|------------------|----------------------------|\n| Tools | 8 | 6 (base + background_run + check)|\n| Execution | Blocking only | Blocking + background threads|\n| Notification | None | Queue drained per loop |\n| Concurrency | None | Daemon threads |\n\n## 試してみる\n\n```sh\ncd learn-claude-code\npython agents/s08_background_tasks.py\n```\n\n1. `Run \"sleep 5 && echo done\" in the background, then create a file while it runs`\n2. `Start 3 background tasks: \"sleep 2\", \"sleep 4\", \"sleep 6\". Check their status.`\n3. `Run pytest in the background and keep working on other things`\n" + "content": "# s08: Background Tasks\r\n\r\n`s01 > s02 > s03 > s04 > s05 > s06 | s07 > [ s08 ] s09 > s10 > s11 > s12`\r\n\r\n> *\"遅い操作はバックグラウンドへ、エージェントは次を考え続ける\"* -- デーモンスレッドがコマンド実行、完了後に通知を注入。\r\n\r\n## 問題\r\n\r\n一部のコマンドは数分かかる: `npm install`、`pytest`、`docker build`。ブロッキングループでは、モデルはサブプロセスの完了を待って座っている。ユーザーが「依存関係をインストールして、その間にconfigファイルを作って」と言っても、エージェントは並列ではなく逐次的に処理する。\r\n\r\n## 解決策\r\n\r\n```\r\nMain thread Background thread\r\n+-----------------+ +-----------------+\r\n| agent loop | | subprocess runs |\r\n| ... | | ... |\r\n| [LLM call] <---+------- | enqueue(result) |\r\n| ^drain queue | +-----------------+\r\n+-----------------+\r\n\r\nTimeline:\r\nAgent --[spawn A]--[spawn B]--[other work]----\r\n | |\r\n v v\r\n [A runs] [B runs] (parallel)\r\n | |\r\n +-- results injected before next LLM call --+\r\n```\r\n\r\n## 仕組み\r\n\r\n1. BackgroundManagerがスレッドセーフな通知キューでタスクを追跡する。\r\n\r\n```python\r\nclass BackgroundManager:\r\n def __init__(self):\r\n self.tasks = {}\r\n self._notification_queue = []\r\n self._lock = threading.Lock()\r\n```\r\n\r\n2. `run()`がデーモンスレッドを開始し、即座にリターンする。\r\n\r\n```python\r\ndef run(self, command: str) -> str:\r\n task_id = str(uuid.uuid4())[:8]\r\n self.tasks[task_id] = {\"status\": \"running\", \"command\": command}\r\n thread = threading.Thread(\r\n target=self._execute, args=(task_id, command), daemon=True)\r\n thread.start()\r\n return f\"Background task {task_id} started\"\r\n```\r\n\r\n3. サブプロセス完了時に、結果を通知キューへ。\r\n\r\n```python\r\ndef _execute(self, task_id, command):\r\n try:\r\n r = subprocess.run(command, shell=True, cwd=WORKDIR,\r\n capture_output=True, text=True, timeout=300)\r\n output = (r.stdout + r.stderr).strip()[:50000]\r\n except subprocess.TimeoutExpired:\r\n output = \"Error: Timeout (300s)\"\r\n with self._lock:\r\n self._notification_queue.append({\r\n \"task_id\": task_id, \"result\": output[:500]})\r\n```\r\n\r\n4. エージェントループが各LLM呼び出しの前に通知をドレインする。\r\n\r\n```python\r\ndef agent_loop(messages: list):\r\n while True:\r\n notifs = BG.drain_notifications()\r\n if notifs:\r\n notif_text = \"\\n\".join(\r\n f\"[bg:{n['task_id']}] {n['result']}\" for n in notifs)\r\n messages.append({\"role\": \"user\",\r\n \"content\": f\"\\n{notif_text}\\n\"\r\n f\"\"})\r\n messages.append({\"role\": \"assistant\",\r\n \"content\": \"Noted background results.\"})\r\n response = client.messages.create(...)\r\n```\r\n\r\nループはシングルスレッドのまま。サブプロセスI/Oだけが並列化される。\r\n\r\n## s07からの変更点\r\n\r\n| Component | Before (s07) | After (s08) |\r\n|----------------|------------------|----------------------------|\r\n| Tools | 8 | 6 (base + background_run + check)|\r\n| Execution | Blocking only | Blocking + background threads|\r\n| Notification | None | Queue drained per loop |\r\n| Concurrency | None | Daemon threads |\r\n\r\n## 試してみる\r\n\r\n```sh\r\ncd learn-claude-code\r\npython agents/s08_background_tasks.py\r\n```\r\n\r\n1. `Run \"sleep 5 && echo done\" in the background, then create a file while it runs`\r\n2. `Start 3 background tasks: \"sleep 2\", \"sleep 4\", \"sleep 6\". Check their status.`\r\n3. `Run pytest in the background and keep working on other things`\r\n" }, { "version": "s09", "locale": "ja", "title": "s09: Agent Teams", - "content": "# s09: Agent Teams\n\n`s01 > s02 > s03 > s04 > s05 > s06 | s07 > s08 > [ s09 ] s10 > s11 > s12`\n\n> *\"一人で終わらないなら、チームメイトに任せる\"* -- 永続チームメイト + 非同期メールボックス。\n\n## 問題\n\nサブエージェント(s04)は使い捨てだ: 生成し、作業し、要約を返し、消滅する。アイデンティティもなく、呼び出し間の記憶もない。バックグラウンドタスク(s08)はシェルコマンドを実行するが、LLM誘導の意思決定はできない。\n\n本物のチームワークには: (1)単一プロンプトを超えて存続する永続エージェント、(2)アイデンティティとライフサイクル管理、(3)エージェント間の通信チャネルが必要だ。\n\n## 解決策\n\n```\nTeammate lifecycle:\n spawn -> WORKING -> IDLE -> WORKING -> ... -> SHUTDOWN\n\nCommunication:\n .team/\n config.json <- team roster + statuses\n inbox/\n alice.jsonl <- append-only, drain-on-read\n bob.jsonl\n lead.jsonl\n\n +--------+ send(\"alice\",\"bob\",\"...\") +--------+\n | alice | -----------------------------> | bob |\n | loop | bob.jsonl << {json_line} | loop |\n +--------+ +--------+\n ^ |\n | BUS.read_inbox(\"alice\") |\n +---- alice.jsonl -> read + drain ---------+\n```\n\n## 仕組み\n\n1. TeammateManagerがconfig.jsonでチーム名簿を管理する。\n\n```python\nclass TeammateManager:\n def __init__(self, team_dir: Path):\n self.dir = team_dir\n self.dir.mkdir(exist_ok=True)\n self.config_path = self.dir / \"config.json\"\n self.config = self._load_config()\n self.threads = {}\n```\n\n2. `spawn()`がチームメイトを作成し、そのエージェントループをスレッドで開始する。\n\n```python\ndef spawn(self, name: str, role: str, prompt: str) -> str:\n member = {\"name\": name, \"role\": role, \"status\": \"working\"}\n self.config[\"members\"].append(member)\n self._save_config()\n thread = threading.Thread(\n target=self._teammate_loop,\n args=(name, role, prompt), daemon=True)\n thread.start()\n return f\"Spawned teammate '{name}' (role: {role})\"\n```\n\n3. MessageBus: 追記専用のJSONLインボックス。`send()`がJSON行を追記し、`read_inbox()`がすべて読み取ってドレインする。\n\n```python\nclass MessageBus:\n def send(self, sender, to, content, msg_type=\"message\", extra=None):\n msg = {\"type\": msg_type, \"from\": sender,\n \"content\": content, \"timestamp\": time.time()}\n if extra:\n msg.update(extra)\n with open(self.dir / f\"{to}.jsonl\", \"a\") as f:\n f.write(json.dumps(msg) + \"\\n\")\n\n def read_inbox(self, name):\n path = self.dir / f\"{name}.jsonl\"\n if not path.exists(): return \"[]\"\n msgs = [json.loads(l) for l in path.read_text().strip().splitlines() if l]\n path.write_text(\"\") # drain\n return json.dumps(msgs, indent=2)\n```\n\n4. 各チームメイトは各LLM呼び出しの前にインボックスを確認し、受信メッセージをコンテキストに注入する。\n\n```python\ndef _teammate_loop(self, name, role, prompt):\n messages = [{\"role\": \"user\", \"content\": prompt}]\n for _ in range(50):\n inbox = BUS.read_inbox(name)\n if inbox != \"[]\":\n messages.append({\"role\": \"user\",\n \"content\": f\"{inbox}\"})\n messages.append({\"role\": \"assistant\",\n \"content\": \"Noted inbox messages.\"})\n response = client.messages.create(...)\n if response.stop_reason != \"tool_use\":\n break\n # execute tools, append results...\n self._find_member(name)[\"status\"] = \"idle\"\n```\n\n## s08からの変更点\n\n| Component | Before (s08) | After (s09) |\n|----------------|------------------|----------------------------|\n| Tools | 6 | 9 (+spawn/send/read_inbox) |\n| Agents | Single | Lead + N teammates |\n| Persistence | None | config.json + JSONL inboxes|\n| Threads | Background cmds | Full agent loops per thread|\n| Lifecycle | Fire-and-forget | idle -> working -> idle |\n| Communication | None | message + broadcast |\n\n## 試してみる\n\n```sh\ncd learn-claude-code\npython agents/s09_agent_teams.py\n```\n\n1. `Spawn alice (coder) and bob (tester). Have alice send bob a message.`\n2. `Broadcast \"status update: phase 1 complete\" to all teammates`\n3. `Check the lead inbox for any messages`\n4. `/team`と入力してステータス付きのチーム名簿を確認する\n5. `/inbox`と入力してリーダーのインボックスを手動確認する\n" + "content": "# s09: Agent Teams\r\n\r\n`s01 > s02 > s03 > s04 > s05 > s06 | s07 > s08 > [ s09 ] s10 > s11 > s12`\r\n\r\n> *\"一人で終わらないなら、チームメイトに任せる\"* -- 永続チームメイト + 非同期メールボックス。\r\n\r\n## 問題\r\n\r\nサブエージェント(s04)は使い捨てだ: 生成し、作業し、要約を返し、消滅する。アイデンティティもなく、呼び出し間の記憶もない。バックグラウンドタスク(s08)はシェルコマンドを実行するが、LLM誘導の意思決定はできない。\r\n\r\n本物のチームワークには: (1)単一プロンプトを超えて存続する永続エージェント、(2)アイデンティティとライフサイクル管理、(3)エージェント間の通信チャネルが必要だ。\r\n\r\n## 解決策\r\n\r\n```\r\nTeammate lifecycle:\r\n spawn -> WORKING -> IDLE -> WORKING -> ... -> SHUTDOWN\r\n\r\nCommunication:\r\n .team/\r\n config.json <- team roster + statuses\r\n inbox/\r\n alice.jsonl <- append-only, drain-on-read\r\n bob.jsonl\r\n lead.jsonl\r\n\r\n +--------+ send(\"alice\",\"bob\",\"...\") +--------+\r\n | alice | -----------------------------> | bob |\r\n | loop | bob.jsonl << {json_line} | loop |\r\n +--------+ +--------+\r\n ^ |\r\n | BUS.read_inbox(\"alice\") |\r\n +---- alice.jsonl -> read + drain ---------+\r\n```\r\n\r\n## 仕組み\r\n\r\n1. TeammateManagerがconfig.jsonでチーム名簿を管理する。\r\n\r\n```python\r\nclass TeammateManager:\r\n def __init__(self, team_dir: Path):\r\n self.dir = team_dir\r\n self.dir.mkdir(exist_ok=True)\r\n self.config_path = self.dir / \"config.json\"\r\n self.config = self._load_config()\r\n self.threads = {}\r\n```\r\n\r\n2. `spawn()`がチームメイトを作成し、そのエージェントループをスレッドで開始する。\r\n\r\n```python\r\ndef spawn(self, name: str, role: str, prompt: str) -> str:\r\n member = {\"name\": name, \"role\": role, \"status\": \"working\"}\r\n self.config[\"members\"].append(member)\r\n self._save_config()\r\n thread = threading.Thread(\r\n target=self._teammate_loop,\r\n args=(name, role, prompt), daemon=True)\r\n thread.start()\r\n return f\"Spawned teammate '{name}' (role: {role})\"\r\n```\r\n\r\n3. MessageBus: 追記専用のJSONLインボックス。`send()`がJSON行を追記し、`read_inbox()`がすべて読み取ってドレインする。\r\n\r\n```python\r\nclass MessageBus:\r\n def send(self, sender, to, content, msg_type=\"message\", extra=None):\r\n msg = {\"type\": msg_type, \"from\": sender,\r\n \"content\": content, \"timestamp\": time.time()}\r\n if extra:\r\n msg.update(extra)\r\n with open(self.dir / f\"{to}.jsonl\", \"a\") as f:\r\n f.write(json.dumps(msg) + \"\\n\")\r\n\r\n def read_inbox(self, name):\r\n path = self.dir / f\"{name}.jsonl\"\r\n if not path.exists(): return \"[]\"\r\n msgs = [json.loads(l) for l in path.read_text().strip().splitlines() if l]\r\n path.write_text(\"\") # drain\r\n return json.dumps(msgs, indent=2)\r\n```\r\n\r\n4. 各チームメイトは各LLM呼び出しの前にインボックスを確認し、受信メッセージをコンテキストに注入する。\r\n\r\n```python\r\ndef _teammate_loop(self, name, role, prompt):\r\n messages = [{\"role\": \"user\", \"content\": prompt}]\r\n for _ in range(50):\r\n inbox = BUS.read_inbox(name)\r\n if inbox != \"[]\":\r\n messages.append({\"role\": \"user\",\r\n \"content\": f\"{inbox}\"})\r\n messages.append({\"role\": \"assistant\",\r\n \"content\": \"Noted inbox messages.\"})\r\n response = client.messages.create(...)\r\n if response.stop_reason != \"tool_use\":\r\n break\r\n # execute tools, append results...\r\n self._find_member(name)[\"status\"] = \"idle\"\r\n```\r\n\r\n## s08からの変更点\r\n\r\n| Component | Before (s08) | After (s09) |\r\n|----------------|------------------|----------------------------|\r\n| Tools | 6 | 9 (+spawn/send/read_inbox) |\r\n| Agents | Single | Lead + N teammates |\r\n| Persistence | None | config.json + JSONL inboxes|\r\n| Threads | Background cmds | Full agent loops per thread|\r\n| Lifecycle | Fire-and-forget | idle -> working -> idle |\r\n| Communication | None | message + broadcast |\r\n\r\n## 試してみる\r\n\r\n```sh\r\ncd learn-claude-code\r\npython agents/s09_agent_teams.py\r\n```\r\n\r\n1. `Spawn alice (coder) and bob (tester). Have alice send bob a message.`\r\n2. `Broadcast \"status update: phase 1 complete\" to all teammates`\r\n3. `Check the lead inbox for any messages`\r\n4. `/team`と入力してステータス付きのチーム名簿を確認する\r\n5. `/inbox`と入力してリーダーのインボックスを手動確認する\r\n" }, { "version": "s10", "locale": "ja", "title": "s10: Team Protocols", - "content": "# s10: Team Protocols\n\n`s01 > s02 > s03 > s04 > s05 > s06 | s07 > s08 > s09 > [ s10 ] s11 > s12`\n\n> *\"チームメイト間には統一の通信ルールが必要\"* -- 1つの request-response パターンが全交渉を駆動。\n\n## 問題\n\ns09ではチームメイトが作業し通信するが、構造化された協調がない:\n\n**シャットダウン**: スレッドを強制終了するとファイルが中途半端に書かれ、config.jsonが不正な状態になる。ハンドシェイクが必要 -- リーダーが要求し、チームメイトが承認(完了して退出)か拒否(作業継続)する。\n\n**プラン承認**: リーダーが「認証モジュールをリファクタリングして」と言うと、チームメイトは即座に開始する。リスクの高い変更では、実行前にリーダーが計画をレビューすべきだ。\n\n両方とも同じ構造: 一方がユニークIDを持つリクエストを送り、他方がそのIDで応答する。\n\n## 解決策\n\n```\nShutdown Protocol Plan Approval Protocol\n================== ======================\n\nLead Teammate Teammate Lead\n | | | |\n |--shutdown_req-->| |--plan_req------>|\n | {req_id:\"abc\"} | | {req_id:\"xyz\"} |\n | | | |\n |<--shutdown_resp-| |<--plan_resp-----|\n | {req_id:\"abc\", | | {req_id:\"xyz\", |\n | approve:true} | | approve:true} |\n\nShared FSM:\n [pending] --approve--> [approved]\n [pending] --reject---> [rejected]\n\nTrackers:\n shutdown_requests = {req_id: {target, status}}\n plan_requests = {req_id: {from, plan, status}}\n```\n\n## 仕組み\n\n1. リーダーがrequest_idを生成し、インボックス経由でシャットダウンを開始する。\n\n```python\nshutdown_requests = {}\n\ndef handle_shutdown_request(teammate: str) -> str:\n req_id = str(uuid.uuid4())[:8]\n shutdown_requests[req_id] = {\"target\": teammate, \"status\": \"pending\"}\n BUS.send(\"lead\", teammate, \"Please shut down gracefully.\",\n \"shutdown_request\", {\"request_id\": req_id})\n return f\"Shutdown request {req_id} sent (status: pending)\"\n```\n\n2. チームメイトがリクエストを受信し、承認または拒否で応答する。\n\n```python\nif tool_name == \"shutdown_response\":\n req_id = args[\"request_id\"]\n approve = args[\"approve\"]\n shutdown_requests[req_id][\"status\"] = \"approved\" if approve else \"rejected\"\n BUS.send(sender, \"lead\", args.get(\"reason\", \"\"),\n \"shutdown_response\",\n {\"request_id\": req_id, \"approve\": approve})\n```\n\n3. プラン承認も同一パターン。チームメイトがプランを提出(request_idを生成)、リーダーがレビュー(同じrequest_idを参照)。\n\n```python\nplan_requests = {}\n\ndef handle_plan_review(request_id, approve, feedback=\"\"):\n req = plan_requests[request_id]\n req[\"status\"] = \"approved\" if approve else \"rejected\"\n BUS.send(\"lead\", req[\"from\"], feedback,\n \"plan_approval_response\",\n {\"request_id\": request_id, \"approve\": approve})\n```\n\n1つのFSM、2つの応用。同じ`pending -> approved | rejected`状態機械が、あらゆるリクエスト-レスポンスプロトコルに適用できる。\n\n## s09からの変更点\n\n| Component | Before (s09) | After (s10) |\n|----------------|------------------|------------------------------|\n| Tools | 9 | 12 (+shutdown_req/resp +plan)|\n| Shutdown | Natural exit only| Request-response handshake |\n| Plan gating | None | Submit/review with approval |\n| Correlation | None | request_id per request |\n| FSM | None | pending -> approved/rejected |\n\n## 試してみる\n\n```sh\ncd learn-claude-code\npython agents/s10_team_protocols.py\n```\n\n1. `Spawn alice as a coder. Then request her shutdown.`\n2. `List teammates to see alice's status after shutdown approval`\n3. `Spawn bob with a risky refactoring task. Review and reject his plan.`\n4. `Spawn charlie, have him submit a plan, then approve it.`\n5. `/team`と入力してステータスを監視する\n" + "content": "# s10: Team Protocols\r\n\r\n`s01 > s02 > s03 > s04 > s05 > s06 | s07 > s08 > s09 > [ s10 ] s11 > s12`\r\n\r\n> *\"チームメイト間には統一の通信ルールが必要\"* -- 1つの request-response パターンが全交渉を駆動。\r\n\r\n## 問題\r\n\r\ns09ではチームメイトが作業し通信するが、構造化された協調がない:\r\n\r\n**シャットダウン**: スレッドを強制終了するとファイルが中途半端に書かれ、config.jsonが不正な状態になる。ハンドシェイクが必要 -- リーダーが要求し、チームメイトが承認(完了して退出)か拒否(作業継続)する。\r\n\r\n**プラン承認**: リーダーが「認証モジュールをリファクタリングして」と言うと、チームメイトは即座に開始する。リスクの高い変更では、実行前にリーダーが計画をレビューすべきだ。\r\n\r\n両方とも同じ構造: 一方がユニークIDを持つリクエストを送り、他方がそのIDで応答する。\r\n\r\n## 解決策\r\n\r\n```\r\nShutdown Protocol Plan Approval Protocol\r\n================== ======================\r\n\r\nLead Teammate Teammate Lead\r\n | | | |\r\n |--shutdown_req-->| |--plan_req------>|\r\n | {req_id:\"abc\"} | | {req_id:\"xyz\"} |\r\n | | | |\r\n |<--shutdown_resp-| |<--plan_resp-----|\r\n | {req_id:\"abc\", | | {req_id:\"xyz\", |\r\n | approve:true} | | approve:true} |\r\n\r\nShared FSM:\r\n [pending] --approve--> [approved]\r\n [pending] --reject---> [rejected]\r\n\r\nTrackers:\r\n shutdown_requests = {req_id: {target, status}}\r\n plan_requests = {req_id: {from, plan, status}}\r\n```\r\n\r\n## 仕組み\r\n\r\n1. リーダーがrequest_idを生成し、インボックス経由でシャットダウンを開始する。\r\n\r\n```python\r\nshutdown_requests = {}\r\n\r\ndef handle_shutdown_request(teammate: str) -> str:\r\n req_id = str(uuid.uuid4())[:8]\r\n shutdown_requests[req_id] = {\"target\": teammate, \"status\": \"pending\"}\r\n BUS.send(\"lead\", teammate, \"Please shut down gracefully.\",\r\n \"shutdown_request\", {\"request_id\": req_id})\r\n return f\"Shutdown request {req_id} sent (status: pending)\"\r\n```\r\n\r\n2. チームメイトがリクエストを受信し、承認または拒否で応答する。\r\n\r\n```python\r\nif tool_name == \"shutdown_response\":\r\n req_id = args[\"request_id\"]\r\n approve = args[\"approve\"]\r\n shutdown_requests[req_id][\"status\"] = \"approved\" if approve else \"rejected\"\r\n BUS.send(sender, \"lead\", args.get(\"reason\", \"\"),\r\n \"shutdown_response\",\r\n {\"request_id\": req_id, \"approve\": approve})\r\n```\r\n\r\n3. プラン承認も同一パターン。チームメイトがプランを提出(request_idを生成)、リーダーがレビュー(同じrequest_idを参照)。\r\n\r\n```python\r\nplan_requests = {}\r\n\r\ndef handle_plan_review(request_id, approve, feedback=\"\"):\r\n req = plan_requests[request_id]\r\n req[\"status\"] = \"approved\" if approve else \"rejected\"\r\n BUS.send(\"lead\", req[\"from\"], feedback,\r\n \"plan_approval_response\",\r\n {\"request_id\": request_id, \"approve\": approve})\r\n```\r\n\r\n1つのFSM、2つの応用。同じ`pending -> approved | rejected`状態機械が、あらゆるリクエスト-レスポンスプロトコルに適用できる。\r\n\r\n## s09からの変更点\r\n\r\n| Component | Before (s09) | After (s10) |\r\n|----------------|------------------|------------------------------|\r\n| Tools | 9 | 12 (+shutdown_req/resp +plan)|\r\n| Shutdown | Natural exit only| Request-response handshake |\r\n| Plan gating | None | Submit/review with approval |\r\n| Correlation | None | request_id per request |\r\n| FSM | None | pending -> approved/rejected |\r\n\r\n## 試してみる\r\n\r\n```sh\r\ncd learn-claude-code\r\npython agents/s10_team_protocols.py\r\n```\r\n\r\n1. `Spawn alice as a coder. Then request her shutdown.`\r\n2. `List teammates to see alice's status after shutdown approval`\r\n3. `Spawn bob with a risky refactoring task. Review and reject his plan.`\r\n4. `Spawn charlie, have him submit a plan, then approve it.`\r\n5. `/team`と入力してステータスを監視する\r\n" }, { "version": "s11", "locale": "ja", "title": "s11: Autonomous Agents", - "content": "# s11: Autonomous Agents\n\n`s01 > s02 > s03 > s04 > s05 > s06 | s07 > s08 > s09 > s10 > [ s11 ] s12`\n\n> *\"チームメイトが自らボードを見て、仕事を取る\"* -- リーダーが逐一割り振る必要はない。\n\n## 問題\n\ns09-s10では、チームメイトは明示的に指示された時のみ作業する。リーダーは各チームメイトを特定のプロンプトでspawnしなければならない。タスクボードに未割り当てのタスクが10個あっても、リーダーが手動で各タスクを割り当てる。これはスケールしない。\n\n真の自律性とは、チームメイトが自分で作業を見つけること: タスクボードをスキャンし、未確保のタスクを確保し、作業し、完了したら次を探す。\n\nもう1つの問題: コンテキスト圧縮(s06)後にエージェントが自分の正体を忘れる可能性がある。アイデンティティ再注入がこれを解決する。\n\n## 解決策\n\n```\nTeammate lifecycle with idle cycle:\n\n+-------+\n| spawn |\n+---+---+\n |\n v\n+-------+ tool_use +-------+\n| WORK | <------------- | LLM |\n+---+---+ +-------+\n |\n | stop_reason != tool_use (or idle tool called)\n v\n+--------+\n| IDLE | poll every 5s for up to 60s\n+---+----+\n |\n +---> check inbox --> message? ----------> WORK\n |\n +---> scan .tasks/ --> unclaimed? -------> claim -> WORK\n |\n +---> 60s timeout ----------------------> SHUTDOWN\n\nIdentity re-injection after compression:\n if len(messages) <= 3:\n messages.insert(0, identity_block)\n```\n\n## 仕組み\n\n1. チームメイトのループはWORKとIDLEの2フェーズ。LLMがツール呼び出しを止めた時(または`idle`ツールを呼んだ時)、IDLEフェーズに入る。\n\n```python\ndef _loop(self, name, role, prompt):\n while True:\n # -- WORK PHASE --\n messages = [{\"role\": \"user\", \"content\": prompt}]\n for _ in range(50):\n response = client.messages.create(...)\n if response.stop_reason != \"tool_use\":\n break\n # execute tools...\n if idle_requested:\n break\n\n # -- IDLE PHASE --\n self._set_status(name, \"idle\")\n resume = self._idle_poll(name, messages)\n if not resume:\n self._set_status(name, \"shutdown\")\n return\n self._set_status(name, \"working\")\n```\n\n2. IDLEフェーズがインボックスとタスクボードをポーリングする。\n\n```python\ndef _idle_poll(self, name, messages):\n for _ in range(IDLE_TIMEOUT // POLL_INTERVAL): # 60s / 5s = 12\n time.sleep(POLL_INTERVAL)\n inbox = BUS.read_inbox(name)\n if inbox:\n messages.append({\"role\": \"user\",\n \"content\": f\"{inbox}\"})\n return True\n unclaimed = scan_unclaimed_tasks()\n if unclaimed:\n claim_task(unclaimed[0][\"id\"], name)\n messages.append({\"role\": \"user\",\n \"content\": f\"Task #{unclaimed[0]['id']}: \"\n f\"{unclaimed[0]['subject']}\"})\n return True\n return False # timeout -> shutdown\n```\n\n3. タスクボードスキャン: pendingかつ未割り当てかつブロックされていないタスクを探す。\n\n```python\ndef scan_unclaimed_tasks() -> list:\n unclaimed = []\n for f in sorted(TASKS_DIR.glob(\"task_*.json\")):\n task = json.loads(f.read_text())\n if (task.get(\"status\") == \"pending\"\n and not task.get(\"owner\")\n and not task.get(\"blockedBy\")):\n unclaimed.append(task)\n return unclaimed\n```\n\n4. アイデンティティ再注入: コンテキストが短すぎる(圧縮が起きた)場合にアイデンティティブロックを挿入する。\n\n```python\nif len(messages) <= 3:\n messages.insert(0, {\"role\": \"user\",\n \"content\": f\"You are '{name}', role: {role}, \"\n f\"team: {team_name}. Continue your work.\"})\n messages.insert(1, {\"role\": \"assistant\",\n \"content\": f\"I am {name}. Continuing.\"})\n```\n\n## s10からの変更点\n\n| Component | Before (s10) | After (s11) |\n|----------------|------------------|----------------------------|\n| Tools | 12 | 14 (+idle, +claim_task) |\n| Autonomy | Lead-directed | Self-organizing |\n| Idle phase | None | Poll inbox + task board |\n| Task claiming | Manual only | Auto-claim unclaimed tasks |\n| Identity | System prompt | + re-injection after compress|\n| Timeout | None | 60s idle -> auto shutdown |\n\n## 試してみる\n\n```sh\ncd learn-claude-code\npython agents/s11_autonomous_agents.py\n```\n\n1. `Create 3 tasks on the board, then spawn alice and bob. Watch them auto-claim.`\n2. `Spawn a coder teammate and let it find work from the task board itself`\n3. `Create tasks with dependencies. Watch teammates respect the blocked order.`\n4. `/tasks`と入力してオーナー付きのタスクボードを確認する\n5. `/team`と入力して誰が作業中でアイドルかを監視する\n" + "content": "# s11: Autonomous Agents\r\n\r\n`s01 > s02 > s03 > s04 > s05 > s06 | s07 > s08 > s09 > s10 > [ s11 ] s12`\r\n\r\n> *\"チームメイトが自らボードを見て、仕事を取る\"* -- リーダーが逐一割り振る必要はない。\r\n\r\n## 問題\r\n\r\ns09-s10では、チームメイトは明示的に指示された時のみ作業する。リーダーは各チームメイトを特定のプロンプトでspawnしなければならない。タスクボードに未割り当てのタスクが10個あっても、リーダーが手動で各タスクを割り当てる。これはスケールしない。\r\n\r\n真の自律性とは、チームメイトが自分で作業を見つけること: タスクボードをスキャンし、未確保のタスクを確保し、作業し、完了したら次を探す。\r\n\r\nもう1つの問題: コンテキスト圧縮(s06)後にエージェントが自分の正体を忘れる可能性がある。アイデンティティ再注入がこれを解決する。\r\n\r\n## 解決策\r\n\r\n```\r\nTeammate lifecycle with idle cycle:\r\n\r\n+-------+\r\n| spawn |\r\n+---+---+\r\n |\r\n v\r\n+-------+ tool_use +-------+\r\n| WORK | <------------- | LLM |\r\n+---+---+ +-------+\r\n |\r\n | stop_reason != tool_use (or idle tool called)\r\n v\r\n+--------+\r\n| IDLE | poll every 5s for up to 60s\r\n+---+----+\r\n |\r\n +---> check inbox --> message? ----------> WORK\r\n |\r\n +---> scan .tasks/ --> unclaimed? -------> claim -> WORK\r\n |\r\n +---> 60s timeout ----------------------> SHUTDOWN\r\n\r\nIdentity re-injection after compression:\r\n if len(messages) <= 3:\r\n messages.insert(0, identity_block)\r\n```\r\n\r\n## 仕組み\r\n\r\n1. チームメイトのループはWORKとIDLEの2フェーズ。LLMがツール呼び出しを止めた時(または`idle`ツールを呼んだ時)、IDLEフェーズに入る。\r\n\r\n```python\r\ndef _loop(self, name, role, prompt):\r\n while True:\r\n # -- WORK PHASE --\r\n messages = [{\"role\": \"user\", \"content\": prompt}]\r\n for _ in range(50):\r\n response = client.messages.create(...)\r\n if response.stop_reason != \"tool_use\":\r\n break\r\n # execute tools...\r\n if idle_requested:\r\n break\r\n\r\n # -- IDLE PHASE --\r\n self._set_status(name, \"idle\")\r\n resume = self._idle_poll(name, messages)\r\n if not resume:\r\n self._set_status(name, \"shutdown\")\r\n return\r\n self._set_status(name, \"working\")\r\n```\r\n\r\n2. IDLEフェーズがインボックスとタスクボードをポーリングする。\r\n\r\n```python\r\ndef _idle_poll(self, name, messages):\r\n for _ in range(IDLE_TIMEOUT // POLL_INTERVAL): # 60s / 5s = 12\r\n time.sleep(POLL_INTERVAL)\r\n inbox = BUS.read_inbox(name)\r\n if inbox:\r\n messages.append({\"role\": \"user\",\r\n \"content\": f\"{inbox}\"})\r\n return True\r\n unclaimed = scan_unclaimed_tasks()\r\n if unclaimed:\r\n claim_task(unclaimed[0][\"id\"], name)\r\n messages.append({\"role\": \"user\",\r\n \"content\": f\"Task #{unclaimed[0]['id']}: \"\r\n f\"{unclaimed[0]['subject']}\"})\r\n return True\r\n return False # timeout -> shutdown\r\n```\r\n\r\n3. タスクボードスキャン: pendingかつ未割り当てかつブロックされていないタスクを探す。\r\n\r\n```python\r\ndef scan_unclaimed_tasks() -> list:\r\n unclaimed = []\r\n for f in sorted(TASKS_DIR.glob(\"task_*.json\")):\r\n task = json.loads(f.read_text())\r\n if (task.get(\"status\") == \"pending\"\r\n and not task.get(\"owner\")\r\n and not task.get(\"blockedBy\")):\r\n unclaimed.append(task)\r\n return unclaimed\r\n```\r\n\r\n4. アイデンティティ再注入: コンテキストが短すぎる(圧縮が起きた)場合にアイデンティティブロックを挿入する。\r\n\r\n```python\r\nif len(messages) <= 3:\r\n messages.insert(0, {\"role\": \"user\",\r\n \"content\": f\"You are '{name}', role: {role}, \"\r\n f\"team: {team_name}. Continue your work.\"})\r\n messages.insert(1, {\"role\": \"assistant\",\r\n \"content\": f\"I am {name}. Continuing.\"})\r\n```\r\n\r\n## s10からの変更点\r\n\r\n| Component | Before (s10) | After (s11) |\r\n|----------------|------------------|----------------------------|\r\n| Tools | 12 | 14 (+idle, +claim_task) |\r\n| Autonomy | Lead-directed | Self-organizing |\r\n| Idle phase | None | Poll inbox + task board |\r\n| Task claiming | Manual only | Auto-claim unclaimed tasks |\r\n| Identity | System prompt | + re-injection after compress|\r\n| Timeout | None | 60s idle -> auto shutdown |\r\n\r\n## 試してみる\r\n\r\n```sh\r\ncd learn-claude-code\r\npython agents/s11_autonomous_agents.py\r\n```\r\n\r\n1. `Create 3 tasks on the board, then spawn alice and bob. Watch them auto-claim.`\r\n2. `Spawn a coder teammate and let it find work from the task board itself`\r\n3. `Create tasks with dependencies. Watch teammates respect the blocked order.`\r\n4. `/tasks`と入力してオーナー付きのタスクボードを確認する\r\n5. `/team`と入力して誰が作業中でアイドルかを監視する\r\n" }, { "version": "s12", "locale": "ja", "title": "s12: Worktree + Task Isolation", - "content": "# s12: Worktree + Task Isolation\n\n`s01 > s02 > s03 > s04 > s05 > s06 | s07 > s08 > s09 > s10 > s11 > [ s12 ]`\n\n> *\"各自のディレクトリで作業し、互いに干渉しない\"* -- タスクは目標を管理、worktree はディレクトリを管理、IDで紐付け。\n\n## 問題\n\ns11までにエージェントはタスクを自律的に確保して完了できるようになった。しかし全タスクが1つの共有ディレクトリで走る。2つのエージェントが同時に異なるモジュールをリファクタリングすると衝突する: 片方が`config.py`を編集し、もう片方も`config.py`を編集し、未コミットの変更が混ざり合い、どちらもクリーンにロールバックできない。\n\nタスクボードは*何をやるか*を追跡するが、*どこでやるか*には関知しない。解決策: 各タスクに専用のgit worktreeディレクトリを与える。タスクが目標を管理し、worktreeが実行コンテキストを管理する。タスクIDで紐付ける。\n\n## 解決策\n\n```\nControl plane (.tasks/) Execution plane (.worktrees/)\n+------------------+ +------------------------+\n| task_1.json | | auth-refactor/ |\n| status: in_progress <------> branch: wt/auth-refactor\n| worktree: \"auth-refactor\" | task_id: 1 |\n+------------------+ +------------------------+\n| task_2.json | | ui-login/ |\n| status: pending <------> branch: wt/ui-login\n| worktree: \"ui-login\" | task_id: 2 |\n+------------------+ +------------------------+\n |\n index.json (worktree registry)\n events.jsonl (lifecycle log)\n\nState machines:\n Task: pending -> in_progress -> completed\n Worktree: absent -> active -> removed | kept\n```\n\n## 仕組み\n\n1. **タスクを作成する。** まず目標を永続化する。\n\n```python\nTASKS.create(\"Implement auth refactor\")\n# -> .tasks/task_1.json status=pending worktree=\"\"\n```\n\n2. **worktreeを作成してタスクに紐付ける。** `task_id`を渡すと、タスクが自動的に`in_progress`に遷移する。\n\n```python\nWORKTREES.create(\"auth-refactor\", task_id=1)\n# -> git worktree add -b wt/auth-refactor .worktrees/auth-refactor HEAD\n# -> index.json gets new entry, task_1.json gets worktree=\"auth-refactor\"\n```\n\n紐付けは両側に状態を書き込む:\n\n```python\ndef bind_worktree(self, task_id, worktree):\n task = self._load(task_id)\n task[\"worktree\"] = worktree\n if task[\"status\"] == \"pending\":\n task[\"status\"] = \"in_progress\"\n self._save(task)\n```\n\n3. **worktree内でコマンドを実行する。** `cwd`が分離ディレクトリを指す。\n\n```python\nsubprocess.run(command, shell=True, cwd=worktree_path,\n capture_output=True, text=True, timeout=300)\n```\n\n4. **終了処理。** 2つの選択肢:\n - `worktree_keep(name)` -- ディレクトリを保持する。\n - `worktree_remove(name, complete_task=True)` -- ディレクトリを削除し、紐付けられたタスクを完了し、イベントを発行する。1回の呼び出しで後片付けと完了を処理する。\n\n```python\ndef remove(self, name, force=False, complete_task=False):\n self._run_git([\"worktree\", \"remove\", wt[\"path\"]])\n if complete_task and wt.get(\"task_id\") is not None:\n self.tasks.update(wt[\"task_id\"], status=\"completed\")\n self.tasks.unbind_worktree(wt[\"task_id\"])\n self.events.emit(\"task.completed\", ...)\n```\n\n5. **イベントストリーム。** ライフサイクルの各ステップが`.worktrees/events.jsonl`に記録される:\n\n```json\n{\n \"event\": \"worktree.remove.after\",\n \"task\": {\"id\": 1, \"status\": \"completed\"},\n \"worktree\": {\"name\": \"auth-refactor\", \"status\": \"removed\"},\n \"ts\": 1730000000\n}\n```\n\n発行されるイベント: `worktree.create.before/after/failed`, `worktree.remove.before/after/failed`, `worktree.keep`, `task.completed`。\n\nクラッシュ後も`.tasks/` + `.worktrees/index.json`から状態を再構築できる。会話メモリは揮発性だが、ファイル状態は永続的だ。\n\n## s11からの変更点\n\n| Component | Before (s11) | After (s12) |\n|--------------------|----------------------------|----------------------------------------------|\n| Coordination | Task board (owner/status) | Task board + explicit worktree binding |\n| Execution scope | Shared directory | Task-scoped isolated directory |\n| Recoverability | Task status only | Task status + worktree index |\n| Teardown | Task completion | Task completion + explicit keep/remove |\n| Lifecycle visibility | Implicit in logs | Explicit events in `.worktrees/events.jsonl` |\n\n## 試してみる\n\n```sh\ncd learn-claude-code\npython agents/s12_worktree_task_isolation.py\n```\n\n1. `Create tasks for backend auth and frontend login page, then list tasks.`\n2. `Create worktree \"auth-refactor\" for task 1, then bind task 2 to a new worktree \"ui-login\".`\n3. `Run \"git status --short\" in worktree \"auth-refactor\".`\n4. `Keep worktree \"ui-login\", then list worktrees and inspect events.`\n5. `Remove worktree \"auth-refactor\" with complete_task=true, then list tasks/worktrees/events.`\n" + "content": "# s12: Worktree + Task Isolation\r\n\r\n`s01 > s02 > s03 > s04 > s05 > s06 | s07 > s08 > s09 > s10 > s11 > [ s12 ]`\r\n\r\n> *\"各自のディレクトリで作業し、互いに干渉しない\"* -- タスクは目標を管理、worktree はディレクトリを管理、IDで紐付け。\r\n\r\n## 問題\r\n\r\ns11までにエージェントはタスクを自律的に確保して完了できるようになった。しかし全タスクが1つの共有ディレクトリで走る。2つのエージェントが同時に異なるモジュールをリファクタリングすると衝突する: 片方が`config.py`を編集し、もう片方も`config.py`を編集し、未コミットの変更が混ざり合い、どちらもクリーンにロールバックできない。\r\n\r\nタスクボードは*何をやるか*を追跡するが、*どこでやるか*には関知しない。解決策: 各タスクに専用のgit worktreeディレクトリを与える。タスクが目標を管理し、worktreeが実行コンテキストを管理する。タスクIDで紐付ける。\r\n\r\n## 解決策\r\n\r\n```\r\nControl plane (.tasks/) Execution plane (.worktrees/)\r\n+------------------+ +------------------------+\r\n| task_1.json | | auth-refactor/ |\r\n| status: in_progress <------> branch: wt/auth-refactor\r\n| worktree: \"auth-refactor\" | task_id: 1 |\r\n+------------------+ +------------------------+\r\n| task_2.json | | ui-login/ |\r\n| status: pending <------> branch: wt/ui-login\r\n| worktree: \"ui-login\" | task_id: 2 |\r\n+------------------+ +------------------------+\r\n |\r\n index.json (worktree registry)\r\n events.jsonl (lifecycle log)\r\n\r\nState machines:\r\n Task: pending -> in_progress -> completed\r\n Worktree: absent -> active -> removed | kept\r\n```\r\n\r\n## 仕組み\r\n\r\n1. **タスクを作成する。** まず目標を永続化する。\r\n\r\n```python\r\nTASKS.create(\"Implement auth refactor\")\r\n# -> .tasks/task_1.json status=pending worktree=\"\"\r\n```\r\n\r\n2. **worktreeを作成してタスクに紐付ける。** `task_id`を渡すと、タスクが自動的に`in_progress`に遷移する。\r\n\r\n```python\r\nWORKTREES.create(\"auth-refactor\", task_id=1)\r\n# -> git worktree add -b wt/auth-refactor .worktrees/auth-refactor HEAD\r\n# -> index.json gets new entry, task_1.json gets worktree=\"auth-refactor\"\r\n```\r\n\r\n紐付けは両側に状態を書き込む:\r\n\r\n```python\r\ndef bind_worktree(self, task_id, worktree):\r\n task = self._load(task_id)\r\n task[\"worktree\"] = worktree\r\n if task[\"status\"] == \"pending\":\r\n task[\"status\"] = \"in_progress\"\r\n self._save(task)\r\n```\r\n\r\n3. **worktree内でコマンドを実行する。** `cwd`が分離ディレクトリを指す。\r\n\r\n```python\r\nsubprocess.run(command, shell=True, cwd=worktree_path,\r\n capture_output=True, text=True, timeout=300)\r\n```\r\n\r\n4. **終了処理。** 2つの選択肢:\r\n - `worktree_keep(name)` -- ディレクトリを保持する。\r\n - `worktree_remove(name, complete_task=True)` -- ディレクトリを削除し、紐付けられたタスクを完了し、イベントを発行する。1回の呼び出しで後片付けと完了を処理する。\r\n\r\n```python\r\ndef remove(self, name, force=False, complete_task=False):\r\n self._run_git([\"worktree\", \"remove\", wt[\"path\"]])\r\n if complete_task and wt.get(\"task_id\") is not None:\r\n self.tasks.update(wt[\"task_id\"], status=\"completed\")\r\n self.tasks.unbind_worktree(wt[\"task_id\"])\r\n self.events.emit(\"task.completed\", ...)\r\n```\r\n\r\n5. **イベントストリーム。** ライフサイクルの各ステップが`.worktrees/events.jsonl`に記録される:\r\n\r\n```json\r\n{\r\n \"event\": \"worktree.remove.after\",\r\n \"task\": {\"id\": 1, \"status\": \"completed\"},\r\n \"worktree\": {\"name\": \"auth-refactor\", \"status\": \"removed\"},\r\n \"ts\": 1730000000\r\n}\r\n```\r\n\r\n発行されるイベント: `worktree.create.before/after/failed`, `worktree.remove.before/after/failed`, `worktree.keep`, `task.completed`。\r\n\r\nクラッシュ後も`.tasks/` + `.worktrees/index.json`から状態を再構築できる。会話メモリは揮発性だが、ファイル状態は永続的だ。\r\n\r\n## s11からの変更点\r\n\r\n| Component | Before (s11) | After (s12) |\r\n|--------------------|----------------------------|----------------------------------------------|\r\n| Coordination | Task board (owner/status) | Task board + explicit worktree binding |\r\n| Execution scope | Shared directory | Task-scoped isolated directory |\r\n| Recoverability | Task status only | Task status + worktree index |\r\n| Teardown | Task completion | Task completion + explicit keep/remove |\r\n| Lifecycle visibility | Implicit in logs | Explicit events in `.worktrees/events.jsonl` |\r\n\r\n## 試してみる\r\n\r\n```sh\r\ncd learn-claude-code\r\npython agents/s12_worktree_task_isolation.py\r\n```\r\n\r\n1. `Create tasks for backend auth and frontend login page, then list tasks.`\r\n2. `Create worktree \"auth-refactor\" for task 1, then bind task 2 to a new worktree \"ui-login\".`\r\n3. `Run \"git status --short\" in worktree \"auth-refactor\".`\r\n4. `Keep worktree \"ui-login\", then list worktrees and inspect events.`\r\n5. `Remove worktree \"auth-refactor\" with complete_task=true, then list tasks/worktrees/events.`\r\n" + }, + { + "version": "s01", + "locale": "ru", + "title": "s01: Цикл агента", + "content": "# s01: Цикл агента\n\n`[ s01 ] s02 > s03 > s04 > s05 > s06 | s07 > s08 > s09 > s10 > s11 > s12`\n\n> *\"Одного цикла и Bash уже достаточно\"* — один инструмент + один цикл = агент.\n\n## Проблема\n\nЯзыковая модель умеет рассуждать о коде, но не может *потрогать* реальный мир: читать файлы, запускать тесты, проверять ошибки. Без цикла каждый вызов инструмента требует вручную копировать результат обратно в диалог. Циклом становитесь вы.\n\n## Решение\n\n```\n+--------+ +-------+ +---------+\n| User | ---> | LLM | ---> | Tool |\n| prompt | | | | execute |\n+--------+ +---+---+ +----+----+\n ^ |\n | tool_result |\n +----------------+\n (цикл, пока stop_reason == \"tool_use\")\n```\n\nВесь поток управления держится на одном условии выхода. Цикл работает, пока модель продолжает вызывать инструменты.\n\n## Как это работает\n\n1. Запрос пользователя становится первым сообщением.\n\n```python\nmessages.append({\"role\": \"user\", \"content\": query})\n```\n\n2. Отправляем `messages` и описания инструментов в LLM.\n\n```python\nresponse = client.messages.create(\n model=MODEL, system=SYSTEM, messages=messages,\n tools=TOOLS, max_tokens=8000,\n)\n```\n\n3. Добавляем ответ ассистента. Проверяем `stop_reason`: если модель не вызвала инструмент, работа закончена.\n\n```python\nmessages.append({\"role\": \"assistant\", \"content\": response.content})\nif response.stop_reason != \"tool_use\":\n return\n```\n\n4. Выполняем каждый вызов инструмента, собираем результаты и добавляем их как сообщение пользователя. Затем возвращаемся к шагу 2.\n\n```python\nresults = []\nfor block in response.content:\n if block.type == \"tool_use\":\n output = run_bash(block.input[\"command\"])\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": output,\n })\nmessages.append({\"role\": \"user\", \"content\": results})\n```\n\nЕсли собрать всё в одну функцию:\n\n```python\ndef agent_loop(query):\n messages = [{\"role\": \"user\", \"content\": query}]\n while True:\n response = client.messages.create(\n model=MODEL, system=SYSTEM, messages=messages,\n tools=TOOLS, max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n\n if response.stop_reason != \"tool_use\":\n return\n\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n output = run_bash(block.input[\"command\"])\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": output,\n })\n messages.append({\"role\": \"user\", \"content\": results})\n```\n\nВот и весь агент меньше чем в 30 строк. Всё остальное в этом курсе наслоится сверху, не меняя сам цикл.\n\n## Что изменилось\n\n| Компонент | До | После |\n|-----------|----|--------|\n| Цикл агента | отсутствует | `while True` + `stop_reason` |\n| Инструменты | отсутствуют | `bash` (один инструмент) |\n| Сообщения | отсутствуют | накапливаемый список |\n| Управление потоком | отсутствует | `stop_reason != \"tool_use\"` |\n\n## Попробуйте\n\n```sh\ncd learn-claude-code\npython agents/s01_agent_loop.py\n```\n\n1. `Создай файл hello.py, который печатает \"Hello, World!\"`\n2. `Покажи все Python-файлы в этом каталоге`\n3. `Какая сейчас активная git-ветка?`\n4. `Создай каталог test_output и запиши в него 3 файла`\n" + }, + { + "version": "s02", + "locale": "ru", + "title": "s02: Инструменты", + "content": "# s02: Инструменты\n\n`s01 > [ s02 ] s03 > s04 > s05 > s06 | s07 > s08 > s09 > s10 > s11 > s12`\n\n> *\"Добавить инструмент — значит добавить один обработчик\"* — цикл остаётся тем же; новый инструмент означает новый обработчик в карте диспетчеризации.\n\n## Проблема\n\nКогда у агента есть только `bash`, он запускает shell для всего подряд. `cat` непредсказуемо обрезает вывод, `sed` ломается на спецсимволах, а каждый вызов bash становится широкой и слабо ограниченной поверхностью атаки. Специализированные инструменты вроде `read_file` и `write_file` позволяют навязать песочницу по путям на уровне самого инструмента.\n\nГлавная мысль: чтобы добавить инструмент, не нужно менять сам цикл.\n\n## Решение\n\n```\n+--------+ +-------+ +------------------+\n| User | ---> | LLM | ---> | Tool Dispatch |\n| prompt | | | | { |\n+--------+ +---+---+ | bash: run_bash |\n ^ | read: run_read |\n | | write: run_wr |\n +-----------+ edit: run_edit |\n tool_result | } |\n +------------------+\n\nКарта диспетчеризации — это словарь: {tool_name: handler_function}.\nОдин lookup заменяет любую if/elif-цепочку.\n```\n\n## Как это работает\n\n1. У каждого инструмента есть свой обработчик. Песочница по путям не даёт выйти за пределы рабочей директории.\n\n```python\ndef safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\ndef run_read(path: str, limit: int = None) -> str:\n text = safe_path(path).read_text()\n lines = text.splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit]\n return \"\\n\".join(lines)[:50000]\n```\n\n2. Карта диспетчеризации связывает имена инструментов с обработчиками.\n\n```python\nTOOL_HANDLERS = {\n \"bash\": lambda **kw: run_bash(kw[\"command\"]),\n \"read_file\": lambda **kw: run_read(kw[\"path\"], kw.get(\"limit\")),\n \"write_file\": lambda **kw: run_write(kw[\"path\"], kw[\"content\"]),\n \"edit_file\": lambda **kw: run_edit(kw[\"path\"], kw[\"old_text\"],\n kw[\"new_text\"]),\n}\n```\n\n3. Внутри цикла мы просто находим обработчик по имени. Тело цикла не меняется со времён s01.\n\n```python\nfor block in response.content:\n if block.type == \"tool_use\":\n handler = TOOL_HANDLERS.get(block.name)\n output = handler(**block.input) if handler \\\n else f\"Unknown tool: {block.name}\"\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": output,\n })\n```\n\nДобавить инструмент = добавить обработчик + схему инструмента. Цикл остаётся нетронутым.\n\n## Что изменилось относительно s01\n\n| Компонент | Было (s01) | Стало (s02) |\n|-----------|------------|-------------|\n| Инструменты | 1 (`bash`) | 4 (`bash`, `read`, `write`, `edit`) |\n| Диспетчеризация | жёсткий вызов `bash` | словарь `TOOL_HANDLERS` |\n| Безопасность путей | нет | песочница через `safe_path()` |\n| Цикл агента | без изменений | без изменений |\n\n## Попробуйте\n\n```sh\ncd learn-claude-code\npython agents/s02_tool_use.py\n```\n\n1. `Прочитай файл requirements.txt`\n2. `Создай файл greet.py с функцией greet(name)`\n3. `Отредактируй greet.py и добавь docstring к функции`\n4. `Снова прочитай greet.py и проверь, что правка применилась`\n" + }, + { + "version": "s03", + "locale": "ru", + "title": "s03: План задач", + "content": "# s03: План задач\n\n`s01 > s02 > [ s03 ] s04 > s05 > s06 | s07 > s08 > s09 > s10 > s11 > s12`\n\n> *\"Агент без плана быстро начинает блуждать\"* — сначала перечисли шаги, потом выполняй.\n\n## Проблема\n\nНа многошаговых задачах модель теряет нить. Она повторяет уже сделанное, пропускает шаги или уходит в сторону. В длинных диалогах это усиливается: системный промпт постепенно тонет в результатах инструментов. Рефакторинг из 10 шагов может пройти шаги 1-3, а потом модель начнёт импровизировать, потому что забыла пункты 4-10.\n\n## Решение\n\n```\n+--------+ +-------+ +---------+\n| User | ---> | LLM | ---> | Tools |\n| prompt | | | | + todo |\n+--------+ +---+---+ +----+----+\n ^ |\n | tool_result |\n +----------------+\n |\n +-----------+-----------+\n | TodoManager state |\n | [ ] task A |\n | [>] task B <- doing |\n | [x] task C |\n +-----------------------+\n |\n if rounds_since_todo >= 3:\n inject into tool_result\n```\n\n## Как это работает\n\n1. `TodoManager` хранит элементы со статусами. Только один элемент одновременно может быть `in_progress`.\n\n```python\nclass TodoManager:\n def update(self, items: list) -> str:\n validated, in_progress_count = [], 0\n for item in items:\n status = item.get(\"status\", \"pending\")\n if status == \"in_progress\":\n in_progress_count += 1\n validated.append({\"id\": item[\"id\"], \"text\": item[\"text\"],\n \"status\": status})\n if in_progress_count > 1:\n raise ValueError(\"Only one task can be in_progress\")\n self.items = validated\n return self.render()\n```\n\n2. Инструмент `todo` подключается в карту диспетчеризации так же, как и любой другой.\n\n```python\nTOOL_HANDLERS = {\n # ...base tools...\n \"todo\": lambda **kw: TODO.update(kw[\"items\"]),\n}\n```\n\n3. Если модель 3 и более раунда не вызывает `todo`, в контекст подмешивается напоминание.\n\n```python\nif rounds_since_todo >= 3 and messages:\n last = messages[-1]\n if last[\"role\"] == \"user\" and isinstance(last.get(\"content\"), list):\n last[\"content\"].insert(0, {\n \"type\": \"text\",\n \"text\": \"Update your todos.\",\n })\n```\n\nОграничение «только один `in_progress`» заставляет модель фокусироваться последовательно. Напоминание создаёт ощущение внешней подотчётности.\n\n## Что изменилось относительно s02\n\n| Компонент | Было (s02) | Стало (s03) |\n|-----------|------------|-------------|\n| Инструменты | 4 | 5 (`+todo`) |\n| Планирование | нет | `TodoManager` со статусами |\n| Напоминание | нет | `` после 3 раундов |\n| Цикл агента | простой dispatch | `+ rounds_since_todo` counter |\n\n## Попробуйте\n\n```sh\ncd learn-claude-code\npython agents/s03_todo_write.py\n```\n\n1. `Отрефактори файл hello.py: добавь type hints, docstring и main guard`\n2. `Создай Python-пакет с __init__.py, utils.py и tests/test_utils.py`\n3. `Проверь все Python-файлы и исправь стилевые проблемы`\n" + }, + { + "version": "s04", + "locale": "ru", + "title": "s04: Субагенты", + "content": "# s04: Субагенты\n\n`s01 > s02 > s03 > [ s04 ] s05 > s06 | s07 > s08 > s09 > s10 > s11 > s12`\n\n> *\"Разбивай крупную работу на части; каждой подзадаче — чистый контекст\"* — у каждой подзадачи свой независимый `messages[]`, поэтому основной диалог не захламляется.\n\n## Проблема\n\nПока агент работает, массив `messages` растёт. Каждое чтение файла, каждый вывод bash остаётся в контексте навсегда. На вопрос «Какой тестовый фреймворк использует этот проект?» может понадобиться прочитать 5 файлов, но родительскому агенту нужен только ответ: `pytest`.\n\n## Решение\n\n```\nРодительский агент Субагент\n+------------------+ +------------------+\n| messages=[...] | | messages=[] | <-- чистый старт\n| | dispatch | |\n| tool: task | -----------> | while tool_use: |\n| prompt=\"...\" | | call tools |\n| | сводка | append results |\n| result = \"...\" | <----------- | return last text |\n+------------------+ +------------------+\n\nКонтекст родителя остаётся чистым. Контекст субагента выбрасывается.\n```\n\n## Как это работает\n\n1. Родитель получает инструмент `task`. Дочерний агент получает все базовые инструменты, кроме `task`, чтобы не было рекурсивного бесконтрольного спавна.\n\n```python\nPARENT_TOOLS = CHILD_TOOLS + [\n {\"name\": \"task\",\n \"description\": \"Spawn a subagent with fresh context.\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\"prompt\": {\"type\": \"string\"}},\n \"required\": [\"prompt\"],\n }},\n]\n```\n\n2. Субагент стартует с `messages=[]` и крутит свой отдельный цикл. Родителю возвращается только финальный текст.\n\n```python\ndef run_subagent(prompt: str) -> str:\n sub_messages = [{\"role\": \"user\", \"content\": prompt}]\n for _ in range(30): # safety limit\n response = client.messages.create(\n model=MODEL, system=SUBAGENT_SYSTEM,\n messages=sub_messages,\n tools=CHILD_TOOLS, max_tokens=8000,\n )\n sub_messages.append({\"role\": \"assistant\",\n \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n break\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n handler = TOOL_HANDLERS.get(block.name)\n output = handler(**block.input)\n results.append({\"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": str(output)[:50000]})\n sub_messages.append({\"role\": \"user\", \"content\": results})\n return \"\".join(\n b.text for b in response.content if hasattr(b, \"text\")\n ) or \"(нет сводки)\"\n```\n\nВся история дочернего агента, включая десятки tool call, выбрасывается. Родитель получает один абзац сводки как обычный `tool_result`.\n\n## Что изменилось относительно s03\n\n| Компонент | Было (s03) | Стало (s04) |\n|-----------|------------|-------------|\n| Инструменты | 5 | 5 базовых + `task` у родителя |\n| Контекст | один общий | изоляция родителя и ребёнка |\n| Субагент | отсутствует | функция `run_subagent()` |\n| Возвращаемое значение | N/A | только текст сводки |\n\n## Попробуйте\n\n```sh\ncd learn-claude-code\npython agents/s04_subagent.py\n```\n\n1. `Используй подзадачу, чтобы выяснить, какой тестовый фреймворк использует этот проект`\n2. `Делегируй чтение всех .py-файлов и верни краткое описание каждого`\n3. `Через task создай новый модуль, а затем проверь его отсюда`\n" + }, + { + "version": "s05", + "locale": "ru", + "title": "s05: Навыки", + "content": "# s05: Навыки\n\n`s01 > s02 > s03 > s04 > [ s05 ] s06 | s07 > s08 > s09 > s10 > s11 > s12`\n\n> *\"Подгружай знания по мере необходимости, а не заранее\"* — знания подмешиваются через `tool_result`, а не через системный промпт заранее.\n\n## Проблема\n\nВы хотите, чтобы агент соблюдал доменные процессы: git-конвенции, паттерны тестирования, чеклисты код-ревью. Если сложить всё это в системный промпт, вы сожжёте кучу токенов на навыки, которые в конкретной задаче вообще не пригодятся. 10 навыков по 2000 токенов каждый — это 20 000 токенов, большая часть которых в текущем запросе просто лишняя.\n\n## Решение\n\n```\nСистемный промпт (Слой 1 — всегда присутствует):\n+--------------------------------------+\n| Ты агент по работе с кодом. |\n| Доступные навыки: |\n| - git: Помощники по git-процессу | ~100 токенов/навык\n| - test: Практики тестирования |\n+--------------------------------------+\n\nКогда модель вызывает load_skill(\"git\"):\n+--------------------------------------+\n| tool_result (Слой 2 — по запросу): |\n| |\n| Полные инструкции по git... | ~2000 токенов\n| Шаг 1: ... |\n| |\n+--------------------------------------+\n```\n\nСлой 1: в системном промпте лежат только *имена* навыков, это дёшево. \nСлой 2: *полное содержимое* навыка приходит через `tool_result`, только когда оно реально нужно.\n\n## Как это работает\n\n1. Каждый навык — это директория с файлом `SKILL.md` и YAML frontmatter.\n\n```\nskills/\n pdf/\n SKILL.md # ---\\n name: pdf\\n description: Process PDF files\\n ---\\n ...\n code-review/\n SKILL.md # ---\\n name: code-review\\n description: Review code\\n ---\\n ...\n```\n\n2. `SkillLoader` сканирует `SKILL.md` и использует имя директории как идентификатор навыка.\n\n```python\nclass SkillLoader:\n def __init__(self, skills_dir: Path):\n self.skills = {}\n for f in sorted(skills_dir.rglob(\"SKILL.md\")):\n text = f.read_text()\n meta, body = self._parse_frontmatter(text)\n name = meta.get(\"name\", f.parent.name)\n self.skills[name] = {\"meta\": meta, \"body\": body}\n\n def get_descriptions(self) -> str:\n lines = []\n for name, skill in self.skills.items():\n desc = skill[\"meta\"].get(\"description\", \"\")\n lines.append(f\" - {name}: {desc}\")\n return \"\\n\".join(lines)\n\n def get_content(self, name: str) -> str:\n skill = self.skills.get(name)\n if not skill:\n return f\"Error: Unknown skill '{name}'.\"\n return f\"\\n{skill['body']}\\n\"\n```\n\n3. Слой 1 идёт в системный промпт. Слой 2 — это ещё один обычный обработчик инструмента.\n\n```python\nSYSTEM = f\"\"\"Ты агент по работе с кодом в {WORKDIR}.\nДоступные навыки:\n{SKILL_LOADER.get_descriptions()}\"\"\"\n\nTOOL_HANDLERS = {\n # ...base tools...\n \"load_skill\": lambda **kw: SKILL_LOADER.get_content(kw[\"name\"]),\n}\n```\n\nМодель знает, какие навыки вообще доступны, но тянет полный текст только по необходимости.\n\n## Что изменилось относительно s04\n\n| Компонент | Было (s04) | Стало (s05) |\n|-----------|------------|-------------|\n| Инструменты | 5 (base + task) | 5 (base + `load_skill`) |\n| Системный промпт | статическая строка | + описания навыков |\n| Знания | отсутствуют | `skills/*/SKILL.md` |\n| Инъекция | отсутствует | двухслойная: system + tool_result |\n\n## Попробуйте\n\n```sh\ncd learn-claude-code\npython agents/s05_skill_loading.py\n```\n\n1. `Какие навыки доступны?`\n2. `Загрузи навык agent-builder и следуй его инструкциям`\n3. `Мне нужен code review — сначала загрузи подходящий навык`\n4. `Собери MCP-сервер с помощью навыка mcp-builder`\n" + }, + { + "version": "s06", + "locale": "ru", + "title": "s06: Сжатие контекста", + "content": "# s06: Сжатие контекста\n\n`s01 > s02 > s03 > s04 > s05 > [ s06 ] | s07 > s08 > s09 > s10 > s11 > s12`\n\n> *\"Контекст всё равно заполнится; нужно уметь освобождать место\"* — трёхслойная стратегия сжатия делает бесконечные сессии возможными.\n\n## Проблема\n\nОкно контекста конечно. Один `read_file` на файле в 1000 строк легко стоит ~4000 токенов. Прочитайте 30 файлов и выполните 20 bash-команд — и вы уже упрётесь в 100 000+ токенов. Без сжатия агент не сможет работать на больших кодовых базах.\n\n## Решение\n\nТри слоя, от самых мягких к самым агрессивным:\n\n```\nКаждый ход:\n+------------------+\n| Результат инструмента |\n+------------------+\n |\n v\n[Слой 1: микросжатие] (тихо, каждый ход)\n Заменяет tool_result старше 3 ходов\n на \"[Previous: used {tool_name}]\"\n |\n v\n[Проверка: токенов > 50000?]\n | |\n no yes\n | |\n v v\nпродолжить [Слой 2: автосжатие]\n Сохраняет transcript в .transcripts/\n LLM делает сводку разговора.\n Все сообщения заменяются на [сводку].\n |\n v\n [Слой 3: инструмент сжатия]\n Модель сама вызывает compact.\n Используется та же сводка, что и в auto_compact.\n```\n\n## Как это работает\n\n1. **Слой 1 — микросжатие (`micro_compact`)**: перед каждым вызовом LLM старые результаты инструментов заменяются заглушками.\n\n```python\ndef micro_compact(messages: list) -> list:\n tool_results = []\n for i, msg in enumerate(messages):\n if msg[\"role\"] == \"user\" and isinstance(msg.get(\"content\"), list):\n for j, part in enumerate(msg[\"content\"]):\n if isinstance(part, dict) and part.get(\"type\") == \"tool_result\":\n tool_results.append((i, j, part))\n if len(tool_results) <= KEEP_RECENT:\n return messages\n for _, _, part in tool_results[:-KEEP_RECENT]:\n if len(part.get(\"content\", \"\")) > 100:\n part[\"content\"] = f\"[Previous: used {tool_name}]\"\n return messages\n```\n\n2. **Слой 2 — автосжатие (`auto_compact`)**: когда оценка токенов превышает порог, полный transcript сохраняется на диск, после чего LLM просится сделать сводку.\n\n```python\ndef auto_compact(messages: list) -> list:\n # Save transcript for recovery\n transcript_path = TRANSCRIPT_DIR / f\"transcript_{int(time.time())}.jsonl\"\n with open(transcript_path, \"w\") as f:\n for msg in messages:\n f.write(json.dumps(msg, default=str) + \"\\n\")\n # LLM summarizes\n response = client.messages.create(\n model=MODEL,\n messages=[{\"role\": \"user\", \"content\":\n \"Summarize this conversation for continuity...\"\n + json.dumps(messages, default=str)[:80000]}],\n max_tokens=2000,\n )\n return [\n {\"role\": \"user\", \"content\": f\"[Compressed]\\n\\n{response.content[0].text}\"},\n {\"role\": \"assistant\", \"content\": \"Understood. Continuing.\"},\n ]\n```\n\n3. **Слой 3 — ручной `compact`**: инструмент `compact` запускает то же сжатие по требованию.\n\n4. Цикл объединяет все три слоя:\n\n```python\ndef agent_loop(messages: list):\n while True:\n micro_compact(messages) # Layer 1\n if estimate_tokens(messages) > THRESHOLD:\n messages[:] = auto_compact(messages) # Layer 2\n response = client.messages.create(...)\n # ... tool execution ...\n if manual_compact:\n messages[:] = auto_compact(messages) # Layer 3\n```\n\nПолная история не исчезает безвозвратно: она просто уходит из активного контекста в transcripts на диске.\n\n## Что изменилось относительно s05\n\n| Компонент | Было (s05) | Стало (s06) |\n|-----------|------------|-------------|\n| Инструменты | 5 | 5 (base + `compact`) |\n| Управление контекстом | отсутствует | трёхслойное сжатие |\n| Микросжатие | отсутствует | старые результаты → placeholders |\n| Автосжатие | отсутствует | срабатывает по порогу токенов |\n| Transcripts | отсутствуют | сохраняются в `.transcripts/` |\n\n## Попробуйте\n\n```sh\ncd learn-claude-code\npython agents/s06_context_compact.py\n```\n\n1. `Прочитай по очереди каждый Python-файл в каталоге agents/`\n2. `Продолжай читать файлы, пока сжатие не сработает автоматически`\n3. `Используй инструмент compact, чтобы вручную сжать разговор`\n" + }, + { + "version": "s07", + "locale": "ru", + "title": "s07: Система задач", + "content": "# s07: Система задач\n\n`s01 > s02 > s03 > s04 > s05 > s06 | [ s07 ] s08 > s09 > s10 > s11 > s12`\n\n> *\"Разбей большую цель на маленькие задачи, упорядочь их и сохрани на диск\"* — файловый граф задач с зависимостями становится фундаментом для мультиагентной координации.\n\n## Проблема\n\n`TodoManager` из s03 — это плоский чеклист в памяти: без порядка, без зависимостей и почти без состояний. Но реальные цели имеют структуру: задача B зависит от A, задачи C и D можно делать параллельно, а задача E ждёт завершения и C, и D.\n\nЕсли связи между задачами не выражены явно, агент не понимает:\n\n- что уже готово к выполнению;\n- что заблокировано;\n- что можно делать параллельно.\n\nА поскольку список живёт только в памяти, сжатие контекста из s06 просто его сотрёт.\n\n## Решение\n\nПоднимаем чеклист до **графа задач**, который хранится на диске. Каждая задача — отдельный JSON-файл со статусом, зависимостями (`blockedBy`) и обратными связями (`blocks`). Такой граф в любой момент отвечает на три вопроса:\n\n- **Что готово?** — задачи со статусом `pending` и пустым `blockedBy`.\n- **Что заблокировано?** — задачи, ожидающие незавершённых зависимостей.\n- **Что уже завершено?** — `completed` задачи, которые автоматически разблокируют зависящие от них.\n\n```\n.tasks/\n task_1.json {\"id\":1, \"status\":\"completed\"}\n task_2.json {\"id\":2, \"blockedBy\":[1], \"status\":\"pending\"}\n task_3.json {\"id\":3, \"blockedBy\":[1], \"status\":\"pending\"}\n task_4.json {\"id\":4, \"blockedBy\":[2,3], \"status\":\"pending\"}\n\nГраф задач (DAG):\n +----------+\n +--> | task 2 | --+\n | | pending | |\n+----------+ +----------+ +--> +----------+\n| task 1 | | task 4 |\n| completed| --> +----------+ +--> | blocked |\n+----------+ | task 3 | --+ +----------+\n | pending |\n +----------+\n\nПорядок: task 1 должна завершиться раньше 2 и 3\nПараллелизм: tasks 2 и 3 могут идти одновременно\nЗависимости: task 4 ждёт и 2, и 3\nСтатусы: pending -> in_progress -> completed\n```\n\nЭтот граф задач становится опорной структурой для всего, что идёт после s07: фоновые задачи (s08), команды агентов (s09+) и изоляция через рабочие деревья (s12) читают и пишут в одну и ту же систему.\n\n## Как это работает\n\n1. **TaskManager**: один JSON-файл на задачу, CRUD и граф зависимостей.\n\n```python\nclass TaskManager:\n def __init__(self, tasks_dir: Path):\n self.dir = tasks_dir\n self.dir.mkdir(exist_ok=True)\n self._next_id = self._max_id() + 1\n\n def create(self, subject, description=\"\"):\n task = {\"id\": self._next_id, \"subject\": subject,\n \"status\": \"pending\", \"blockedBy\": [],\n \"blocks\": [], \"owner\": \"\"}\n self._save(task)\n self._next_id += 1\n return json.dumps(task, indent=2)\n```\n\n2. **Разрешение зависимостей**: завершение задачи убирает её ID из `blockedBy` у всех остальных, автоматически разблокируя зависимые задачи.\n\n```python\ndef _clear_dependency(self, completed_id):\n for f in self.dir.glob(\"task_*.json\"):\n task = json.loads(f.read_text())\n if completed_id in task.get(\"blockedBy\", []):\n task[\"blockedBy\"].remove(completed_id)\n self._save(task)\n```\n\n3. **Переходы статусов и wiring зависимостей**: `update` обрабатывает переходы и связи.\n\n```python\ndef update(self, task_id, status=None,\n add_blocked_by=None, add_blocks=None):\n task = self._load(task_id)\n if status:\n task[\"status\"] = status\n if status == \"completed\":\n self._clear_dependency(task_id)\n self._save(task)\n```\n\n4. В карте диспетчеризации появляется четыре инструмента для задач.\n\n```python\nTOOL_HANDLERS = {\n # ...base tools...\n \"task_create\": lambda **kw: TASKS.create(kw[\"subject\"]),\n \"task_update\": lambda **kw: TASKS.update(kw[\"task_id\"], kw.get(\"status\")),\n \"task_list\": lambda **kw: TASKS.list_all(),\n \"task_get\": lambda **kw: TASKS.get(kw[\"task_id\"]),\n}\n```\n\nНачиная с s07, для многошаговой работы граф задач становится стандартом. План задач из s03 остаётся удобным вариантом для коротких односеансовых чеклистов.\n\n## Что изменилось относительно s06\n\n| Компонент | Было (s06) | Стало (s07) |\n|-----------|------------|-------------|\n| Инструменты | 5 | 8 (`task_create/update/list/get`) |\n| Модель планирования | плоский чеклист в памяти | граф задач с зависимостями на диске |\n| Связи | отсутствуют | рёбра `blockedBy` и `blocks` |\n| Статусы | сделано / не сделано | `pending` -> `in_progress` -> `completed` |\n| Постоянство | теряется при compact | переживает compact и рестарты |\n\n## Попробуйте\n\n```sh\ncd learn-claude-code\npython agents/s07_task_system.py\n```\n\n1. `Создай 3 задачи: \"Подготовить проект\", \"Написать код\", \"Написать тесты\". Свяжи их зависимостями по порядку.`\n2. `Покажи все задачи и граф их зависимостей`\n3. `Заверши задачу 1, а затем снова покажи список задач, чтобы увидеть разблокировку задачи 2`\n4. `Создай доску задач для рефакторинга: parse -> transform -> emit -> test, где transform и emit могут идти параллельно после parse`\n" + }, + { + "version": "s08", + "locale": "ru", + "title": "s08: Фоновые задачи", + "content": "# s08: Фоновые задачи\n\n`s01 > s02 > s03 > s04 > s05 > s06 | s07 > [ s08 ] s09 > s10 > s11 > s12`\n\n> *\"Медленные операции можно увести в фон, пока агент думает дальше\"* — демон-потоки запускают команды в фоне и подмешивают уведомления по завершении.\n\n## Проблема\n\nНекоторые команды работают минутами: `npm install`, `pytest`, `docker build`. В блокирующем цикле модель просто сидит и ждёт. Если пользователь говорит «установи зависимости и пока это идёт, создай config-файл», агент выполняет это последовательно, а не параллельно.\n\n## Решение\n\n```\nОсновной поток Фоновый поток\n+-----------------+ +-----------------+\n| цикл агента | | идёт subprocess |\n| ... | | ... |\n| [вызов LLM] <---+------ | enqueue(result) |\n| ^слив очереди | +-----------------+\n+-----------------+\n\nЛиния времени:\nАгент --[спавн A]--[спавн B]--[другая работа]----\n | |\n v v\n [A идёт] [B идёт] (параллельно)\n | |\n +-- результаты подмешиваются перед следующим вызовом LLM --+\n```\n\n## Как это работает\n\n1. `BackgroundManager` хранит задачи и потокобезопасную очередь уведомлений.\n\n```python\nclass BackgroundManager:\n def __init__(self):\n self.tasks = {}\n self._notification_queue = []\n self._lock = threading.Lock()\n```\n\n2. `run()` запускает демон-поток и сразу возвращает управление.\n\n```python\ndef run(self, command: str) -> str:\n task_id = str(uuid.uuid4())[:8]\n self.tasks[task_id] = {\"status\": \"running\", \"command\": command}\n thread = threading.Thread(\n target=self._execute, args=(task_id, command), daemon=True)\n thread.start()\n return f\"Background task {task_id} started\"\n```\n\n3. Когда subprocess завершается, результат попадает в очередь уведомлений.\n\n```python\ndef _execute(self, task_id, command):\n try:\n r = subprocess.run(command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=300)\n output = (r.stdout + r.stderr).strip()[:50000]\n except subprocess.TimeoutExpired:\n output = \"Error: Timeout (300s)\"\n with self._lock:\n self._notification_queue.append({\n \"task_id\": task_id, \"result\": output[:500]})\n```\n\n4. Перед каждым вызовом LLM агент опустошает очередь уведомлений.\n\n```python\ndef agent_loop(messages: list):\n while True:\n notifs = BG.drain_notifications()\n if notifs:\n notif_text = \"\\n\".join(\n f\"[bg:{n['task_id']}] {n['result']}\" for n in notifs)\n messages.append({\"role\": \"user\",\n \"content\": f\"\\n{notif_text}\\n\"\n f\"\"})\n messages.append({\"role\": \"assistant\",\n \"content\": \"Noted background results.\"})\n response = client.messages.create(...)\n```\n\nСам агентный цикл остаётся однопоточным. В параллель уходит только I/O подзадач и subprocess.\n\n## Что изменилось относительно s07\n\n| Компонент | Было (s07) | Стало (s08) |\n|-----------|------------|-------------|\n| Инструменты | 8 | 6 (base + `background_run` + `check`) |\n| Выполнение | только блокирующее | блокирующее + фоновые потоки |\n| Уведомления | отсутствуют | очередь очищается на каждом цикле |\n| Параллельность | отсутствует | демон-потоки |\n\n## Попробуйте\n\n```sh\ncd learn-claude-code\npython agents/s08_background_tasks.py\n```\n\n1. `Запусти \"sleep 5 && echo done\" в фоне, а пока он работает, создай файл`\n2. `Стартуй 3 фоновые задачи: \"sleep 2\", \"sleep 4\", \"sleep 6\". Затем проверь их статус.`\n3. `Запусти pytest в фоне и продолжай параллельно делать другие вещи`\n" + }, + { + "version": "s09", + "locale": "ru", + "title": "s09: Команды агентов", + "content": "# s09: Команды агентов\n\n`s01 > s02 > s03 > s04 > s05 > s06 | s07 > s08 > [ s09 ] s10 > s11 > s12`\n\n> *\"Когда задача слишком велика для одного, делегируй её напарникам\"* — постоянные напарники и асинхронные почтовые ящики.\n\n## Проблема\n\nСубагенты из s04 — одноразовые: создали, поработали, вернули сводку, умерли. У них нет личности, нет памяти между вызовами. Фоновые задачи из s08 умеют запускать shell-команды, но не принимают LLM-управляемые решения.\n\nДля настоящей командной работы нужны:\n\n1. постоянные агенты, живущие дольше одного запроса;\n2. идентичность и управление жизненным циклом;\n3. канал связи между агентами.\n\n## Решение\n\n```\nЖизненный цикл напарника:\n запуск -> РАБОТАЕТ -> ОЖИДАНИЕ -> РАБОТАЕТ -> ... -> ЗАВЕРШЕНИЕ\n\nКоммуникация:\n .team/\n config.json <- состав команды + статусы\n inbox/\n alice.jsonl <- append-only, drain-on-read\n bob.jsonl\n lead.jsonl\n\n +--------+ send(\"alice\",\"bob\",\"...\") +--------+\n | alice | -----------------------------> | bob |\n | loop | bob.jsonl << {json_line} | loop |\n +--------+ +--------+\n ^ |\n | BUS.read_inbox(\"alice\") |\n +---- alice.jsonl -> read + drain ---------+\n```\n\n## Как это работает\n\n1. `TeammateManager` поддерживает `config.json` со списком участников команды.\n\n```python\nclass TeammateManager:\n def __init__(self, team_dir: Path):\n self.dir = team_dir\n self.dir.mkdir(exist_ok=True)\n self.config_path = self.dir / \"config.json\"\n self.config = self._load_config()\n self.threads = {}\n```\n\n2. `spawn()` создаёт нового участника и запускает его агентный цикл в отдельном потоке.\n\n```python\ndef spawn(self, name: str, role: str, prompt: str) -> str:\n member = {\"name\": name, \"role\": role, \"status\": \"working\"}\n self.config[\"members\"].append(member)\n self._save_config()\n thread = threading.Thread(\n target=self._teammate_loop,\n args=(name, role, prompt), daemon=True)\n thread.start()\n return f\"Spawned teammate '{name}' (role: {role})\"\n```\n\n3. `MessageBus` использует append-only JSONL-входящий ящик. `send()` дописывает строку, `read_inbox()` читает всё и очищает файл.\n\n```python\nclass MessageBus:\n def send(self, sender, to, content, msg_type=\"message\", extra=None):\n msg = {\"type\": msg_type, \"from\": sender,\n \"content\": content, \"timestamp\": time.time()}\n if extra:\n msg.update(extra)\n with open(self.dir / f\"{to}.jsonl\", \"a\") as f:\n f.write(json.dumps(msg) + \"\\n\")\n\n def read_inbox(self, name):\n path = self.dir / f\"{name}.jsonl\"\n if not path.exists(): return \"[]\"\n msgs = [json.loads(l) for l in path.read_text().strip().splitlines() if l]\n path.write_text(\"\") # drain\n return json.dumps(msgs, indent=2)\n```\n\n4. Каждый напарник перед каждым LLM-вызовом проверяет входящий ящик и добавляет новые сообщения в контекст.\n\n```python\ndef _teammate_loop(self, name, role, prompt):\n messages = [{\"role\": \"user\", \"content\": prompt}]\n for _ in range(50):\n inbox = BUS.read_inbox(name)\n if inbox != \"[]\":\n messages.append({\"role\": \"user\",\n \"content\": f\"{inbox}\"})\n messages.append({\"role\": \"assistant\",\n \"content\": \"Noted inbox messages.\"})\n response = client.messages.create(...)\n if response.stop_reason != \"tool_use\":\n break\n # execute tools, append results...\n self._find_member(name)[\"status\"] = \"idle\"\n```\n\n## Что изменилось относительно s08\n\n| Компонент | Было (s08) | Стало (s09) |\n|-----------|------------|-------------|\n| Инструменты | 6 | 9 (`+spawn/send/read_inbox`) |\n| Агенты | один агент | lead + N напарников |\n| Постоянство | отсутствует | `config.json` + JSONL-входящий ящик |\n| Потоки | только фоновые команды | полноценный агентный цикл на поток |\n| Жизненный цикл | fire-and-forget | `idle -> working -> idle` |\n| Коммуникация | отсутствует | личные сообщения + broadcast |\n\n## Попробуйте\n\n```sh\ncd learn-claude-code\npython agents/s09_agent_teams.py\n```\n\n1. `Заспавнь alice как coder и bob как tester. Пусть alice отправит bob сообщение.`\n2. `Разошли всем напарникам сообщение \"status update: phase 1 complete\"`\n3. `Проверь, есть ли сообщения во входящем ящике лида`\n4. `Введи /team, чтобы посмотреть состав команды и статусы`\n5. `Введи /inbox, чтобы вручную проверить входящий ящик лидера`\n" + }, + { + "version": "s10", + "locale": "ru", + "title": "s10: Командные протоколы", + "content": "# s10: Командные протоколы\n\n`s01 > s02 > s03 > s04 > s05 > s06 | s07 > s08 > s09 > [ s10 ] s11 > s12`\n\n> *\"Напарникам нужны общие правила общения\"* — один паттерн «запрос-ответ» управляет всеми переговорами.\n\n## Проблема\n\nВ s09 напарники умеют работать и общаться, но у них нет структурированной координации.\n\n**Shutdown**: если просто убить поток, файлы могут остаться наполовину записанными, а `config.json` — устаревшим. Нужен handshake: лидер просит завершиться, напарник либо соглашается и аккуратно выходит, либо отклоняет запрос и продолжает работу.\n\n**Plan approval**: если лидер говорит «отрефактори auth-модуль», напарник начинает сразу. Но для рискованных изменений лидер должен сначала посмотреть план.\n\nОбе ситуации имеют одну и ту же форму: одна сторона отправляет запрос с уникальным ID, другая отвечает, ссылаясь на тот же ID.\n\n## Решение\n\n```\nПротокол завершения Протокол утверждения плана\n=================== =========================\n\nЛидер Напарник Напарник Лидер\n | | | |\n |--shutdown_req-->| |--plan_req------>|\n | {req_id:\"abc\"} | | {req_id:\"xyz\"} |\n | | | |\n |<--shutdown_resp-| |<--plan_resp-----|\n | {req_id:\"abc\", | | {req_id:\"xyz\", |\n | approve:true} | | approve:true} |\n\nОбщий FSM:\n [pending] --approve--> [approved]\n [pending] --reject---> [rejected]\n\nТрекеры:\n shutdown_requests = {req_id: {target, status}}\n plan_requests = {req_id: {from, plan, status}}\n```\n\n## Как это работает\n\n1. Лидер инициирует shutdown: генерирует `request_id` и отправляет запрос через входящий ящик.\n\n```python\nshutdown_requests = {}\n\ndef handle_shutdown_request(teammate: str) -> str:\n req_id = str(uuid.uuid4())[:8]\n shutdown_requests[req_id] = {\"target\": teammate, \"status\": \"pending\"}\n BUS.send(\"lead\", teammate, \"Please shut down gracefully.\",\n \"shutdown_request\", {\"request_id\": req_id})\n return f\"Shutdown request {req_id} sent (status: pending)\"\n```\n\n2. Напарник получает запрос и отвечает approve/reject.\n\n```python\nif tool_name == \"shutdown_response\":\n req_id = args[\"request_id\"]\n approve = args[\"approve\"]\n shutdown_requests[req_id][\"status\"] = \"approved\" if approve else \"rejected\"\n BUS.send(sender, \"lead\", args.get(\"reason\", \"\"),\n \"shutdown_response\",\n {\"request_id\": req_id, \"approve\": approve})\n```\n\n3. Approval плана устроен так же. Напарник отправляет план, создавая `request_id`, а лидер отвечает на тот же ID.\n\n```python\nplan_requests = {}\n\ndef handle_plan_review(request_id, approve, feedback=\"\"):\n req = plan_requests[request_id]\n req[\"status\"] = \"approved\" if approve else \"rejected\"\n BUS.send(\"lead\", req[\"from\"], feedback,\n \"plan_approval_response\",\n {\"request_id\": request_id, \"approve\": approve})\n```\n\nОдин FSM, два применения. Машина состояний `pending -> approved | rejected` подходит для любого протокола «запрос-ответ».\n\n## Что изменилось относительно s09\n\n| Компонент | Было (s09) | Стало (s10) |\n|-----------|------------|-------------|\n| Инструменты | 9 | 12 (`+shutdown_req/resp +plan`) |\n| Shutdown | только естественный выход | согласованный протокол «запрос-ответ» |\n| Gating плана | отсутствует | submit/review с подтверждением |\n| Корреляция | отсутствует | `request_id` на каждый запрос |\n| FSM | отсутствует | `pending -> approved/rejected` |\n\n## Попробуйте\n\n```sh\ncd learn-claude-code\npython agents/s10_team_protocols.py\n```\n\n1. `Заспавнь alice как coder, а затем запроси её завершение`\n2. `Покажи список напарников и проверь статус alice после одобрения shutdown`\n3. `Заспавнь bob с рискованной задачей на рефакторинг. Посмотри его план и отклони его.`\n4. `Заспавнь charlie, попроси его отправить план, а затем одобри его`\n5. `Введи /team, чтобы следить за статусами`\n" + }, + { + "version": "s11", + "locale": "ru", + "title": "s11: Автономные агенты", + "content": "# s11: Автономные агенты\n\n`s01 > s02 > s03 > s04 > s05 > s06 | s07 > s08 > s09 > s10 > [ s11 ] s12`\n\n> *\"Напарники сами смотрят на доску и забирают задачи\"* — лидеру больше не нужно вручную раздавать каждую задачу.\n\n## Проблема\n\nВ s09-s10 напарники работают только по прямому указанию. Лидеру приходится спавнить каждого с конкретным запросом. Есть 10 ничьих задач на доске? Значит, лидер назначит все 10 вручную. Такой подход не масштабируется.\n\nНастоящая автономность выглядит иначе: напарники сами смотрят на доску задач, сами забирают ничьи задачи, выполняют их и затем ищут следующую работу.\n\nЕсть ещё один тонкий момент: после сжатия контекста из s06 агент может забыть, кто он такой. Это исправляется повторным внедрением его идентичности.\n\n## Решение\n\n```\nЖизненный цикл напарника с idle-циклом:\n\n+-------+\n| запуск |\n+---+---+\n |\n v\n+-------+ tool_use +-------+\n| РАБОТА| <------------- | LLM |\n+---+---+ +-------+\n |\n | stop_reason != tool_use (или вызван idle tool)\n v\n+--------+\n| ОЖИД. | опрос каждые 5 с, максимум 60 с\n+---+----+\n |\n +---> проверить inbox --> есть письмо? ---> РАБОТА\n |\n +---> проверить .tasks/ -> есть свободная? -> забрать -> РАБОТА\n |\n +---> тайм-аут 60 c --------------------> ЗАВЕРШЕНИЕ\n\nПовторная инъекция идентичности после compress:\n if len(messages) <= 3:\n messages.insert(0, identity_block)\n```\n\n## Как это работает\n\n1. Цикл напарника делится на две фазы: `WORK` и `IDLE`. Когда LLM перестаёт вызывать инструменты или сама вызывает `idle`, агент переходит в ожидание.\n\n```python\ndef _loop(self, name, role, prompt):\n while True:\n # -- WORK PHASE --\n messages = [{\"role\": \"user\", \"content\": prompt}]\n for _ in range(50):\n response = client.messages.create(...)\n if response.stop_reason != \"tool_use\":\n break\n # execute tools...\n if idle_requested:\n break\n\n # -- IDLE PHASE --\n self._set_status(name, \"idle\")\n resume = self._idle_poll(name, messages)\n if not resume:\n self._set_status(name, \"shutdown\")\n return\n self._set_status(name, \"working\")\n```\n\n2. В idle-фазе агент циклически опрашивает входящий ящик и доску задач.\n\n```python\ndef _idle_poll(self, name, messages):\n for _ in range(IDLE_TIMEOUT // POLL_INTERVAL): # 60s / 5s = 12\n time.sleep(POLL_INTERVAL)\n inbox = BUS.read_inbox(name)\n if inbox:\n messages.append({\"role\": \"user\",\n \"content\": f\"{inbox}\"})\n return True\n unclaimed = scan_unclaimed_tasks()\n if unclaimed:\n claim_task(unclaimed[0][\"id\"], name)\n messages.append({\"role\": \"user\",\n \"content\": f\"Task #{unclaimed[0]['id']}: \"\n f\"{unclaimed[0]['subject']}\"})\n return True\n return False # timeout -> shutdown\n```\n\n3. Сканирование доски задач: ищем `pending`, без владельца и без блокировок.\n\n```python\ndef scan_unclaimed_tasks() -> list:\n unclaimed = []\n for f in sorted(TASKS_DIR.glob(\"task_*.json\")):\n task = json.loads(f.read_text())\n if (task.get(\"status\") == \"pending\"\n and not task.get(\"owner\")\n and not task.get(\"blockedBy\")):\n unclaimed.append(task)\n return unclaimed\n```\n\n4. Повторная инъекция идентичности: если сообщений слишком мало, значит, скорее всего, сработал compact, и агенту нужно напомнить, кто он.\n\n```python\nif len(messages) <= 3:\n messages.insert(0, {\"role\": \"user\",\n \"content\": f\"You are '{name}', role: {role}, \"\n f\"team: {team_name}. Continue your work.\"})\n messages.insert(1, {\"role\": \"assistant\",\n \"content\": f\"I am {name}. Continuing.\"})\n```\n\n## Что изменилось относительно s10\n\n| Компонент | Было (s10) | Стало (s11) |\n|-----------|------------|-------------|\n| Инструменты | 12 | 14 (`+idle`, `+claim_task`) |\n| Автономность | лидер раздаёт работу | самоорганизация |\n| Фаза ожидания | отсутствует | опрос входящего ящика и доски задач |\n| Захват задач | только вручную | автозахват свободных задач |\n| Идентичность | только системный промпт | + повторное внедрение после compact |\n| Тайм-аут | отсутствует | 60 секунд простоя -> автозавершение |\n\n## Попробуйте\n\n```sh\ncd learn-claude-code\npython agents/s11_autonomous_agents.py\n```\n\n1. `Создай на доске 3 задачи, затем заспавнь alice и bob. Посмотри, как они сами их заберут.`\n2. `Заспавнь напарника-coder и позволь ему самому найти работу на доске задач`\n3. `Создай задачи с зависимостями и посмотри, как напарники соблюдают порядок блокировок`\n4. `Введи /tasks, чтобы посмотреть доску задач с владельцами`\n5. `Введи /team, чтобы увидеть, кто сейчас работает, а кто простаивает`\n" + }, + { + "version": "s12", + "locale": "ru", + "title": "s12: Изоляция задач и рабочих деревьев", + "content": "# s12: Изоляция задач и рабочих деревьев\n\n`s01 > s02 > s03 > s04 > s05 > s06 | s07 > s08 > s09 > s10 > s11 > [ s12 ]`\n\n> *\"Каждый работает в своей директории и никому не мешает\"* — задачи управляют целями, рабочие деревья управляют каталогами, а связываются через ID.\n\n## Проблема\n\nК s11 агенты уже умеют автоматически забирать и завершать задачи. Но все задачи всё ещё выполняются в одной общей директории. Если два агента одновременно рефакторят разные модули, они начнут сталкиваться: агент A редактирует `config.py`, агент B тоже редактирует `config.py`, unstaged changes смешиваются, а откатить это чисто уже нельзя.\n\nДоска задач знает *что* нужно сделать, но ничего не говорит о том, *где* это делать. Исправление — дать каждой задаче собственную директорию `git worktree`. Задачи управляют целями, рабочие деревья — контекстом выполнения. Связка идёт по ID задачи.\n\n## Решение\n\n```\nКонтур управления (.tasks/) Контур выполнения (.worktrees/)\n+------------------+ +------------------------+\n| task_1.json | | auth-refactor/ |\n| status: in_progress <------> branch: wt/auth-refactor\n| worktree: \"auth-refactor\" | task_id: 1 |\n+------------------+ +------------------------+\n| task_2.json | | ui-login/ |\n| status: pending <------> branch: wt/ui-login\n| worktree: \"ui-login\" | task_id: 2 |\n+------------------+ +------------------------+\n |\n index.json (реестр worktree)\n events.jsonl (журнал жизненного цикла)\n\nМашины состояний:\n Задача: pending -> in_progress -> completed\n Worktree: absent -> active -> removed | kept\n```\n\n## Как это работает\n\n1. **Создаём задачу.** Сначала сохраняем цель.\n\n```python\nTASKS.create(\"Implement auth refactor\")\n# -> .tasks/task_1.json status=pending worktree=\"\"\n```\n\n2. **Создаём worktree и привязываем к задаче.** Если передать `task_id`, задача автоматически перейдёт в `in_progress`.\n\n```python\nWORKTREES.create(\"auth-refactor\", task_id=1)\n# -> git worktree add -b wt/auth-refactor .worktrees/auth-refactor HEAD\n# -> новая запись в index.json, а task_1.json получает worktree=\"auth-refactor\"\n```\n\nПривязка обновляет состояние с обеих сторон:\n\n```python\ndef bind_worktree(self, task_id, worktree):\n task = self._load(task_id)\n task[\"worktree\"] = worktree\n if task[\"status\"] == \"pending\":\n task[\"status\"] = \"in_progress\"\n self._save(task)\n```\n\n3. **Выполняем команды внутри worktree.** `cwd` указывает на изолированную директорию.\n\n```python\nsubprocess.run(command, shell=True, cwd=worktree_path,\n capture_output=True, text=True, timeout=300)\n```\n\n4. **Закрытие работы.** Есть два варианта:\n\n- `worktree_keep(name)` — сохранить директорию на потом.\n- `worktree_remove(name, complete_task=True)` — удалить директорию, завершить связанную задачу и сгенерировать событие. Один вызов закрывает и окружение, и задачу.\n\n```python\ndef remove(self, name, force=False, complete_task=False):\n self._run_git([\"worktree\", \"remove\", wt[\"path\"]])\n if complete_task and wt.get(\"task_id\") is not None:\n self.tasks.update(wt[\"task_id\"], status=\"completed\")\n self.tasks.unbind_worktree(wt[\"task_id\"])\n self.events.emit(\"task.completed\", ...)\n```\n\n5. **Поток событий.** Каждый шаг жизненного цикла дописывается в `.worktrees/events.jsonl`:\n\n```json\n{\n \"event\": \"worktree.remove.after\",\n \"task\": {\"id\": 1, \"status\": \"completed\"},\n \"worktree\": {\"name\": \"auth-refactor\", \"status\": \"removed\"},\n \"ts\": 1730000000\n}\n```\n\nГенерируемые события: `worktree.create.before/after/failed`, `worktree.remove.before/after/failed`, `worktree.keep`, `task.completed`.\n\nПосле падения процесса состояние восстанавливается по `.tasks/` и `.worktrees/index.json` на диске. Память разговора эфемерна, файловое состояние — долговечно.\n\n## Что изменилось относительно s11\n\n| Компонент | Было (s11) | Стало (s12) |\n|-----------|------------|-------------|\n| Координация | доска задач (`owner/status`) | доска задач + явная привязка к worktree |\n| Область выполнения | общая директория | изолированная директория на задачу |\n| Восстановление | только статус задач | статус задач + индекс рабочих деревьев |\n| Закрытие | завершение задачи | завершение задачи + явный `keep/remove` |\n| Наблюдаемость жизненного цикла | неявно через логи | явные события в `.worktrees/events.jsonl` |\n\n## Попробуйте\n\n```sh\ncd learn-claude-code\npython agents/s12_worktree_task_isolation.py\n```\n\n1. `Создай задачи для backend auth и frontend login page, затем покажи список задач`\n2. `Создай worktree \"auth-refactor\" для задачи 1, а затем привяжи задачу 2 к новому worktree \"ui-login\"`\n3. `Запусти \"git status --short\" внутри worktree \"auth-refactor\"`\n4. `Сохрани worktree \"ui-login\", затем покажи список worktree и проверь события`\n5. `Удали worktree \"auth-refactor\" с complete_task=true, затем покажи задачи, worktree и события`\n" } ] \ No newline at end of file diff --git a/web/src/data/generated/versions.json b/web/src/data/generated/versions.json index 40b6afeaa..0e16cb0d6 100644 --- a/web/src/data/generated/versions.json +++ b/web/src/data/generated/versions.json @@ -28,14 +28,14 @@ } ], "layer": "tools", - "source": "#!/usr/bin/env python3\n\"\"\"\ns01_agent_loop.py - The Agent Loop\n\nThe entire secret of an AI coding agent in one pattern:\n\n while stop_reason == \"tool_use\":\n response = LLM(messages, tools)\n execute tools\n append results\n\n +----------+ +-------+ +---------+\n | User | ---> | LLM | ---> | Tool |\n | prompt | | | | execute |\n +----------+ +---+---+ +----+----+\n ^ |\n | tool_result |\n +---------------+\n (loop continues)\n\nThis is the core loop: feed tool results back to the model\nuntil the model decides to stop. Production agents layer\npolicy, hooks, and lifecycle controls on top.\n\"\"\"\n\nimport os\nimport subprocess\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\n\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\n\nSYSTEM = f\"You are a coding agent at {os.getcwd()}. Use bash to solve tasks. Act, don't explain.\"\n\nTOOLS = [{\n \"name\": \"bash\",\n \"description\": \"Run a shell command.\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\"command\": {\"type\": \"string\"}},\n \"required\": [\"command\"],\n },\n}]\n\n\ndef run_bash(command: str) -> str:\n dangerous = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\", \"> /dev/\"]\n if any(d in command for d in dangerous):\n return \"Error: Dangerous command blocked\"\n try:\n r = subprocess.run(command, shell=True, cwd=os.getcwd(),\n capture_output=True, text=True, timeout=120)\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\n\n# -- The core pattern: a while loop that calls tools until the model stops --\ndef agent_loop(messages: list):\n while True:\n response = client.messages.create(\n model=MODEL, system=SYSTEM, messages=messages,\n tools=TOOLS, max_tokens=8000,\n )\n # Append assistant turn\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n # If the model didn't call a tool, we're done\n if response.stop_reason != \"tool_use\":\n return\n # Execute each tool call, collect results\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n print(f\"\\033[33m$ {block.input['command']}\\033[0m\")\n output = run_bash(block.input[\"command\"])\n print(output[:200])\n results.append({\"type\": \"tool_result\", \"tool_use_id\": block.id,\n \"content\": output})\n messages.append({\"role\": \"user\", \"content\": results})\n\n\nif __name__ == \"__main__\":\n history = []\n while True:\n try:\n query = input(\"\\033[36ms01 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history)\n response_content = history[-1][\"content\"]\n if isinstance(response_content, list):\n for block in response_content:\n if hasattr(block, \"text\"):\n print(block.text)\n print()\n" + "source": "#!/usr/bin/env python3\r\n\"\"\"\r\ns01_agent_loop.py - The Agent Loop\r\n\r\nThe entire secret of an AI coding agent in one pattern:\r\n\r\n while stop_reason == \"tool_use\":\r\n response = LLM(messages, tools)\r\n execute tools\r\n append results\r\n\r\n +----------+ +-------+ +---------+\r\n | User | ---> | LLM | ---> | Tool |\r\n | prompt | | | | execute |\r\n +----------+ +---+---+ +----+----+\r\n ^ |\r\n | tool_result |\r\n +---------------+\r\n (loop continues)\r\n\r\nThis is the core loop: feed tool results back to the model\r\nuntil the model decides to stop. Production agents layer\r\npolicy, hooks, and lifecycle controls on top.\r\n\"\"\"\r\n\r\nimport os\r\nimport subprocess\r\n\r\nfrom anthropic import Anthropic\r\nfrom dotenv import load_dotenv\r\n\r\nload_dotenv(override=True)\r\n\r\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\r\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\r\n\r\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\r\nMODEL = os.environ[\"MODEL_ID\"]\r\n\r\nSYSTEM = f\"You are a coding agent at {os.getcwd()}. Use bash to solve tasks. Act, don't explain.\"\r\n\r\nTOOLS = [{\r\n \"name\": \"bash\",\r\n \"description\": \"Run a shell command.\",\r\n \"input_schema\": {\r\n \"type\": \"object\",\r\n \"properties\": {\"command\": {\"type\": \"string\"}},\r\n \"required\": [\"command\"],\r\n },\r\n}]\r\n\r\n\r\ndef run_bash(command: str) -> str:\r\n dangerous = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\", \"> /dev/\"]\r\n if any(d in command for d in dangerous):\r\n return \"Error: Dangerous command blocked\"\r\n try:\r\n r = subprocess.run(command, shell=True, cwd=os.getcwd(),\r\n capture_output=True, text=True, timeout=120)\r\n out = (r.stdout + r.stderr).strip()\r\n return out[:50000] if out else \"(no output)\"\r\n except subprocess.TimeoutExpired:\r\n return \"Error: Timeout (120s)\"\r\n\r\n\r\n# -- The core pattern: a while loop that calls tools until the model stops --\r\ndef agent_loop(messages: list):\r\n while True:\r\n response = client.messages.create(\r\n model=MODEL, system=SYSTEM, messages=messages,\r\n tools=TOOLS, max_tokens=8000,\r\n )\r\n # Append assistant turn\r\n messages.append({\"role\": \"assistant\", \"content\": response.content})\r\n # If the model didn't call a tool, we're done\r\n if response.stop_reason != \"tool_use\":\r\n return\r\n # Execute each tool call, collect results\r\n results = []\r\n for block in response.content:\r\n if block.type == \"tool_use\":\r\n print(f\"\\033[33m$ {block.input['command']}\\033[0m\")\r\n output = run_bash(block.input[\"command\"])\r\n print(output[:200])\r\n results.append({\"type\": \"tool_result\", \"tool_use_id\": block.id,\r\n \"content\": output})\r\n messages.append({\"role\": \"user\", \"content\": results})\r\n\r\n\r\nif __name__ == \"__main__\":\r\n history = []\r\n while True:\r\n try:\r\n query = input(\"\\033[36ms01 >> \\033[0m\")\r\n except (EOFError, KeyboardInterrupt):\r\n break\r\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\r\n break\r\n history.append({\"role\": \"user\", \"content\": query})\r\n agent_loop(history)\r\n response_content = history[-1][\"content\"]\r\n if isinstance(response_content, list):\r\n for block in response_content:\r\n if hasattr(block, \"text\"):\r\n print(block.text)\r\n print()\r\n" }, { "id": "s02", "filename": "s02_tool_use.py", "title": "Tools", "subtitle": "One Handler Per Tool", - "loc": 115, + "loc": 120, "tools": [ "bash", "read_file", @@ -83,14 +83,14 @@ } ], "layer": "tools", - "source": "#!/usr/bin/env python3\n\"\"\"\ns02_tool_use.py - Tools\n\nThe agent loop from s01 didn't change. We just added tools to the array\nand a dispatch map to route calls.\n\n +----------+ +-------+ +------------------+\n | User | ---> | LLM | ---> | Tool Dispatch |\n | prompt | | | | { |\n +----------+ +---+---+ | bash: run_bash |\n ^ | read: run_read |\n | | write: run_wr |\n +----------+ edit: run_edit |\n tool_result| } |\n +------------------+\n\nKey insight: \"The loop didn't change at all. I just added tools.\"\n\"\"\"\n\nimport os\nimport subprocess\nfrom pathlib import Path\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\n\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\n\nSYSTEM = f\"You are a coding agent at {WORKDIR}. Use tools to solve tasks. Act, don't explain.\"\n\n\ndef safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\n\ndef run_bash(command: str) -> str:\n dangerous = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\", \"> /dev/\"]\n if any(d in command for d in dangerous):\n return \"Error: Dangerous command blocked\"\n try:\n r = subprocess.run(command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=120)\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\n\ndef run_read(path: str, limit: int = None) -> str:\n try:\n text = safe_path(path).read_text()\n lines = text.splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more lines)\"]\n return \"\\n\".join(lines)[:50000]\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef run_write(path: str, content: str) -> str:\n try:\n fp = safe_path(path)\n fp.parent.mkdir(parents=True, exist_ok=True)\n fp.write_text(content)\n return f\"Wrote {len(content)} bytes to {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef run_edit(path: str, old_text: str, new_text: str) -> str:\n try:\n fp = safe_path(path)\n content = fp.read_text()\n if old_text not in content:\n return f\"Error: Text not found in {path}\"\n fp.write_text(content.replace(old_text, new_text, 1))\n return f\"Edited {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\n# -- The dispatch map: {tool_name: handler} --\nTOOL_HANDLERS = {\n \"bash\": lambda **kw: run_bash(kw[\"command\"]),\n \"read_file\": lambda **kw: run_read(kw[\"path\"], kw.get(\"limit\")),\n \"write_file\": lambda **kw: run_write(kw[\"path\"], kw[\"content\"]),\n \"edit_file\": lambda **kw: run_edit(kw[\"path\"], kw[\"old_text\"], kw[\"new_text\"]),\n}\n\nTOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"limit\": {\"type\": \"integer\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n]\n\n\ndef agent_loop(messages: list):\n while True:\n response = client.messages.create(\n model=MODEL, system=SYSTEM, messages=messages,\n tools=TOOLS, max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n return\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n handler = TOOL_HANDLERS.get(block.name)\n output = handler(**block.input) if handler else f\"Unknown tool: {block.name}\"\n print(f\"> {block.name}: {output[:200]}\")\n results.append({\"type\": \"tool_result\", \"tool_use_id\": block.id, \"content\": output})\n messages.append({\"role\": \"user\", \"content\": results})\n\n\nif __name__ == \"__main__\":\n history = []\n while True:\n try:\n query = input(\"\\033[36ms02 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history)\n print()\n" + "source": "#!/usr/bin/env python3\r\n\"\"\"\r\ns02_tool_use.py - Tools\r\n\r\nThe agent loop from s01 didn't change. We just added tools to the array\r\nand a dispatch map to route calls.\r\n\r\n +----------+ +-------+ +------------------+\r\n | User | ---> | LLM | ---> | Tool Dispatch |\r\n | prompt | | | | { |\r\n +----------+ +---+---+ | bash: run_bash |\r\n ^ | read: run_read |\r\n | | write: run_wr |\r\n +----------+ edit: run_edit |\r\n tool_result| } |\r\n +------------------+\r\n\r\nKey insight: \"The loop didn't change at all. I just added tools.\"\r\n\"\"\"\r\n\r\nimport os\r\nimport subprocess\r\nfrom pathlib import Path\r\n\r\nfrom anthropic import Anthropic\r\nfrom dotenv import load_dotenv\r\n\r\nload_dotenv(override=True)\r\n\r\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\r\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\r\n\r\nWORKDIR = Path.cwd()\r\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\r\nMODEL = os.environ[\"MODEL_ID\"]\r\n\r\nSYSTEM = f\"You are a coding agent at {WORKDIR}. Use tools to solve tasks. Act, don't explain.\"\r\n\r\n\r\ndef safe_path(p: str) -> Path:\r\n path = (WORKDIR / p).resolve()\r\n if not path.is_relative_to(WORKDIR):\r\n raise ValueError(f\"Path escapes workspace: {p}\")\r\n return path\r\n\r\n\r\ndef run_bash(command: str) -> str:\r\n dangerous = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\", \"> /dev/\"]\r\n if any(d in command for d in dangerous):\r\n return \"Error: Dangerous command blocked\"\r\n try:\r\n r = subprocess.run(command, shell=True, cwd=WORKDIR,\r\n capture_output=True, text=True, timeout=120)\r\n out = (r.stdout + r.stderr).strip()\r\n return out[:50000] if out else \"(no output)\"\r\n except subprocess.TimeoutExpired:\r\n return \"Error: Timeout (120s)\"\r\n\r\n\r\ndef run_read(path: str, limit: int = None) -> str:\r\n try:\r\n text = safe_path(path).read_text()\r\n lines = text.splitlines()\r\n if limit and limit < len(lines):\r\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more lines)\"]\r\n return \"\\n\".join(lines)[:50000]\r\n except Exception as e:\r\n return f\"Error: {e}\"\r\n\r\n\r\ndef run_write(path: str, content: str) -> str:\r\n try:\r\n fp = safe_path(path)\r\n fp.parent.mkdir(parents=True, exist_ok=True)\r\n fp.write_text(content)\r\n return f\"Wrote {len(content)} bytes to {path}\"\r\n except Exception as e:\r\n return f\"Error: {e}\"\r\n\r\n\r\ndef run_edit(path: str, old_text: str, new_text: str) -> str:\r\n try:\r\n fp = safe_path(path)\r\n content = fp.read_text()\r\n if old_text not in content:\r\n return f\"Error: Text not found in {path}\"\r\n fp.write_text(content.replace(old_text, new_text, 1))\r\n return f\"Edited {path}\"\r\n except Exception as e:\r\n return f\"Error: {e}\"\r\n\r\n\r\n# -- The dispatch map: {tool_name: handler} --\r\nTOOL_HANDLERS = {\r\n \"bash\": lambda **kw: run_bash(kw[\"command\"]),\r\n \"read_file\": lambda **kw: run_read(kw[\"path\"], kw.get(\"limit\")),\r\n \"write_file\": lambda **kw: run_write(kw[\"path\"], kw[\"content\"]),\r\n \"edit_file\": lambda **kw: run_edit(kw[\"path\"], kw[\"old_text\"], kw[\"new_text\"]),\r\n}\r\n\r\nTOOLS = [\r\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\r\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\r\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\r\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"limit\": {\"type\": \"integer\"}}, \"required\": [\"path\"]}},\r\n {\"name\": \"write_file\", \"description\": \"Write content to file.\",\r\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\r\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in file.\",\r\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\r\n]\r\n\r\n\r\ndef agent_loop(messages: list):\r\n while True:\r\n response = client.messages.create(\r\n model=MODEL, system=SYSTEM, messages=messages,\r\n tools=TOOLS, max_tokens=8000,\r\n )\r\n messages.append({\"role\": \"assistant\", \"content\": response.content})\r\n if response.stop_reason != \"tool_use\":\r\n return\r\n results = []\r\n for block in response.content:\r\n if block.type == \"tool_use\":\r\n handler = TOOL_HANDLERS.get(block.name)\r\n output = handler(**block.input) if handler else f\"Unknown tool: {block.name}\"\r\n print(f\"> {block.name}: {output[:200]}\")\r\n results.append({\"type\": \"tool_result\", \"tool_use_id\": block.id, \"content\": output})\r\n messages.append({\"role\": \"user\", \"content\": results})\r\n\r\n\r\nif __name__ == \"__main__\":\r\n history = []\r\n while True:\r\n try:\r\n query = input(\"\\033[36ms02 >> \\033[0m\")\r\n except (EOFError, KeyboardInterrupt):\r\n break\r\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\r\n break\r\n history.append({\"role\": \"user\", \"content\": query})\r\n agent_loop(history)\r\n response_content = history[-1][\"content\"]\r\n if isinstance(response_content, list):\r\n for block in response_content:\r\n if hasattr(block, \"text\"):\r\n print(block.text)\r\n print()\r\n" }, { "id": "s03", "filename": "s03_todo_write.py", "title": "TodoWrite", "subtitle": "Plan Before You Act", - "loc": 171, + "loc": 176, "tools": [ "bash", "read_file", @@ -143,14 +143,14 @@ } ], "layer": "planning", - "source": "#!/usr/bin/env python3\n\"\"\"\ns03_todo_write.py - TodoWrite\n\nThe model tracks its own progress via a TodoManager. A nag reminder\nforces it to keep updating when it forgets.\n\n +----------+ +-------+ +---------+\n | User | ---> | LLM | ---> | Tools |\n | prompt | | | | + todo |\n +----------+ +---+---+ +----+----+\n ^ |\n | tool_result |\n +---------------+\n |\n +-----------+-----------+\n | TodoManager state |\n | [ ] task A |\n | [>] task B <- doing |\n | [x] task C |\n +-----------------------+\n |\n if rounds_since_todo >= 3:\n inject \n\nKey insight: \"The agent can track its own progress -- and I can see it.\"\n\"\"\"\n\nimport os\nimport subprocess\nfrom pathlib import Path\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\n\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\n\nSYSTEM = f\"\"\"You are a coding agent at {WORKDIR}.\nUse the todo tool to plan multi-step tasks. Mark in_progress before starting, completed when done.\nPrefer tools over prose.\"\"\"\n\n\n# -- TodoManager: structured state the LLM writes to --\nclass TodoManager:\n def __init__(self):\n self.items = []\n\n def update(self, items: list) -> str:\n if len(items) > 20:\n raise ValueError(\"Max 20 todos allowed\")\n validated = []\n in_progress_count = 0\n for i, item in enumerate(items):\n text = str(item.get(\"text\", \"\")).strip()\n status = str(item.get(\"status\", \"pending\")).lower()\n item_id = str(item.get(\"id\", str(i + 1)))\n if not text:\n raise ValueError(f\"Item {item_id}: text required\")\n if status not in (\"pending\", \"in_progress\", \"completed\"):\n raise ValueError(f\"Item {item_id}: invalid status '{status}'\")\n if status == \"in_progress\":\n in_progress_count += 1\n validated.append({\"id\": item_id, \"text\": text, \"status\": status})\n if in_progress_count > 1:\n raise ValueError(\"Only one task can be in_progress at a time\")\n self.items = validated\n return self.render()\n\n def render(self) -> str:\n if not self.items:\n return \"No todos.\"\n lines = []\n for item in self.items:\n marker = {\"pending\": \"[ ]\", \"in_progress\": \"[>]\", \"completed\": \"[x]\"}[item[\"status\"]]\n lines.append(f\"{marker} #{item['id']}: {item['text']}\")\n done = sum(1 for t in self.items if t[\"status\"] == \"completed\")\n lines.append(f\"\\n({done}/{len(self.items)} completed)\")\n return \"\\n\".join(lines)\n\n\nTODO = TodoManager()\n\n\n# -- Tool implementations --\ndef safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\ndef run_bash(command: str) -> str:\n dangerous = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\", \"> /dev/\"]\n if any(d in command for d in dangerous):\n return \"Error: Dangerous command blocked\"\n try:\n r = subprocess.run(command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=120)\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\ndef run_read(path: str, limit: int = None) -> str:\n try:\n lines = safe_path(path).read_text().splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more)\"]\n return \"\\n\".join(lines)[:50000]\n except Exception as e:\n return f\"Error: {e}\"\n\ndef run_write(path: str, content: str) -> str:\n try:\n fp = safe_path(path)\n fp.parent.mkdir(parents=True, exist_ok=True)\n fp.write_text(content)\n return f\"Wrote {len(content)} bytes\"\n except Exception as e:\n return f\"Error: {e}\"\n\ndef run_edit(path: str, old_text: str, new_text: str) -> str:\n try:\n fp = safe_path(path)\n content = fp.read_text()\n if old_text not in content:\n return f\"Error: Text not found in {path}\"\n fp.write_text(content.replace(old_text, new_text, 1))\n return f\"Edited {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\nTOOL_HANDLERS = {\n \"bash\": lambda **kw: run_bash(kw[\"command\"]),\n \"read_file\": lambda **kw: run_read(kw[\"path\"], kw.get(\"limit\")),\n \"write_file\": lambda **kw: run_write(kw[\"path\"], kw[\"content\"]),\n \"edit_file\": lambda **kw: run_edit(kw[\"path\"], kw[\"old_text\"], kw[\"new_text\"]),\n \"todo\": lambda **kw: TODO.update(kw[\"items\"]),\n}\n\nTOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"limit\": {\"type\": \"integer\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n {\"name\": \"todo\", \"description\": \"Update task list. Track progress on multi-step tasks.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"items\": {\"type\": \"array\", \"items\": {\"type\": \"object\", \"properties\": {\"id\": {\"type\": \"string\"}, \"text\": {\"type\": \"string\"}, \"status\": {\"type\": \"string\", \"enum\": [\"pending\", \"in_progress\", \"completed\"]}}, \"required\": [\"id\", \"text\", \"status\"]}}}, \"required\": [\"items\"]}},\n]\n\n\n# -- Agent loop with nag reminder injection --\ndef agent_loop(messages: list):\n rounds_since_todo = 0\n while True:\n # Nag reminder is injected below, alongside tool results\n response = client.messages.create(\n model=MODEL, system=SYSTEM, messages=messages,\n tools=TOOLS, max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n return\n results = []\n used_todo = False\n for block in response.content:\n if block.type == \"tool_use\":\n handler = TOOL_HANDLERS.get(block.name)\n try:\n output = handler(**block.input) if handler else f\"Unknown tool: {block.name}\"\n except Exception as e:\n output = f\"Error: {e}\"\n print(f\"> {block.name}: {str(output)[:200]}\")\n results.append({\"type\": \"tool_result\", \"tool_use_id\": block.id, \"content\": str(output)})\n if block.name == \"todo\":\n used_todo = True\n rounds_since_todo = 0 if used_todo else rounds_since_todo + 1\n if rounds_since_todo >= 3:\n results.insert(0, {\"type\": \"text\", \"text\": \"Update your todos.\"})\n messages.append({\"role\": \"user\", \"content\": results})\n\n\nif __name__ == \"__main__\":\n history = []\n while True:\n try:\n query = input(\"\\033[36ms03 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history)\n print()\n" + "source": "#!/usr/bin/env python3\r\n\"\"\"\r\ns03_todo_write.py - TodoWrite\r\n\r\nThe model tracks its own progress via a TodoManager. A nag reminder\r\nforces it to keep updating when it forgets.\r\n\r\n +----------+ +-------+ +---------+\r\n | User | ---> | LLM | ---> | Tools |\r\n | prompt | | | | + todo |\r\n +----------+ +---+---+ +----+----+\r\n ^ |\r\n | tool_result |\r\n +---------------+\r\n |\r\n +-----------+-----------+\r\n | TodoManager state |\r\n | [ ] task A |\r\n | [>] task B <- doing |\r\n | [x] task C |\r\n +-----------------------+\r\n |\r\n if rounds_since_todo >= 3:\r\n inject \r\n\r\nKey insight: \"The agent can track its own progress -- and I can see it.\"\r\n\"\"\"\r\n\r\nimport os\r\nimport subprocess\r\nfrom pathlib import Path\r\n\r\nfrom anthropic import Anthropic\r\nfrom dotenv import load_dotenv\r\n\r\nload_dotenv(override=True)\r\n\r\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\r\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\r\n\r\nWORKDIR = Path.cwd()\r\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\r\nMODEL = os.environ[\"MODEL_ID\"]\r\n\r\nSYSTEM = f\"\"\"You are a coding agent at {WORKDIR}.\r\nUse the todo tool to plan multi-step tasks. Mark in_progress before starting, completed when done.\r\nPrefer tools over prose.\"\"\"\r\n\r\n\r\n# -- TodoManager: structured state the LLM writes to --\r\nclass TodoManager:\r\n def __init__(self):\r\n self.items = []\r\n\r\n def update(self, items: list) -> str:\r\n if len(items) > 20:\r\n raise ValueError(\"Max 20 todos allowed\")\r\n validated = []\r\n in_progress_count = 0\r\n for i, item in enumerate(items):\r\n text = str(item.get(\"text\", \"\")).strip()\r\n status = str(item.get(\"status\", \"pending\")).lower()\r\n item_id = str(item.get(\"id\", str(i + 1)))\r\n if not text:\r\n raise ValueError(f\"Item {item_id}: text required\")\r\n if status not in (\"pending\", \"in_progress\", \"completed\"):\r\n raise ValueError(f\"Item {item_id}: invalid status '{status}'\")\r\n if status == \"in_progress\":\r\n in_progress_count += 1\r\n validated.append({\"id\": item_id, \"text\": text, \"status\": status})\r\n if in_progress_count > 1:\r\n raise ValueError(\"Only one task can be in_progress at a time\")\r\n self.items = validated\r\n return self.render()\r\n\r\n def render(self) -> str:\r\n if not self.items:\r\n return \"No todos.\"\r\n lines = []\r\n for item in self.items:\r\n marker = {\"pending\": \"[ ]\", \"in_progress\": \"[>]\", \"completed\": \"[x]\"}[item[\"status\"]]\r\n lines.append(f\"{marker} #{item['id']}: {item['text']}\")\r\n done = sum(1 for t in self.items if t[\"status\"] == \"completed\")\r\n lines.append(f\"\\n({done}/{len(self.items)} completed)\")\r\n return \"\\n\".join(lines)\r\n\r\n\r\nTODO = TodoManager()\r\n\r\n\r\n# -- Tool implementations --\r\ndef safe_path(p: str) -> Path:\r\n path = (WORKDIR / p).resolve()\r\n if not path.is_relative_to(WORKDIR):\r\n raise ValueError(f\"Path escapes workspace: {p}\")\r\n return path\r\n\r\ndef run_bash(command: str) -> str:\r\n dangerous = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\", \"> /dev/\"]\r\n if any(d in command for d in dangerous):\r\n return \"Error: Dangerous command blocked\"\r\n try:\r\n r = subprocess.run(command, shell=True, cwd=WORKDIR,\r\n capture_output=True, text=True, timeout=120)\r\n out = (r.stdout + r.stderr).strip()\r\n return out[:50000] if out else \"(no output)\"\r\n except subprocess.TimeoutExpired:\r\n return \"Error: Timeout (120s)\"\r\n\r\ndef run_read(path: str, limit: int = None) -> str:\r\n try:\r\n lines = safe_path(path).read_text().splitlines()\r\n if limit and limit < len(lines):\r\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more)\"]\r\n return \"\\n\".join(lines)[:50000]\r\n except Exception as e:\r\n return f\"Error: {e}\"\r\n\r\ndef run_write(path: str, content: str) -> str:\r\n try:\r\n fp = safe_path(path)\r\n fp.parent.mkdir(parents=True, exist_ok=True)\r\n fp.write_text(content)\r\n return f\"Wrote {len(content)} bytes\"\r\n except Exception as e:\r\n return f\"Error: {e}\"\r\n\r\ndef run_edit(path: str, old_text: str, new_text: str) -> str:\r\n try:\r\n fp = safe_path(path)\r\n content = fp.read_text()\r\n if old_text not in content:\r\n return f\"Error: Text not found in {path}\"\r\n fp.write_text(content.replace(old_text, new_text, 1))\r\n return f\"Edited {path}\"\r\n except Exception as e:\r\n return f\"Error: {e}\"\r\n\r\n\r\nTOOL_HANDLERS = {\r\n \"bash\": lambda **kw: run_bash(kw[\"command\"]),\r\n \"read_file\": lambda **kw: run_read(kw[\"path\"], kw.get(\"limit\")),\r\n \"write_file\": lambda **kw: run_write(kw[\"path\"], kw[\"content\"]),\r\n \"edit_file\": lambda **kw: run_edit(kw[\"path\"], kw[\"old_text\"], kw[\"new_text\"]),\r\n \"todo\": lambda **kw: TODO.update(kw[\"items\"]),\r\n}\r\n\r\nTOOLS = [\r\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\r\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\r\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\r\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"limit\": {\"type\": \"integer\"}}, \"required\": [\"path\"]}},\r\n {\"name\": \"write_file\", \"description\": \"Write content to file.\",\r\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\r\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in file.\",\r\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\r\n {\"name\": \"todo\", \"description\": \"Update task list. Track progress on multi-step tasks.\",\r\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"items\": {\"type\": \"array\", \"items\": {\"type\": \"object\", \"properties\": {\"id\": {\"type\": \"string\"}, \"text\": {\"type\": \"string\"}, \"status\": {\"type\": \"string\", \"enum\": [\"pending\", \"in_progress\", \"completed\"]}}, \"required\": [\"id\", \"text\", \"status\"]}}}, \"required\": [\"items\"]}},\r\n]\r\n\r\n\r\n# -- Agent loop with nag reminder injection --\r\ndef agent_loop(messages: list):\r\n rounds_since_todo = 0\r\n while True:\r\n # Nag reminder is injected below, alongside tool results\r\n response = client.messages.create(\r\n model=MODEL, system=SYSTEM, messages=messages,\r\n tools=TOOLS, max_tokens=8000,\r\n )\r\n messages.append({\"role\": \"assistant\", \"content\": response.content})\r\n if response.stop_reason != \"tool_use\":\r\n return\r\n results = []\r\n used_todo = False\r\n for block in response.content:\r\n if block.type == \"tool_use\":\r\n handler = TOOL_HANDLERS.get(block.name)\r\n try:\r\n output = handler(**block.input) if handler else f\"Unknown tool: {block.name}\"\r\n except Exception as e:\r\n output = f\"Error: {e}\"\r\n print(f\"> {block.name}: {str(output)[:200]}\")\r\n results.append({\"type\": \"tool_result\", \"tool_use_id\": block.id, \"content\": str(output)})\r\n if block.name == \"todo\":\r\n used_todo = True\r\n rounds_since_todo = 0 if used_todo else rounds_since_todo + 1\r\n if rounds_since_todo >= 3:\r\n results.insert(0, {\"type\": \"text\", \"text\": \"Update your todos.\"})\r\n messages.append({\"role\": \"user\", \"content\": results})\r\n\r\n\r\nif __name__ == \"__main__\":\r\n history = []\r\n while True:\r\n try:\r\n query = input(\"\\033[36ms03 >> \\033[0m\")\r\n except (EOFError, KeyboardInterrupt):\r\n break\r\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\r\n break\r\n history.append({\"role\": \"user\", \"content\": query})\r\n agent_loop(history)\r\n response_content = history[-1][\"content\"]\r\n if isinstance(response_content, list):\r\n for block in response_content:\r\n if hasattr(block, \"text\"):\r\n print(block.text)\r\n print()\r\n" }, { "id": "s04", "filename": "s04_subagent.py", "title": "Subagents", "subtitle": "Clean Context Per Subtask", - "loc": 146, + "loc": 151, "tools": [ "bash", "read_file", @@ -202,14 +202,14 @@ } ], "layer": "planning", - "source": "#!/usr/bin/env python3\n\"\"\"\ns04_subagent.py - Subagents\n\nSpawn a child agent with fresh messages=[]. The child works in its own\ncontext, sharing the filesystem, then returns only a summary to the parent.\n\n Parent agent Subagent\n +------------------+ +------------------+\n | messages=[...] | | messages=[] | <-- fresh\n | | dispatch | |\n | tool: task | ---------->| while tool_use: |\n | prompt=\"...\" | | call tools |\n | description=\"\" | | append results |\n | | summary | |\n | result = \"...\" | <--------- | return last text |\n +------------------+ +------------------+\n |\n Parent context stays clean.\n Subagent context is discarded.\n\nKey insight: \"Process isolation gives context isolation for free.\"\n\"\"\"\n\nimport os\nimport subprocess\nfrom pathlib import Path\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\n\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\n\nSYSTEM = f\"You are a coding agent at {WORKDIR}. Use the task tool to delegate exploration or subtasks.\"\nSUBAGENT_SYSTEM = f\"You are a coding subagent at {WORKDIR}. Complete the given task, then summarize your findings.\"\n\n\n# -- Tool implementations shared by parent and child --\ndef safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\ndef run_bash(command: str) -> str:\n dangerous = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\", \"> /dev/\"]\n if any(d in command for d in dangerous):\n return \"Error: Dangerous command blocked\"\n try:\n r = subprocess.run(command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=120)\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\ndef run_read(path: str, limit: int = None) -> str:\n try:\n lines = safe_path(path).read_text().splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more)\"]\n return \"\\n\".join(lines)[:50000]\n except Exception as e:\n return f\"Error: {e}\"\n\ndef run_write(path: str, content: str) -> str:\n try:\n fp = safe_path(path)\n fp.parent.mkdir(parents=True, exist_ok=True)\n fp.write_text(content)\n return f\"Wrote {len(content)} bytes\"\n except Exception as e:\n return f\"Error: {e}\"\n\ndef run_edit(path: str, old_text: str, new_text: str) -> str:\n try:\n fp = safe_path(path)\n content = fp.read_text()\n if old_text not in content:\n return f\"Error: Text not found in {path}\"\n fp.write_text(content.replace(old_text, new_text, 1))\n return f\"Edited {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\nTOOL_HANDLERS = {\n \"bash\": lambda **kw: run_bash(kw[\"command\"]),\n \"read_file\": lambda **kw: run_read(kw[\"path\"], kw.get(\"limit\")),\n \"write_file\": lambda **kw: run_write(kw[\"path\"], kw[\"content\"]),\n \"edit_file\": lambda **kw: run_edit(kw[\"path\"], kw[\"old_text\"], kw[\"new_text\"]),\n}\n\n# Child gets all base tools except task (no recursive spawning)\nCHILD_TOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"limit\": {\"type\": \"integer\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n]\n\n\n# -- Subagent: fresh context, filtered tools, summary-only return --\ndef run_subagent(prompt: str) -> str:\n sub_messages = [{\"role\": \"user\", \"content\": prompt}] # fresh context\n for _ in range(30): # safety limit\n response = client.messages.create(\n model=MODEL, system=SUBAGENT_SYSTEM, messages=sub_messages,\n tools=CHILD_TOOLS, max_tokens=8000,\n )\n sub_messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n break\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n handler = TOOL_HANDLERS.get(block.name)\n output = handler(**block.input) if handler else f\"Unknown tool: {block.name}\"\n results.append({\"type\": \"tool_result\", \"tool_use_id\": block.id, \"content\": str(output)[:50000]})\n sub_messages.append({\"role\": \"user\", \"content\": results})\n # Only the final text returns to the parent -- child context is discarded\n return \"\".join(b.text for b in response.content if hasattr(b, \"text\")) or \"(no summary)\"\n\n\n# -- Parent tools: base tools + task dispatcher --\nPARENT_TOOLS = CHILD_TOOLS + [\n {\"name\": \"task\", \"description\": \"Spawn a subagent with fresh context. It shares the filesystem but not conversation history.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"prompt\": {\"type\": \"string\"}, \"description\": {\"type\": \"string\", \"description\": \"Short description of the task\"}}, \"required\": [\"prompt\"]}},\n]\n\n\ndef agent_loop(messages: list):\n while True:\n response = client.messages.create(\n model=MODEL, system=SYSTEM, messages=messages,\n tools=PARENT_TOOLS, max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n return\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n if block.name == \"task\":\n desc = block.input.get(\"description\", \"subtask\")\n print(f\"> task ({desc}): {block.input['prompt'][:80]}\")\n output = run_subagent(block.input[\"prompt\"])\n else:\n handler = TOOL_HANDLERS.get(block.name)\n output = handler(**block.input) if handler else f\"Unknown tool: {block.name}\"\n print(f\" {str(output)[:200]}\")\n results.append({\"type\": \"tool_result\", \"tool_use_id\": block.id, \"content\": str(output)})\n messages.append({\"role\": \"user\", \"content\": results})\n\n\nif __name__ == \"__main__\":\n history = []\n while True:\n try:\n query = input(\"\\033[36ms04 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history)\n print()\n" + "source": "#!/usr/bin/env python3\r\n\"\"\"\r\ns04_subagent.py - Subagents\r\n\r\nSpawn a child agent with fresh messages=[]. The child works in its own\r\ncontext, sharing the filesystem, then returns only a summary to the parent.\r\n\r\n Parent agent Subagent\r\n +------------------+ +------------------+\r\n | messages=[...] | | messages=[] | <-- fresh\r\n | | dispatch | |\r\n | tool: task | ---------->| while tool_use: |\r\n | prompt=\"...\" | | call tools |\r\n | description=\"\" | | append results |\r\n | | summary | |\r\n | result = \"...\" | <--------- | return last text |\r\n +------------------+ +------------------+\r\n |\r\n Parent context stays clean.\r\n Subagent context is discarded.\r\n\r\nKey insight: \"Process isolation gives context isolation for free.\"\r\n\"\"\"\r\n\r\nimport os\r\nimport subprocess\r\nfrom pathlib import Path\r\n\r\nfrom anthropic import Anthropic\r\nfrom dotenv import load_dotenv\r\n\r\nload_dotenv(override=True)\r\n\r\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\r\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\r\n\r\nWORKDIR = Path.cwd()\r\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\r\nMODEL = os.environ[\"MODEL_ID\"]\r\n\r\nSYSTEM = f\"You are a coding agent at {WORKDIR}. Use the task tool to delegate exploration or subtasks.\"\r\nSUBAGENT_SYSTEM = f\"You are a coding subagent at {WORKDIR}. Complete the given task, then summarize your findings.\"\r\n\r\n\r\n# -- Tool implementations shared by parent and child --\r\ndef safe_path(p: str) -> Path:\r\n path = (WORKDIR / p).resolve()\r\n if not path.is_relative_to(WORKDIR):\r\n raise ValueError(f\"Path escapes workspace: {p}\")\r\n return path\r\n\r\ndef run_bash(command: str) -> str:\r\n dangerous = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\", \"> /dev/\"]\r\n if any(d in command for d in dangerous):\r\n return \"Error: Dangerous command blocked\"\r\n try:\r\n r = subprocess.run(command, shell=True, cwd=WORKDIR,\r\n capture_output=True, text=True, timeout=120)\r\n out = (r.stdout + r.stderr).strip()\r\n return out[:50000] if out else \"(no output)\"\r\n except subprocess.TimeoutExpired:\r\n return \"Error: Timeout (120s)\"\r\n\r\ndef run_read(path: str, limit: int = None) -> str:\r\n try:\r\n lines = safe_path(path).read_text().splitlines()\r\n if limit and limit < len(lines):\r\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more)\"]\r\n return \"\\n\".join(lines)[:50000]\r\n except Exception as e:\r\n return f\"Error: {e}\"\r\n\r\ndef run_write(path: str, content: str) -> str:\r\n try:\r\n fp = safe_path(path)\r\n fp.parent.mkdir(parents=True, exist_ok=True)\r\n fp.write_text(content)\r\n return f\"Wrote {len(content)} bytes\"\r\n except Exception as e:\r\n return f\"Error: {e}\"\r\n\r\ndef run_edit(path: str, old_text: str, new_text: str) -> str:\r\n try:\r\n fp = safe_path(path)\r\n content = fp.read_text()\r\n if old_text not in content:\r\n return f\"Error: Text not found in {path}\"\r\n fp.write_text(content.replace(old_text, new_text, 1))\r\n return f\"Edited {path}\"\r\n except Exception as e:\r\n return f\"Error: {e}\"\r\n\r\n\r\nTOOL_HANDLERS = {\r\n \"bash\": lambda **kw: run_bash(kw[\"command\"]),\r\n \"read_file\": lambda **kw: run_read(kw[\"path\"], kw.get(\"limit\")),\r\n \"write_file\": lambda **kw: run_write(kw[\"path\"], kw[\"content\"]),\r\n \"edit_file\": lambda **kw: run_edit(kw[\"path\"], kw[\"old_text\"], kw[\"new_text\"]),\r\n}\r\n\r\n# Child gets all base tools except task (no recursive spawning)\r\nCHILD_TOOLS = [\r\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\r\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\r\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\r\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"limit\": {\"type\": \"integer\"}}, \"required\": [\"path\"]}},\r\n {\"name\": \"write_file\", \"description\": \"Write content to file.\",\r\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\r\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in file.\",\r\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\r\n]\r\n\r\n\r\n# -- Subagent: fresh context, filtered tools, summary-only return --\r\ndef run_subagent(prompt: str) -> str:\r\n sub_messages = [{\"role\": \"user\", \"content\": prompt}] # fresh context\r\n for _ in range(30): # safety limit\r\n response = client.messages.create(\r\n model=MODEL, system=SUBAGENT_SYSTEM, messages=sub_messages,\r\n tools=CHILD_TOOLS, max_tokens=8000,\r\n )\r\n sub_messages.append({\"role\": \"assistant\", \"content\": response.content})\r\n if response.stop_reason != \"tool_use\":\r\n break\r\n results = []\r\n for block in response.content:\r\n if block.type == \"tool_use\":\r\n handler = TOOL_HANDLERS.get(block.name)\r\n output = handler(**block.input) if handler else f\"Unknown tool: {block.name}\"\r\n results.append({\"type\": \"tool_result\", \"tool_use_id\": block.id, \"content\": str(output)[:50000]})\r\n sub_messages.append({\"role\": \"user\", \"content\": results})\r\n # Only the final text returns to the parent -- child context is discarded\r\n return \"\".join(b.text for b in response.content if hasattr(b, \"text\")) or \"(no summary)\"\r\n\r\n\r\n# -- Parent tools: base tools + task dispatcher --\r\nPARENT_TOOLS = CHILD_TOOLS + [\r\n {\"name\": \"task\", \"description\": \"Spawn a subagent with fresh context. It shares the filesystem but not conversation history.\",\r\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"prompt\": {\"type\": \"string\"}, \"description\": {\"type\": \"string\", \"description\": \"Short description of the task\"}}, \"required\": [\"prompt\"]}},\r\n]\r\n\r\n\r\ndef agent_loop(messages: list):\r\n while True:\r\n response = client.messages.create(\r\n model=MODEL, system=SYSTEM, messages=messages,\r\n tools=PARENT_TOOLS, max_tokens=8000,\r\n )\r\n messages.append({\"role\": \"assistant\", \"content\": response.content})\r\n if response.stop_reason != \"tool_use\":\r\n return\r\n results = []\r\n for block in response.content:\r\n if block.type == \"tool_use\":\r\n if block.name == \"task\":\r\n desc = block.input.get(\"description\", \"subtask\")\r\n print(f\"> task ({desc}): {block.input['prompt'][:80]}\")\r\n output = run_subagent(block.input[\"prompt\"])\r\n else:\r\n handler = TOOL_HANDLERS.get(block.name)\r\n output = handler(**block.input) if handler else f\"Unknown tool: {block.name}\"\r\n print(f\" {str(output)[:200]}\")\r\n results.append({\"type\": \"tool_result\", \"tool_use_id\": block.id, \"content\": str(output)})\r\n messages.append({\"role\": \"user\", \"content\": results})\r\n\r\n\r\nif __name__ == \"__main__\":\r\n history = []\r\n while True:\r\n try:\r\n query = input(\"\\033[36ms04 >> \\033[0m\")\r\n except (EOFError, KeyboardInterrupt):\r\n break\r\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\r\n break\r\n history.append({\"role\": \"user\", \"content\": query})\r\n agent_loop(history)\r\n response_content = history[-1][\"content\"]\r\n if isinstance(response_content, list):\r\n for block in response_content:\r\n if hasattr(block, \"text\"):\r\n print(block.text)\r\n print()\r\n" }, { "id": "s05", "filename": "s05_skill_loading.py", "title": "Skills", "subtitle": "Load on Demand", - "loc": 182, + "loc": 187, "tools": [ "bash", "read_file", @@ -262,14 +262,14 @@ } ], "layer": "planning", - "source": "#!/usr/bin/env python3\n\"\"\"\ns05_skill_loading.py - Skills\n\nTwo-layer skill injection that avoids bloating the system prompt:\n\n Layer 1 (cheap): skill names in system prompt (~100 tokens/skill)\n Layer 2 (on demand): full skill body in tool_result\n\n skills/\n pdf/\n SKILL.md <-- frontmatter (name, description) + body\n code-review/\n SKILL.md\n\n System prompt:\n +--------------------------------------+\n | You are a coding agent. |\n | Skills available: |\n | - pdf: Process PDF files... | <-- Layer 1: metadata only\n | - code-review: Review code... |\n +--------------------------------------+\n\n When model calls load_skill(\"pdf\"):\n +--------------------------------------+\n | tool_result: |\n | |\n | Full PDF processing instructions | <-- Layer 2: full body\n | Step 1: ... |\n | Step 2: ... |\n | |\n +--------------------------------------+\n\nKey insight: \"Don't put everything in the system prompt. Load on demand.\"\n\"\"\"\n\nimport os\nimport re\nimport subprocess\nfrom pathlib import Path\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\n\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\nSKILLS_DIR = WORKDIR / \"skills\"\n\n\n# -- SkillLoader: scan skills//SKILL.md with YAML frontmatter --\nclass SkillLoader:\n def __init__(self, skills_dir: Path):\n self.skills_dir = skills_dir\n self.skills = {}\n self._load_all()\n\n def _load_all(self):\n if not self.skills_dir.exists():\n return\n for f in sorted(self.skills_dir.rglob(\"SKILL.md\")):\n text = f.read_text()\n meta, body = self._parse_frontmatter(text)\n name = meta.get(\"name\", f.parent.name)\n self.skills[name] = {\"meta\": meta, \"body\": body, \"path\": str(f)}\n\n def _parse_frontmatter(self, text: str) -> tuple:\n \"\"\"Parse YAML frontmatter between --- delimiters.\"\"\"\n match = re.match(r\"^---\\n(.*?)\\n---\\n(.*)\", text, re.DOTALL)\n if not match:\n return {}, text\n meta = {}\n for line in match.group(1).strip().splitlines():\n if \":\" in line:\n key, val = line.split(\":\", 1)\n meta[key.strip()] = val.strip()\n return meta, match.group(2).strip()\n\n def get_descriptions(self) -> str:\n \"\"\"Layer 1: short descriptions for the system prompt.\"\"\"\n if not self.skills:\n return \"(no skills available)\"\n lines = []\n for name, skill in self.skills.items():\n desc = skill[\"meta\"].get(\"description\", \"No description\")\n tags = skill[\"meta\"].get(\"tags\", \"\")\n line = f\" - {name}: {desc}\"\n if tags:\n line += f\" [{tags}]\"\n lines.append(line)\n return \"\\n\".join(lines)\n\n def get_content(self, name: str) -> str:\n \"\"\"Layer 2: full skill body returned in tool_result.\"\"\"\n skill = self.skills.get(name)\n if not skill:\n return f\"Error: Unknown skill '{name}'. Available: {', '.join(self.skills.keys())}\"\n return f\"\\n{skill['body']}\\n\"\n\n\nSKILL_LOADER = SkillLoader(SKILLS_DIR)\n\n# Layer 1: skill metadata injected into system prompt\nSYSTEM = f\"\"\"You are a coding agent at {WORKDIR}.\nUse load_skill to access specialized knowledge before tackling unfamiliar topics.\n\nSkills available:\n{SKILL_LOADER.get_descriptions()}\"\"\"\n\n\n# -- Tool implementations --\ndef safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\ndef run_bash(command: str) -> str:\n dangerous = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\", \"> /dev/\"]\n if any(d in command for d in dangerous):\n return \"Error: Dangerous command blocked\"\n try:\n r = subprocess.run(command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=120)\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\ndef run_read(path: str, limit: int = None) -> str:\n try:\n lines = safe_path(path).read_text().splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more)\"]\n return \"\\n\".join(lines)[:50000]\n except Exception as e:\n return f\"Error: {e}\"\n\ndef run_write(path: str, content: str) -> str:\n try:\n fp = safe_path(path)\n fp.parent.mkdir(parents=True, exist_ok=True)\n fp.write_text(content)\n return f\"Wrote {len(content)} bytes\"\n except Exception as e:\n return f\"Error: {e}\"\n\ndef run_edit(path: str, old_text: str, new_text: str) -> str:\n try:\n fp = safe_path(path)\n content = fp.read_text()\n if old_text not in content:\n return f\"Error: Text not found in {path}\"\n fp.write_text(content.replace(old_text, new_text, 1))\n return f\"Edited {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\nTOOL_HANDLERS = {\n \"bash\": lambda **kw: run_bash(kw[\"command\"]),\n \"read_file\": lambda **kw: run_read(kw[\"path\"], kw.get(\"limit\")),\n \"write_file\": lambda **kw: run_write(kw[\"path\"], kw[\"content\"]),\n \"edit_file\": lambda **kw: run_edit(kw[\"path\"], kw[\"old_text\"], kw[\"new_text\"]),\n \"load_skill\": lambda **kw: SKILL_LOADER.get_content(kw[\"name\"]),\n}\n\nTOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"limit\": {\"type\": \"integer\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n {\"name\": \"load_skill\", \"description\": \"Load specialized knowledge by name.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"name\": {\"type\": \"string\", \"description\": \"Skill name to load\"}}, \"required\": [\"name\"]}},\n]\n\n\ndef agent_loop(messages: list):\n while True:\n response = client.messages.create(\n model=MODEL, system=SYSTEM, messages=messages,\n tools=TOOLS, max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n return\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n handler = TOOL_HANDLERS.get(block.name)\n try:\n output = handler(**block.input) if handler else f\"Unknown tool: {block.name}\"\n except Exception as e:\n output = f\"Error: {e}\"\n print(f\"> {block.name}: {str(output)[:200]}\")\n results.append({\"type\": \"tool_result\", \"tool_use_id\": block.id, \"content\": str(output)})\n messages.append({\"role\": \"user\", \"content\": results})\n\n\nif __name__ == \"__main__\":\n history = []\n while True:\n try:\n query = input(\"\\033[36ms05 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history)\n print()\n" + "source": "#!/usr/bin/env python3\r\n\"\"\"\r\ns05_skill_loading.py - Skills\r\n\r\nTwo-layer skill injection that avoids bloating the system prompt:\r\n\r\n Layer 1 (cheap): skill names in system prompt (~100 tokens/skill)\r\n Layer 2 (on demand): full skill body in tool_result\r\n\r\n skills/\r\n pdf/\r\n SKILL.md <-- frontmatter (name, description) + body\r\n code-review/\r\n SKILL.md\r\n\r\n System prompt:\r\n +--------------------------------------+\r\n | You are a coding agent. |\r\n | Skills available: |\r\n | - pdf: Process PDF files... | <-- Layer 1: metadata only\r\n | - code-review: Review code... |\r\n +--------------------------------------+\r\n\r\n When model calls load_skill(\"pdf\"):\r\n +--------------------------------------+\r\n | tool_result: |\r\n | |\r\n | Full PDF processing instructions | <-- Layer 2: full body\r\n | Step 1: ... |\r\n | Step 2: ... |\r\n | |\r\n +--------------------------------------+\r\n\r\nKey insight: \"Don't put everything in the system prompt. Load on demand.\"\r\n\"\"\"\r\n\r\nimport os\r\nimport re\r\nimport subprocess\r\nfrom pathlib import Path\r\n\r\nfrom anthropic import Anthropic\r\nfrom dotenv import load_dotenv\r\n\r\nload_dotenv(override=True)\r\n\r\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\r\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\r\n\r\nWORKDIR = Path.cwd()\r\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\r\nMODEL = os.environ[\"MODEL_ID\"]\r\nSKILLS_DIR = WORKDIR / \"skills\"\r\n\r\n\r\n# -- SkillLoader: scan skills//SKILL.md with YAML frontmatter --\r\nclass SkillLoader:\r\n def __init__(self, skills_dir: Path):\r\n self.skills_dir = skills_dir\r\n self.skills = {}\r\n self._load_all()\r\n\r\n def _load_all(self):\r\n if not self.skills_dir.exists():\r\n return\r\n for f in sorted(self.skills_dir.rglob(\"SKILL.md\")):\r\n text = f.read_text()\r\n meta, body = self._parse_frontmatter(text)\r\n name = meta.get(\"name\", f.parent.name)\r\n self.skills[name] = {\"meta\": meta, \"body\": body, \"path\": str(f)}\r\n\r\n def _parse_frontmatter(self, text: str) -> tuple:\r\n \"\"\"Parse YAML frontmatter between --- delimiters.\"\"\"\r\n match = re.match(r\"^---\\n(.*?)\\n---\\n(.*)\", text, re.DOTALL)\r\n if not match:\r\n return {}, text\r\n meta = {}\r\n for line in match.group(1).strip().splitlines():\r\n if \":\" in line:\r\n key, val = line.split(\":\", 1)\r\n meta[key.strip()] = val.strip()\r\n return meta, match.group(2).strip()\r\n\r\n def get_descriptions(self) -> str:\r\n \"\"\"Layer 1: short descriptions for the system prompt.\"\"\"\r\n if not self.skills:\r\n return \"(no skills available)\"\r\n lines = []\r\n for name, skill in self.skills.items():\r\n desc = skill[\"meta\"].get(\"description\", \"No description\")\r\n tags = skill[\"meta\"].get(\"tags\", \"\")\r\n line = f\" - {name}: {desc}\"\r\n if tags:\r\n line += f\" [{tags}]\"\r\n lines.append(line)\r\n return \"\\n\".join(lines)\r\n\r\n def get_content(self, name: str) -> str:\r\n \"\"\"Layer 2: full skill body returned in tool_result.\"\"\"\r\n skill = self.skills.get(name)\r\n if not skill:\r\n return f\"Error: Unknown skill '{name}'. Available: {', '.join(self.skills.keys())}\"\r\n return f\"\\n{skill['body']}\\n\"\r\n\r\n\r\nSKILL_LOADER = SkillLoader(SKILLS_DIR)\r\n\r\n# Layer 1: skill metadata injected into system prompt\r\nSYSTEM = f\"\"\"You are a coding agent at {WORKDIR}.\r\nUse load_skill to access specialized knowledge before tackling unfamiliar topics.\r\n\r\nSkills available:\r\n{SKILL_LOADER.get_descriptions()}\"\"\"\r\n\r\n\r\n# -- Tool implementations --\r\ndef safe_path(p: str) -> Path:\r\n path = (WORKDIR / p).resolve()\r\n if not path.is_relative_to(WORKDIR):\r\n raise ValueError(f\"Path escapes workspace: {p}\")\r\n return path\r\n\r\ndef run_bash(command: str) -> str:\r\n dangerous = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\", \"> /dev/\"]\r\n if any(d in command for d in dangerous):\r\n return \"Error: Dangerous command blocked\"\r\n try:\r\n r = subprocess.run(command, shell=True, cwd=WORKDIR,\r\n capture_output=True, text=True, timeout=120)\r\n out = (r.stdout + r.stderr).strip()\r\n return out[:50000] if out else \"(no output)\"\r\n except subprocess.TimeoutExpired:\r\n return \"Error: Timeout (120s)\"\r\n\r\ndef run_read(path: str, limit: int = None) -> str:\r\n try:\r\n lines = safe_path(path).read_text().splitlines()\r\n if limit and limit < len(lines):\r\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more)\"]\r\n return \"\\n\".join(lines)[:50000]\r\n except Exception as e:\r\n return f\"Error: {e}\"\r\n\r\ndef run_write(path: str, content: str) -> str:\r\n try:\r\n fp = safe_path(path)\r\n fp.parent.mkdir(parents=True, exist_ok=True)\r\n fp.write_text(content)\r\n return f\"Wrote {len(content)} bytes\"\r\n except Exception as e:\r\n return f\"Error: {e}\"\r\n\r\ndef run_edit(path: str, old_text: str, new_text: str) -> str:\r\n try:\r\n fp = safe_path(path)\r\n content = fp.read_text()\r\n if old_text not in content:\r\n return f\"Error: Text not found in {path}\"\r\n fp.write_text(content.replace(old_text, new_text, 1))\r\n return f\"Edited {path}\"\r\n except Exception as e:\r\n return f\"Error: {e}\"\r\n\r\n\r\nTOOL_HANDLERS = {\r\n \"bash\": lambda **kw: run_bash(kw[\"command\"]),\r\n \"read_file\": lambda **kw: run_read(kw[\"path\"], kw.get(\"limit\")),\r\n \"write_file\": lambda **kw: run_write(kw[\"path\"], kw[\"content\"]),\r\n \"edit_file\": lambda **kw: run_edit(kw[\"path\"], kw[\"old_text\"], kw[\"new_text\"]),\r\n \"load_skill\": lambda **kw: SKILL_LOADER.get_content(kw[\"name\"]),\r\n}\r\n\r\nTOOLS = [\r\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\r\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\r\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\r\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"limit\": {\"type\": \"integer\"}}, \"required\": [\"path\"]}},\r\n {\"name\": \"write_file\", \"description\": \"Write content to file.\",\r\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\r\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in file.\",\r\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\r\n {\"name\": \"load_skill\", \"description\": \"Load specialized knowledge by name.\",\r\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"name\": {\"type\": \"string\", \"description\": \"Skill name to load\"}}, \"required\": [\"name\"]}},\r\n]\r\n\r\n\r\ndef agent_loop(messages: list):\r\n while True:\r\n response = client.messages.create(\r\n model=MODEL, system=SYSTEM, messages=messages,\r\n tools=TOOLS, max_tokens=8000,\r\n )\r\n messages.append({\"role\": \"assistant\", \"content\": response.content})\r\n if response.stop_reason != \"tool_use\":\r\n return\r\n results = []\r\n for block in response.content:\r\n if block.type == \"tool_use\":\r\n handler = TOOL_HANDLERS.get(block.name)\r\n try:\r\n output = handler(**block.input) if handler else f\"Unknown tool: {block.name}\"\r\n except Exception as e:\r\n output = f\"Error: {e}\"\r\n print(f\"> {block.name}: {str(output)[:200]}\")\r\n results.append({\"type\": \"tool_result\", \"tool_use_id\": block.id, \"content\": str(output)})\r\n messages.append({\"role\": \"user\", \"content\": results})\r\n\r\n\r\nif __name__ == \"__main__\":\r\n history = []\r\n while True:\r\n try:\r\n query = input(\"\\033[36ms05 >> \\033[0m\")\r\n except (EOFError, KeyboardInterrupt):\r\n break\r\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\r\n break\r\n history.append({\"role\": \"user\", \"content\": query})\r\n agent_loop(history)\r\n response_content = history[-1][\"content\"]\r\n if isinstance(response_content, list):\r\n for block in response_content:\r\n if hasattr(block, \"text\"):\r\n print(block.text)\r\n print()\r\n" }, { "id": "s06", "filename": "s06_context_compact.py", "title": "Compact", "subtitle": "Three-Layer Compression", - "loc": 200, + "loc": 205, "tools": [ "bash", "read_file", @@ -331,14 +331,14 @@ } ], "layer": "memory", - "source": "#!/usr/bin/env python3\n\"\"\"\ns06_context_compact.py - Compact\n\nThree-layer compression pipeline so the agent can work forever:\n\n Every turn:\n +------------------+\n | Tool call result |\n +------------------+\n |\n v\n [Layer 1: micro_compact] (silent, every turn)\n Replace tool_result content older than last 3\n with \"[Previous: used {tool_name}]\"\n |\n v\n [Check: tokens > 50000?]\n | |\n no yes\n | |\n v v\n continue [Layer 2: auto_compact]\n Save full transcript to .transcripts/\n Ask LLM to summarize conversation.\n Replace all messages with [summary].\n |\n v\n [Layer 3: compact tool]\n Model calls compact -> immediate summarization.\n Same as auto, triggered manually.\n\nKey insight: \"The agent can forget strategically and keep working forever.\"\n\"\"\"\n\nimport json\nimport os\nimport subprocess\nimport time\nfrom pathlib import Path\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\n\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\n\nSYSTEM = f\"You are a coding agent at {WORKDIR}. Use tools to solve tasks.\"\n\nTHRESHOLD = 50000\nTRANSCRIPT_DIR = WORKDIR / \".transcripts\"\nKEEP_RECENT = 3\n\n\ndef estimate_tokens(messages: list) -> int:\n \"\"\"Rough token count: ~4 chars per token.\"\"\"\n return len(str(messages)) // 4\n\n\n# -- Layer 1: micro_compact - replace old tool results with placeholders --\ndef micro_compact(messages: list) -> list:\n # Collect (msg_index, part_index, tool_result_dict) for all tool_result entries\n tool_results = []\n for msg_idx, msg in enumerate(messages):\n if msg[\"role\"] == \"user\" and isinstance(msg.get(\"content\"), list):\n for part_idx, part in enumerate(msg[\"content\"]):\n if isinstance(part, dict) and part.get(\"type\") == \"tool_result\":\n tool_results.append((msg_idx, part_idx, part))\n if len(tool_results) <= KEEP_RECENT:\n return messages\n # Find tool_name for each result by matching tool_use_id in prior assistant messages\n tool_name_map = {}\n for msg in messages:\n if msg[\"role\"] == \"assistant\":\n content = msg.get(\"content\", [])\n if isinstance(content, list):\n for block in content:\n if hasattr(block, \"type\") and block.type == \"tool_use\":\n tool_name_map[block.id] = block.name\n # Clear old results (keep last KEEP_RECENT)\n to_clear = tool_results[:-KEEP_RECENT]\n for _, _, result in to_clear:\n if isinstance(result.get(\"content\"), str) and len(result[\"content\"]) > 100:\n tool_id = result.get(\"tool_use_id\", \"\")\n tool_name = tool_name_map.get(tool_id, \"unknown\")\n result[\"content\"] = f\"[Previous: used {tool_name}]\"\n return messages\n\n\n# -- Layer 2: auto_compact - save transcript, summarize, replace messages --\ndef auto_compact(messages: list) -> list:\n # Save full transcript to disk\n TRANSCRIPT_DIR.mkdir(exist_ok=True)\n transcript_path = TRANSCRIPT_DIR / f\"transcript_{int(time.time())}.jsonl\"\n with open(transcript_path, \"w\") as f:\n for msg in messages:\n f.write(json.dumps(msg, default=str) + \"\\n\")\n print(f\"[transcript saved: {transcript_path}]\")\n # Ask LLM to summarize\n conversation_text = json.dumps(messages, default=str)[:80000]\n response = client.messages.create(\n model=MODEL,\n messages=[{\"role\": \"user\", \"content\":\n \"Summarize this conversation for continuity. Include: \"\n \"1) What was accomplished, 2) Current state, 3) Key decisions made. \"\n \"Be concise but preserve critical details.\\n\\n\" + conversation_text}],\n max_tokens=2000,\n )\n summary = response.content[0].text\n # Replace all messages with compressed summary\n return [\n {\"role\": \"user\", \"content\": f\"[Conversation compressed. Transcript: {transcript_path}]\\n\\n{summary}\"},\n {\"role\": \"assistant\", \"content\": \"Understood. I have the context from the summary. Continuing.\"},\n ]\n\n\n# -- Tool implementations --\ndef safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\ndef run_bash(command: str) -> str:\n dangerous = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\", \"> /dev/\"]\n if any(d in command for d in dangerous):\n return \"Error: Dangerous command blocked\"\n try:\n r = subprocess.run(command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=120)\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\ndef run_read(path: str, limit: int = None) -> str:\n try:\n lines = safe_path(path).read_text().splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more)\"]\n return \"\\n\".join(lines)[:50000]\n except Exception as e:\n return f\"Error: {e}\"\n\ndef run_write(path: str, content: str) -> str:\n try:\n fp = safe_path(path)\n fp.parent.mkdir(parents=True, exist_ok=True)\n fp.write_text(content)\n return f\"Wrote {len(content)} bytes\"\n except Exception as e:\n return f\"Error: {e}\"\n\ndef run_edit(path: str, old_text: str, new_text: str) -> str:\n try:\n fp = safe_path(path)\n content = fp.read_text()\n if old_text not in content:\n return f\"Error: Text not found in {path}\"\n fp.write_text(content.replace(old_text, new_text, 1))\n return f\"Edited {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\nTOOL_HANDLERS = {\n \"bash\": lambda **kw: run_bash(kw[\"command\"]),\n \"read_file\": lambda **kw: run_read(kw[\"path\"], kw.get(\"limit\")),\n \"write_file\": lambda **kw: run_write(kw[\"path\"], kw[\"content\"]),\n \"edit_file\": lambda **kw: run_edit(kw[\"path\"], kw[\"old_text\"], kw[\"new_text\"]),\n \"compact\": lambda **kw: \"Manual compression requested.\",\n}\n\nTOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"limit\": {\"type\": \"integer\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n {\"name\": \"compact\", \"description\": \"Trigger manual conversation compression.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"focus\": {\"type\": \"string\", \"description\": \"What to preserve in the summary\"}}}},\n]\n\n\ndef agent_loop(messages: list):\n while True:\n # Layer 1: micro_compact before each LLM call\n micro_compact(messages)\n # Layer 2: auto_compact if token estimate exceeds threshold\n if estimate_tokens(messages) > THRESHOLD:\n print(\"[auto_compact triggered]\")\n messages[:] = auto_compact(messages)\n response = client.messages.create(\n model=MODEL, system=SYSTEM, messages=messages,\n tools=TOOLS, max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n return\n results = []\n manual_compact = False\n for block in response.content:\n if block.type == \"tool_use\":\n if block.name == \"compact\":\n manual_compact = True\n output = \"Compressing...\"\n else:\n handler = TOOL_HANDLERS.get(block.name)\n try:\n output = handler(**block.input) if handler else f\"Unknown tool: {block.name}\"\n except Exception as e:\n output = f\"Error: {e}\"\n print(f\"> {block.name}: {str(output)[:200]}\")\n results.append({\"type\": \"tool_result\", \"tool_use_id\": block.id, \"content\": str(output)})\n messages.append({\"role\": \"user\", \"content\": results})\n # Layer 3: manual compact triggered by the compact tool\n if manual_compact:\n print(\"[manual compact]\")\n messages[:] = auto_compact(messages)\n\n\nif __name__ == \"__main__\":\n history = []\n while True:\n try:\n query = input(\"\\033[36ms06 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history)\n print()\n" + "source": "#!/usr/bin/env python3\r\n\"\"\"\r\ns06_context_compact.py - Compact\r\n\r\nThree-layer compression pipeline so the agent can work forever:\r\n\r\n Every turn:\r\n +------------------+\r\n | Tool call result |\r\n +------------------+\r\n |\r\n v\r\n [Layer 1: micro_compact] (silent, every turn)\r\n Replace tool_result content older than last 3\r\n with \"[Previous: used {tool_name}]\"\r\n |\r\n v\r\n [Check: tokens > 50000?]\r\n | |\r\n no yes\r\n | |\r\n v v\r\n continue [Layer 2: auto_compact]\r\n Save full transcript to .transcripts/\r\n Ask LLM to summarize conversation.\r\n Replace all messages with [summary].\r\n |\r\n v\r\n [Layer 3: compact tool]\r\n Model calls compact -> immediate summarization.\r\n Same as auto, triggered manually.\r\n\r\nKey insight: \"The agent can forget strategically and keep working forever.\"\r\n\"\"\"\r\n\r\nimport json\r\nimport os\r\nimport subprocess\r\nimport time\r\nfrom pathlib import Path\r\n\r\nfrom anthropic import Anthropic\r\nfrom dotenv import load_dotenv\r\n\r\nload_dotenv(override=True)\r\n\r\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\r\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\r\n\r\nWORKDIR = Path.cwd()\r\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\r\nMODEL = os.environ[\"MODEL_ID\"]\r\n\r\nSYSTEM = f\"You are a coding agent at {WORKDIR}. Use tools to solve tasks.\"\r\n\r\nTHRESHOLD = 50000\r\nTRANSCRIPT_DIR = WORKDIR / \".transcripts\"\r\nKEEP_RECENT = 3\r\n\r\n\r\ndef estimate_tokens(messages: list) -> int:\r\n \"\"\"Rough token count: ~4 chars per token.\"\"\"\r\n return len(str(messages)) // 4\r\n\r\n\r\n# -- Layer 1: micro_compact - replace old tool results with placeholders --\r\ndef micro_compact(messages: list) -> list:\r\n # Collect (msg_index, part_index, tool_result_dict) for all tool_result entries\r\n tool_results = []\r\n for msg_idx, msg in enumerate(messages):\r\n if msg[\"role\"] == \"user\" and isinstance(msg.get(\"content\"), list):\r\n for part_idx, part in enumerate(msg[\"content\"]):\r\n if isinstance(part, dict) and part.get(\"type\") == \"tool_result\":\r\n tool_results.append((msg_idx, part_idx, part))\r\n if len(tool_results) <= KEEP_RECENT:\r\n return messages\r\n # Find tool_name for each result by matching tool_use_id in prior assistant messages\r\n tool_name_map = {}\r\n for msg in messages:\r\n if msg[\"role\"] == \"assistant\":\r\n content = msg.get(\"content\", [])\r\n if isinstance(content, list):\r\n for block in content:\r\n if hasattr(block, \"type\") and block.type == \"tool_use\":\r\n tool_name_map[block.id] = block.name\r\n # Clear old results (keep last KEEP_RECENT)\r\n to_clear = tool_results[:-KEEP_RECENT]\r\n for _, _, result in to_clear:\r\n if isinstance(result.get(\"content\"), str) and len(result[\"content\"]) > 100:\r\n tool_id = result.get(\"tool_use_id\", \"\")\r\n tool_name = tool_name_map.get(tool_id, \"unknown\")\r\n result[\"content\"] = f\"[Previous: used {tool_name}]\"\r\n return messages\r\n\r\n\r\n# -- Layer 2: auto_compact - save transcript, summarize, replace messages --\r\ndef auto_compact(messages: list) -> list:\r\n # Save full transcript to disk\r\n TRANSCRIPT_DIR.mkdir(exist_ok=True)\r\n transcript_path = TRANSCRIPT_DIR / f\"transcript_{int(time.time())}.jsonl\"\r\n with open(transcript_path, \"w\") as f:\r\n for msg in messages:\r\n f.write(json.dumps(msg, default=str) + \"\\n\")\r\n print(f\"[transcript saved: {transcript_path}]\")\r\n # Ask LLM to summarize\r\n conversation_text = json.dumps(messages, default=str)[:80000]\r\n response = client.messages.create(\r\n model=MODEL,\r\n messages=[{\"role\": \"user\", \"content\":\r\n \"Summarize this conversation for continuity. Include: \"\r\n \"1) What was accomplished, 2) Current state, 3) Key decisions made. \"\r\n \"Be concise but preserve critical details.\\n\\n\" + conversation_text}],\r\n max_tokens=2000,\r\n )\r\n summary = response.content[0].text\r\n # Replace all messages with compressed summary\r\n return [\r\n {\"role\": \"user\", \"content\": f\"[Conversation compressed. Transcript: {transcript_path}]\\n\\n{summary}\"},\r\n {\"role\": \"assistant\", \"content\": \"Understood. I have the context from the summary. Continuing.\"},\r\n ]\r\n\r\n\r\n# -- Tool implementations --\r\ndef safe_path(p: str) -> Path:\r\n path = (WORKDIR / p).resolve()\r\n if not path.is_relative_to(WORKDIR):\r\n raise ValueError(f\"Path escapes workspace: {p}\")\r\n return path\r\n\r\ndef run_bash(command: str) -> str:\r\n dangerous = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\", \"> /dev/\"]\r\n if any(d in command for d in dangerous):\r\n return \"Error: Dangerous command blocked\"\r\n try:\r\n r = subprocess.run(command, shell=True, cwd=WORKDIR,\r\n capture_output=True, text=True, timeout=120)\r\n out = (r.stdout + r.stderr).strip()\r\n return out[:50000] if out else \"(no output)\"\r\n except subprocess.TimeoutExpired:\r\n return \"Error: Timeout (120s)\"\r\n\r\ndef run_read(path: str, limit: int = None) -> str:\r\n try:\r\n lines = safe_path(path).read_text().splitlines()\r\n if limit and limit < len(lines):\r\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more)\"]\r\n return \"\\n\".join(lines)[:50000]\r\n except Exception as e:\r\n return f\"Error: {e}\"\r\n\r\ndef run_write(path: str, content: str) -> str:\r\n try:\r\n fp = safe_path(path)\r\n fp.parent.mkdir(parents=True, exist_ok=True)\r\n fp.write_text(content)\r\n return f\"Wrote {len(content)} bytes\"\r\n except Exception as e:\r\n return f\"Error: {e}\"\r\n\r\ndef run_edit(path: str, old_text: str, new_text: str) -> str:\r\n try:\r\n fp = safe_path(path)\r\n content = fp.read_text()\r\n if old_text not in content:\r\n return f\"Error: Text not found in {path}\"\r\n fp.write_text(content.replace(old_text, new_text, 1))\r\n return f\"Edited {path}\"\r\n except Exception as e:\r\n return f\"Error: {e}\"\r\n\r\n\r\nTOOL_HANDLERS = {\r\n \"bash\": lambda **kw: run_bash(kw[\"command\"]),\r\n \"read_file\": lambda **kw: run_read(kw[\"path\"], kw.get(\"limit\")),\r\n \"write_file\": lambda **kw: run_write(kw[\"path\"], kw[\"content\"]),\r\n \"edit_file\": lambda **kw: run_edit(kw[\"path\"], kw[\"old_text\"], kw[\"new_text\"]),\r\n \"compact\": lambda **kw: \"Manual compression requested.\",\r\n}\r\n\r\nTOOLS = [\r\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\r\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\r\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\r\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"limit\": {\"type\": \"integer\"}}, \"required\": [\"path\"]}},\r\n {\"name\": \"write_file\", \"description\": \"Write content to file.\",\r\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\r\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in file.\",\r\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\r\n {\"name\": \"compact\", \"description\": \"Trigger manual conversation compression.\",\r\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"focus\": {\"type\": \"string\", \"description\": \"What to preserve in the summary\"}}}},\r\n]\r\n\r\n\r\ndef agent_loop(messages: list):\r\n while True:\r\n # Layer 1: micro_compact before each LLM call\r\n micro_compact(messages)\r\n # Layer 2: auto_compact if token estimate exceeds threshold\r\n if estimate_tokens(messages) > THRESHOLD:\r\n print(\"[auto_compact triggered]\")\r\n messages[:] = auto_compact(messages)\r\n response = client.messages.create(\r\n model=MODEL, system=SYSTEM, messages=messages,\r\n tools=TOOLS, max_tokens=8000,\r\n )\r\n messages.append({\"role\": \"assistant\", \"content\": response.content})\r\n if response.stop_reason != \"tool_use\":\r\n return\r\n results = []\r\n manual_compact = False\r\n for block in response.content:\r\n if block.type == \"tool_use\":\r\n if block.name == \"compact\":\r\n manual_compact = True\r\n output = \"Compressing...\"\r\n else:\r\n handler = TOOL_HANDLERS.get(block.name)\r\n try:\r\n output = handler(**block.input) if handler else f\"Unknown tool: {block.name}\"\r\n except Exception as e:\r\n output = f\"Error: {e}\"\r\n print(f\"> {block.name}: {str(output)[:200]}\")\r\n results.append({\"type\": \"tool_result\", \"tool_use_id\": block.id, \"content\": str(output)})\r\n messages.append({\"role\": \"user\", \"content\": results})\r\n # Layer 3: manual compact triggered by the compact tool\r\n if manual_compact:\r\n print(\"[manual compact]\")\r\n messages[:] = auto_compact(messages)\r\n\r\n\r\nif __name__ == \"__main__\":\r\n history = []\r\n while True:\r\n try:\r\n query = input(\"\\033[36ms06 >> \\033[0m\")\r\n except (EOFError, KeyboardInterrupt):\r\n break\r\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\r\n break\r\n history.append({\"role\": \"user\", \"content\": query})\r\n agent_loop(history)\r\n response_content = history[-1][\"content\"]\r\n if isinstance(response_content, list):\r\n for block in response_content:\r\n if hasattr(block, \"text\"):\r\n print(block.text)\r\n print()\r\n" }, { "id": "s07", "filename": "s07_task_system.py", "title": "Tasks", "subtitle": "Task Graph + Dependencies", - "loc": 202, + "loc": 207, "tools": [ "bash", "read_file", @@ -397,14 +397,14 @@ } ], "layer": "planning", - "source": "#!/usr/bin/env python3\n\"\"\"\ns07_task_system.py - Tasks\n\nTasks persist as JSON files in .tasks/ so they survive context compression.\nEach task has a dependency graph (blockedBy/blocks).\n\n .tasks/\n task_1.json {\"id\":1, \"subject\":\"...\", \"status\":\"completed\", ...}\n task_2.json {\"id\":2, \"blockedBy\":[1], \"status\":\"pending\", ...}\n task_3.json {\"id\":3, \"blockedBy\":[2], \"blocks\":[], ...}\n\n Dependency resolution:\n +----------+ +----------+ +----------+\n | task 1 | --> | task 2 | --> | task 3 |\n | complete | | blocked | | blocked |\n +----------+ +----------+ +----------+\n | ^\n +--- completing task 1 removes it from task 2's blockedBy\n\nKey insight: \"State that survives compression -- because it's outside the conversation.\"\n\"\"\"\n\nimport json\nimport os\nimport subprocess\nfrom pathlib import Path\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\n\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\nTASKS_DIR = WORKDIR / \".tasks\"\n\nSYSTEM = f\"You are a coding agent at {WORKDIR}. Use task tools to plan and track work.\"\n\n\n# -- TaskManager: CRUD with dependency graph, persisted as JSON files --\nclass TaskManager:\n def __init__(self, tasks_dir: Path):\n self.dir = tasks_dir\n self.dir.mkdir(exist_ok=True)\n self._next_id = self._max_id() + 1\n\n def _max_id(self) -> int:\n ids = [int(f.stem.split(\"_\")[1]) for f in self.dir.glob(\"task_*.json\")]\n return max(ids) if ids else 0\n\n def _load(self, task_id: int) -> dict:\n path = self.dir / f\"task_{task_id}.json\"\n if not path.exists():\n raise ValueError(f\"Task {task_id} not found\")\n return json.loads(path.read_text())\n\n def _save(self, task: dict):\n path = self.dir / f\"task_{task['id']}.json\"\n path.write_text(json.dumps(task, indent=2))\n\n def create(self, subject: str, description: str = \"\") -> str:\n task = {\n \"id\": self._next_id, \"subject\": subject, \"description\": description,\n \"status\": \"pending\", \"blockedBy\": [], \"blocks\": [], \"owner\": \"\",\n }\n self._save(task)\n self._next_id += 1\n return json.dumps(task, indent=2)\n\n def get(self, task_id: int) -> str:\n return json.dumps(self._load(task_id), indent=2)\n\n def update(self, task_id: int, status: str = None,\n add_blocked_by: list = None, add_blocks: list = None) -> str:\n task = self._load(task_id)\n if status:\n if status not in (\"pending\", \"in_progress\", \"completed\"):\n raise ValueError(f\"Invalid status: {status}\")\n task[\"status\"] = status\n # When a task is completed, remove it from all other tasks' blockedBy\n if status == \"completed\":\n self._clear_dependency(task_id)\n if add_blocked_by:\n task[\"blockedBy\"] = list(set(task[\"blockedBy\"] + add_blocked_by))\n if add_blocks:\n task[\"blocks\"] = list(set(task[\"blocks\"] + add_blocks))\n # Bidirectional: also update the blocked tasks' blockedBy lists\n for blocked_id in add_blocks:\n try:\n blocked = self._load(blocked_id)\n if task_id not in blocked[\"blockedBy\"]:\n blocked[\"blockedBy\"].append(task_id)\n self._save(blocked)\n except ValueError:\n pass\n self._save(task)\n return json.dumps(task, indent=2)\n\n def _clear_dependency(self, completed_id: int):\n \"\"\"Remove completed_id from all other tasks' blockedBy lists.\"\"\"\n for f in self.dir.glob(\"task_*.json\"):\n task = json.loads(f.read_text())\n if completed_id in task.get(\"blockedBy\", []):\n task[\"blockedBy\"].remove(completed_id)\n self._save(task)\n\n def list_all(self) -> str:\n tasks = []\n for f in sorted(self.dir.glob(\"task_*.json\")):\n tasks.append(json.loads(f.read_text()))\n if not tasks:\n return \"No tasks.\"\n lines = []\n for t in tasks:\n marker = {\"pending\": \"[ ]\", \"in_progress\": \"[>]\", \"completed\": \"[x]\"}.get(t[\"status\"], \"[?]\")\n blocked = f\" (blocked by: {t['blockedBy']})\" if t.get(\"blockedBy\") else \"\"\n lines.append(f\"{marker} #{t['id']}: {t['subject']}{blocked}\")\n return \"\\n\".join(lines)\n\n\nTASKS = TaskManager(TASKS_DIR)\n\n\n# -- Base tool implementations --\ndef safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\ndef run_bash(command: str) -> str:\n dangerous = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\", \"> /dev/\"]\n if any(d in command for d in dangerous):\n return \"Error: Dangerous command blocked\"\n try:\n r = subprocess.run(command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=120)\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\ndef run_read(path: str, limit: int = None) -> str:\n try:\n lines = safe_path(path).read_text().splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more)\"]\n return \"\\n\".join(lines)[:50000]\n except Exception as e:\n return f\"Error: {e}\"\n\ndef run_write(path: str, content: str) -> str:\n try:\n fp = safe_path(path)\n fp.parent.mkdir(parents=True, exist_ok=True)\n fp.write_text(content)\n return f\"Wrote {len(content)} bytes\"\n except Exception as e:\n return f\"Error: {e}\"\n\ndef run_edit(path: str, old_text: str, new_text: str) -> str:\n try:\n fp = safe_path(path)\n c = fp.read_text()\n if old_text not in c:\n return f\"Error: Text not found in {path}\"\n fp.write_text(c.replace(old_text, new_text, 1))\n return f\"Edited {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\nTOOL_HANDLERS = {\n \"bash\": lambda **kw: run_bash(kw[\"command\"]),\n \"read_file\": lambda **kw: run_read(kw[\"path\"], kw.get(\"limit\")),\n \"write_file\": lambda **kw: run_write(kw[\"path\"], kw[\"content\"]),\n \"edit_file\": lambda **kw: run_edit(kw[\"path\"], kw[\"old_text\"], kw[\"new_text\"]),\n \"task_create\": lambda **kw: TASKS.create(kw[\"subject\"], kw.get(\"description\", \"\")),\n \"task_update\": lambda **kw: TASKS.update(kw[\"task_id\"], kw.get(\"status\"), kw.get(\"addBlockedBy\"), kw.get(\"addBlocks\")),\n \"task_list\": lambda **kw: TASKS.list_all(),\n \"task_get\": lambda **kw: TASKS.get(kw[\"task_id\"]),\n}\n\nTOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"limit\": {\"type\": \"integer\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n {\"name\": \"task_create\", \"description\": \"Create a new task.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"subject\": {\"type\": \"string\"}, \"description\": {\"type\": \"string\"}}, \"required\": [\"subject\"]}},\n {\"name\": \"task_update\", \"description\": \"Update a task's status or dependencies.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"task_id\": {\"type\": \"integer\"}, \"status\": {\"type\": \"string\", \"enum\": [\"pending\", \"in_progress\", \"completed\"]}, \"addBlockedBy\": {\"type\": \"array\", \"items\": {\"type\": \"integer\"}}, \"addBlocks\": {\"type\": \"array\", \"items\": {\"type\": \"integer\"}}}, \"required\": [\"task_id\"]}},\n {\"name\": \"task_list\", \"description\": \"List all tasks with status summary.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}}},\n {\"name\": \"task_get\", \"description\": \"Get full details of a task by ID.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"task_id\": {\"type\": \"integer\"}}, \"required\": [\"task_id\"]}},\n]\n\n\ndef agent_loop(messages: list):\n while True:\n response = client.messages.create(\n model=MODEL, system=SYSTEM, messages=messages,\n tools=TOOLS, max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n return\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n handler = TOOL_HANDLERS.get(block.name)\n try:\n output = handler(**block.input) if handler else f\"Unknown tool: {block.name}\"\n except Exception as e:\n output = f\"Error: {e}\"\n print(f\"> {block.name}: {str(output)[:200]}\")\n results.append({\"type\": \"tool_result\", \"tool_use_id\": block.id, \"content\": str(output)})\n messages.append({\"role\": \"user\", \"content\": results})\n\n\nif __name__ == \"__main__\":\n history = []\n while True:\n try:\n query = input(\"\\033[36ms07 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history)\n print()\n" + "source": "#!/usr/bin/env python3\r\n\"\"\"\r\ns07_task_system.py - Tasks\r\n\r\nTasks persist as JSON files in .tasks/ so they survive context compression.\r\nEach task has a dependency graph (blockedBy/blocks).\r\n\r\n .tasks/\r\n task_1.json {\"id\":1, \"subject\":\"...\", \"status\":\"completed\", ...}\r\n task_2.json {\"id\":2, \"blockedBy\":[1], \"status\":\"pending\", ...}\r\n task_3.json {\"id\":3, \"blockedBy\":[2], \"blocks\":[], ...}\r\n\r\n Dependency resolution:\r\n +----------+ +----------+ +----------+\r\n | task 1 | --> | task 2 | --> | task 3 |\r\n | complete | | blocked | | blocked |\r\n +----------+ +----------+ +----------+\r\n | ^\r\n +--- completing task 1 removes it from task 2's blockedBy\r\n\r\nKey insight: \"State that survives compression -- because it's outside the conversation.\"\r\n\"\"\"\r\n\r\nimport json\r\nimport os\r\nimport subprocess\r\nfrom pathlib import Path\r\n\r\nfrom anthropic import Anthropic\r\nfrom dotenv import load_dotenv\r\n\r\nload_dotenv(override=True)\r\n\r\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\r\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\r\n\r\nWORKDIR = Path.cwd()\r\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\r\nMODEL = os.environ[\"MODEL_ID\"]\r\nTASKS_DIR = WORKDIR / \".tasks\"\r\n\r\nSYSTEM = f\"You are a coding agent at {WORKDIR}. Use task tools to plan and track work.\"\r\n\r\n\r\n# -- TaskManager: CRUD with dependency graph, persisted as JSON files --\r\nclass TaskManager:\r\n def __init__(self, tasks_dir: Path):\r\n self.dir = tasks_dir\r\n self.dir.mkdir(exist_ok=True)\r\n self._next_id = self._max_id() + 1\r\n\r\n def _max_id(self) -> int:\r\n ids = [int(f.stem.split(\"_\")[1]) for f in self.dir.glob(\"task_*.json\")]\r\n return max(ids) if ids else 0\r\n\r\n def _load(self, task_id: int) -> dict:\r\n path = self.dir / f\"task_{task_id}.json\"\r\n if not path.exists():\r\n raise ValueError(f\"Task {task_id} not found\")\r\n return json.loads(path.read_text())\r\n\r\n def _save(self, task: dict):\r\n path = self.dir / f\"task_{task['id']}.json\"\r\n path.write_text(json.dumps(task, indent=2))\r\n\r\n def create(self, subject: str, description: str = \"\") -> str:\r\n task = {\r\n \"id\": self._next_id, \"subject\": subject, \"description\": description,\r\n \"status\": \"pending\", \"blockedBy\": [], \"blocks\": [], \"owner\": \"\",\r\n }\r\n self._save(task)\r\n self._next_id += 1\r\n return json.dumps(task, indent=2)\r\n\r\n def get(self, task_id: int) -> str:\r\n return json.dumps(self._load(task_id), indent=2)\r\n\r\n def update(self, task_id: int, status: str = None,\r\n add_blocked_by: list = None, add_blocks: list = None) -> str:\r\n task = self._load(task_id)\r\n if status:\r\n if status not in (\"pending\", \"in_progress\", \"completed\"):\r\n raise ValueError(f\"Invalid status: {status}\")\r\n task[\"status\"] = status\r\n # When a task is completed, remove it from all other tasks' blockedBy\r\n if status == \"completed\":\r\n self._clear_dependency(task_id)\r\n if add_blocked_by:\r\n task[\"blockedBy\"] = list(set(task[\"blockedBy\"] + add_blocked_by))\r\n if add_blocks:\r\n task[\"blocks\"] = list(set(task[\"blocks\"] + add_blocks))\r\n # Bidirectional: also update the blocked tasks' blockedBy lists\r\n for blocked_id in add_blocks:\r\n try:\r\n blocked = self._load(blocked_id)\r\n if task_id not in blocked[\"blockedBy\"]:\r\n blocked[\"blockedBy\"].append(task_id)\r\n self._save(blocked)\r\n except ValueError:\r\n pass\r\n self._save(task)\r\n return json.dumps(task, indent=2)\r\n\r\n def _clear_dependency(self, completed_id: int):\r\n \"\"\"Remove completed_id from all other tasks' blockedBy lists.\"\"\"\r\n for f in self.dir.glob(\"task_*.json\"):\r\n task = json.loads(f.read_text())\r\n if completed_id in task.get(\"blockedBy\", []):\r\n task[\"blockedBy\"].remove(completed_id)\r\n self._save(task)\r\n\r\n def list_all(self) -> str:\r\n tasks = []\r\n for f in sorted(self.dir.glob(\"task_*.json\")):\r\n tasks.append(json.loads(f.read_text()))\r\n if not tasks:\r\n return \"No tasks.\"\r\n lines = []\r\n for t in tasks:\r\n marker = {\"pending\": \"[ ]\", \"in_progress\": \"[>]\", \"completed\": \"[x]\"}.get(t[\"status\"], \"[?]\")\r\n blocked = f\" (blocked by: {t['blockedBy']})\" if t.get(\"blockedBy\") else \"\"\r\n lines.append(f\"{marker} #{t['id']}: {t['subject']}{blocked}\")\r\n return \"\\n\".join(lines)\r\n\r\n\r\nTASKS = TaskManager(TASKS_DIR)\r\n\r\n\r\n# -- Base tool implementations --\r\ndef safe_path(p: str) -> Path:\r\n path = (WORKDIR / p).resolve()\r\n if not path.is_relative_to(WORKDIR):\r\n raise ValueError(f\"Path escapes workspace: {p}\")\r\n return path\r\n\r\ndef run_bash(command: str) -> str:\r\n dangerous = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\", \"> /dev/\"]\r\n if any(d in command for d in dangerous):\r\n return \"Error: Dangerous command blocked\"\r\n try:\r\n r = subprocess.run(command, shell=True, cwd=WORKDIR,\r\n capture_output=True, text=True, timeout=120)\r\n out = (r.stdout + r.stderr).strip()\r\n return out[:50000] if out else \"(no output)\"\r\n except subprocess.TimeoutExpired:\r\n return \"Error: Timeout (120s)\"\r\n\r\ndef run_read(path: str, limit: int = None) -> str:\r\n try:\r\n lines = safe_path(path).read_text().splitlines()\r\n if limit and limit < len(lines):\r\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more)\"]\r\n return \"\\n\".join(lines)[:50000]\r\n except Exception as e:\r\n return f\"Error: {e}\"\r\n\r\ndef run_write(path: str, content: str) -> str:\r\n try:\r\n fp = safe_path(path)\r\n fp.parent.mkdir(parents=True, exist_ok=True)\r\n fp.write_text(content)\r\n return f\"Wrote {len(content)} bytes\"\r\n except Exception as e:\r\n return f\"Error: {e}\"\r\n\r\ndef run_edit(path: str, old_text: str, new_text: str) -> str:\r\n try:\r\n fp = safe_path(path)\r\n c = fp.read_text()\r\n if old_text not in c:\r\n return f\"Error: Text not found in {path}\"\r\n fp.write_text(c.replace(old_text, new_text, 1))\r\n return f\"Edited {path}\"\r\n except Exception as e:\r\n return f\"Error: {e}\"\r\n\r\n\r\nTOOL_HANDLERS = {\r\n \"bash\": lambda **kw: run_bash(kw[\"command\"]),\r\n \"read_file\": lambda **kw: run_read(kw[\"path\"], kw.get(\"limit\")),\r\n \"write_file\": lambda **kw: run_write(kw[\"path\"], kw[\"content\"]),\r\n \"edit_file\": lambda **kw: run_edit(kw[\"path\"], kw[\"old_text\"], kw[\"new_text\"]),\r\n \"task_create\": lambda **kw: TASKS.create(kw[\"subject\"], kw.get(\"description\", \"\")),\r\n \"task_update\": lambda **kw: TASKS.update(kw[\"task_id\"], kw.get(\"status\"), kw.get(\"addBlockedBy\"), kw.get(\"addBlocks\")),\r\n \"task_list\": lambda **kw: TASKS.list_all(),\r\n \"task_get\": lambda **kw: TASKS.get(kw[\"task_id\"]),\r\n}\r\n\r\nTOOLS = [\r\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\r\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\r\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\r\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"limit\": {\"type\": \"integer\"}}, \"required\": [\"path\"]}},\r\n {\"name\": \"write_file\", \"description\": \"Write content to file.\",\r\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\r\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in file.\",\r\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\r\n {\"name\": \"task_create\", \"description\": \"Create a new task.\",\r\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"subject\": {\"type\": \"string\"}, \"description\": {\"type\": \"string\"}}, \"required\": [\"subject\"]}},\r\n {\"name\": \"task_update\", \"description\": \"Update a task's status or dependencies.\",\r\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"task_id\": {\"type\": \"integer\"}, \"status\": {\"type\": \"string\", \"enum\": [\"pending\", \"in_progress\", \"completed\"]}, \"addBlockedBy\": {\"type\": \"array\", \"items\": {\"type\": \"integer\"}}, \"addBlocks\": {\"type\": \"array\", \"items\": {\"type\": \"integer\"}}}, \"required\": [\"task_id\"]}},\r\n {\"name\": \"task_list\", \"description\": \"List all tasks with status summary.\",\r\n \"input_schema\": {\"type\": \"object\", \"properties\": {}}},\r\n {\"name\": \"task_get\", \"description\": \"Get full details of a task by ID.\",\r\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"task_id\": {\"type\": \"integer\"}}, \"required\": [\"task_id\"]}},\r\n]\r\n\r\n\r\ndef agent_loop(messages: list):\r\n while True:\r\n response = client.messages.create(\r\n model=MODEL, system=SYSTEM, messages=messages,\r\n tools=TOOLS, max_tokens=8000,\r\n )\r\n messages.append({\"role\": \"assistant\", \"content\": response.content})\r\n if response.stop_reason != \"tool_use\":\r\n return\r\n results = []\r\n for block in response.content:\r\n if block.type == \"tool_use\":\r\n handler = TOOL_HANDLERS.get(block.name)\r\n try:\r\n output = handler(**block.input) if handler else f\"Unknown tool: {block.name}\"\r\n except Exception as e:\r\n output = f\"Error: {e}\"\r\n print(f\"> {block.name}: {str(output)[:200]}\")\r\n results.append({\"type\": \"tool_result\", \"tool_use_id\": block.id, \"content\": str(output)})\r\n messages.append({\"role\": \"user\", \"content\": results})\r\n\r\n\r\nif __name__ == \"__main__\":\r\n history = []\r\n while True:\r\n try:\r\n query = input(\"\\033[36ms07 >> \\033[0m\")\r\n except (EOFError, KeyboardInterrupt):\r\n break\r\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\r\n break\r\n history.append({\"role\": \"user\", \"content\": query})\r\n agent_loop(history)\r\n response_content = history[-1][\"content\"]\r\n if isinstance(response_content, list):\r\n for block in response_content:\r\n if hasattr(block, \"text\"):\r\n print(block.text)\r\n print()\r\n" }, { "id": "s08", "filename": "s08_background_tasks.py", "title": "Background Tasks", "subtitle": "Background Threads + Notifications", - "loc": 193, + "loc": 198, "tools": [ "bash", "read_file", @@ -459,14 +459,14 @@ } ], "layer": "concurrency", - "source": "#!/usr/bin/env python3\n\"\"\"\ns08_background_tasks.py - Background Tasks\n\nRun commands in background threads. A notification queue is drained\nbefore each LLM call to deliver results.\n\n Main thread Background thread\n +-----------------+ +-----------------+\n | agent loop | | task executes |\n | ... | | ... |\n | [LLM call] <---+------- | enqueue(result) |\n | ^drain queue | +-----------------+\n +-----------------+\n\n Timeline:\n Agent ----[spawn A]----[spawn B]----[other work]----\n | |\n v v\n [A runs] [B runs] (parallel)\n | |\n +-- notification queue --> [results injected]\n\nKey insight: \"Fire and forget -- the agent doesn't block while the command runs.\"\n\"\"\"\n\nimport os\nimport subprocess\nimport threading\nimport uuid\nfrom pathlib import Path\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\n\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\n\nSYSTEM = f\"You are a coding agent at {WORKDIR}. Use background_run for long-running commands.\"\n\n\n# -- BackgroundManager: threaded execution + notification queue --\nclass BackgroundManager:\n def __init__(self):\n self.tasks = {} # task_id -> {status, result, command}\n self._notification_queue = [] # completed task results\n self._lock = threading.Lock()\n\n def run(self, command: str) -> str:\n \"\"\"Start a background thread, return task_id immediately.\"\"\"\n task_id = str(uuid.uuid4())[:8]\n self.tasks[task_id] = {\"status\": \"running\", \"result\": None, \"command\": command}\n thread = threading.Thread(\n target=self._execute, args=(task_id, command), daemon=True\n )\n thread.start()\n return f\"Background task {task_id} started: {command[:80]}\"\n\n def _execute(self, task_id: str, command: str):\n \"\"\"Thread target: run subprocess, capture output, push to queue.\"\"\"\n try:\n r = subprocess.run(\n command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=300\n )\n output = (r.stdout + r.stderr).strip()[:50000]\n status = \"completed\"\n except subprocess.TimeoutExpired:\n output = \"Error: Timeout (300s)\"\n status = \"timeout\"\n except Exception as e:\n output = f\"Error: {e}\"\n status = \"error\"\n self.tasks[task_id][\"status\"] = status\n self.tasks[task_id][\"result\"] = output or \"(no output)\"\n with self._lock:\n self._notification_queue.append({\n \"task_id\": task_id,\n \"status\": status,\n \"command\": command[:80],\n \"result\": (output or \"(no output)\")[:500],\n })\n\n def check(self, task_id: str = None) -> str:\n \"\"\"Check status of one task or list all.\"\"\"\n if task_id:\n t = self.tasks.get(task_id)\n if not t:\n return f\"Error: Unknown task {task_id}\"\n return f\"[{t['status']}] {t['command'][:60]}\\n{t.get('result') or '(running)'}\"\n lines = []\n for tid, t in self.tasks.items():\n lines.append(f\"{tid}: [{t['status']}] {t['command'][:60]}\")\n return \"\\n\".join(lines) if lines else \"No background tasks.\"\n\n def drain_notifications(self) -> list:\n \"\"\"Return and clear all pending completion notifications.\"\"\"\n with self._lock:\n notifs = list(self._notification_queue)\n self._notification_queue.clear()\n return notifs\n\n\nBG = BackgroundManager()\n\n\n# -- Tool implementations --\ndef safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\ndef run_bash(command: str) -> str:\n dangerous = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\", \"> /dev/\"]\n if any(d in command for d in dangerous):\n return \"Error: Dangerous command blocked\"\n try:\n r = subprocess.run(command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=120)\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\ndef run_read(path: str, limit: int = None) -> str:\n try:\n lines = safe_path(path).read_text().splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more)\"]\n return \"\\n\".join(lines)[:50000]\n except Exception as e:\n return f\"Error: {e}\"\n\ndef run_write(path: str, content: str) -> str:\n try:\n fp = safe_path(path)\n fp.parent.mkdir(parents=True, exist_ok=True)\n fp.write_text(content)\n return f\"Wrote {len(content)} bytes\"\n except Exception as e:\n return f\"Error: {e}\"\n\ndef run_edit(path: str, old_text: str, new_text: str) -> str:\n try:\n fp = safe_path(path)\n c = fp.read_text()\n if old_text not in c:\n return f\"Error: Text not found in {path}\"\n fp.write_text(c.replace(old_text, new_text, 1))\n return f\"Edited {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\nTOOL_HANDLERS = {\n \"bash\": lambda **kw: run_bash(kw[\"command\"]),\n \"read_file\": lambda **kw: run_read(kw[\"path\"], kw.get(\"limit\")),\n \"write_file\": lambda **kw: run_write(kw[\"path\"], kw[\"content\"]),\n \"edit_file\": lambda **kw: run_edit(kw[\"path\"], kw[\"old_text\"], kw[\"new_text\"]),\n \"background_run\": lambda **kw: BG.run(kw[\"command\"]),\n \"check_background\": lambda **kw: BG.check(kw.get(\"task_id\")),\n}\n\nTOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command (blocking).\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"limit\": {\"type\": \"integer\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n {\"name\": \"background_run\", \"description\": \"Run command in background thread. Returns task_id immediately.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"check_background\", \"description\": \"Check background task status. Omit task_id to list all.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"task_id\": {\"type\": \"string\"}}}},\n]\n\n\ndef agent_loop(messages: list):\n while True:\n # Drain background notifications and inject as system message before LLM call\n notifs = BG.drain_notifications()\n if notifs and messages:\n notif_text = \"\\n\".join(\n f\"[bg:{n['task_id']}] {n['status']}: {n['result']}\" for n in notifs\n )\n messages.append({\"role\": \"user\", \"content\": f\"\\n{notif_text}\\n\"})\n messages.append({\"role\": \"assistant\", \"content\": \"Noted background results.\"})\n response = client.messages.create(\n model=MODEL, system=SYSTEM, messages=messages,\n tools=TOOLS, max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n return\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n handler = TOOL_HANDLERS.get(block.name)\n try:\n output = handler(**block.input) if handler else f\"Unknown tool: {block.name}\"\n except Exception as e:\n output = f\"Error: {e}\"\n print(f\"> {block.name}: {str(output)[:200]}\")\n results.append({\"type\": \"tool_result\", \"tool_use_id\": block.id, \"content\": str(output)})\n messages.append({\"role\": \"user\", \"content\": results})\n\n\nif __name__ == \"__main__\":\n history = []\n while True:\n try:\n query = input(\"\\033[36ms08 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history)\n print()\n" + "source": "#!/usr/bin/env python3\r\n\"\"\"\r\ns08_background_tasks.py - Background Tasks\r\n\r\nRun commands in background threads. A notification queue is drained\r\nbefore each LLM call to deliver results.\r\n\r\n Main thread Background thread\r\n +-----------------+ +-----------------+\r\n | agent loop | | task executes |\r\n | ... | | ... |\r\n | [LLM call] <---+------- | enqueue(result) |\r\n | ^drain queue | +-----------------+\r\n +-----------------+\r\n\r\n Timeline:\r\n Agent ----[spawn A]----[spawn B]----[other work]----\r\n | |\r\n v v\r\n [A runs] [B runs] (parallel)\r\n | |\r\n +-- notification queue --> [results injected]\r\n\r\nKey insight: \"Fire and forget -- the agent doesn't block while the command runs.\"\r\n\"\"\"\r\n\r\nimport os\r\nimport subprocess\r\nimport threading\r\nimport uuid\r\nfrom pathlib import Path\r\n\r\nfrom anthropic import Anthropic\r\nfrom dotenv import load_dotenv\r\n\r\nload_dotenv(override=True)\r\n\r\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\r\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\r\n\r\nWORKDIR = Path.cwd()\r\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\r\nMODEL = os.environ[\"MODEL_ID\"]\r\n\r\nSYSTEM = f\"You are a coding agent at {WORKDIR}. Use background_run for long-running commands.\"\r\n\r\n\r\n# -- BackgroundManager: threaded execution + notification queue --\r\nclass BackgroundManager:\r\n def __init__(self):\r\n self.tasks = {} # task_id -> {status, result, command}\r\n self._notification_queue = [] # completed task results\r\n self._lock = threading.Lock()\r\n\r\n def run(self, command: str) -> str:\r\n \"\"\"Start a background thread, return task_id immediately.\"\"\"\r\n task_id = str(uuid.uuid4())[:8]\r\n self.tasks[task_id] = {\"status\": \"running\", \"result\": None, \"command\": command}\r\n thread = threading.Thread(\r\n target=self._execute, args=(task_id, command), daemon=True\r\n )\r\n thread.start()\r\n return f\"Background task {task_id} started: {command[:80]}\"\r\n\r\n def _execute(self, task_id: str, command: str):\r\n \"\"\"Thread target: run subprocess, capture output, push to queue.\"\"\"\r\n try:\r\n r = subprocess.run(\r\n command, shell=True, cwd=WORKDIR,\r\n capture_output=True, text=True, timeout=300\r\n )\r\n output = (r.stdout + r.stderr).strip()[:50000]\r\n status = \"completed\"\r\n except subprocess.TimeoutExpired:\r\n output = \"Error: Timeout (300s)\"\r\n status = \"timeout\"\r\n except Exception as e:\r\n output = f\"Error: {e}\"\r\n status = \"error\"\r\n self.tasks[task_id][\"status\"] = status\r\n self.tasks[task_id][\"result\"] = output or \"(no output)\"\r\n with self._lock:\r\n self._notification_queue.append({\r\n \"task_id\": task_id,\r\n \"status\": status,\r\n \"command\": command[:80],\r\n \"result\": (output or \"(no output)\")[:500],\r\n })\r\n\r\n def check(self, task_id: str = None) -> str:\r\n \"\"\"Check status of one task or list all.\"\"\"\r\n if task_id:\r\n t = self.tasks.get(task_id)\r\n if not t:\r\n return f\"Error: Unknown task {task_id}\"\r\n return f\"[{t['status']}] {t['command'][:60]}\\n{t.get('result') or '(running)'}\"\r\n lines = []\r\n for tid, t in self.tasks.items():\r\n lines.append(f\"{tid}: [{t['status']}] {t['command'][:60]}\")\r\n return \"\\n\".join(lines) if lines else \"No background tasks.\"\r\n\r\n def drain_notifications(self) -> list:\r\n \"\"\"Return and clear all pending completion notifications.\"\"\"\r\n with self._lock:\r\n notifs = list(self._notification_queue)\r\n self._notification_queue.clear()\r\n return notifs\r\n\r\n\r\nBG = BackgroundManager()\r\n\r\n\r\n# -- Tool implementations --\r\ndef safe_path(p: str) -> Path:\r\n path = (WORKDIR / p).resolve()\r\n if not path.is_relative_to(WORKDIR):\r\n raise ValueError(f\"Path escapes workspace: {p}\")\r\n return path\r\n\r\ndef run_bash(command: str) -> str:\r\n dangerous = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\", \"> /dev/\"]\r\n if any(d in command for d in dangerous):\r\n return \"Error: Dangerous command blocked\"\r\n try:\r\n r = subprocess.run(command, shell=True, cwd=WORKDIR,\r\n capture_output=True, text=True, timeout=120)\r\n out = (r.stdout + r.stderr).strip()\r\n return out[:50000] if out else \"(no output)\"\r\n except subprocess.TimeoutExpired:\r\n return \"Error: Timeout (120s)\"\r\n\r\ndef run_read(path: str, limit: int = None) -> str:\r\n try:\r\n lines = safe_path(path).read_text().splitlines()\r\n if limit and limit < len(lines):\r\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more)\"]\r\n return \"\\n\".join(lines)[:50000]\r\n except Exception as e:\r\n return f\"Error: {e}\"\r\n\r\ndef run_write(path: str, content: str) -> str:\r\n try:\r\n fp = safe_path(path)\r\n fp.parent.mkdir(parents=True, exist_ok=True)\r\n fp.write_text(content)\r\n return f\"Wrote {len(content)} bytes\"\r\n except Exception as e:\r\n return f\"Error: {e}\"\r\n\r\ndef run_edit(path: str, old_text: str, new_text: str) -> str:\r\n try:\r\n fp = safe_path(path)\r\n c = fp.read_text()\r\n if old_text not in c:\r\n return f\"Error: Text not found in {path}\"\r\n fp.write_text(c.replace(old_text, new_text, 1))\r\n return f\"Edited {path}\"\r\n except Exception as e:\r\n return f\"Error: {e}\"\r\n\r\n\r\nTOOL_HANDLERS = {\r\n \"bash\": lambda **kw: run_bash(kw[\"command\"]),\r\n \"read_file\": lambda **kw: run_read(kw[\"path\"], kw.get(\"limit\")),\r\n \"write_file\": lambda **kw: run_write(kw[\"path\"], kw[\"content\"]),\r\n \"edit_file\": lambda **kw: run_edit(kw[\"path\"], kw[\"old_text\"], kw[\"new_text\"]),\r\n \"background_run\": lambda **kw: BG.run(kw[\"command\"]),\r\n \"check_background\": lambda **kw: BG.check(kw.get(\"task_id\")),\r\n}\r\n\r\nTOOLS = [\r\n {\"name\": \"bash\", \"description\": \"Run a shell command (blocking).\",\r\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\r\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\r\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"limit\": {\"type\": \"integer\"}}, \"required\": [\"path\"]}},\r\n {\"name\": \"write_file\", \"description\": \"Write content to file.\",\r\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\r\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in file.\",\r\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\r\n {\"name\": \"background_run\", \"description\": \"Run command in background thread. Returns task_id immediately.\",\r\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\r\n {\"name\": \"check_background\", \"description\": \"Check background task status. Omit task_id to list all.\",\r\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"task_id\": {\"type\": \"string\"}}}},\r\n]\r\n\r\n\r\ndef agent_loop(messages: list):\r\n while True:\r\n # Drain background notifications and inject as system message before LLM call\r\n notifs = BG.drain_notifications()\r\n if notifs and messages:\r\n notif_text = \"\\n\".join(\r\n f\"[bg:{n['task_id']}] {n['status']}: {n['result']}\" for n in notifs\r\n )\r\n messages.append({\"role\": \"user\", \"content\": f\"\\n{notif_text}\\n\"})\r\n messages.append({\"role\": \"assistant\", \"content\": \"Noted background results.\"})\r\n response = client.messages.create(\r\n model=MODEL, system=SYSTEM, messages=messages,\r\n tools=TOOLS, max_tokens=8000,\r\n )\r\n messages.append({\"role\": \"assistant\", \"content\": response.content})\r\n if response.stop_reason != \"tool_use\":\r\n return\r\n results = []\r\n for block in response.content:\r\n if block.type == \"tool_use\":\r\n handler = TOOL_HANDLERS.get(block.name)\r\n try:\r\n output = handler(**block.input) if handler else f\"Unknown tool: {block.name}\"\r\n except Exception as e:\r\n output = f\"Error: {e}\"\r\n print(f\"> {block.name}: {str(output)[:200]}\")\r\n results.append({\"type\": \"tool_result\", \"tool_use_id\": block.id, \"content\": str(output)})\r\n messages.append({\"role\": \"user\", \"content\": results})\r\n\r\n\r\nif __name__ == \"__main__\":\r\n history = []\r\n while True:\r\n try:\r\n query = input(\"\\033[36ms08 >> \\033[0m\")\r\n except (EOFError, KeyboardInterrupt):\r\n break\r\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\r\n break\r\n history.append({\"role\": \"user\", \"content\": query})\r\n agent_loop(history)\r\n response_content = history[-1][\"content\"]\r\n if isinstance(response_content, list):\r\n for block in response_content:\r\n if hasattr(block, \"text\"):\r\n print(block.text)\r\n print()\r\n" }, { "id": "s09", "filename": "s09_agent_teams.py", "title": "Agent Teams", "subtitle": "Teammates + Mailboxes", - "loc": 343, + "loc": 348, "tools": [ "alice", "bash", @@ -534,14 +534,14 @@ } ], "layer": "collaboration", - "source": "#!/usr/bin/env python3\n\"\"\"\ns09_agent_teams.py - Agent Teams\n\nPersistent named agents with file-based JSONL inboxes. Each teammate runs\nits own agent loop in a separate thread. Communication via append-only inboxes.\n\n Subagent (s04): spawn -> execute -> return summary -> destroyed\n Teammate (s09): spawn -> work -> idle -> work -> ... -> shutdown\n\n .team/config.json .team/inbox/\n +----------------------------+ +------------------+\n | {\"team_name\": \"default\", | | alice.jsonl |\n | \"members\": [ | | bob.jsonl |\n | {\"name\":\"alice\", | | lead.jsonl |\n | \"role\":\"coder\", | +------------------+\n | \"status\":\"idle\"} |\n | ]} | send_message(\"alice\", \"fix bug\"):\n +----------------------------+ open(\"alice.jsonl\", \"a\").write(msg)\n\n read_inbox(\"alice\"):\n spawn_teammate(\"alice\",\"coder\",...) msgs = [json.loads(l) for l in ...]\n | open(\"alice.jsonl\", \"w\").close()\n v return msgs # drain\n Thread: alice Thread: bob\n +------------------+ +------------------+\n | agent_loop | | agent_loop |\n | status: working | | status: idle |\n | ... runs tools | | ... waits ... |\n | status -> idle | | |\n +------------------+ +------------------+\n\n 5 message types (all declared, not all handled here):\n +-------------------------+-----------------------------------+\n | message | Normal text message |\n | broadcast | Sent to all teammates |\n | shutdown_request | Request graceful shutdown (s10) |\n | shutdown_response | Approve/reject shutdown (s10) |\n | plan_approval_response | Approve/reject plan (s10) |\n +-------------------------+-----------------------------------+\n\nKey insight: \"Teammates that can talk to each other.\"\n\"\"\"\n\nimport json\nimport os\nimport subprocess\nimport threading\nimport time\nfrom pathlib import Path\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\nTEAM_DIR = WORKDIR / \".team\"\nINBOX_DIR = TEAM_DIR / \"inbox\"\n\nSYSTEM = f\"You are a team lead at {WORKDIR}. Spawn teammates and communicate via inboxes.\"\n\nVALID_MSG_TYPES = {\n \"message\",\n \"broadcast\",\n \"shutdown_request\",\n \"shutdown_response\",\n \"plan_approval_response\",\n}\n\n\n# -- MessageBus: JSONL inbox per teammate --\nclass MessageBus:\n def __init__(self, inbox_dir: Path):\n self.dir = inbox_dir\n self.dir.mkdir(parents=True, exist_ok=True)\n\n def send(self, sender: str, to: str, content: str,\n msg_type: str = \"message\", extra: dict = None) -> str:\n if msg_type not in VALID_MSG_TYPES:\n return f\"Error: Invalid type '{msg_type}'. Valid: {VALID_MSG_TYPES}\"\n msg = {\n \"type\": msg_type,\n \"from\": sender,\n \"content\": content,\n \"timestamp\": time.time(),\n }\n if extra:\n msg.update(extra)\n inbox_path = self.dir / f\"{to}.jsonl\"\n with open(inbox_path, \"a\") as f:\n f.write(json.dumps(msg) + \"\\n\")\n return f\"Sent {msg_type} to {to}\"\n\n def read_inbox(self, name: str) -> list:\n inbox_path = self.dir / f\"{name}.jsonl\"\n if not inbox_path.exists():\n return []\n messages = []\n for line in inbox_path.read_text().strip().splitlines():\n if line:\n messages.append(json.loads(line))\n inbox_path.write_text(\"\")\n return messages\n\n def broadcast(self, sender: str, content: str, teammates: list) -> str:\n count = 0\n for name in teammates:\n if name != sender:\n self.send(sender, name, content, \"broadcast\")\n count += 1\n return f\"Broadcast to {count} teammates\"\n\n\nBUS = MessageBus(INBOX_DIR)\n\n\n# -- TeammateManager: persistent named agents with config.json --\nclass TeammateManager:\n def __init__(self, team_dir: Path):\n self.dir = team_dir\n self.dir.mkdir(exist_ok=True)\n self.config_path = self.dir / \"config.json\"\n self.config = self._load_config()\n self.threads = {}\n\n def _load_config(self) -> dict:\n if self.config_path.exists():\n return json.loads(self.config_path.read_text())\n return {\"team_name\": \"default\", \"members\": []}\n\n def _save_config(self):\n self.config_path.write_text(json.dumps(self.config, indent=2))\n\n def _find_member(self, name: str) -> dict:\n for m in self.config[\"members\"]:\n if m[\"name\"] == name:\n return m\n return None\n\n def spawn(self, name: str, role: str, prompt: str) -> str:\n member = self._find_member(name)\n if member:\n if member[\"status\"] not in (\"idle\", \"shutdown\"):\n return f\"Error: '{name}' is currently {member['status']}\"\n member[\"status\"] = \"working\"\n member[\"role\"] = role\n else:\n member = {\"name\": name, \"role\": role, \"status\": \"working\"}\n self.config[\"members\"].append(member)\n self._save_config()\n thread = threading.Thread(\n target=self._teammate_loop,\n args=(name, role, prompt),\n daemon=True,\n )\n self.threads[name] = thread\n thread.start()\n return f\"Spawned '{name}' (role: {role})\"\n\n def _teammate_loop(self, name: str, role: str, prompt: str):\n sys_prompt = (\n f\"You are '{name}', role: {role}, at {WORKDIR}. \"\n f\"Use send_message to communicate. Complete your task.\"\n )\n messages = [{\"role\": \"user\", \"content\": prompt}]\n tools = self._teammate_tools()\n for _ in range(50):\n inbox = BUS.read_inbox(name)\n for msg in inbox:\n messages.append({\"role\": \"user\", \"content\": json.dumps(msg)})\n try:\n response = client.messages.create(\n model=MODEL,\n system=sys_prompt,\n messages=messages,\n tools=tools,\n max_tokens=8000,\n )\n except Exception:\n break\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n break\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n output = self._exec(name, block.name, block.input)\n print(f\" [{name}] {block.name}: {str(output)[:120]}\")\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": str(output),\n })\n messages.append({\"role\": \"user\", \"content\": results})\n member = self._find_member(name)\n if member and member[\"status\"] != \"shutdown\":\n member[\"status\"] = \"idle\"\n self._save_config()\n\n def _exec(self, sender: str, tool_name: str, args: dict) -> str:\n # these base tools are unchanged from s02\n if tool_name == \"bash\":\n return _run_bash(args[\"command\"])\n if tool_name == \"read_file\":\n return _run_read(args[\"path\"])\n if tool_name == \"write_file\":\n return _run_write(args[\"path\"], args[\"content\"])\n if tool_name == \"edit_file\":\n return _run_edit(args[\"path\"], args[\"old_text\"], args[\"new_text\"])\n if tool_name == \"send_message\":\n return BUS.send(sender, args[\"to\"], args[\"content\"], args.get(\"msg_type\", \"message\"))\n if tool_name == \"read_inbox\":\n return json.dumps(BUS.read_inbox(sender), indent=2)\n return f\"Unknown tool: {tool_name}\"\n\n def _teammate_tools(self) -> list:\n # these base tools are unchanged from s02\n return [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n {\"name\": \"send_message\", \"description\": \"Send message to a teammate.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"to\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}, \"msg_type\": {\"type\": \"string\", \"enum\": list(VALID_MSG_TYPES)}}, \"required\": [\"to\", \"content\"]}},\n {\"name\": \"read_inbox\", \"description\": \"Read and drain your inbox.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}}},\n ]\n\n def list_all(self) -> str:\n if not self.config[\"members\"]:\n return \"No teammates.\"\n lines = [f\"Team: {self.config['team_name']}\"]\n for m in self.config[\"members\"]:\n lines.append(f\" {m['name']} ({m['role']}): {m['status']}\")\n return \"\\n\".join(lines)\n\n def member_names(self) -> list:\n return [m[\"name\"] for m in self.config[\"members\"]]\n\n\nTEAM = TeammateManager(TEAM_DIR)\n\n\n# -- Base tool implementations (these base tools are unchanged from s02) --\ndef _safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\n\ndef _run_bash(command: str) -> str:\n dangerous = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\"]\n if any(d in command for d in dangerous):\n return \"Error: Dangerous command blocked\"\n try:\n r = subprocess.run(\n command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=120,\n )\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\n\ndef _run_read(path: str, limit: int = None) -> str:\n try:\n lines = _safe_path(path).read_text().splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more)\"]\n return \"\\n\".join(lines)[:50000]\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef _run_write(path: str, content: str) -> str:\n try:\n fp = _safe_path(path)\n fp.parent.mkdir(parents=True, exist_ok=True)\n fp.write_text(content)\n return f\"Wrote {len(content)} bytes\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef _run_edit(path: str, old_text: str, new_text: str) -> str:\n try:\n fp = _safe_path(path)\n c = fp.read_text()\n if old_text not in c:\n return f\"Error: Text not found in {path}\"\n fp.write_text(c.replace(old_text, new_text, 1))\n return f\"Edited {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\n# -- Lead tool dispatch (9 tools) --\nTOOL_HANDLERS = {\n \"bash\": lambda **kw: _run_bash(kw[\"command\"]),\n \"read_file\": lambda **kw: _run_read(kw[\"path\"], kw.get(\"limit\")),\n \"write_file\": lambda **kw: _run_write(kw[\"path\"], kw[\"content\"]),\n \"edit_file\": lambda **kw: _run_edit(kw[\"path\"], kw[\"old_text\"], kw[\"new_text\"]),\n \"spawn_teammate\": lambda **kw: TEAM.spawn(kw[\"name\"], kw[\"role\"], kw[\"prompt\"]),\n \"list_teammates\": lambda **kw: TEAM.list_all(),\n \"send_message\": lambda **kw: BUS.send(\"lead\", kw[\"to\"], kw[\"content\"], kw.get(\"msg_type\", \"message\")),\n \"read_inbox\": lambda **kw: json.dumps(BUS.read_inbox(\"lead\"), indent=2),\n \"broadcast\": lambda **kw: BUS.broadcast(\"lead\", kw[\"content\"], TEAM.member_names()),\n}\n\n# these base tools are unchanged from s02\nTOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"limit\": {\"type\": \"integer\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n {\"name\": \"spawn_teammate\", \"description\": \"Spawn a persistent teammate that runs in its own thread.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"name\": {\"type\": \"string\"}, \"role\": {\"type\": \"string\"}, \"prompt\": {\"type\": \"string\"}}, \"required\": [\"name\", \"role\", \"prompt\"]}},\n {\"name\": \"list_teammates\", \"description\": \"List all teammates with name, role, status.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}}},\n {\"name\": \"send_message\", \"description\": \"Send a message to a teammate's inbox.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"to\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}, \"msg_type\": {\"type\": \"string\", \"enum\": list(VALID_MSG_TYPES)}}, \"required\": [\"to\", \"content\"]}},\n {\"name\": \"read_inbox\", \"description\": \"Read and drain the lead's inbox.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}}},\n {\"name\": \"broadcast\", \"description\": \"Send a message to all teammates.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"content\": {\"type\": \"string\"}}, \"required\": [\"content\"]}},\n]\n\n\ndef agent_loop(messages: list):\n while True:\n inbox = BUS.read_inbox(\"lead\")\n if inbox:\n messages.append({\n \"role\": \"user\",\n \"content\": f\"{json.dumps(inbox, indent=2)}\",\n })\n messages.append({\n \"role\": \"assistant\",\n \"content\": \"Noted inbox messages.\",\n })\n response = client.messages.create(\n model=MODEL,\n system=SYSTEM,\n messages=messages,\n tools=TOOLS,\n max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n return\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n handler = TOOL_HANDLERS.get(block.name)\n try:\n output = handler(**block.input) if handler else f\"Unknown tool: {block.name}\"\n except Exception as e:\n output = f\"Error: {e}\"\n print(f\"> {block.name}: {str(output)[:200]}\")\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": str(output),\n })\n messages.append({\"role\": \"user\", \"content\": results})\n\n\nif __name__ == \"__main__\":\n history = []\n while True:\n try:\n query = input(\"\\033[36ms09 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n if query.strip() == \"/team\":\n print(TEAM.list_all())\n continue\n if query.strip() == \"/inbox\":\n print(json.dumps(BUS.read_inbox(\"lead\"), indent=2))\n continue\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history)\n print()\n" + "source": "#!/usr/bin/env python3\r\n\"\"\"\r\ns09_agent_teams.py - Agent Teams\r\n\r\nPersistent named agents with file-based JSONL inboxes. Each teammate runs\r\nits own agent loop in a separate thread. Communication via append-only inboxes.\r\n\r\n Subagent (s04): spawn -> execute -> return summary -> destroyed\r\n Teammate (s09): spawn -> work -> idle -> work -> ... -> shutdown\r\n\r\n .team/config.json .team/inbox/\r\n +----------------------------+ +------------------+\r\n | {\"team_name\": \"default\", | | alice.jsonl |\r\n | \"members\": [ | | bob.jsonl |\r\n | {\"name\":\"alice\", | | lead.jsonl |\r\n | \"role\":\"coder\", | +------------------+\r\n | \"status\":\"idle\"} |\r\n | ]} | send_message(\"alice\", \"fix bug\"):\r\n +----------------------------+ open(\"alice.jsonl\", \"a\").write(msg)\r\n\r\n read_inbox(\"alice\"):\r\n spawn_teammate(\"alice\",\"coder\",...) msgs = [json.loads(l) for l in ...]\r\n | open(\"alice.jsonl\", \"w\").close()\r\n v return msgs # drain\r\n Thread: alice Thread: bob\r\n +------------------+ +------------------+\r\n | agent_loop | | agent_loop |\r\n | status: working | | status: idle |\r\n | ... runs tools | | ... waits ... |\r\n | status -> idle | | |\r\n +------------------+ +------------------+\r\n\r\n 5 message types (all declared, not all handled here):\r\n +-------------------------+-----------------------------------+\r\n | message | Normal text message |\r\n | broadcast | Sent to all teammates |\r\n | shutdown_request | Request graceful shutdown (s10) |\r\n | shutdown_response | Approve/reject shutdown (s10) |\r\n | plan_approval_response | Approve/reject plan (s10) |\r\n +-------------------------+-----------------------------------+\r\n\r\nKey insight: \"Teammates that can talk to each other.\"\r\n\"\"\"\r\n\r\nimport json\r\nimport os\r\nimport subprocess\r\nimport threading\r\nimport time\r\nfrom pathlib import Path\r\n\r\nfrom anthropic import Anthropic\r\nfrom dotenv import load_dotenv\r\n\r\nload_dotenv(override=True)\r\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\r\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\r\n\r\nWORKDIR = Path.cwd()\r\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\r\nMODEL = os.environ[\"MODEL_ID\"]\r\nTEAM_DIR = WORKDIR / \".team\"\r\nINBOX_DIR = TEAM_DIR / \"inbox\"\r\n\r\nSYSTEM = f\"You are a team lead at {WORKDIR}. Spawn teammates and communicate via inboxes.\"\r\n\r\nVALID_MSG_TYPES = {\r\n \"message\",\r\n \"broadcast\",\r\n \"shutdown_request\",\r\n \"shutdown_response\",\r\n \"plan_approval_response\",\r\n}\r\n\r\n\r\n# -- MessageBus: JSONL inbox per teammate --\r\nclass MessageBus:\r\n def __init__(self, inbox_dir: Path):\r\n self.dir = inbox_dir\r\n self.dir.mkdir(parents=True, exist_ok=True)\r\n\r\n def send(self, sender: str, to: str, content: str,\r\n msg_type: str = \"message\", extra: dict = None) -> str:\r\n if msg_type not in VALID_MSG_TYPES:\r\n return f\"Error: Invalid type '{msg_type}'. Valid: {VALID_MSG_TYPES}\"\r\n msg = {\r\n \"type\": msg_type,\r\n \"from\": sender,\r\n \"content\": content,\r\n \"timestamp\": time.time(),\r\n }\r\n if extra:\r\n msg.update(extra)\r\n inbox_path = self.dir / f\"{to}.jsonl\"\r\n with open(inbox_path, \"a\") as f:\r\n f.write(json.dumps(msg) + \"\\n\")\r\n return f\"Sent {msg_type} to {to}\"\r\n\r\n def read_inbox(self, name: str) -> list:\r\n inbox_path = self.dir / f\"{name}.jsonl\"\r\n if not inbox_path.exists():\r\n return []\r\n messages = []\r\n for line in inbox_path.read_text().strip().splitlines():\r\n if line:\r\n messages.append(json.loads(line))\r\n inbox_path.write_text(\"\")\r\n return messages\r\n\r\n def broadcast(self, sender: str, content: str, teammates: list) -> str:\r\n count = 0\r\n for name in teammates:\r\n if name != sender:\r\n self.send(sender, name, content, \"broadcast\")\r\n count += 1\r\n return f\"Broadcast to {count} teammates\"\r\n\r\n\r\nBUS = MessageBus(INBOX_DIR)\r\n\r\n\r\n# -- TeammateManager: persistent named agents with config.json --\r\nclass TeammateManager:\r\n def __init__(self, team_dir: Path):\r\n self.dir = team_dir\r\n self.dir.mkdir(exist_ok=True)\r\n self.config_path = self.dir / \"config.json\"\r\n self.config = self._load_config()\r\n self.threads = {}\r\n\r\n def _load_config(self) -> dict:\r\n if self.config_path.exists():\r\n return json.loads(self.config_path.read_text())\r\n return {\"team_name\": \"default\", \"members\": []}\r\n\r\n def _save_config(self):\r\n self.config_path.write_text(json.dumps(self.config, indent=2))\r\n\r\n def _find_member(self, name: str) -> dict:\r\n for m in self.config[\"members\"]:\r\n if m[\"name\"] == name:\r\n return m\r\n return None\r\n\r\n def spawn(self, name: str, role: str, prompt: str) -> str:\r\n member = self._find_member(name)\r\n if member:\r\n if member[\"status\"] not in (\"idle\", \"shutdown\"):\r\n return f\"Error: '{name}' is currently {member['status']}\"\r\n member[\"status\"] = \"working\"\r\n member[\"role\"] = role\r\n else:\r\n member = {\"name\": name, \"role\": role, \"status\": \"working\"}\r\n self.config[\"members\"].append(member)\r\n self._save_config()\r\n thread = threading.Thread(\r\n target=self._teammate_loop,\r\n args=(name, role, prompt),\r\n daemon=True,\r\n )\r\n self.threads[name] = thread\r\n thread.start()\r\n return f\"Spawned '{name}' (role: {role})\"\r\n\r\n def _teammate_loop(self, name: str, role: str, prompt: str):\r\n sys_prompt = (\r\n f\"You are '{name}', role: {role}, at {WORKDIR}. \"\r\n f\"Use send_message to communicate. Complete your task.\"\r\n )\r\n messages = [{\"role\": \"user\", \"content\": prompt}]\r\n tools = self._teammate_tools()\r\n for _ in range(50):\r\n inbox = BUS.read_inbox(name)\r\n for msg in inbox:\r\n messages.append({\"role\": \"user\", \"content\": json.dumps(msg)})\r\n try:\r\n response = client.messages.create(\r\n model=MODEL,\r\n system=sys_prompt,\r\n messages=messages,\r\n tools=tools,\r\n max_tokens=8000,\r\n )\r\n except Exception:\r\n break\r\n messages.append({\"role\": \"assistant\", \"content\": response.content})\r\n if response.stop_reason != \"tool_use\":\r\n break\r\n results = []\r\n for block in response.content:\r\n if block.type == \"tool_use\":\r\n output = self._exec(name, block.name, block.input)\r\n print(f\" [{name}] {block.name}: {str(output)[:120]}\")\r\n results.append({\r\n \"type\": \"tool_result\",\r\n \"tool_use_id\": block.id,\r\n \"content\": str(output),\r\n })\r\n messages.append({\"role\": \"user\", \"content\": results})\r\n member = self._find_member(name)\r\n if member and member[\"status\"] != \"shutdown\":\r\n member[\"status\"] = \"idle\"\r\n self._save_config()\r\n\r\n def _exec(self, sender: str, tool_name: str, args: dict) -> str:\r\n # these base tools are unchanged from s02\r\n if tool_name == \"bash\":\r\n return _run_bash(args[\"command\"])\r\n if tool_name == \"read_file\":\r\n return _run_read(args[\"path\"])\r\n if tool_name == \"write_file\":\r\n return _run_write(args[\"path\"], args[\"content\"])\r\n if tool_name == \"edit_file\":\r\n return _run_edit(args[\"path\"], args[\"old_text\"], args[\"new_text\"])\r\n if tool_name == \"send_message\":\r\n return BUS.send(sender, args[\"to\"], args[\"content\"], args.get(\"msg_type\", \"message\"))\r\n if tool_name == \"read_inbox\":\r\n return json.dumps(BUS.read_inbox(sender), indent=2)\r\n return f\"Unknown tool: {tool_name}\"\r\n\r\n def _teammate_tools(self) -> list:\r\n # these base tools are unchanged from s02\r\n return [\r\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\r\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\r\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\r\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}}, \"required\": [\"path\"]}},\r\n {\"name\": \"write_file\", \"description\": \"Write content to file.\",\r\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\r\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in file.\",\r\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\r\n {\"name\": \"send_message\", \"description\": \"Send message to a teammate.\",\r\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"to\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}, \"msg_type\": {\"type\": \"string\", \"enum\": list(VALID_MSG_TYPES)}}, \"required\": [\"to\", \"content\"]}},\r\n {\"name\": \"read_inbox\", \"description\": \"Read and drain your inbox.\",\r\n \"input_schema\": {\"type\": \"object\", \"properties\": {}}},\r\n ]\r\n\r\n def list_all(self) -> str:\r\n if not self.config[\"members\"]:\r\n return \"No teammates.\"\r\n lines = [f\"Team: {self.config['team_name']}\"]\r\n for m in self.config[\"members\"]:\r\n lines.append(f\" {m['name']} ({m['role']}): {m['status']}\")\r\n return \"\\n\".join(lines)\r\n\r\n def member_names(self) -> list:\r\n return [m[\"name\"] for m in self.config[\"members\"]]\r\n\r\n\r\nTEAM = TeammateManager(TEAM_DIR)\r\n\r\n\r\n# -- Base tool implementations (these base tools are unchanged from s02) --\r\ndef _safe_path(p: str) -> Path:\r\n path = (WORKDIR / p).resolve()\r\n if not path.is_relative_to(WORKDIR):\r\n raise ValueError(f\"Path escapes workspace: {p}\")\r\n return path\r\n\r\n\r\ndef _run_bash(command: str) -> str:\r\n dangerous = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\"]\r\n if any(d in command for d in dangerous):\r\n return \"Error: Dangerous command blocked\"\r\n try:\r\n r = subprocess.run(\r\n command, shell=True, cwd=WORKDIR,\r\n capture_output=True, text=True, timeout=120,\r\n )\r\n out = (r.stdout + r.stderr).strip()\r\n return out[:50000] if out else \"(no output)\"\r\n except subprocess.TimeoutExpired:\r\n return \"Error: Timeout (120s)\"\r\n\r\n\r\ndef _run_read(path: str, limit: int = None) -> str:\r\n try:\r\n lines = _safe_path(path).read_text().splitlines()\r\n if limit and limit < len(lines):\r\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more)\"]\r\n return \"\\n\".join(lines)[:50000]\r\n except Exception as e:\r\n return f\"Error: {e}\"\r\n\r\n\r\ndef _run_write(path: str, content: str) -> str:\r\n try:\r\n fp = _safe_path(path)\r\n fp.parent.mkdir(parents=True, exist_ok=True)\r\n fp.write_text(content)\r\n return f\"Wrote {len(content)} bytes\"\r\n except Exception as e:\r\n return f\"Error: {e}\"\r\n\r\n\r\ndef _run_edit(path: str, old_text: str, new_text: str) -> str:\r\n try:\r\n fp = _safe_path(path)\r\n c = fp.read_text()\r\n if old_text not in c:\r\n return f\"Error: Text not found in {path}\"\r\n fp.write_text(c.replace(old_text, new_text, 1))\r\n return f\"Edited {path}\"\r\n except Exception as e:\r\n return f\"Error: {e}\"\r\n\r\n\r\n# -- Lead tool dispatch (9 tools) --\r\nTOOL_HANDLERS = {\r\n \"bash\": lambda **kw: _run_bash(kw[\"command\"]),\r\n \"read_file\": lambda **kw: _run_read(kw[\"path\"], kw.get(\"limit\")),\r\n \"write_file\": lambda **kw: _run_write(kw[\"path\"], kw[\"content\"]),\r\n \"edit_file\": lambda **kw: _run_edit(kw[\"path\"], kw[\"old_text\"], kw[\"new_text\"]),\r\n \"spawn_teammate\": lambda **kw: TEAM.spawn(kw[\"name\"], kw[\"role\"], kw[\"prompt\"]),\r\n \"list_teammates\": lambda **kw: TEAM.list_all(),\r\n \"send_message\": lambda **kw: BUS.send(\"lead\", kw[\"to\"], kw[\"content\"], kw.get(\"msg_type\", \"message\")),\r\n \"read_inbox\": lambda **kw: json.dumps(BUS.read_inbox(\"lead\"), indent=2),\r\n \"broadcast\": lambda **kw: BUS.broadcast(\"lead\", kw[\"content\"], TEAM.member_names()),\r\n}\r\n\r\n# these base tools are unchanged from s02\r\nTOOLS = [\r\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\r\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\r\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\r\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"limit\": {\"type\": \"integer\"}}, \"required\": [\"path\"]}},\r\n {\"name\": \"write_file\", \"description\": \"Write content to file.\",\r\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\r\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in file.\",\r\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\r\n {\"name\": \"spawn_teammate\", \"description\": \"Spawn a persistent teammate that runs in its own thread.\",\r\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"name\": {\"type\": \"string\"}, \"role\": {\"type\": \"string\"}, \"prompt\": {\"type\": \"string\"}}, \"required\": [\"name\", \"role\", \"prompt\"]}},\r\n {\"name\": \"list_teammates\", \"description\": \"List all teammates with name, role, status.\",\r\n \"input_schema\": {\"type\": \"object\", \"properties\": {}}},\r\n {\"name\": \"send_message\", \"description\": \"Send a message to a teammate's inbox.\",\r\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"to\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}, \"msg_type\": {\"type\": \"string\", \"enum\": list(VALID_MSG_TYPES)}}, \"required\": [\"to\", \"content\"]}},\r\n {\"name\": \"read_inbox\", \"description\": \"Read and drain the lead's inbox.\",\r\n \"input_schema\": {\"type\": \"object\", \"properties\": {}}},\r\n {\"name\": \"broadcast\", \"description\": \"Send a message to all teammates.\",\r\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"content\": {\"type\": \"string\"}}, \"required\": [\"content\"]}},\r\n]\r\n\r\n\r\ndef agent_loop(messages: list):\r\n while True:\r\n inbox = BUS.read_inbox(\"lead\")\r\n if inbox:\r\n messages.append({\r\n \"role\": \"user\",\r\n \"content\": f\"{json.dumps(inbox, indent=2)}\",\r\n })\r\n messages.append({\r\n \"role\": \"assistant\",\r\n \"content\": \"Noted inbox messages.\",\r\n })\r\n response = client.messages.create(\r\n model=MODEL,\r\n system=SYSTEM,\r\n messages=messages,\r\n tools=TOOLS,\r\n max_tokens=8000,\r\n )\r\n messages.append({\"role\": \"assistant\", \"content\": response.content})\r\n if response.stop_reason != \"tool_use\":\r\n return\r\n results = []\r\n for block in response.content:\r\n if block.type == \"tool_use\":\r\n handler = TOOL_HANDLERS.get(block.name)\r\n try:\r\n output = handler(**block.input) if handler else f\"Unknown tool: {block.name}\"\r\n except Exception as e:\r\n output = f\"Error: {e}\"\r\n print(f\"> {block.name}: {str(output)[:200]}\")\r\n results.append({\r\n \"type\": \"tool_result\",\r\n \"tool_use_id\": block.id,\r\n \"content\": str(output),\r\n })\r\n messages.append({\"role\": \"user\", \"content\": results})\r\n\r\n\r\nif __name__ == \"__main__\":\r\n history = []\r\n while True:\r\n try:\r\n query = input(\"\\033[36ms09 >> \\033[0m\")\r\n except (EOFError, KeyboardInterrupt):\r\n break\r\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\r\n break\r\n if query.strip() == \"/team\":\r\n print(TEAM.list_all())\r\n continue\r\n if query.strip() == \"/inbox\":\r\n print(json.dumps(BUS.read_inbox(\"lead\"), indent=2))\r\n continue\r\n history.append({\"role\": \"user\", \"content\": query})\r\n agent_loop(history)\r\n response_content = history[-1][\"content\"]\r\n if isinstance(response_content, list):\r\n for block in response_content:\r\n if hasattr(block, \"text\"):\r\n print(block.text)\r\n print()\r\n" }, { "id": "s10", "filename": "s10_team_protocols.py", "title": "Team Protocols", "subtitle": "Shared Communication Rules", - "loc": 414, + "loc": 419, "tools": [ "bash", "read_file", @@ -623,14 +623,14 @@ } ], "layer": "collaboration", - "source": "#!/usr/bin/env python3\n\"\"\"\ns10_team_protocols.py - Team Protocols\n\nShutdown protocol and plan approval protocol, both using the same\nrequest_id correlation pattern. Builds on s09's team messaging.\n\n Shutdown FSM: pending -> approved | rejected\n\n Lead Teammate\n +---------------------+ +---------------------+\n | shutdown_request | | |\n | { | -------> | receives request |\n | request_id: abc | | decides: approve? |\n | } | | |\n +---------------------+ +---------------------+\n |\n +---------------------+ +-------v-------------+\n | shutdown_response | <------- | shutdown_response |\n | { | | { |\n | request_id: abc | | request_id: abc |\n | approve: true | | approve: true |\n | } | | } |\n +---------------------+ +---------------------+\n |\n v\n status -> \"shutdown\", thread stops\n\n Plan approval FSM: pending -> approved | rejected\n\n Teammate Lead\n +---------------------+ +---------------------+\n | plan_approval | | |\n | submit: {plan:\"...\"}| -------> | reviews plan text |\n +---------------------+ | approve/reject? |\n +---------------------+\n |\n +---------------------+ +-------v-------------+\n | plan_approval_resp | <------- | plan_approval |\n | {approve: true} | | review: {req_id, |\n +---------------------+ | approve: true} |\n +---------------------+\n\n Trackers: {request_id: {\"target|from\": name, \"status\": \"pending|...\"}}\n\nKey insight: \"Same request_id correlation pattern, two domains.\"\n\"\"\"\n\nimport json\nimport os\nimport subprocess\nimport threading\nimport time\nimport uuid\nfrom pathlib import Path\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\nTEAM_DIR = WORKDIR / \".team\"\nINBOX_DIR = TEAM_DIR / \"inbox\"\n\nSYSTEM = f\"You are a team lead at {WORKDIR}. Manage teammates with shutdown and plan approval protocols.\"\n\nVALID_MSG_TYPES = {\n \"message\",\n \"broadcast\",\n \"shutdown_request\",\n \"shutdown_response\",\n \"plan_approval_response\",\n}\n\n# -- Request trackers: correlate by request_id --\nshutdown_requests = {}\nplan_requests = {}\n_tracker_lock = threading.Lock()\n\n\n# -- MessageBus: JSONL inbox per teammate --\nclass MessageBus:\n def __init__(self, inbox_dir: Path):\n self.dir = inbox_dir\n self.dir.mkdir(parents=True, exist_ok=True)\n\n def send(self, sender: str, to: str, content: str,\n msg_type: str = \"message\", extra: dict = None) -> str:\n if msg_type not in VALID_MSG_TYPES:\n return f\"Error: Invalid type '{msg_type}'. Valid: {VALID_MSG_TYPES}\"\n msg = {\n \"type\": msg_type,\n \"from\": sender,\n \"content\": content,\n \"timestamp\": time.time(),\n }\n if extra:\n msg.update(extra)\n inbox_path = self.dir / f\"{to}.jsonl\"\n with open(inbox_path, \"a\") as f:\n f.write(json.dumps(msg) + \"\\n\")\n return f\"Sent {msg_type} to {to}\"\n\n def read_inbox(self, name: str) -> list:\n inbox_path = self.dir / f\"{name}.jsonl\"\n if not inbox_path.exists():\n return []\n messages = []\n for line in inbox_path.read_text().strip().splitlines():\n if line:\n messages.append(json.loads(line))\n inbox_path.write_text(\"\")\n return messages\n\n def broadcast(self, sender: str, content: str, teammates: list) -> str:\n count = 0\n for name in teammates:\n if name != sender:\n self.send(sender, name, content, \"broadcast\")\n count += 1\n return f\"Broadcast to {count} teammates\"\n\n\nBUS = MessageBus(INBOX_DIR)\n\n\n# -- TeammateManager with shutdown + plan approval --\nclass TeammateManager:\n def __init__(self, team_dir: Path):\n self.dir = team_dir\n self.dir.mkdir(exist_ok=True)\n self.config_path = self.dir / \"config.json\"\n self.config = self._load_config()\n self.threads = {}\n\n def _load_config(self) -> dict:\n if self.config_path.exists():\n return json.loads(self.config_path.read_text())\n return {\"team_name\": \"default\", \"members\": []}\n\n def _save_config(self):\n self.config_path.write_text(json.dumps(self.config, indent=2))\n\n def _find_member(self, name: str) -> dict:\n for m in self.config[\"members\"]:\n if m[\"name\"] == name:\n return m\n return None\n\n def spawn(self, name: str, role: str, prompt: str) -> str:\n member = self._find_member(name)\n if member:\n if member[\"status\"] not in (\"idle\", \"shutdown\"):\n return f\"Error: '{name}' is currently {member['status']}\"\n member[\"status\"] = \"working\"\n member[\"role\"] = role\n else:\n member = {\"name\": name, \"role\": role, \"status\": \"working\"}\n self.config[\"members\"].append(member)\n self._save_config()\n thread = threading.Thread(\n target=self._teammate_loop,\n args=(name, role, prompt),\n daemon=True,\n )\n self.threads[name] = thread\n thread.start()\n return f\"Spawned '{name}' (role: {role})\"\n\n def _teammate_loop(self, name: str, role: str, prompt: str):\n sys_prompt = (\n f\"You are '{name}', role: {role}, at {WORKDIR}. \"\n f\"Submit plans via plan_approval before major work. \"\n f\"Respond to shutdown_request with shutdown_response.\"\n )\n messages = [{\"role\": \"user\", \"content\": prompt}]\n tools = self._teammate_tools()\n should_exit = False\n for _ in range(50):\n inbox = BUS.read_inbox(name)\n for msg in inbox:\n messages.append({\"role\": \"user\", \"content\": json.dumps(msg)})\n if should_exit:\n break\n try:\n response = client.messages.create(\n model=MODEL,\n system=sys_prompt,\n messages=messages,\n tools=tools,\n max_tokens=8000,\n )\n except Exception:\n break\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n break\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n output = self._exec(name, block.name, block.input)\n print(f\" [{name}] {block.name}: {str(output)[:120]}\")\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": str(output),\n })\n if block.name == \"shutdown_response\" and block.input.get(\"approve\"):\n should_exit = True\n messages.append({\"role\": \"user\", \"content\": results})\n member = self._find_member(name)\n if member:\n member[\"status\"] = \"shutdown\" if should_exit else \"idle\"\n self._save_config()\n\n def _exec(self, sender: str, tool_name: str, args: dict) -> str:\n # these base tools are unchanged from s02\n if tool_name == \"bash\":\n return _run_bash(args[\"command\"])\n if tool_name == \"read_file\":\n return _run_read(args[\"path\"])\n if tool_name == \"write_file\":\n return _run_write(args[\"path\"], args[\"content\"])\n if tool_name == \"edit_file\":\n return _run_edit(args[\"path\"], args[\"old_text\"], args[\"new_text\"])\n if tool_name == \"send_message\":\n return BUS.send(sender, args[\"to\"], args[\"content\"], args.get(\"msg_type\", \"message\"))\n if tool_name == \"read_inbox\":\n return json.dumps(BUS.read_inbox(sender), indent=2)\n if tool_name == \"shutdown_response\":\n req_id = args[\"request_id\"]\n approve = args[\"approve\"]\n with _tracker_lock:\n if req_id in shutdown_requests:\n shutdown_requests[req_id][\"status\"] = \"approved\" if approve else \"rejected\"\n BUS.send(\n sender, \"lead\", args.get(\"reason\", \"\"),\n \"shutdown_response\", {\"request_id\": req_id, \"approve\": approve},\n )\n return f\"Shutdown {'approved' if approve else 'rejected'}\"\n if tool_name == \"plan_approval\":\n plan_text = args.get(\"plan\", \"\")\n req_id = str(uuid.uuid4())[:8]\n with _tracker_lock:\n plan_requests[req_id] = {\"from\": sender, \"plan\": plan_text, \"status\": \"pending\"}\n BUS.send(\n sender, \"lead\", plan_text, \"plan_approval_response\",\n {\"request_id\": req_id, \"plan\": plan_text},\n )\n return f\"Plan submitted (request_id={req_id}). Waiting for lead approval.\"\n return f\"Unknown tool: {tool_name}\"\n\n def _teammate_tools(self) -> list:\n # these base tools are unchanged from s02\n return [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n {\"name\": \"send_message\", \"description\": \"Send message to a teammate.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"to\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}, \"msg_type\": {\"type\": \"string\", \"enum\": list(VALID_MSG_TYPES)}}, \"required\": [\"to\", \"content\"]}},\n {\"name\": \"read_inbox\", \"description\": \"Read and drain your inbox.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}}},\n {\"name\": \"shutdown_response\", \"description\": \"Respond to a shutdown request. Approve to shut down, reject to keep working.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"request_id\": {\"type\": \"string\"}, \"approve\": {\"type\": \"boolean\"}, \"reason\": {\"type\": \"string\"}}, \"required\": [\"request_id\", \"approve\"]}},\n {\"name\": \"plan_approval\", \"description\": \"Submit a plan for lead approval. Provide plan text.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"plan\": {\"type\": \"string\"}}, \"required\": [\"plan\"]}},\n ]\n\n def list_all(self) -> str:\n if not self.config[\"members\"]:\n return \"No teammates.\"\n lines = [f\"Team: {self.config['team_name']}\"]\n for m in self.config[\"members\"]:\n lines.append(f\" {m['name']} ({m['role']}): {m['status']}\")\n return \"\\n\".join(lines)\n\n def member_names(self) -> list:\n return [m[\"name\"] for m in self.config[\"members\"]]\n\n\nTEAM = TeammateManager(TEAM_DIR)\n\n\n# -- Base tool implementations (these base tools are unchanged from s02) --\ndef _safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\n\ndef _run_bash(command: str) -> str:\n dangerous = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\"]\n if any(d in command for d in dangerous):\n return \"Error: Dangerous command blocked\"\n try:\n r = subprocess.run(\n command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=120,\n )\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\n\ndef _run_read(path: str, limit: int = None) -> str:\n try:\n lines = _safe_path(path).read_text().splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more)\"]\n return \"\\n\".join(lines)[:50000]\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef _run_write(path: str, content: str) -> str:\n try:\n fp = _safe_path(path)\n fp.parent.mkdir(parents=True, exist_ok=True)\n fp.write_text(content)\n return f\"Wrote {len(content)} bytes\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef _run_edit(path: str, old_text: str, new_text: str) -> str:\n try:\n fp = _safe_path(path)\n c = fp.read_text()\n if old_text not in c:\n return f\"Error: Text not found in {path}\"\n fp.write_text(c.replace(old_text, new_text, 1))\n return f\"Edited {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\n# -- Lead-specific protocol handlers --\ndef handle_shutdown_request(teammate: str) -> str:\n req_id = str(uuid.uuid4())[:8]\n with _tracker_lock:\n shutdown_requests[req_id] = {\"target\": teammate, \"status\": \"pending\"}\n BUS.send(\n \"lead\", teammate, \"Please shut down gracefully.\",\n \"shutdown_request\", {\"request_id\": req_id},\n )\n return f\"Shutdown request {req_id} sent to '{teammate}' (status: pending)\"\n\n\ndef handle_plan_review(request_id: str, approve: bool, feedback: str = \"\") -> str:\n with _tracker_lock:\n req = plan_requests.get(request_id)\n if not req:\n return f\"Error: Unknown plan request_id '{request_id}'\"\n with _tracker_lock:\n req[\"status\"] = \"approved\" if approve else \"rejected\"\n BUS.send(\n \"lead\", req[\"from\"], feedback, \"plan_approval_response\",\n {\"request_id\": request_id, \"approve\": approve, \"feedback\": feedback},\n )\n return f\"Plan {req['status']} for '{req['from']}'\"\n\n\ndef _check_shutdown_status(request_id: str) -> str:\n with _tracker_lock:\n return json.dumps(shutdown_requests.get(request_id, {\"error\": \"not found\"}))\n\n\n# -- Lead tool dispatch (12 tools) --\nTOOL_HANDLERS = {\n \"bash\": lambda **kw: _run_bash(kw[\"command\"]),\n \"read_file\": lambda **kw: _run_read(kw[\"path\"], kw.get(\"limit\")),\n \"write_file\": lambda **kw: _run_write(kw[\"path\"], kw[\"content\"]),\n \"edit_file\": lambda **kw: _run_edit(kw[\"path\"], kw[\"old_text\"], kw[\"new_text\"]),\n \"spawn_teammate\": lambda **kw: TEAM.spawn(kw[\"name\"], kw[\"role\"], kw[\"prompt\"]),\n \"list_teammates\": lambda **kw: TEAM.list_all(),\n \"send_message\": lambda **kw: BUS.send(\"lead\", kw[\"to\"], kw[\"content\"], kw.get(\"msg_type\", \"message\")),\n \"read_inbox\": lambda **kw: json.dumps(BUS.read_inbox(\"lead\"), indent=2),\n \"broadcast\": lambda **kw: BUS.broadcast(\"lead\", kw[\"content\"], TEAM.member_names()),\n \"shutdown_request\": lambda **kw: handle_shutdown_request(kw[\"teammate\"]),\n \"shutdown_response\": lambda **kw: _check_shutdown_status(kw.get(\"request_id\", \"\")),\n \"plan_approval\": lambda **kw: handle_plan_review(kw[\"request_id\"], kw[\"approve\"], kw.get(\"feedback\", \"\")),\n}\n\n# these base tools are unchanged from s02\nTOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"limit\": {\"type\": \"integer\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n {\"name\": \"spawn_teammate\", \"description\": \"Spawn a persistent teammate.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"name\": {\"type\": \"string\"}, \"role\": {\"type\": \"string\"}, \"prompt\": {\"type\": \"string\"}}, \"required\": [\"name\", \"role\", \"prompt\"]}},\n {\"name\": \"list_teammates\", \"description\": \"List all teammates.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}}},\n {\"name\": \"send_message\", \"description\": \"Send a message to a teammate.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"to\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}, \"msg_type\": {\"type\": \"string\", \"enum\": list(VALID_MSG_TYPES)}}, \"required\": [\"to\", \"content\"]}},\n {\"name\": \"read_inbox\", \"description\": \"Read and drain the lead's inbox.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}}},\n {\"name\": \"broadcast\", \"description\": \"Send a message to all teammates.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"content\": {\"type\": \"string\"}}, \"required\": [\"content\"]}},\n {\"name\": \"shutdown_request\", \"description\": \"Request a teammate to shut down gracefully. Returns a request_id for tracking.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"teammate\": {\"type\": \"string\"}}, \"required\": [\"teammate\"]}},\n {\"name\": \"shutdown_response\", \"description\": \"Check the status of a shutdown request by request_id.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"request_id\": {\"type\": \"string\"}}, \"required\": [\"request_id\"]}},\n {\"name\": \"plan_approval\", \"description\": \"Approve or reject a teammate's plan. Provide request_id + approve + optional feedback.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"request_id\": {\"type\": \"string\"}, \"approve\": {\"type\": \"boolean\"}, \"feedback\": {\"type\": \"string\"}}, \"required\": [\"request_id\", \"approve\"]}},\n]\n\n\ndef agent_loop(messages: list):\n while True:\n inbox = BUS.read_inbox(\"lead\")\n if inbox:\n messages.append({\n \"role\": \"user\",\n \"content\": f\"{json.dumps(inbox, indent=2)}\",\n })\n messages.append({\n \"role\": \"assistant\",\n \"content\": \"Noted inbox messages.\",\n })\n response = client.messages.create(\n model=MODEL,\n system=SYSTEM,\n messages=messages,\n tools=TOOLS,\n max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n return\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n handler = TOOL_HANDLERS.get(block.name)\n try:\n output = handler(**block.input) if handler else f\"Unknown tool: {block.name}\"\n except Exception as e:\n output = f\"Error: {e}\"\n print(f\"> {block.name}: {str(output)[:200]}\")\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": str(output),\n })\n messages.append({\"role\": \"user\", \"content\": results})\n\n\nif __name__ == \"__main__\":\n history = []\n while True:\n try:\n query = input(\"\\033[36ms10 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n if query.strip() == \"/team\":\n print(TEAM.list_all())\n continue\n if query.strip() == \"/inbox\":\n print(json.dumps(BUS.read_inbox(\"lead\"), indent=2))\n continue\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history)\n print()\n" + "source": "#!/usr/bin/env python3\r\n\"\"\"\r\ns10_team_protocols.py - Team Protocols\r\n\r\nShutdown protocol and plan approval protocol, both using the same\r\nrequest_id correlation pattern. Builds on s09's team messaging.\r\n\r\n Shutdown FSM: pending -> approved | rejected\r\n\r\n Lead Teammate\r\n +---------------------+ +---------------------+\r\n | shutdown_request | | |\r\n | { | -------> | receives request |\r\n | request_id: abc | | decides: approve? |\r\n | } | | |\r\n +---------------------+ +---------------------+\r\n |\r\n +---------------------+ +-------v-------------+\r\n | shutdown_response | <------- | shutdown_response |\r\n | { | | { |\r\n | request_id: abc | | request_id: abc |\r\n | approve: true | | approve: true |\r\n | } | | } |\r\n +---------------------+ +---------------------+\r\n |\r\n v\r\n status -> \"shutdown\", thread stops\r\n\r\n Plan approval FSM: pending -> approved | rejected\r\n\r\n Teammate Lead\r\n +---------------------+ +---------------------+\r\n | plan_approval | | |\r\n | submit: {plan:\"...\"}| -------> | reviews plan text |\r\n +---------------------+ | approve/reject? |\r\n +---------------------+\r\n |\r\n +---------------------+ +-------v-------------+\r\n | plan_approval_resp | <------- | plan_approval |\r\n | {approve: true} | | review: {req_id, |\r\n +---------------------+ | approve: true} |\r\n +---------------------+\r\n\r\n Trackers: {request_id: {\"target|from\": name, \"status\": \"pending|...\"}}\r\n\r\nKey insight: \"Same request_id correlation pattern, two domains.\"\r\n\"\"\"\r\n\r\nimport json\r\nimport os\r\nimport subprocess\r\nimport threading\r\nimport time\r\nimport uuid\r\nfrom pathlib import Path\r\n\r\nfrom anthropic import Anthropic\r\nfrom dotenv import load_dotenv\r\n\r\nload_dotenv(override=True)\r\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\r\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\r\n\r\nWORKDIR = Path.cwd()\r\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\r\nMODEL = os.environ[\"MODEL_ID\"]\r\nTEAM_DIR = WORKDIR / \".team\"\r\nINBOX_DIR = TEAM_DIR / \"inbox\"\r\n\r\nSYSTEM = f\"You are a team lead at {WORKDIR}. Manage teammates with shutdown and plan approval protocols.\"\r\n\r\nVALID_MSG_TYPES = {\r\n \"message\",\r\n \"broadcast\",\r\n \"shutdown_request\",\r\n \"shutdown_response\",\r\n \"plan_approval_response\",\r\n}\r\n\r\n# -- Request trackers: correlate by request_id --\r\nshutdown_requests = {}\r\nplan_requests = {}\r\n_tracker_lock = threading.Lock()\r\n\r\n\r\n# -- MessageBus: JSONL inbox per teammate --\r\nclass MessageBus:\r\n def __init__(self, inbox_dir: Path):\r\n self.dir = inbox_dir\r\n self.dir.mkdir(parents=True, exist_ok=True)\r\n\r\n def send(self, sender: str, to: str, content: str,\r\n msg_type: str = \"message\", extra: dict = None) -> str:\r\n if msg_type not in VALID_MSG_TYPES:\r\n return f\"Error: Invalid type '{msg_type}'. Valid: {VALID_MSG_TYPES}\"\r\n msg = {\r\n \"type\": msg_type,\r\n \"from\": sender,\r\n \"content\": content,\r\n \"timestamp\": time.time(),\r\n }\r\n if extra:\r\n msg.update(extra)\r\n inbox_path = self.dir / f\"{to}.jsonl\"\r\n with open(inbox_path, \"a\") as f:\r\n f.write(json.dumps(msg) + \"\\n\")\r\n return f\"Sent {msg_type} to {to}\"\r\n\r\n def read_inbox(self, name: str) -> list:\r\n inbox_path = self.dir / f\"{name}.jsonl\"\r\n if not inbox_path.exists():\r\n return []\r\n messages = []\r\n for line in inbox_path.read_text().strip().splitlines():\r\n if line:\r\n messages.append(json.loads(line))\r\n inbox_path.write_text(\"\")\r\n return messages\r\n\r\n def broadcast(self, sender: str, content: str, teammates: list) -> str:\r\n count = 0\r\n for name in teammates:\r\n if name != sender:\r\n self.send(sender, name, content, \"broadcast\")\r\n count += 1\r\n return f\"Broadcast to {count} teammates\"\r\n\r\n\r\nBUS = MessageBus(INBOX_DIR)\r\n\r\n\r\n# -- TeammateManager with shutdown + plan approval --\r\nclass TeammateManager:\r\n def __init__(self, team_dir: Path):\r\n self.dir = team_dir\r\n self.dir.mkdir(exist_ok=True)\r\n self.config_path = self.dir / \"config.json\"\r\n self.config = self._load_config()\r\n self.threads = {}\r\n\r\n def _load_config(self) -> dict:\r\n if self.config_path.exists():\r\n return json.loads(self.config_path.read_text())\r\n return {\"team_name\": \"default\", \"members\": []}\r\n\r\n def _save_config(self):\r\n self.config_path.write_text(json.dumps(self.config, indent=2))\r\n\r\n def _find_member(self, name: str) -> dict:\r\n for m in self.config[\"members\"]:\r\n if m[\"name\"] == name:\r\n return m\r\n return None\r\n\r\n def spawn(self, name: str, role: str, prompt: str) -> str:\r\n member = self._find_member(name)\r\n if member:\r\n if member[\"status\"] not in (\"idle\", \"shutdown\"):\r\n return f\"Error: '{name}' is currently {member['status']}\"\r\n member[\"status\"] = \"working\"\r\n member[\"role\"] = role\r\n else:\r\n member = {\"name\": name, \"role\": role, \"status\": \"working\"}\r\n self.config[\"members\"].append(member)\r\n self._save_config()\r\n thread = threading.Thread(\r\n target=self._teammate_loop,\r\n args=(name, role, prompt),\r\n daemon=True,\r\n )\r\n self.threads[name] = thread\r\n thread.start()\r\n return f\"Spawned '{name}' (role: {role})\"\r\n\r\n def _teammate_loop(self, name: str, role: str, prompt: str):\r\n sys_prompt = (\r\n f\"You are '{name}', role: {role}, at {WORKDIR}. \"\r\n f\"Submit plans via plan_approval before major work. \"\r\n f\"Respond to shutdown_request with shutdown_response.\"\r\n )\r\n messages = [{\"role\": \"user\", \"content\": prompt}]\r\n tools = self._teammate_tools()\r\n should_exit = False\r\n for _ in range(50):\r\n inbox = BUS.read_inbox(name)\r\n for msg in inbox:\r\n messages.append({\"role\": \"user\", \"content\": json.dumps(msg)})\r\n if should_exit:\r\n break\r\n try:\r\n response = client.messages.create(\r\n model=MODEL,\r\n system=sys_prompt,\r\n messages=messages,\r\n tools=tools,\r\n max_tokens=8000,\r\n )\r\n except Exception:\r\n break\r\n messages.append({\"role\": \"assistant\", \"content\": response.content})\r\n if response.stop_reason != \"tool_use\":\r\n break\r\n results = []\r\n for block in response.content:\r\n if block.type == \"tool_use\":\r\n output = self._exec(name, block.name, block.input)\r\n print(f\" [{name}] {block.name}: {str(output)[:120]}\")\r\n results.append({\r\n \"type\": \"tool_result\",\r\n \"tool_use_id\": block.id,\r\n \"content\": str(output),\r\n })\r\n if block.name == \"shutdown_response\" and block.input.get(\"approve\"):\r\n should_exit = True\r\n messages.append({\"role\": \"user\", \"content\": results})\r\n member = self._find_member(name)\r\n if member:\r\n member[\"status\"] = \"shutdown\" if should_exit else \"idle\"\r\n self._save_config()\r\n\r\n def _exec(self, sender: str, tool_name: str, args: dict) -> str:\r\n # these base tools are unchanged from s02\r\n if tool_name == \"bash\":\r\n return _run_bash(args[\"command\"])\r\n if tool_name == \"read_file\":\r\n return _run_read(args[\"path\"])\r\n if tool_name == \"write_file\":\r\n return _run_write(args[\"path\"], args[\"content\"])\r\n if tool_name == \"edit_file\":\r\n return _run_edit(args[\"path\"], args[\"old_text\"], args[\"new_text\"])\r\n if tool_name == \"send_message\":\r\n return BUS.send(sender, args[\"to\"], args[\"content\"], args.get(\"msg_type\", \"message\"))\r\n if tool_name == \"read_inbox\":\r\n return json.dumps(BUS.read_inbox(sender), indent=2)\r\n if tool_name == \"shutdown_response\":\r\n req_id = args[\"request_id\"]\r\n approve = args[\"approve\"]\r\n with _tracker_lock:\r\n if req_id in shutdown_requests:\r\n shutdown_requests[req_id][\"status\"] = \"approved\" if approve else \"rejected\"\r\n BUS.send(\r\n sender, \"lead\", args.get(\"reason\", \"\"),\r\n \"shutdown_response\", {\"request_id\": req_id, \"approve\": approve},\r\n )\r\n return f\"Shutdown {'approved' if approve else 'rejected'}\"\r\n if tool_name == \"plan_approval\":\r\n plan_text = args.get(\"plan\", \"\")\r\n req_id = str(uuid.uuid4())[:8]\r\n with _tracker_lock:\r\n plan_requests[req_id] = {\"from\": sender, \"plan\": plan_text, \"status\": \"pending\"}\r\n BUS.send(\r\n sender, \"lead\", plan_text, \"plan_approval_response\",\r\n {\"request_id\": req_id, \"plan\": plan_text},\r\n )\r\n return f\"Plan submitted (request_id={req_id}). Waiting for lead approval.\"\r\n return f\"Unknown tool: {tool_name}\"\r\n\r\n def _teammate_tools(self) -> list:\r\n # these base tools are unchanged from s02\r\n return [\r\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\r\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\r\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\r\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}}, \"required\": [\"path\"]}},\r\n {\"name\": \"write_file\", \"description\": \"Write content to file.\",\r\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\r\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in file.\",\r\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\r\n {\"name\": \"send_message\", \"description\": \"Send message to a teammate.\",\r\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"to\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}, \"msg_type\": {\"type\": \"string\", \"enum\": list(VALID_MSG_TYPES)}}, \"required\": [\"to\", \"content\"]}},\r\n {\"name\": \"read_inbox\", \"description\": \"Read and drain your inbox.\",\r\n \"input_schema\": {\"type\": \"object\", \"properties\": {}}},\r\n {\"name\": \"shutdown_response\", \"description\": \"Respond to a shutdown request. Approve to shut down, reject to keep working.\",\r\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"request_id\": {\"type\": \"string\"}, \"approve\": {\"type\": \"boolean\"}, \"reason\": {\"type\": \"string\"}}, \"required\": [\"request_id\", \"approve\"]}},\r\n {\"name\": \"plan_approval\", \"description\": \"Submit a plan for lead approval. Provide plan text.\",\r\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"plan\": {\"type\": \"string\"}}, \"required\": [\"plan\"]}},\r\n ]\r\n\r\n def list_all(self) -> str:\r\n if not self.config[\"members\"]:\r\n return \"No teammates.\"\r\n lines = [f\"Team: {self.config['team_name']}\"]\r\n for m in self.config[\"members\"]:\r\n lines.append(f\" {m['name']} ({m['role']}): {m['status']}\")\r\n return \"\\n\".join(lines)\r\n\r\n def member_names(self) -> list:\r\n return [m[\"name\"] for m in self.config[\"members\"]]\r\n\r\n\r\nTEAM = TeammateManager(TEAM_DIR)\r\n\r\n\r\n# -- Base tool implementations (these base tools are unchanged from s02) --\r\ndef _safe_path(p: str) -> Path:\r\n path = (WORKDIR / p).resolve()\r\n if not path.is_relative_to(WORKDIR):\r\n raise ValueError(f\"Path escapes workspace: {p}\")\r\n return path\r\n\r\n\r\ndef _run_bash(command: str) -> str:\r\n dangerous = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\"]\r\n if any(d in command for d in dangerous):\r\n return \"Error: Dangerous command blocked\"\r\n try:\r\n r = subprocess.run(\r\n command, shell=True, cwd=WORKDIR,\r\n capture_output=True, text=True, timeout=120,\r\n )\r\n out = (r.stdout + r.stderr).strip()\r\n return out[:50000] if out else \"(no output)\"\r\n except subprocess.TimeoutExpired:\r\n return \"Error: Timeout (120s)\"\r\n\r\n\r\ndef _run_read(path: str, limit: int = None) -> str:\r\n try:\r\n lines = _safe_path(path).read_text().splitlines()\r\n if limit and limit < len(lines):\r\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more)\"]\r\n return \"\\n\".join(lines)[:50000]\r\n except Exception as e:\r\n return f\"Error: {e}\"\r\n\r\n\r\ndef _run_write(path: str, content: str) -> str:\r\n try:\r\n fp = _safe_path(path)\r\n fp.parent.mkdir(parents=True, exist_ok=True)\r\n fp.write_text(content)\r\n return f\"Wrote {len(content)} bytes\"\r\n except Exception as e:\r\n return f\"Error: {e}\"\r\n\r\n\r\ndef _run_edit(path: str, old_text: str, new_text: str) -> str:\r\n try:\r\n fp = _safe_path(path)\r\n c = fp.read_text()\r\n if old_text not in c:\r\n return f\"Error: Text not found in {path}\"\r\n fp.write_text(c.replace(old_text, new_text, 1))\r\n return f\"Edited {path}\"\r\n except Exception as e:\r\n return f\"Error: {e}\"\r\n\r\n\r\n# -- Lead-specific protocol handlers --\r\ndef handle_shutdown_request(teammate: str) -> str:\r\n req_id = str(uuid.uuid4())[:8]\r\n with _tracker_lock:\r\n shutdown_requests[req_id] = {\"target\": teammate, \"status\": \"pending\"}\r\n BUS.send(\r\n \"lead\", teammate, \"Please shut down gracefully.\",\r\n \"shutdown_request\", {\"request_id\": req_id},\r\n )\r\n return f\"Shutdown request {req_id} sent to '{teammate}' (status: pending)\"\r\n\r\n\r\ndef handle_plan_review(request_id: str, approve: bool, feedback: str = \"\") -> str:\r\n with _tracker_lock:\r\n req = plan_requests.get(request_id)\r\n if not req:\r\n return f\"Error: Unknown plan request_id '{request_id}'\"\r\n with _tracker_lock:\r\n req[\"status\"] = \"approved\" if approve else \"rejected\"\r\n BUS.send(\r\n \"lead\", req[\"from\"], feedback, \"plan_approval_response\",\r\n {\"request_id\": request_id, \"approve\": approve, \"feedback\": feedback},\r\n )\r\n return f\"Plan {req['status']} for '{req['from']}'\"\r\n\r\n\r\ndef _check_shutdown_status(request_id: str) -> str:\r\n with _tracker_lock:\r\n return json.dumps(shutdown_requests.get(request_id, {\"error\": \"not found\"}))\r\n\r\n\r\n# -- Lead tool dispatch (12 tools) --\r\nTOOL_HANDLERS = {\r\n \"bash\": lambda **kw: _run_bash(kw[\"command\"]),\r\n \"read_file\": lambda **kw: _run_read(kw[\"path\"], kw.get(\"limit\")),\r\n \"write_file\": lambda **kw: _run_write(kw[\"path\"], kw[\"content\"]),\r\n \"edit_file\": lambda **kw: _run_edit(kw[\"path\"], kw[\"old_text\"], kw[\"new_text\"]),\r\n \"spawn_teammate\": lambda **kw: TEAM.spawn(kw[\"name\"], kw[\"role\"], kw[\"prompt\"]),\r\n \"list_teammates\": lambda **kw: TEAM.list_all(),\r\n \"send_message\": lambda **kw: BUS.send(\"lead\", kw[\"to\"], kw[\"content\"], kw.get(\"msg_type\", \"message\")),\r\n \"read_inbox\": lambda **kw: json.dumps(BUS.read_inbox(\"lead\"), indent=2),\r\n \"broadcast\": lambda **kw: BUS.broadcast(\"lead\", kw[\"content\"], TEAM.member_names()),\r\n \"shutdown_request\": lambda **kw: handle_shutdown_request(kw[\"teammate\"]),\r\n \"shutdown_response\": lambda **kw: _check_shutdown_status(kw.get(\"request_id\", \"\")),\r\n \"plan_approval\": lambda **kw: handle_plan_review(kw[\"request_id\"], kw[\"approve\"], kw.get(\"feedback\", \"\")),\r\n}\r\n\r\n# these base tools are unchanged from s02\r\nTOOLS = [\r\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\r\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\r\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\r\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"limit\": {\"type\": \"integer\"}}, \"required\": [\"path\"]}},\r\n {\"name\": \"write_file\", \"description\": \"Write content to file.\",\r\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\r\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in file.\",\r\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\r\n {\"name\": \"spawn_teammate\", \"description\": \"Spawn a persistent teammate.\",\r\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"name\": {\"type\": \"string\"}, \"role\": {\"type\": \"string\"}, \"prompt\": {\"type\": \"string\"}}, \"required\": [\"name\", \"role\", \"prompt\"]}},\r\n {\"name\": \"list_teammates\", \"description\": \"List all teammates.\",\r\n \"input_schema\": {\"type\": \"object\", \"properties\": {}}},\r\n {\"name\": \"send_message\", \"description\": \"Send a message to a teammate.\",\r\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"to\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}, \"msg_type\": {\"type\": \"string\", \"enum\": list(VALID_MSG_TYPES)}}, \"required\": [\"to\", \"content\"]}},\r\n {\"name\": \"read_inbox\", \"description\": \"Read and drain the lead's inbox.\",\r\n \"input_schema\": {\"type\": \"object\", \"properties\": {}}},\r\n {\"name\": \"broadcast\", \"description\": \"Send a message to all teammates.\",\r\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"content\": {\"type\": \"string\"}}, \"required\": [\"content\"]}},\r\n {\"name\": \"shutdown_request\", \"description\": \"Request a teammate to shut down gracefully. Returns a request_id for tracking.\",\r\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"teammate\": {\"type\": \"string\"}}, \"required\": [\"teammate\"]}},\r\n {\"name\": \"shutdown_response\", \"description\": \"Check the status of a shutdown request by request_id.\",\r\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"request_id\": {\"type\": \"string\"}}, \"required\": [\"request_id\"]}},\r\n {\"name\": \"plan_approval\", \"description\": \"Approve or reject a teammate's plan. Provide request_id + approve + optional feedback.\",\r\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"request_id\": {\"type\": \"string\"}, \"approve\": {\"type\": \"boolean\"}, \"feedback\": {\"type\": \"string\"}}, \"required\": [\"request_id\", \"approve\"]}},\r\n]\r\n\r\n\r\ndef agent_loop(messages: list):\r\n while True:\r\n inbox = BUS.read_inbox(\"lead\")\r\n if inbox:\r\n messages.append({\r\n \"role\": \"user\",\r\n \"content\": f\"{json.dumps(inbox, indent=2)}\",\r\n })\r\n messages.append({\r\n \"role\": \"assistant\",\r\n \"content\": \"Noted inbox messages.\",\r\n })\r\n response = client.messages.create(\r\n model=MODEL,\r\n system=SYSTEM,\r\n messages=messages,\r\n tools=TOOLS,\r\n max_tokens=8000,\r\n )\r\n messages.append({\"role\": \"assistant\", \"content\": response.content})\r\n if response.stop_reason != \"tool_use\":\r\n return\r\n results = []\r\n for block in response.content:\r\n if block.type == \"tool_use\":\r\n handler = TOOL_HANDLERS.get(block.name)\r\n try:\r\n output = handler(**block.input) if handler else f\"Unknown tool: {block.name}\"\r\n except Exception as e:\r\n output = f\"Error: {e}\"\r\n print(f\"> {block.name}: {str(output)[:200]}\")\r\n results.append({\r\n \"type\": \"tool_result\",\r\n \"tool_use_id\": block.id,\r\n \"content\": str(output),\r\n })\r\n messages.append({\"role\": \"user\", \"content\": results})\r\n\r\n\r\nif __name__ == \"__main__\":\r\n history = []\r\n while True:\r\n try:\r\n query = input(\"\\033[36ms10 >> \\033[0m\")\r\n except (EOFError, KeyboardInterrupt):\r\n break\r\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\r\n break\r\n if query.strip() == \"/team\":\r\n print(TEAM.list_all())\r\n continue\r\n if query.strip() == \"/inbox\":\r\n print(json.dumps(BUS.read_inbox(\"lead\"), indent=2))\r\n continue\r\n history.append({\"role\": \"user\", \"content\": query})\r\n agent_loop(history)\r\n response_content = history[-1][\"content\"]\r\n if isinstance(response_content, list):\r\n for block in response_content:\r\n if hasattr(block, \"text\"):\r\n print(block.text)\r\n print()\r\n" }, { "id": "s11", "filename": "s11_autonomous_agents.py", "title": "Autonomous Agents", "subtitle": "Scan Board, Claim Tasks", - "loc": 494, + "loc": 499, "tools": [ "bash", "read_file", @@ -728,14 +728,14 @@ } ], "layer": "collaboration", - "source": "#!/usr/bin/env python3\n\"\"\"\ns11_autonomous_agents.py - Autonomous Agents\n\nIdle cycle with task board polling, auto-claiming unclaimed tasks, and\nidentity re-injection after context compression. Builds on s10's protocols.\n\n Teammate lifecycle:\n +-------+\n | spawn |\n +---+---+\n |\n v\n +-------+ tool_use +-------+\n | WORK | <----------- | LLM |\n +---+---+ +-------+\n |\n | stop_reason != tool_use\n v\n +--------+\n | IDLE | poll every 5s for up to 60s\n +---+----+\n |\n +---> check inbox -> message? -> resume WORK\n |\n +---> scan .tasks/ -> unclaimed? -> claim -> resume WORK\n |\n +---> timeout (60s) -> shutdown\n\n Identity re-injection after compression:\n messages = [identity_block, ...remaining...]\n \"You are 'coder', role: backend, team: my-team\"\n\nKey insight: \"The agent finds work itself.\"\n\"\"\"\n\nimport json\nimport os\nimport subprocess\nimport threading\nimport time\nimport uuid\nfrom pathlib import Path\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\nTEAM_DIR = WORKDIR / \".team\"\nINBOX_DIR = TEAM_DIR / \"inbox\"\nTASKS_DIR = WORKDIR / \".tasks\"\n\nPOLL_INTERVAL = 5\nIDLE_TIMEOUT = 60\n\nSYSTEM = f\"You are a team lead at {WORKDIR}. Teammates are autonomous -- they find work themselves.\"\n\nVALID_MSG_TYPES = {\n \"message\",\n \"broadcast\",\n \"shutdown_request\",\n \"shutdown_response\",\n \"plan_approval_response\",\n}\n\n# -- Request trackers --\nshutdown_requests = {}\nplan_requests = {}\n_tracker_lock = threading.Lock()\n_claim_lock = threading.Lock()\n\n\n# -- MessageBus: JSONL inbox per teammate --\nclass MessageBus:\n def __init__(self, inbox_dir: Path):\n self.dir = inbox_dir\n self.dir.mkdir(parents=True, exist_ok=True)\n\n def send(self, sender: str, to: str, content: str,\n msg_type: str = \"message\", extra: dict = None) -> str:\n if msg_type not in VALID_MSG_TYPES:\n return f\"Error: Invalid type '{msg_type}'. Valid: {VALID_MSG_TYPES}\"\n msg = {\n \"type\": msg_type,\n \"from\": sender,\n \"content\": content,\n \"timestamp\": time.time(),\n }\n if extra:\n msg.update(extra)\n inbox_path = self.dir / f\"{to}.jsonl\"\n with open(inbox_path, \"a\") as f:\n f.write(json.dumps(msg) + \"\\n\")\n return f\"Sent {msg_type} to {to}\"\n\n def read_inbox(self, name: str) -> list:\n inbox_path = self.dir / f\"{name}.jsonl\"\n if not inbox_path.exists():\n return []\n messages = []\n for line in inbox_path.read_text().strip().splitlines():\n if line:\n messages.append(json.loads(line))\n inbox_path.write_text(\"\")\n return messages\n\n def broadcast(self, sender: str, content: str, teammates: list) -> str:\n count = 0\n for name in teammates:\n if name != sender:\n self.send(sender, name, content, \"broadcast\")\n count += 1\n return f\"Broadcast to {count} teammates\"\n\n\nBUS = MessageBus(INBOX_DIR)\n\n\n# -- Task board scanning --\ndef scan_unclaimed_tasks() -> list:\n TASKS_DIR.mkdir(exist_ok=True)\n unclaimed = []\n for f in sorted(TASKS_DIR.glob(\"task_*.json\")):\n task = json.loads(f.read_text())\n if (task.get(\"status\") == \"pending\"\n and not task.get(\"owner\")\n and not task.get(\"blockedBy\")):\n unclaimed.append(task)\n return unclaimed\n\n\ndef claim_task(task_id: int, owner: str) -> str:\n with _claim_lock:\n path = TASKS_DIR / f\"task_{task_id}.json\"\n if not path.exists():\n return f\"Error: Task {task_id} not found\"\n task = json.loads(path.read_text())\n task[\"owner\"] = owner\n task[\"status\"] = \"in_progress\"\n path.write_text(json.dumps(task, indent=2))\n return f\"Claimed task #{task_id} for {owner}\"\n\n\n# -- Identity re-injection after compression --\ndef make_identity_block(name: str, role: str, team_name: str) -> dict:\n return {\n \"role\": \"user\",\n \"content\": f\"You are '{name}', role: {role}, team: {team_name}. Continue your work.\",\n }\n\n\n# -- Autonomous TeammateManager --\nclass TeammateManager:\n def __init__(self, team_dir: Path):\n self.dir = team_dir\n self.dir.mkdir(exist_ok=True)\n self.config_path = self.dir / \"config.json\"\n self.config = self._load_config()\n self.threads = {}\n\n def _load_config(self) -> dict:\n if self.config_path.exists():\n return json.loads(self.config_path.read_text())\n return {\"team_name\": \"default\", \"members\": []}\n\n def _save_config(self):\n self.config_path.write_text(json.dumps(self.config, indent=2))\n\n def _find_member(self, name: str) -> dict:\n for m in self.config[\"members\"]:\n if m[\"name\"] == name:\n return m\n return None\n\n def _set_status(self, name: str, status: str):\n member = self._find_member(name)\n if member:\n member[\"status\"] = status\n self._save_config()\n\n def spawn(self, name: str, role: str, prompt: str) -> str:\n member = self._find_member(name)\n if member:\n if member[\"status\"] not in (\"idle\", \"shutdown\"):\n return f\"Error: '{name}' is currently {member['status']}\"\n member[\"status\"] = \"working\"\n member[\"role\"] = role\n else:\n member = {\"name\": name, \"role\": role, \"status\": \"working\"}\n self.config[\"members\"].append(member)\n self._save_config()\n thread = threading.Thread(\n target=self._loop,\n args=(name, role, prompt),\n daemon=True,\n )\n self.threads[name] = thread\n thread.start()\n return f\"Spawned '{name}' (role: {role})\"\n\n def _loop(self, name: str, role: str, prompt: str):\n team_name = self.config[\"team_name\"]\n sys_prompt = (\n f\"You are '{name}', role: {role}, team: {team_name}, at {WORKDIR}. \"\n f\"Use idle tool when you have no more work. You will auto-claim new tasks.\"\n )\n messages = [{\"role\": \"user\", \"content\": prompt}]\n tools = self._teammate_tools()\n\n while True:\n # -- WORK PHASE: standard agent loop --\n for _ in range(50):\n inbox = BUS.read_inbox(name)\n for msg in inbox:\n if msg.get(\"type\") == \"shutdown_request\":\n self._set_status(name, \"shutdown\")\n return\n messages.append({\"role\": \"user\", \"content\": json.dumps(msg)})\n try:\n response = client.messages.create(\n model=MODEL,\n system=sys_prompt,\n messages=messages,\n tools=tools,\n max_tokens=8000,\n )\n except Exception:\n self._set_status(name, \"idle\")\n return\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n break\n results = []\n idle_requested = False\n for block in response.content:\n if block.type == \"tool_use\":\n if block.name == \"idle\":\n idle_requested = True\n output = \"Entering idle phase. Will poll for new tasks.\"\n else:\n output = self._exec(name, block.name, block.input)\n print(f\" [{name}] {block.name}: {str(output)[:120]}\")\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": str(output),\n })\n messages.append({\"role\": \"user\", \"content\": results})\n if idle_requested:\n break\n\n # -- IDLE PHASE: poll for inbox messages and unclaimed tasks --\n self._set_status(name, \"idle\")\n resume = False\n polls = IDLE_TIMEOUT // max(POLL_INTERVAL, 1)\n for _ in range(polls):\n time.sleep(POLL_INTERVAL)\n inbox = BUS.read_inbox(name)\n if inbox:\n for msg in inbox:\n if msg.get(\"type\") == \"shutdown_request\":\n self._set_status(name, \"shutdown\")\n return\n messages.append({\"role\": \"user\", \"content\": json.dumps(msg)})\n resume = True\n break\n unclaimed = scan_unclaimed_tasks()\n if unclaimed:\n task = unclaimed[0]\n claim_task(task[\"id\"], name)\n task_prompt = (\n f\"Task #{task['id']}: {task['subject']}\\n\"\n f\"{task.get('description', '')}\"\n )\n if len(messages) <= 3:\n messages.insert(0, make_identity_block(name, role, team_name))\n messages.insert(1, {\"role\": \"assistant\", \"content\": f\"I am {name}. Continuing.\"})\n messages.append({\"role\": \"user\", \"content\": task_prompt})\n messages.append({\"role\": \"assistant\", \"content\": f\"Claimed task #{task['id']}. Working on it.\"})\n resume = True\n break\n\n if not resume:\n self._set_status(name, \"shutdown\")\n return\n self._set_status(name, \"working\")\n\n def _exec(self, sender: str, tool_name: str, args: dict) -> str:\n # these base tools are unchanged from s02\n if tool_name == \"bash\":\n return _run_bash(args[\"command\"])\n if tool_name == \"read_file\":\n return _run_read(args[\"path\"])\n if tool_name == \"write_file\":\n return _run_write(args[\"path\"], args[\"content\"])\n if tool_name == \"edit_file\":\n return _run_edit(args[\"path\"], args[\"old_text\"], args[\"new_text\"])\n if tool_name == \"send_message\":\n return BUS.send(sender, args[\"to\"], args[\"content\"], args.get(\"msg_type\", \"message\"))\n if tool_name == \"read_inbox\":\n return json.dumps(BUS.read_inbox(sender), indent=2)\n if tool_name == \"shutdown_response\":\n req_id = args[\"request_id\"]\n with _tracker_lock:\n if req_id in shutdown_requests:\n shutdown_requests[req_id][\"status\"] = \"approved\" if args[\"approve\"] else \"rejected\"\n BUS.send(\n sender, \"lead\", args.get(\"reason\", \"\"),\n \"shutdown_response\", {\"request_id\": req_id, \"approve\": args[\"approve\"]},\n )\n return f\"Shutdown {'approved' if args['approve'] else 'rejected'}\"\n if tool_name == \"plan_approval\":\n plan_text = args.get(\"plan\", \"\")\n req_id = str(uuid.uuid4())[:8]\n with _tracker_lock:\n plan_requests[req_id] = {\"from\": sender, \"plan\": plan_text, \"status\": \"pending\"}\n BUS.send(\n sender, \"lead\", plan_text, \"plan_approval_response\",\n {\"request_id\": req_id, \"plan\": plan_text},\n )\n return f\"Plan submitted (request_id={req_id}). Waiting for approval.\"\n if tool_name == \"claim_task\":\n return claim_task(args[\"task_id\"], sender)\n return f\"Unknown tool: {tool_name}\"\n\n def _teammate_tools(self) -> list:\n # these base tools are unchanged from s02\n return [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n {\"name\": \"send_message\", \"description\": \"Send message to a teammate.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"to\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}, \"msg_type\": {\"type\": \"string\", \"enum\": list(VALID_MSG_TYPES)}}, \"required\": [\"to\", \"content\"]}},\n {\"name\": \"read_inbox\", \"description\": \"Read and drain your inbox.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}}},\n {\"name\": \"shutdown_response\", \"description\": \"Respond to a shutdown request.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"request_id\": {\"type\": \"string\"}, \"approve\": {\"type\": \"boolean\"}, \"reason\": {\"type\": \"string\"}}, \"required\": [\"request_id\", \"approve\"]}},\n {\"name\": \"plan_approval\", \"description\": \"Submit a plan for lead approval.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"plan\": {\"type\": \"string\"}}, \"required\": [\"plan\"]}},\n {\"name\": \"idle\", \"description\": \"Signal that you have no more work. Enters idle polling phase.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}}},\n {\"name\": \"claim_task\", \"description\": \"Claim a task from the task board by ID.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"task_id\": {\"type\": \"integer\"}}, \"required\": [\"task_id\"]}},\n ]\n\n def list_all(self) -> str:\n if not self.config[\"members\"]:\n return \"No teammates.\"\n lines = [f\"Team: {self.config['team_name']}\"]\n for m in self.config[\"members\"]:\n lines.append(f\" {m['name']} ({m['role']}): {m['status']}\")\n return \"\\n\".join(lines)\n\n def member_names(self) -> list:\n return [m[\"name\"] for m in self.config[\"members\"]]\n\n\nTEAM = TeammateManager(TEAM_DIR)\n\n\n# -- Base tool implementations (these base tools are unchanged from s02) --\ndef _safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\n\ndef _run_bash(command: str) -> str:\n dangerous = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\"]\n if any(d in command for d in dangerous):\n return \"Error: Dangerous command blocked\"\n try:\n r = subprocess.run(\n command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=120,\n )\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\n\ndef _run_read(path: str, limit: int = None) -> str:\n try:\n lines = _safe_path(path).read_text().splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more)\"]\n return \"\\n\".join(lines)[:50000]\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef _run_write(path: str, content: str) -> str:\n try:\n fp = _safe_path(path)\n fp.parent.mkdir(parents=True, exist_ok=True)\n fp.write_text(content)\n return f\"Wrote {len(content)} bytes\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef _run_edit(path: str, old_text: str, new_text: str) -> str:\n try:\n fp = _safe_path(path)\n c = fp.read_text()\n if old_text not in c:\n return f\"Error: Text not found in {path}\"\n fp.write_text(c.replace(old_text, new_text, 1))\n return f\"Edited {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\n# -- Lead-specific protocol handlers --\ndef handle_shutdown_request(teammate: str) -> str:\n req_id = str(uuid.uuid4())[:8]\n with _tracker_lock:\n shutdown_requests[req_id] = {\"target\": teammate, \"status\": \"pending\"}\n BUS.send(\n \"lead\", teammate, \"Please shut down gracefully.\",\n \"shutdown_request\", {\"request_id\": req_id},\n )\n return f\"Shutdown request {req_id} sent to '{teammate}'\"\n\n\ndef handle_plan_review(request_id: str, approve: bool, feedback: str = \"\") -> str:\n with _tracker_lock:\n req = plan_requests.get(request_id)\n if not req:\n return f\"Error: Unknown plan request_id '{request_id}'\"\n with _tracker_lock:\n req[\"status\"] = \"approved\" if approve else \"rejected\"\n BUS.send(\n \"lead\", req[\"from\"], feedback, \"plan_approval_response\",\n {\"request_id\": request_id, \"approve\": approve, \"feedback\": feedback},\n )\n return f\"Plan {req['status']} for '{req['from']}'\"\n\n\ndef _check_shutdown_status(request_id: str) -> str:\n with _tracker_lock:\n return json.dumps(shutdown_requests.get(request_id, {\"error\": \"not found\"}))\n\n\n# -- Lead tool dispatch (14 tools) --\nTOOL_HANDLERS = {\n \"bash\": lambda **kw: _run_bash(kw[\"command\"]),\n \"read_file\": lambda **kw: _run_read(kw[\"path\"], kw.get(\"limit\")),\n \"write_file\": lambda **kw: _run_write(kw[\"path\"], kw[\"content\"]),\n \"edit_file\": lambda **kw: _run_edit(kw[\"path\"], kw[\"old_text\"], kw[\"new_text\"]),\n \"spawn_teammate\": lambda **kw: TEAM.spawn(kw[\"name\"], kw[\"role\"], kw[\"prompt\"]),\n \"list_teammates\": lambda **kw: TEAM.list_all(),\n \"send_message\": lambda **kw: BUS.send(\"lead\", kw[\"to\"], kw[\"content\"], kw.get(\"msg_type\", \"message\")),\n \"read_inbox\": lambda **kw: json.dumps(BUS.read_inbox(\"lead\"), indent=2),\n \"broadcast\": lambda **kw: BUS.broadcast(\"lead\", kw[\"content\"], TEAM.member_names()),\n \"shutdown_request\": lambda **kw: handle_shutdown_request(kw[\"teammate\"]),\n \"shutdown_response\": lambda **kw: _check_shutdown_status(kw.get(\"request_id\", \"\")),\n \"plan_approval\": lambda **kw: handle_plan_review(kw[\"request_id\"], kw[\"approve\"], kw.get(\"feedback\", \"\")),\n \"idle\": lambda **kw: \"Lead does not idle.\",\n \"claim_task\": lambda **kw: claim_task(kw[\"task_id\"], \"lead\"),\n}\n\n# these base tools are unchanged from s02\nTOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"limit\": {\"type\": \"integer\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n {\"name\": \"spawn_teammate\", \"description\": \"Spawn an autonomous teammate.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"name\": {\"type\": \"string\"}, \"role\": {\"type\": \"string\"}, \"prompt\": {\"type\": \"string\"}}, \"required\": [\"name\", \"role\", \"prompt\"]}},\n {\"name\": \"list_teammates\", \"description\": \"List all teammates.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}}},\n {\"name\": \"send_message\", \"description\": \"Send a message to a teammate.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"to\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}, \"msg_type\": {\"type\": \"string\", \"enum\": list(VALID_MSG_TYPES)}}, \"required\": [\"to\", \"content\"]}},\n {\"name\": \"read_inbox\", \"description\": \"Read and drain the lead's inbox.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}}},\n {\"name\": \"broadcast\", \"description\": \"Send a message to all teammates.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"content\": {\"type\": \"string\"}}, \"required\": [\"content\"]}},\n {\"name\": \"shutdown_request\", \"description\": \"Request a teammate to shut down.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"teammate\": {\"type\": \"string\"}}, \"required\": [\"teammate\"]}},\n {\"name\": \"shutdown_response\", \"description\": \"Check shutdown request status.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"request_id\": {\"type\": \"string\"}}, \"required\": [\"request_id\"]}},\n {\"name\": \"plan_approval\", \"description\": \"Approve or reject a teammate's plan.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"request_id\": {\"type\": \"string\"}, \"approve\": {\"type\": \"boolean\"}, \"feedback\": {\"type\": \"string\"}}, \"required\": [\"request_id\", \"approve\"]}},\n {\"name\": \"idle\", \"description\": \"Enter idle state (for lead -- rarely used).\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}}},\n {\"name\": \"claim_task\", \"description\": \"Claim a task from the board by ID.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"task_id\": {\"type\": \"integer\"}}, \"required\": [\"task_id\"]}},\n]\n\n\ndef agent_loop(messages: list):\n while True:\n inbox = BUS.read_inbox(\"lead\")\n if inbox:\n messages.append({\n \"role\": \"user\",\n \"content\": f\"{json.dumps(inbox, indent=2)}\",\n })\n messages.append({\n \"role\": \"assistant\",\n \"content\": \"Noted inbox messages.\",\n })\n response = client.messages.create(\n model=MODEL,\n system=SYSTEM,\n messages=messages,\n tools=TOOLS,\n max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n return\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n handler = TOOL_HANDLERS.get(block.name)\n try:\n output = handler(**block.input) if handler else f\"Unknown tool: {block.name}\"\n except Exception as e:\n output = f\"Error: {e}\"\n print(f\"> {block.name}: {str(output)[:200]}\")\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": str(output),\n })\n messages.append({\"role\": \"user\", \"content\": results})\n\n\nif __name__ == \"__main__\":\n history = []\n while True:\n try:\n query = input(\"\\033[36ms11 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n if query.strip() == \"/team\":\n print(TEAM.list_all())\n continue\n if query.strip() == \"/inbox\":\n print(json.dumps(BUS.read_inbox(\"lead\"), indent=2))\n continue\n if query.strip() == \"/tasks\":\n TASKS_DIR.mkdir(exist_ok=True)\n for f in sorted(TASKS_DIR.glob(\"task_*.json\")):\n t = json.loads(f.read_text())\n marker = {\"pending\": \"[ ]\", \"in_progress\": \"[>]\", \"completed\": \"[x]\"}.get(t[\"status\"], \"[?]\")\n owner = f\" @{t['owner']}\" if t.get(\"owner\") else \"\"\n print(f\" {marker} #{t['id']}: {t['subject']}{owner}\")\n continue\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history)\n print()\n" + "source": "#!/usr/bin/env python3\r\n\"\"\"\r\ns11_autonomous_agents.py - Autonomous Agents\r\n\r\nIdle cycle with task board polling, auto-claiming unclaimed tasks, and\r\nidentity re-injection after context compression. Builds on s10's protocols.\r\n\r\n Teammate lifecycle:\r\n +-------+\r\n | spawn |\r\n +---+---+\r\n |\r\n v\r\n +-------+ tool_use +-------+\r\n | WORK | <----------- | LLM |\r\n +---+---+ +-------+\r\n |\r\n | stop_reason != tool_use\r\n v\r\n +--------+\r\n | IDLE | poll every 5s for up to 60s\r\n +---+----+\r\n |\r\n +---> check inbox -> message? -> resume WORK\r\n |\r\n +---> scan .tasks/ -> unclaimed? -> claim -> resume WORK\r\n |\r\n +---> timeout (60s) -> shutdown\r\n\r\n Identity re-injection after compression:\r\n messages = [identity_block, ...remaining...]\r\n \"You are 'coder', role: backend, team: my-team\"\r\n\r\nKey insight: \"The agent finds work itself.\"\r\n\"\"\"\r\n\r\nimport json\r\nimport os\r\nimport subprocess\r\nimport threading\r\nimport time\r\nimport uuid\r\nfrom pathlib import Path\r\n\r\nfrom anthropic import Anthropic\r\nfrom dotenv import load_dotenv\r\n\r\nload_dotenv(override=True)\r\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\r\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\r\n\r\nWORKDIR = Path.cwd()\r\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\r\nMODEL = os.environ[\"MODEL_ID\"]\r\nTEAM_DIR = WORKDIR / \".team\"\r\nINBOX_DIR = TEAM_DIR / \"inbox\"\r\nTASKS_DIR = WORKDIR / \".tasks\"\r\n\r\nPOLL_INTERVAL = 5\r\nIDLE_TIMEOUT = 60\r\n\r\nSYSTEM = f\"You are a team lead at {WORKDIR}. Teammates are autonomous -- they find work themselves.\"\r\n\r\nVALID_MSG_TYPES = {\r\n \"message\",\r\n \"broadcast\",\r\n \"shutdown_request\",\r\n \"shutdown_response\",\r\n \"plan_approval_response\",\r\n}\r\n\r\n# -- Request trackers --\r\nshutdown_requests = {}\r\nplan_requests = {}\r\n_tracker_lock = threading.Lock()\r\n_claim_lock = threading.Lock()\r\n\r\n\r\n# -- MessageBus: JSONL inbox per teammate --\r\nclass MessageBus:\r\n def __init__(self, inbox_dir: Path):\r\n self.dir = inbox_dir\r\n self.dir.mkdir(parents=True, exist_ok=True)\r\n\r\n def send(self, sender: str, to: str, content: str,\r\n msg_type: str = \"message\", extra: dict = None) -> str:\r\n if msg_type not in VALID_MSG_TYPES:\r\n return f\"Error: Invalid type '{msg_type}'. Valid: {VALID_MSG_TYPES}\"\r\n msg = {\r\n \"type\": msg_type,\r\n \"from\": sender,\r\n \"content\": content,\r\n \"timestamp\": time.time(),\r\n }\r\n if extra:\r\n msg.update(extra)\r\n inbox_path = self.dir / f\"{to}.jsonl\"\r\n with open(inbox_path, \"a\") as f:\r\n f.write(json.dumps(msg) + \"\\n\")\r\n return f\"Sent {msg_type} to {to}\"\r\n\r\n def read_inbox(self, name: str) -> list:\r\n inbox_path = self.dir / f\"{name}.jsonl\"\r\n if not inbox_path.exists():\r\n return []\r\n messages = []\r\n for line in inbox_path.read_text().strip().splitlines():\r\n if line:\r\n messages.append(json.loads(line))\r\n inbox_path.write_text(\"\")\r\n return messages\r\n\r\n def broadcast(self, sender: str, content: str, teammates: list) -> str:\r\n count = 0\r\n for name in teammates:\r\n if name != sender:\r\n self.send(sender, name, content, \"broadcast\")\r\n count += 1\r\n return f\"Broadcast to {count} teammates\"\r\n\r\n\r\nBUS = MessageBus(INBOX_DIR)\r\n\r\n\r\n# -- Task board scanning --\r\ndef scan_unclaimed_tasks() -> list:\r\n TASKS_DIR.mkdir(exist_ok=True)\r\n unclaimed = []\r\n for f in sorted(TASKS_DIR.glob(\"task_*.json\")):\r\n task = json.loads(f.read_text())\r\n if (task.get(\"status\") == \"pending\"\r\n and not task.get(\"owner\")\r\n and not task.get(\"blockedBy\")):\r\n unclaimed.append(task)\r\n return unclaimed\r\n\r\n\r\ndef claim_task(task_id: int, owner: str) -> str:\r\n with _claim_lock:\r\n path = TASKS_DIR / f\"task_{task_id}.json\"\r\n if not path.exists():\r\n return f\"Error: Task {task_id} not found\"\r\n task = json.loads(path.read_text())\r\n task[\"owner\"] = owner\r\n task[\"status\"] = \"in_progress\"\r\n path.write_text(json.dumps(task, indent=2))\r\n return f\"Claimed task #{task_id} for {owner}\"\r\n\r\n\r\n# -- Identity re-injection after compression --\r\ndef make_identity_block(name: str, role: str, team_name: str) -> dict:\r\n return {\r\n \"role\": \"user\",\r\n \"content\": f\"You are '{name}', role: {role}, team: {team_name}. Continue your work.\",\r\n }\r\n\r\n\r\n# -- Autonomous TeammateManager --\r\nclass TeammateManager:\r\n def __init__(self, team_dir: Path):\r\n self.dir = team_dir\r\n self.dir.mkdir(exist_ok=True)\r\n self.config_path = self.dir / \"config.json\"\r\n self.config = self._load_config()\r\n self.threads = {}\r\n\r\n def _load_config(self) -> dict:\r\n if self.config_path.exists():\r\n return json.loads(self.config_path.read_text())\r\n return {\"team_name\": \"default\", \"members\": []}\r\n\r\n def _save_config(self):\r\n self.config_path.write_text(json.dumps(self.config, indent=2))\r\n\r\n def _find_member(self, name: str) -> dict:\r\n for m in self.config[\"members\"]:\r\n if m[\"name\"] == name:\r\n return m\r\n return None\r\n\r\n def _set_status(self, name: str, status: str):\r\n member = self._find_member(name)\r\n if member:\r\n member[\"status\"] = status\r\n self._save_config()\r\n\r\n def spawn(self, name: str, role: str, prompt: str) -> str:\r\n member = self._find_member(name)\r\n if member:\r\n if member[\"status\"] not in (\"idle\", \"shutdown\"):\r\n return f\"Error: '{name}' is currently {member['status']}\"\r\n member[\"status\"] = \"working\"\r\n member[\"role\"] = role\r\n else:\r\n member = {\"name\": name, \"role\": role, \"status\": \"working\"}\r\n self.config[\"members\"].append(member)\r\n self._save_config()\r\n thread = threading.Thread(\r\n target=self._loop,\r\n args=(name, role, prompt),\r\n daemon=True,\r\n )\r\n self.threads[name] = thread\r\n thread.start()\r\n return f\"Spawned '{name}' (role: {role})\"\r\n\r\n def _loop(self, name: str, role: str, prompt: str):\r\n team_name = self.config[\"team_name\"]\r\n sys_prompt = (\r\n f\"You are '{name}', role: {role}, team: {team_name}, at {WORKDIR}. \"\r\n f\"Use idle tool when you have no more work. You will auto-claim new tasks.\"\r\n )\r\n messages = [{\"role\": \"user\", \"content\": prompt}]\r\n tools = self._teammate_tools()\r\n\r\n while True:\r\n # -- WORK PHASE: standard agent loop --\r\n for _ in range(50):\r\n inbox = BUS.read_inbox(name)\r\n for msg in inbox:\r\n if msg.get(\"type\") == \"shutdown_request\":\r\n self._set_status(name, \"shutdown\")\r\n return\r\n messages.append({\"role\": \"user\", \"content\": json.dumps(msg)})\r\n try:\r\n response = client.messages.create(\r\n model=MODEL,\r\n system=sys_prompt,\r\n messages=messages,\r\n tools=tools,\r\n max_tokens=8000,\r\n )\r\n except Exception:\r\n self._set_status(name, \"idle\")\r\n return\r\n messages.append({\"role\": \"assistant\", \"content\": response.content})\r\n if response.stop_reason != \"tool_use\":\r\n break\r\n results = []\r\n idle_requested = False\r\n for block in response.content:\r\n if block.type == \"tool_use\":\r\n if block.name == \"idle\":\r\n idle_requested = True\r\n output = \"Entering idle phase. Will poll for new tasks.\"\r\n else:\r\n output = self._exec(name, block.name, block.input)\r\n print(f\" [{name}] {block.name}: {str(output)[:120]}\")\r\n results.append({\r\n \"type\": \"tool_result\",\r\n \"tool_use_id\": block.id,\r\n \"content\": str(output),\r\n })\r\n messages.append({\"role\": \"user\", \"content\": results})\r\n if idle_requested:\r\n break\r\n\r\n # -- IDLE PHASE: poll for inbox messages and unclaimed tasks --\r\n self._set_status(name, \"idle\")\r\n resume = False\r\n polls = IDLE_TIMEOUT // max(POLL_INTERVAL, 1)\r\n for _ in range(polls):\r\n time.sleep(POLL_INTERVAL)\r\n inbox = BUS.read_inbox(name)\r\n if inbox:\r\n for msg in inbox:\r\n if msg.get(\"type\") == \"shutdown_request\":\r\n self._set_status(name, \"shutdown\")\r\n return\r\n messages.append({\"role\": \"user\", \"content\": json.dumps(msg)})\r\n resume = True\r\n break\r\n unclaimed = scan_unclaimed_tasks()\r\n if unclaimed:\r\n task = unclaimed[0]\r\n claim_task(task[\"id\"], name)\r\n task_prompt = (\r\n f\"Task #{task['id']}: {task['subject']}\\n\"\r\n f\"{task.get('description', '')}\"\r\n )\r\n if len(messages) <= 3:\r\n messages.insert(0, make_identity_block(name, role, team_name))\r\n messages.insert(1, {\"role\": \"assistant\", \"content\": f\"I am {name}. Continuing.\"})\r\n messages.append({\"role\": \"user\", \"content\": task_prompt})\r\n messages.append({\"role\": \"assistant\", \"content\": f\"Claimed task #{task['id']}. Working on it.\"})\r\n resume = True\r\n break\r\n\r\n if not resume:\r\n self._set_status(name, \"shutdown\")\r\n return\r\n self._set_status(name, \"working\")\r\n\r\n def _exec(self, sender: str, tool_name: str, args: dict) -> str:\r\n # these base tools are unchanged from s02\r\n if tool_name == \"bash\":\r\n return _run_bash(args[\"command\"])\r\n if tool_name == \"read_file\":\r\n return _run_read(args[\"path\"])\r\n if tool_name == \"write_file\":\r\n return _run_write(args[\"path\"], args[\"content\"])\r\n if tool_name == \"edit_file\":\r\n return _run_edit(args[\"path\"], args[\"old_text\"], args[\"new_text\"])\r\n if tool_name == \"send_message\":\r\n return BUS.send(sender, args[\"to\"], args[\"content\"], args.get(\"msg_type\", \"message\"))\r\n if tool_name == \"read_inbox\":\r\n return json.dumps(BUS.read_inbox(sender), indent=2)\r\n if tool_name == \"shutdown_response\":\r\n req_id = args[\"request_id\"]\r\n with _tracker_lock:\r\n if req_id in shutdown_requests:\r\n shutdown_requests[req_id][\"status\"] = \"approved\" if args[\"approve\"] else \"rejected\"\r\n BUS.send(\r\n sender, \"lead\", args.get(\"reason\", \"\"),\r\n \"shutdown_response\", {\"request_id\": req_id, \"approve\": args[\"approve\"]},\r\n )\r\n return f\"Shutdown {'approved' if args['approve'] else 'rejected'}\"\r\n if tool_name == \"plan_approval\":\r\n plan_text = args.get(\"plan\", \"\")\r\n req_id = str(uuid.uuid4())[:8]\r\n with _tracker_lock:\r\n plan_requests[req_id] = {\"from\": sender, \"plan\": plan_text, \"status\": \"pending\"}\r\n BUS.send(\r\n sender, \"lead\", plan_text, \"plan_approval_response\",\r\n {\"request_id\": req_id, \"plan\": plan_text},\r\n )\r\n return f\"Plan submitted (request_id={req_id}). Waiting for approval.\"\r\n if tool_name == \"claim_task\":\r\n return claim_task(args[\"task_id\"], sender)\r\n return f\"Unknown tool: {tool_name}\"\r\n\r\n def _teammate_tools(self) -> list:\r\n # these base tools are unchanged from s02\r\n return [\r\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\r\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\r\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\r\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}}, \"required\": [\"path\"]}},\r\n {\"name\": \"write_file\", \"description\": \"Write content to file.\",\r\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\r\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in file.\",\r\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\r\n {\"name\": \"send_message\", \"description\": \"Send message to a teammate.\",\r\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"to\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}, \"msg_type\": {\"type\": \"string\", \"enum\": list(VALID_MSG_TYPES)}}, \"required\": [\"to\", \"content\"]}},\r\n {\"name\": \"read_inbox\", \"description\": \"Read and drain your inbox.\",\r\n \"input_schema\": {\"type\": \"object\", \"properties\": {}}},\r\n {\"name\": \"shutdown_response\", \"description\": \"Respond to a shutdown request.\",\r\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"request_id\": {\"type\": \"string\"}, \"approve\": {\"type\": \"boolean\"}, \"reason\": {\"type\": \"string\"}}, \"required\": [\"request_id\", \"approve\"]}},\r\n {\"name\": \"plan_approval\", \"description\": \"Submit a plan for lead approval.\",\r\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"plan\": {\"type\": \"string\"}}, \"required\": [\"plan\"]}},\r\n {\"name\": \"idle\", \"description\": \"Signal that you have no more work. Enters idle polling phase.\",\r\n \"input_schema\": {\"type\": \"object\", \"properties\": {}}},\r\n {\"name\": \"claim_task\", \"description\": \"Claim a task from the task board by ID.\",\r\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"task_id\": {\"type\": \"integer\"}}, \"required\": [\"task_id\"]}},\r\n ]\r\n\r\n def list_all(self) -> str:\r\n if not self.config[\"members\"]:\r\n return \"No teammates.\"\r\n lines = [f\"Team: {self.config['team_name']}\"]\r\n for m in self.config[\"members\"]:\r\n lines.append(f\" {m['name']} ({m['role']}): {m['status']}\")\r\n return \"\\n\".join(lines)\r\n\r\n def member_names(self) -> list:\r\n return [m[\"name\"] for m in self.config[\"members\"]]\r\n\r\n\r\nTEAM = TeammateManager(TEAM_DIR)\r\n\r\n\r\n# -- Base tool implementations (these base tools are unchanged from s02) --\r\ndef _safe_path(p: str) -> Path:\r\n path = (WORKDIR / p).resolve()\r\n if not path.is_relative_to(WORKDIR):\r\n raise ValueError(f\"Path escapes workspace: {p}\")\r\n return path\r\n\r\n\r\ndef _run_bash(command: str) -> str:\r\n dangerous = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\"]\r\n if any(d in command for d in dangerous):\r\n return \"Error: Dangerous command blocked\"\r\n try:\r\n r = subprocess.run(\r\n command, shell=True, cwd=WORKDIR,\r\n capture_output=True, text=True, timeout=120,\r\n )\r\n out = (r.stdout + r.stderr).strip()\r\n return out[:50000] if out else \"(no output)\"\r\n except subprocess.TimeoutExpired:\r\n return \"Error: Timeout (120s)\"\r\n\r\n\r\ndef _run_read(path: str, limit: int = None) -> str:\r\n try:\r\n lines = _safe_path(path).read_text().splitlines()\r\n if limit and limit < len(lines):\r\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more)\"]\r\n return \"\\n\".join(lines)[:50000]\r\n except Exception as e:\r\n return f\"Error: {e}\"\r\n\r\n\r\ndef _run_write(path: str, content: str) -> str:\r\n try:\r\n fp = _safe_path(path)\r\n fp.parent.mkdir(parents=True, exist_ok=True)\r\n fp.write_text(content)\r\n return f\"Wrote {len(content)} bytes\"\r\n except Exception as e:\r\n return f\"Error: {e}\"\r\n\r\n\r\ndef _run_edit(path: str, old_text: str, new_text: str) -> str:\r\n try:\r\n fp = _safe_path(path)\r\n c = fp.read_text()\r\n if old_text not in c:\r\n return f\"Error: Text not found in {path}\"\r\n fp.write_text(c.replace(old_text, new_text, 1))\r\n return f\"Edited {path}\"\r\n except Exception as e:\r\n return f\"Error: {e}\"\r\n\r\n\r\n# -- Lead-specific protocol handlers --\r\ndef handle_shutdown_request(teammate: str) -> str:\r\n req_id = str(uuid.uuid4())[:8]\r\n with _tracker_lock:\r\n shutdown_requests[req_id] = {\"target\": teammate, \"status\": \"pending\"}\r\n BUS.send(\r\n \"lead\", teammate, \"Please shut down gracefully.\",\r\n \"shutdown_request\", {\"request_id\": req_id},\r\n )\r\n return f\"Shutdown request {req_id} sent to '{teammate}'\"\r\n\r\n\r\ndef handle_plan_review(request_id: str, approve: bool, feedback: str = \"\") -> str:\r\n with _tracker_lock:\r\n req = plan_requests.get(request_id)\r\n if not req:\r\n return f\"Error: Unknown plan request_id '{request_id}'\"\r\n with _tracker_lock:\r\n req[\"status\"] = \"approved\" if approve else \"rejected\"\r\n BUS.send(\r\n \"lead\", req[\"from\"], feedback, \"plan_approval_response\",\r\n {\"request_id\": request_id, \"approve\": approve, \"feedback\": feedback},\r\n )\r\n return f\"Plan {req['status']} for '{req['from']}'\"\r\n\r\n\r\ndef _check_shutdown_status(request_id: str) -> str:\r\n with _tracker_lock:\r\n return json.dumps(shutdown_requests.get(request_id, {\"error\": \"not found\"}))\r\n\r\n\r\n# -- Lead tool dispatch (14 tools) --\r\nTOOL_HANDLERS = {\r\n \"bash\": lambda **kw: _run_bash(kw[\"command\"]),\r\n \"read_file\": lambda **kw: _run_read(kw[\"path\"], kw.get(\"limit\")),\r\n \"write_file\": lambda **kw: _run_write(kw[\"path\"], kw[\"content\"]),\r\n \"edit_file\": lambda **kw: _run_edit(kw[\"path\"], kw[\"old_text\"], kw[\"new_text\"]),\r\n \"spawn_teammate\": lambda **kw: TEAM.spawn(kw[\"name\"], kw[\"role\"], kw[\"prompt\"]),\r\n \"list_teammates\": lambda **kw: TEAM.list_all(),\r\n \"send_message\": lambda **kw: BUS.send(\"lead\", kw[\"to\"], kw[\"content\"], kw.get(\"msg_type\", \"message\")),\r\n \"read_inbox\": lambda **kw: json.dumps(BUS.read_inbox(\"lead\"), indent=2),\r\n \"broadcast\": lambda **kw: BUS.broadcast(\"lead\", kw[\"content\"], TEAM.member_names()),\r\n \"shutdown_request\": lambda **kw: handle_shutdown_request(kw[\"teammate\"]),\r\n \"shutdown_response\": lambda **kw: _check_shutdown_status(kw.get(\"request_id\", \"\")),\r\n \"plan_approval\": lambda **kw: handle_plan_review(kw[\"request_id\"], kw[\"approve\"], kw.get(\"feedback\", \"\")),\r\n \"idle\": lambda **kw: \"Lead does not idle.\",\r\n \"claim_task\": lambda **kw: claim_task(kw[\"task_id\"], \"lead\"),\r\n}\r\n\r\n# these base tools are unchanged from s02\r\nTOOLS = [\r\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\r\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\r\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\r\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"limit\": {\"type\": \"integer\"}}, \"required\": [\"path\"]}},\r\n {\"name\": \"write_file\", \"description\": \"Write content to file.\",\r\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\r\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in file.\",\r\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\r\n {\"name\": \"spawn_teammate\", \"description\": \"Spawn an autonomous teammate.\",\r\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"name\": {\"type\": \"string\"}, \"role\": {\"type\": \"string\"}, \"prompt\": {\"type\": \"string\"}}, \"required\": [\"name\", \"role\", \"prompt\"]}},\r\n {\"name\": \"list_teammates\", \"description\": \"List all teammates.\",\r\n \"input_schema\": {\"type\": \"object\", \"properties\": {}}},\r\n {\"name\": \"send_message\", \"description\": \"Send a message to a teammate.\",\r\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"to\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}, \"msg_type\": {\"type\": \"string\", \"enum\": list(VALID_MSG_TYPES)}}, \"required\": [\"to\", \"content\"]}},\r\n {\"name\": \"read_inbox\", \"description\": \"Read and drain the lead's inbox.\",\r\n \"input_schema\": {\"type\": \"object\", \"properties\": {}}},\r\n {\"name\": \"broadcast\", \"description\": \"Send a message to all teammates.\",\r\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"content\": {\"type\": \"string\"}}, \"required\": [\"content\"]}},\r\n {\"name\": \"shutdown_request\", \"description\": \"Request a teammate to shut down.\",\r\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"teammate\": {\"type\": \"string\"}}, \"required\": [\"teammate\"]}},\r\n {\"name\": \"shutdown_response\", \"description\": \"Check shutdown request status.\",\r\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"request_id\": {\"type\": \"string\"}}, \"required\": [\"request_id\"]}},\r\n {\"name\": \"plan_approval\", \"description\": \"Approve or reject a teammate's plan.\",\r\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"request_id\": {\"type\": \"string\"}, \"approve\": {\"type\": \"boolean\"}, \"feedback\": {\"type\": \"string\"}}, \"required\": [\"request_id\", \"approve\"]}},\r\n {\"name\": \"idle\", \"description\": \"Enter idle state (for lead -- rarely used).\",\r\n \"input_schema\": {\"type\": \"object\", \"properties\": {}}},\r\n {\"name\": \"claim_task\", \"description\": \"Claim a task from the board by ID.\",\r\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"task_id\": {\"type\": \"integer\"}}, \"required\": [\"task_id\"]}},\r\n]\r\n\r\n\r\ndef agent_loop(messages: list):\r\n while True:\r\n inbox = BUS.read_inbox(\"lead\")\r\n if inbox:\r\n messages.append({\r\n \"role\": \"user\",\r\n \"content\": f\"{json.dumps(inbox, indent=2)}\",\r\n })\r\n messages.append({\r\n \"role\": \"assistant\",\r\n \"content\": \"Noted inbox messages.\",\r\n })\r\n response = client.messages.create(\r\n model=MODEL,\r\n system=SYSTEM,\r\n messages=messages,\r\n tools=TOOLS,\r\n max_tokens=8000,\r\n )\r\n messages.append({\"role\": \"assistant\", \"content\": response.content})\r\n if response.stop_reason != \"tool_use\":\r\n return\r\n results = []\r\n for block in response.content:\r\n if block.type == \"tool_use\":\r\n handler = TOOL_HANDLERS.get(block.name)\r\n try:\r\n output = handler(**block.input) if handler else f\"Unknown tool: {block.name}\"\r\n except Exception as e:\r\n output = f\"Error: {e}\"\r\n print(f\"> {block.name}: {str(output)[:200]}\")\r\n results.append({\r\n \"type\": \"tool_result\",\r\n \"tool_use_id\": block.id,\r\n \"content\": str(output),\r\n })\r\n messages.append({\"role\": \"user\", \"content\": results})\r\n\r\n\r\nif __name__ == \"__main__\":\r\n history = []\r\n while True:\r\n try:\r\n query = input(\"\\033[36ms11 >> \\033[0m\")\r\n except (EOFError, KeyboardInterrupt):\r\n break\r\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\r\n break\r\n if query.strip() == \"/team\":\r\n print(TEAM.list_all())\r\n continue\r\n if query.strip() == \"/inbox\":\r\n print(json.dumps(BUS.read_inbox(\"lead\"), indent=2))\r\n continue\r\n if query.strip() == \"/tasks\":\r\n TASKS_DIR.mkdir(exist_ok=True)\r\n for f in sorted(TASKS_DIR.glob(\"task_*.json\")):\r\n t = json.loads(f.read_text())\r\n marker = {\"pending\": \"[ ]\", \"in_progress\": \"[>]\", \"completed\": \"[x]\"}.get(t[\"status\"], \"[?]\")\r\n owner = f\" @{t['owner']}\" if t.get(\"owner\") else \"\"\r\n print(f\" {marker} #{t['id']}: {t['subject']}{owner}\")\r\n continue\r\n history.append({\"role\": \"user\", \"content\": query})\r\n agent_loop(history)\r\n response_content = history[-1][\"content\"]\r\n if isinstance(response_content, list):\r\n for block in response_content:\r\n if hasattr(block, \"text\"):\r\n print(block.text)\r\n print()\r\n" }, { "id": "s12", "filename": "s12_worktree_task_isolation.py", "title": "Worktree + Task Isolation", "subtitle": "Isolate by Directory", - "loc": 689, + "loc": 694, "tools": [ "bash", "read_file", @@ -825,7 +825,7 @@ } ], "layer": "collaboration", - "source": "#!/usr/bin/env python3\n\"\"\"\ns12_worktree_task_isolation.py - Worktree + Task Isolation\n\nDirectory-level isolation for parallel task execution.\nTasks are the control plane and worktrees are the execution plane.\n\n .tasks/task_12.json\n {\n \"id\": 12,\n \"subject\": \"Implement auth refactor\",\n \"status\": \"in_progress\",\n \"worktree\": \"auth-refactor\"\n }\n\n .worktrees/index.json\n {\n \"worktrees\": [\n {\n \"name\": \"auth-refactor\",\n \"path\": \".../.worktrees/auth-refactor\",\n \"branch\": \"wt/auth-refactor\",\n \"task_id\": 12,\n \"status\": \"active\"\n }\n ]\n }\n\nKey insight: \"Isolate by directory, coordinate by task ID.\"\n\"\"\"\n\nimport json\nimport os\nimport re\nimport subprocess\nimport time\nfrom pathlib import Path\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\n\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\n\n\ndef detect_repo_root(cwd: Path) -> Path | None:\n \"\"\"Return git repo root if cwd is inside a repo, else None.\"\"\"\n try:\n r = subprocess.run(\n [\"git\", \"rev-parse\", \"--show-toplevel\"],\n cwd=cwd,\n capture_output=True,\n text=True,\n timeout=10,\n )\n if r.returncode != 0:\n return None\n root = Path(r.stdout.strip())\n return root if root.exists() else None\n except Exception:\n return None\n\n\nREPO_ROOT = detect_repo_root(WORKDIR) or WORKDIR\n\nSYSTEM = (\n f\"You are a coding agent at {WORKDIR}. \"\n \"Use task + worktree tools for multi-task work. \"\n \"For parallel or risky changes: create tasks, allocate worktree lanes, \"\n \"run commands in those lanes, then choose keep/remove for closeout. \"\n \"Use worktree_events when you need lifecycle visibility.\"\n)\n\n\n# -- EventBus: append-only lifecycle events for observability --\nclass EventBus:\n def __init__(self, event_log_path: Path):\n self.path = event_log_path\n self.path.parent.mkdir(parents=True, exist_ok=True)\n if not self.path.exists():\n self.path.write_text(\"\")\n\n def emit(\n self,\n event: str,\n task: dict | None = None,\n worktree: dict | None = None,\n error: str | None = None,\n ):\n payload = {\n \"event\": event,\n \"ts\": time.time(),\n \"task\": task or {},\n \"worktree\": worktree or {},\n }\n if error:\n payload[\"error\"] = error\n with self.path.open(\"a\", encoding=\"utf-8\") as f:\n f.write(json.dumps(payload) + \"\\n\")\n\n def list_recent(self, limit: int = 20) -> str:\n n = max(1, min(int(limit or 20), 200))\n lines = self.path.read_text(encoding=\"utf-8\").splitlines()\n recent = lines[-n:]\n items = []\n for line in recent:\n try:\n items.append(json.loads(line))\n except Exception:\n items.append({\"event\": \"parse_error\", \"raw\": line})\n return json.dumps(items, indent=2)\n\n\n# -- TaskManager: persistent task board with optional worktree binding --\nclass TaskManager:\n def __init__(self, tasks_dir: Path):\n self.dir = tasks_dir\n self.dir.mkdir(parents=True, exist_ok=True)\n self._next_id = self._max_id() + 1\n\n def _max_id(self) -> int:\n ids = []\n for f in self.dir.glob(\"task_*.json\"):\n try:\n ids.append(int(f.stem.split(\"_\")[1]))\n except Exception:\n pass\n return max(ids) if ids else 0\n\n def _path(self, task_id: int) -> Path:\n return self.dir / f\"task_{task_id}.json\"\n\n def _load(self, task_id: int) -> dict:\n path = self._path(task_id)\n if not path.exists():\n raise ValueError(f\"Task {task_id} not found\")\n return json.loads(path.read_text())\n\n def _save(self, task: dict):\n self._path(task[\"id\"]).write_text(json.dumps(task, indent=2))\n\n def create(self, subject: str, description: str = \"\") -> str:\n task = {\n \"id\": self._next_id,\n \"subject\": subject,\n \"description\": description,\n \"status\": \"pending\",\n \"owner\": \"\",\n \"worktree\": \"\",\n \"blockedBy\": [],\n \"created_at\": time.time(),\n \"updated_at\": time.time(),\n }\n self._save(task)\n self._next_id += 1\n return json.dumps(task, indent=2)\n\n def get(self, task_id: int) -> str:\n return json.dumps(self._load(task_id), indent=2)\n\n def exists(self, task_id: int) -> bool:\n return self._path(task_id).exists()\n\n def update(self, task_id: int, status: str = None, owner: str = None) -> str:\n task = self._load(task_id)\n if status:\n if status not in (\"pending\", \"in_progress\", \"completed\"):\n raise ValueError(f\"Invalid status: {status}\")\n task[\"status\"] = status\n if owner is not None:\n task[\"owner\"] = owner\n task[\"updated_at\"] = time.time()\n self._save(task)\n return json.dumps(task, indent=2)\n\n def bind_worktree(self, task_id: int, worktree: str, owner: str = \"\") -> str:\n task = self._load(task_id)\n task[\"worktree\"] = worktree\n if owner:\n task[\"owner\"] = owner\n if task[\"status\"] == \"pending\":\n task[\"status\"] = \"in_progress\"\n task[\"updated_at\"] = time.time()\n self._save(task)\n return json.dumps(task, indent=2)\n\n def unbind_worktree(self, task_id: int) -> str:\n task = self._load(task_id)\n task[\"worktree\"] = \"\"\n task[\"updated_at\"] = time.time()\n self._save(task)\n return json.dumps(task, indent=2)\n\n def list_all(self) -> str:\n tasks = []\n for f in sorted(self.dir.glob(\"task_*.json\")):\n tasks.append(json.loads(f.read_text()))\n if not tasks:\n return \"No tasks.\"\n lines = []\n for t in tasks:\n marker = {\n \"pending\": \"[ ]\",\n \"in_progress\": \"[>]\",\n \"completed\": \"[x]\",\n }.get(t[\"status\"], \"[?]\")\n owner = f\" owner={t['owner']}\" if t.get(\"owner\") else \"\"\n wt = f\" wt={t['worktree']}\" if t.get(\"worktree\") else \"\"\n lines.append(f\"{marker} #{t['id']}: {t['subject']}{owner}{wt}\")\n return \"\\n\".join(lines)\n\n\nTASKS = TaskManager(REPO_ROOT / \".tasks\")\nEVENTS = EventBus(REPO_ROOT / \".worktrees\" / \"events.jsonl\")\n\n\n# -- WorktreeManager: create/list/run/remove git worktrees + lifecycle index --\nclass WorktreeManager:\n def __init__(self, repo_root: Path, tasks: TaskManager, events: EventBus):\n self.repo_root = repo_root\n self.tasks = tasks\n self.events = events\n self.dir = repo_root / \".worktrees\"\n self.dir.mkdir(parents=True, exist_ok=True)\n self.index_path = self.dir / \"index.json\"\n if not self.index_path.exists():\n self.index_path.write_text(json.dumps({\"worktrees\": []}, indent=2))\n self.git_available = self._is_git_repo()\n\n def _is_git_repo(self) -> bool:\n try:\n r = subprocess.run(\n [\"git\", \"rev-parse\", \"--is-inside-work-tree\"],\n cwd=self.repo_root,\n capture_output=True,\n text=True,\n timeout=10,\n )\n return r.returncode == 0\n except Exception:\n return False\n\n def _run_git(self, args: list[str]) -> str:\n if not self.git_available:\n raise RuntimeError(\"Not in a git repository. worktree tools require git.\")\n r = subprocess.run(\n [\"git\", *args],\n cwd=self.repo_root,\n capture_output=True,\n text=True,\n timeout=120,\n )\n if r.returncode != 0:\n msg = (r.stdout + r.stderr).strip()\n raise RuntimeError(msg or f\"git {' '.join(args)} failed\")\n return (r.stdout + r.stderr).strip() or \"(no output)\"\n\n def _load_index(self) -> dict:\n return json.loads(self.index_path.read_text())\n\n def _save_index(self, data: dict):\n self.index_path.write_text(json.dumps(data, indent=2))\n\n def _find(self, name: str) -> dict | None:\n idx = self._load_index()\n for wt in idx.get(\"worktrees\", []):\n if wt.get(\"name\") == name:\n return wt\n return None\n\n def _validate_name(self, name: str):\n if not re.fullmatch(r\"[A-Za-z0-9._-]{1,40}\", name or \"\"):\n raise ValueError(\n \"Invalid worktree name. Use 1-40 chars: letters, numbers, ., _, -\"\n )\n\n def create(self, name: str, task_id: int = None, base_ref: str = \"HEAD\") -> str:\n self._validate_name(name)\n if self._find(name):\n raise ValueError(f\"Worktree '{name}' already exists in index\")\n if task_id is not None and not self.tasks.exists(task_id):\n raise ValueError(f\"Task {task_id} not found\")\n\n path = self.dir / name\n branch = f\"wt/{name}\"\n self.events.emit(\n \"worktree.create.before\",\n task={\"id\": task_id} if task_id is not None else {},\n worktree={\"name\": name, \"base_ref\": base_ref},\n )\n try:\n self._run_git([\"worktree\", \"add\", \"-b\", branch, str(path), base_ref])\n\n entry = {\n \"name\": name,\n \"path\": str(path),\n \"branch\": branch,\n \"task_id\": task_id,\n \"status\": \"active\",\n \"created_at\": time.time(),\n }\n\n idx = self._load_index()\n idx[\"worktrees\"].append(entry)\n self._save_index(idx)\n\n if task_id is not None:\n self.tasks.bind_worktree(task_id, name)\n\n self.events.emit(\n \"worktree.create.after\",\n task={\"id\": task_id} if task_id is not None else {},\n worktree={\n \"name\": name,\n \"path\": str(path),\n \"branch\": branch,\n \"status\": \"active\",\n },\n )\n return json.dumps(entry, indent=2)\n except Exception as e:\n self.events.emit(\n \"worktree.create.failed\",\n task={\"id\": task_id} if task_id is not None else {},\n worktree={\"name\": name, \"base_ref\": base_ref},\n error=str(e),\n )\n raise\n\n def list_all(self) -> str:\n idx = self._load_index()\n wts = idx.get(\"worktrees\", [])\n if not wts:\n return \"No worktrees in index.\"\n lines = []\n for wt in wts:\n suffix = f\" task={wt['task_id']}\" if wt.get(\"task_id\") else \"\"\n lines.append(\n f\"[{wt.get('status', 'unknown')}] {wt['name']} -> \"\n f\"{wt['path']} ({wt.get('branch', '-')}){suffix}\"\n )\n return \"\\n\".join(lines)\n\n def status(self, name: str) -> str:\n wt = self._find(name)\n if not wt:\n return f\"Error: Unknown worktree '{name}'\"\n path = Path(wt[\"path\"])\n if not path.exists():\n return f\"Error: Worktree path missing: {path}\"\n r = subprocess.run(\n [\"git\", \"status\", \"--short\", \"--branch\"],\n cwd=path,\n capture_output=True,\n text=True,\n timeout=60,\n )\n text = (r.stdout + r.stderr).strip()\n return text or \"Clean worktree\"\n\n def run(self, name: str, command: str) -> str:\n dangerous = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\", \"> /dev/\"]\n if any(d in command for d in dangerous):\n return \"Error: Dangerous command blocked\"\n\n wt = self._find(name)\n if not wt:\n return f\"Error: Unknown worktree '{name}'\"\n path = Path(wt[\"path\"])\n if not path.exists():\n return f\"Error: Worktree path missing: {path}\"\n\n try:\n r = subprocess.run(\n command,\n shell=True,\n cwd=path,\n capture_output=True,\n text=True,\n timeout=300,\n )\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (300s)\"\n\n def remove(self, name: str, force: bool = False, complete_task: bool = False) -> str:\n wt = self._find(name)\n if not wt:\n return f\"Error: Unknown worktree '{name}'\"\n\n self.events.emit(\n \"worktree.remove.before\",\n task={\"id\": wt.get(\"task_id\")} if wt.get(\"task_id\") is not None else {},\n worktree={\"name\": name, \"path\": wt.get(\"path\")},\n )\n try:\n args = [\"worktree\", \"remove\"]\n if force:\n args.append(\"--force\")\n args.append(wt[\"path\"])\n self._run_git(args)\n\n if complete_task and wt.get(\"task_id\") is not None:\n task_id = wt[\"task_id\"]\n before = json.loads(self.tasks.get(task_id))\n self.tasks.update(task_id, status=\"completed\")\n self.tasks.unbind_worktree(task_id)\n self.events.emit(\n \"task.completed\",\n task={\n \"id\": task_id,\n \"subject\": before.get(\"subject\", \"\"),\n \"status\": \"completed\",\n },\n worktree={\"name\": name},\n )\n\n idx = self._load_index()\n for item in idx.get(\"worktrees\", []):\n if item.get(\"name\") == name:\n item[\"status\"] = \"removed\"\n item[\"removed_at\"] = time.time()\n self._save_index(idx)\n\n self.events.emit(\n \"worktree.remove.after\",\n task={\"id\": wt.get(\"task_id\")} if wt.get(\"task_id\") is not None else {},\n worktree={\"name\": name, \"path\": wt.get(\"path\"), \"status\": \"removed\"},\n )\n return f\"Removed worktree '{name}'\"\n except Exception as e:\n self.events.emit(\n \"worktree.remove.failed\",\n task={\"id\": wt.get(\"task_id\")} if wt.get(\"task_id\") is not None else {},\n worktree={\"name\": name, \"path\": wt.get(\"path\")},\n error=str(e),\n )\n raise\n\n def keep(self, name: str) -> str:\n wt = self._find(name)\n if not wt:\n return f\"Error: Unknown worktree '{name}'\"\n\n idx = self._load_index()\n kept = None\n for item in idx.get(\"worktrees\", []):\n if item.get(\"name\") == name:\n item[\"status\"] = \"kept\"\n item[\"kept_at\"] = time.time()\n kept = item\n self._save_index(idx)\n\n self.events.emit(\n \"worktree.keep\",\n task={\"id\": wt.get(\"task_id\")} if wt.get(\"task_id\") is not None else {},\n worktree={\n \"name\": name,\n \"path\": wt.get(\"path\"),\n \"status\": \"kept\",\n },\n )\n return json.dumps(kept, indent=2) if kept else f\"Error: Unknown worktree '{name}'\"\n\n\nWORKTREES = WorktreeManager(REPO_ROOT, TASKS, EVENTS)\n\n\n# -- Base tools (kept minimal, same style as previous sessions) --\ndef safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\n\ndef run_bash(command: str) -> str:\n dangerous = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\", \"> /dev/\"]\n if any(d in command for d in dangerous):\n return \"Error: Dangerous command blocked\"\n try:\n r = subprocess.run(\n command,\n shell=True,\n cwd=WORKDIR,\n capture_output=True,\n text=True,\n timeout=120,\n )\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\n\ndef run_read(path: str, limit: int = None) -> str:\n try:\n lines = safe_path(path).read_text().splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more)\"]\n return \"\\n\".join(lines)[:50000]\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef run_write(path: str, content: str) -> str:\n try:\n fp = safe_path(path)\n fp.parent.mkdir(parents=True, exist_ok=True)\n fp.write_text(content)\n return f\"Wrote {len(content)} bytes\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef run_edit(path: str, old_text: str, new_text: str) -> str:\n try:\n fp = safe_path(path)\n c = fp.read_text()\n if old_text not in c:\n return f\"Error: Text not found in {path}\"\n fp.write_text(c.replace(old_text, new_text, 1))\n return f\"Edited {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\nTOOL_HANDLERS = {\n \"bash\": lambda **kw: run_bash(kw[\"command\"]),\n \"read_file\": lambda **kw: run_read(kw[\"path\"], kw.get(\"limit\")),\n \"write_file\": lambda **kw: run_write(kw[\"path\"], kw[\"content\"]),\n \"edit_file\": lambda **kw: run_edit(kw[\"path\"], kw[\"old_text\"], kw[\"new_text\"]),\n \"task_create\": lambda **kw: TASKS.create(kw[\"subject\"], kw.get(\"description\", \"\")),\n \"task_list\": lambda **kw: TASKS.list_all(),\n \"task_get\": lambda **kw: TASKS.get(kw[\"task_id\"]),\n \"task_update\": lambda **kw: TASKS.update(kw[\"task_id\"], kw.get(\"status\"), kw.get(\"owner\")),\n \"task_bind_worktree\": lambda **kw: TASKS.bind_worktree(kw[\"task_id\"], kw[\"worktree\"], kw.get(\"owner\", \"\")),\n \"worktree_create\": lambda **kw: WORKTREES.create(kw[\"name\"], kw.get(\"task_id\"), kw.get(\"base_ref\", \"HEAD\")),\n \"worktree_list\": lambda **kw: WORKTREES.list_all(),\n \"worktree_status\": lambda **kw: WORKTREES.status(kw[\"name\"]),\n \"worktree_run\": lambda **kw: WORKTREES.run(kw[\"name\"], kw[\"command\"]),\n \"worktree_keep\": lambda **kw: WORKTREES.keep(kw[\"name\"]),\n \"worktree_remove\": lambda **kw: WORKTREES.remove(kw[\"name\"], kw.get(\"force\", False), kw.get(\"complete_task\", False)),\n \"worktree_events\": lambda **kw: EVENTS.list_recent(kw.get(\"limit\", 20)),\n}\n\nTOOLS = [\n {\n \"name\": \"bash\",\n \"description\": \"Run a shell command in the current workspace (blocking).\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\"command\": {\"type\": \"string\"}},\n \"required\": [\"command\"],\n },\n },\n {\n \"name\": \"read_file\",\n \"description\": \"Read file contents.\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\n \"path\": {\"type\": \"string\"},\n \"limit\": {\"type\": \"integer\"},\n },\n \"required\": [\"path\"],\n },\n },\n {\n \"name\": \"write_file\",\n \"description\": \"Write content to file.\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\n \"path\": {\"type\": \"string\"},\n \"content\": {\"type\": \"string\"},\n },\n \"required\": [\"path\", \"content\"],\n },\n },\n {\n \"name\": \"edit_file\",\n \"description\": \"Replace exact text in file.\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\n \"path\": {\"type\": \"string\"},\n \"old_text\": {\"type\": \"string\"},\n \"new_text\": {\"type\": \"string\"},\n },\n \"required\": [\"path\", \"old_text\", \"new_text\"],\n },\n },\n {\n \"name\": \"task_create\",\n \"description\": \"Create a new task on the shared task board.\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\n \"subject\": {\"type\": \"string\"},\n \"description\": {\"type\": \"string\"},\n },\n \"required\": [\"subject\"],\n },\n },\n {\n \"name\": \"task_list\",\n \"description\": \"List all tasks with status, owner, and worktree binding.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}},\n },\n {\n \"name\": \"task_get\",\n \"description\": \"Get task details by ID.\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\"task_id\": {\"type\": \"integer\"}},\n \"required\": [\"task_id\"],\n },\n },\n {\n \"name\": \"task_update\",\n \"description\": \"Update task status or owner.\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\n \"task_id\": {\"type\": \"integer\"},\n \"status\": {\n \"type\": \"string\",\n \"enum\": [\"pending\", \"in_progress\", \"completed\"],\n },\n \"owner\": {\"type\": \"string\"},\n },\n \"required\": [\"task_id\"],\n },\n },\n {\n \"name\": \"task_bind_worktree\",\n \"description\": \"Bind a task to a worktree name.\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\n \"task_id\": {\"type\": \"integer\"},\n \"worktree\": {\"type\": \"string\"},\n \"owner\": {\"type\": \"string\"},\n },\n \"required\": [\"task_id\", \"worktree\"],\n },\n },\n {\n \"name\": \"worktree_create\",\n \"description\": \"Create a git worktree and optionally bind it to a task.\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\n \"name\": {\"type\": \"string\"},\n \"task_id\": {\"type\": \"integer\"},\n \"base_ref\": {\"type\": \"string\"},\n },\n \"required\": [\"name\"],\n },\n },\n {\n \"name\": \"worktree_list\",\n \"description\": \"List worktrees tracked in .worktrees/index.json.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}},\n },\n {\n \"name\": \"worktree_status\",\n \"description\": \"Show git status for one worktree.\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\"name\": {\"type\": \"string\"}},\n \"required\": [\"name\"],\n },\n },\n {\n \"name\": \"worktree_run\",\n \"description\": \"Run a shell command in a named worktree directory.\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\n \"name\": {\"type\": \"string\"},\n \"command\": {\"type\": \"string\"},\n },\n \"required\": [\"name\", \"command\"],\n },\n },\n {\n \"name\": \"worktree_remove\",\n \"description\": \"Remove a worktree and optionally mark its bound task completed.\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\n \"name\": {\"type\": \"string\"},\n \"force\": {\"type\": \"boolean\"},\n \"complete_task\": {\"type\": \"boolean\"},\n },\n \"required\": [\"name\"],\n },\n },\n {\n \"name\": \"worktree_keep\",\n \"description\": \"Mark a worktree as kept in lifecycle state without removing it.\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\"name\": {\"type\": \"string\"}},\n \"required\": [\"name\"],\n },\n },\n {\n \"name\": \"worktree_events\",\n \"description\": \"List recent worktree/task lifecycle events from .worktrees/events.jsonl.\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\"limit\": {\"type\": \"integer\"}},\n },\n },\n]\n\n\ndef agent_loop(messages: list):\n while True:\n response = client.messages.create(\n model=MODEL,\n system=SYSTEM,\n messages=messages,\n tools=TOOLS,\n max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n return\n\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n handler = TOOL_HANDLERS.get(block.name)\n try:\n output = handler(**block.input) if handler else f\"Unknown tool: {block.name}\"\n except Exception as e:\n output = f\"Error: {e}\"\n print(f\"> {block.name}: {str(output)[:200]}\")\n results.append(\n {\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": str(output),\n }\n )\n messages.append({\"role\": \"user\", \"content\": results})\n\n\nif __name__ == \"__main__\":\n print(f\"Repo root for s12: {REPO_ROOT}\")\n if not WORKTREES.git_available:\n print(\"Note: Not in a git repo. worktree_* tools will return errors.\")\n\n history = []\n while True:\n try:\n query = input(\"\\033[36ms12 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history)\n print()\n" + "source": "#!/usr/bin/env python3\r\n\"\"\"\r\ns12_worktree_task_isolation.py - Worktree + Task Isolation\r\n\r\nDirectory-level isolation for parallel task execution.\r\nTasks are the control plane and worktrees are the execution plane.\r\n\r\n .tasks/task_12.json\r\n {\r\n \"id\": 12,\r\n \"subject\": \"Implement auth refactor\",\r\n \"status\": \"in_progress\",\r\n \"worktree\": \"auth-refactor\"\r\n }\r\n\r\n .worktrees/index.json\r\n {\r\n \"worktrees\": [\r\n {\r\n \"name\": \"auth-refactor\",\r\n \"path\": \".../.worktrees/auth-refactor\",\r\n \"branch\": \"wt/auth-refactor\",\r\n \"task_id\": 12,\r\n \"status\": \"active\"\r\n }\r\n ]\r\n }\r\n\r\nKey insight: \"Isolate by directory, coordinate by task ID.\"\r\n\"\"\"\r\n\r\nimport json\r\nimport os\r\nimport re\r\nimport subprocess\r\nimport time\r\nfrom pathlib import Path\r\n\r\nfrom anthropic import Anthropic\r\nfrom dotenv import load_dotenv\r\n\r\nload_dotenv(override=True)\r\n\r\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\r\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\r\n\r\nWORKDIR = Path.cwd()\r\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\r\nMODEL = os.environ[\"MODEL_ID\"]\r\n\r\n\r\ndef detect_repo_root(cwd: Path) -> Path | None:\r\n \"\"\"Return git repo root if cwd is inside a repo, else None.\"\"\"\r\n try:\r\n r = subprocess.run(\r\n [\"git\", \"rev-parse\", \"--show-toplevel\"],\r\n cwd=cwd,\r\n capture_output=True,\r\n text=True,\r\n timeout=10,\r\n )\r\n if r.returncode != 0:\r\n return None\r\n root = Path(r.stdout.strip())\r\n return root if root.exists() else None\r\n except Exception:\r\n return None\r\n\r\n\r\nREPO_ROOT = detect_repo_root(WORKDIR) or WORKDIR\r\n\r\nSYSTEM = (\r\n f\"You are a coding agent at {WORKDIR}. \"\r\n \"Use task + worktree tools for multi-task work. \"\r\n \"For parallel or risky changes: create tasks, allocate worktree lanes, \"\r\n \"run commands in those lanes, then choose keep/remove for closeout. \"\r\n \"Use worktree_events when you need lifecycle visibility.\"\r\n)\r\n\r\n\r\n# -- EventBus: append-only lifecycle events for observability --\r\nclass EventBus:\r\n def __init__(self, event_log_path: Path):\r\n self.path = event_log_path\r\n self.path.parent.mkdir(parents=True, exist_ok=True)\r\n if not self.path.exists():\r\n self.path.write_text(\"\")\r\n\r\n def emit(\r\n self,\r\n event: str,\r\n task: dict | None = None,\r\n worktree: dict | None = None,\r\n error: str | None = None,\r\n ):\r\n payload = {\r\n \"event\": event,\r\n \"ts\": time.time(),\r\n \"task\": task or {},\r\n \"worktree\": worktree or {},\r\n }\r\n if error:\r\n payload[\"error\"] = error\r\n with self.path.open(\"a\", encoding=\"utf-8\") as f:\r\n f.write(json.dumps(payload) + \"\\n\")\r\n\r\n def list_recent(self, limit: int = 20) -> str:\r\n n = max(1, min(int(limit or 20), 200))\r\n lines = self.path.read_text(encoding=\"utf-8\").splitlines()\r\n recent = lines[-n:]\r\n items = []\r\n for line in recent:\r\n try:\r\n items.append(json.loads(line))\r\n except Exception:\r\n items.append({\"event\": \"parse_error\", \"raw\": line})\r\n return json.dumps(items, indent=2)\r\n\r\n\r\n# -- TaskManager: persistent task board with optional worktree binding --\r\nclass TaskManager:\r\n def __init__(self, tasks_dir: Path):\r\n self.dir = tasks_dir\r\n self.dir.mkdir(parents=True, exist_ok=True)\r\n self._next_id = self._max_id() + 1\r\n\r\n def _max_id(self) -> int:\r\n ids = []\r\n for f in self.dir.glob(\"task_*.json\"):\r\n try:\r\n ids.append(int(f.stem.split(\"_\")[1]))\r\n except Exception:\r\n pass\r\n return max(ids) if ids else 0\r\n\r\n def _path(self, task_id: int) -> Path:\r\n return self.dir / f\"task_{task_id}.json\"\r\n\r\n def _load(self, task_id: int) -> dict:\r\n path = self._path(task_id)\r\n if not path.exists():\r\n raise ValueError(f\"Task {task_id} not found\")\r\n return json.loads(path.read_text())\r\n\r\n def _save(self, task: dict):\r\n self._path(task[\"id\"]).write_text(json.dumps(task, indent=2))\r\n\r\n def create(self, subject: str, description: str = \"\") -> str:\r\n task = {\r\n \"id\": self._next_id,\r\n \"subject\": subject,\r\n \"description\": description,\r\n \"status\": \"pending\",\r\n \"owner\": \"\",\r\n \"worktree\": \"\",\r\n \"blockedBy\": [],\r\n \"created_at\": time.time(),\r\n \"updated_at\": time.time(),\r\n }\r\n self._save(task)\r\n self._next_id += 1\r\n return json.dumps(task, indent=2)\r\n\r\n def get(self, task_id: int) -> str:\r\n return json.dumps(self._load(task_id), indent=2)\r\n\r\n def exists(self, task_id: int) -> bool:\r\n return self._path(task_id).exists()\r\n\r\n def update(self, task_id: int, status: str = None, owner: str = None) -> str:\r\n task = self._load(task_id)\r\n if status:\r\n if status not in (\"pending\", \"in_progress\", \"completed\"):\r\n raise ValueError(f\"Invalid status: {status}\")\r\n task[\"status\"] = status\r\n if owner is not None:\r\n task[\"owner\"] = owner\r\n task[\"updated_at\"] = time.time()\r\n self._save(task)\r\n return json.dumps(task, indent=2)\r\n\r\n def bind_worktree(self, task_id: int, worktree: str, owner: str = \"\") -> str:\r\n task = self._load(task_id)\r\n task[\"worktree\"] = worktree\r\n if owner:\r\n task[\"owner\"] = owner\r\n if task[\"status\"] == \"pending\":\r\n task[\"status\"] = \"in_progress\"\r\n task[\"updated_at\"] = time.time()\r\n self._save(task)\r\n return json.dumps(task, indent=2)\r\n\r\n def unbind_worktree(self, task_id: int) -> str:\r\n task = self._load(task_id)\r\n task[\"worktree\"] = \"\"\r\n task[\"updated_at\"] = time.time()\r\n self._save(task)\r\n return json.dumps(task, indent=2)\r\n\r\n def list_all(self) -> str:\r\n tasks = []\r\n for f in sorted(self.dir.glob(\"task_*.json\")):\r\n tasks.append(json.loads(f.read_text()))\r\n if not tasks:\r\n return \"No tasks.\"\r\n lines = []\r\n for t in tasks:\r\n marker = {\r\n \"pending\": \"[ ]\",\r\n \"in_progress\": \"[>]\",\r\n \"completed\": \"[x]\",\r\n }.get(t[\"status\"], \"[?]\")\r\n owner = f\" owner={t['owner']}\" if t.get(\"owner\") else \"\"\r\n wt = f\" wt={t['worktree']}\" if t.get(\"worktree\") else \"\"\r\n lines.append(f\"{marker} #{t['id']}: {t['subject']}{owner}{wt}\")\r\n return \"\\n\".join(lines)\r\n\r\n\r\nTASKS = TaskManager(REPO_ROOT / \".tasks\")\r\nEVENTS = EventBus(REPO_ROOT / \".worktrees\" / \"events.jsonl\")\r\n\r\n\r\n# -- WorktreeManager: create/list/run/remove git worktrees + lifecycle index --\r\nclass WorktreeManager:\r\n def __init__(self, repo_root: Path, tasks: TaskManager, events: EventBus):\r\n self.repo_root = repo_root\r\n self.tasks = tasks\r\n self.events = events\r\n self.dir = repo_root / \".worktrees\"\r\n self.dir.mkdir(parents=True, exist_ok=True)\r\n self.index_path = self.dir / \"index.json\"\r\n if not self.index_path.exists():\r\n self.index_path.write_text(json.dumps({\"worktrees\": []}, indent=2))\r\n self.git_available = self._is_git_repo()\r\n\r\n def _is_git_repo(self) -> bool:\r\n try:\r\n r = subprocess.run(\r\n [\"git\", \"rev-parse\", \"--is-inside-work-tree\"],\r\n cwd=self.repo_root,\r\n capture_output=True,\r\n text=True,\r\n timeout=10,\r\n )\r\n return r.returncode == 0\r\n except Exception:\r\n return False\r\n\r\n def _run_git(self, args: list[str]) -> str:\r\n if not self.git_available:\r\n raise RuntimeError(\"Not in a git repository. worktree tools require git.\")\r\n r = subprocess.run(\r\n [\"git\", *args],\r\n cwd=self.repo_root,\r\n capture_output=True,\r\n text=True,\r\n timeout=120,\r\n )\r\n if r.returncode != 0:\r\n msg = (r.stdout + r.stderr).strip()\r\n raise RuntimeError(msg or f\"git {' '.join(args)} failed\")\r\n return (r.stdout + r.stderr).strip() or \"(no output)\"\r\n\r\n def _load_index(self) -> dict:\r\n return json.loads(self.index_path.read_text())\r\n\r\n def _save_index(self, data: dict):\r\n self.index_path.write_text(json.dumps(data, indent=2))\r\n\r\n def _find(self, name: str) -> dict | None:\r\n idx = self._load_index()\r\n for wt in idx.get(\"worktrees\", []):\r\n if wt.get(\"name\") == name:\r\n return wt\r\n return None\r\n\r\n def _validate_name(self, name: str):\r\n if not re.fullmatch(r\"[A-Za-z0-9._-]{1,40}\", name or \"\"):\r\n raise ValueError(\r\n \"Invalid worktree name. Use 1-40 chars: letters, numbers, ., _, -\"\r\n )\r\n\r\n def create(self, name: str, task_id: int = None, base_ref: str = \"HEAD\") -> str:\r\n self._validate_name(name)\r\n if self._find(name):\r\n raise ValueError(f\"Worktree '{name}' already exists in index\")\r\n if task_id is not None and not self.tasks.exists(task_id):\r\n raise ValueError(f\"Task {task_id} not found\")\r\n\r\n path = self.dir / name\r\n branch = f\"wt/{name}\"\r\n self.events.emit(\r\n \"worktree.create.before\",\r\n task={\"id\": task_id} if task_id is not None else {},\r\n worktree={\"name\": name, \"base_ref\": base_ref},\r\n )\r\n try:\r\n self._run_git([\"worktree\", \"add\", \"-b\", branch, str(path), base_ref])\r\n\r\n entry = {\r\n \"name\": name,\r\n \"path\": str(path),\r\n \"branch\": branch,\r\n \"task_id\": task_id,\r\n \"status\": \"active\",\r\n \"created_at\": time.time(),\r\n }\r\n\r\n idx = self._load_index()\r\n idx[\"worktrees\"].append(entry)\r\n self._save_index(idx)\r\n\r\n if task_id is not None:\r\n self.tasks.bind_worktree(task_id, name)\r\n\r\n self.events.emit(\r\n \"worktree.create.after\",\r\n task={\"id\": task_id} if task_id is not None else {},\r\n worktree={\r\n \"name\": name,\r\n \"path\": str(path),\r\n \"branch\": branch,\r\n \"status\": \"active\",\r\n },\r\n )\r\n return json.dumps(entry, indent=2)\r\n except Exception as e:\r\n self.events.emit(\r\n \"worktree.create.failed\",\r\n task={\"id\": task_id} if task_id is not None else {},\r\n worktree={\"name\": name, \"base_ref\": base_ref},\r\n error=str(e),\r\n )\r\n raise\r\n\r\n def list_all(self) -> str:\r\n idx = self._load_index()\r\n wts = idx.get(\"worktrees\", [])\r\n if not wts:\r\n return \"No worktrees in index.\"\r\n lines = []\r\n for wt in wts:\r\n suffix = f\" task={wt['task_id']}\" if wt.get(\"task_id\") else \"\"\r\n lines.append(\r\n f\"[{wt.get('status', 'unknown')}] {wt['name']} -> \"\r\n f\"{wt['path']} ({wt.get('branch', '-')}){suffix}\"\r\n )\r\n return \"\\n\".join(lines)\r\n\r\n def status(self, name: str) -> str:\r\n wt = self._find(name)\r\n if not wt:\r\n return f\"Error: Unknown worktree '{name}'\"\r\n path = Path(wt[\"path\"])\r\n if not path.exists():\r\n return f\"Error: Worktree path missing: {path}\"\r\n r = subprocess.run(\r\n [\"git\", \"status\", \"--short\", \"--branch\"],\r\n cwd=path,\r\n capture_output=True,\r\n text=True,\r\n timeout=60,\r\n )\r\n text = (r.stdout + r.stderr).strip()\r\n return text or \"Clean worktree\"\r\n\r\n def run(self, name: str, command: str) -> str:\r\n dangerous = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\", \"> /dev/\"]\r\n if any(d in command for d in dangerous):\r\n return \"Error: Dangerous command blocked\"\r\n\r\n wt = self._find(name)\r\n if not wt:\r\n return f\"Error: Unknown worktree '{name}'\"\r\n path = Path(wt[\"path\"])\r\n if not path.exists():\r\n return f\"Error: Worktree path missing: {path}\"\r\n\r\n try:\r\n r = subprocess.run(\r\n command,\r\n shell=True,\r\n cwd=path,\r\n capture_output=True,\r\n text=True,\r\n timeout=300,\r\n )\r\n out = (r.stdout + r.stderr).strip()\r\n return out[:50000] if out else \"(no output)\"\r\n except subprocess.TimeoutExpired:\r\n return \"Error: Timeout (300s)\"\r\n\r\n def remove(self, name: str, force: bool = False, complete_task: bool = False) -> str:\r\n wt = self._find(name)\r\n if not wt:\r\n return f\"Error: Unknown worktree '{name}'\"\r\n\r\n self.events.emit(\r\n \"worktree.remove.before\",\r\n task={\"id\": wt.get(\"task_id\")} if wt.get(\"task_id\") is not None else {},\r\n worktree={\"name\": name, \"path\": wt.get(\"path\")},\r\n )\r\n try:\r\n args = [\"worktree\", \"remove\"]\r\n if force:\r\n args.append(\"--force\")\r\n args.append(wt[\"path\"])\r\n self._run_git(args)\r\n\r\n if complete_task and wt.get(\"task_id\") is not None:\r\n task_id = wt[\"task_id\"]\r\n before = json.loads(self.tasks.get(task_id))\r\n self.tasks.update(task_id, status=\"completed\")\r\n self.tasks.unbind_worktree(task_id)\r\n self.events.emit(\r\n \"task.completed\",\r\n task={\r\n \"id\": task_id,\r\n \"subject\": before.get(\"subject\", \"\"),\r\n \"status\": \"completed\",\r\n },\r\n worktree={\"name\": name},\r\n )\r\n\r\n idx = self._load_index()\r\n for item in idx.get(\"worktrees\", []):\r\n if item.get(\"name\") == name:\r\n item[\"status\"] = \"removed\"\r\n item[\"removed_at\"] = time.time()\r\n self._save_index(idx)\r\n\r\n self.events.emit(\r\n \"worktree.remove.after\",\r\n task={\"id\": wt.get(\"task_id\")} if wt.get(\"task_id\") is not None else {},\r\n worktree={\"name\": name, \"path\": wt.get(\"path\"), \"status\": \"removed\"},\r\n )\r\n return f\"Removed worktree '{name}'\"\r\n except Exception as e:\r\n self.events.emit(\r\n \"worktree.remove.failed\",\r\n task={\"id\": wt.get(\"task_id\")} if wt.get(\"task_id\") is not None else {},\r\n worktree={\"name\": name, \"path\": wt.get(\"path\")},\r\n error=str(e),\r\n )\r\n raise\r\n\r\n def keep(self, name: str) -> str:\r\n wt = self._find(name)\r\n if not wt:\r\n return f\"Error: Unknown worktree '{name}'\"\r\n\r\n idx = self._load_index()\r\n kept = None\r\n for item in idx.get(\"worktrees\", []):\r\n if item.get(\"name\") == name:\r\n item[\"status\"] = \"kept\"\r\n item[\"kept_at\"] = time.time()\r\n kept = item\r\n self._save_index(idx)\r\n\r\n self.events.emit(\r\n \"worktree.keep\",\r\n task={\"id\": wt.get(\"task_id\")} if wt.get(\"task_id\") is not None else {},\r\n worktree={\r\n \"name\": name,\r\n \"path\": wt.get(\"path\"),\r\n \"status\": \"kept\",\r\n },\r\n )\r\n return json.dumps(kept, indent=2) if kept else f\"Error: Unknown worktree '{name}'\"\r\n\r\n\r\nWORKTREES = WorktreeManager(REPO_ROOT, TASKS, EVENTS)\r\n\r\n\r\n# -- Base tools (kept minimal, same style as previous sessions) --\r\ndef safe_path(p: str) -> Path:\r\n path = (WORKDIR / p).resolve()\r\n if not path.is_relative_to(WORKDIR):\r\n raise ValueError(f\"Path escapes workspace: {p}\")\r\n return path\r\n\r\n\r\ndef run_bash(command: str) -> str:\r\n dangerous = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\", \"> /dev/\"]\r\n if any(d in command for d in dangerous):\r\n return \"Error: Dangerous command blocked\"\r\n try:\r\n r = subprocess.run(\r\n command,\r\n shell=True,\r\n cwd=WORKDIR,\r\n capture_output=True,\r\n text=True,\r\n timeout=120,\r\n )\r\n out = (r.stdout + r.stderr).strip()\r\n return out[:50000] if out else \"(no output)\"\r\n except subprocess.TimeoutExpired:\r\n return \"Error: Timeout (120s)\"\r\n\r\n\r\ndef run_read(path: str, limit: int = None) -> str:\r\n try:\r\n lines = safe_path(path).read_text().splitlines()\r\n if limit and limit < len(lines):\r\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more)\"]\r\n return \"\\n\".join(lines)[:50000]\r\n except Exception as e:\r\n return f\"Error: {e}\"\r\n\r\n\r\ndef run_write(path: str, content: str) -> str:\r\n try:\r\n fp = safe_path(path)\r\n fp.parent.mkdir(parents=True, exist_ok=True)\r\n fp.write_text(content)\r\n return f\"Wrote {len(content)} bytes\"\r\n except Exception as e:\r\n return f\"Error: {e}\"\r\n\r\n\r\ndef run_edit(path: str, old_text: str, new_text: str) -> str:\r\n try:\r\n fp = safe_path(path)\r\n c = fp.read_text()\r\n if old_text not in c:\r\n return f\"Error: Text not found in {path}\"\r\n fp.write_text(c.replace(old_text, new_text, 1))\r\n return f\"Edited {path}\"\r\n except Exception as e:\r\n return f\"Error: {e}\"\r\n\r\n\r\nTOOL_HANDLERS = {\r\n \"bash\": lambda **kw: run_bash(kw[\"command\"]),\r\n \"read_file\": lambda **kw: run_read(kw[\"path\"], kw.get(\"limit\")),\r\n \"write_file\": lambda **kw: run_write(kw[\"path\"], kw[\"content\"]),\r\n \"edit_file\": lambda **kw: run_edit(kw[\"path\"], kw[\"old_text\"], kw[\"new_text\"]),\r\n \"task_create\": lambda **kw: TASKS.create(kw[\"subject\"], kw.get(\"description\", \"\")),\r\n \"task_list\": lambda **kw: TASKS.list_all(),\r\n \"task_get\": lambda **kw: TASKS.get(kw[\"task_id\"]),\r\n \"task_update\": lambda **kw: TASKS.update(kw[\"task_id\"], kw.get(\"status\"), kw.get(\"owner\")),\r\n \"task_bind_worktree\": lambda **kw: TASKS.bind_worktree(kw[\"task_id\"], kw[\"worktree\"], kw.get(\"owner\", \"\")),\r\n \"worktree_create\": lambda **kw: WORKTREES.create(kw[\"name\"], kw.get(\"task_id\"), kw.get(\"base_ref\", \"HEAD\")),\r\n \"worktree_list\": lambda **kw: WORKTREES.list_all(),\r\n \"worktree_status\": lambda **kw: WORKTREES.status(kw[\"name\"]),\r\n \"worktree_run\": lambda **kw: WORKTREES.run(kw[\"name\"], kw[\"command\"]),\r\n \"worktree_keep\": lambda **kw: WORKTREES.keep(kw[\"name\"]),\r\n \"worktree_remove\": lambda **kw: WORKTREES.remove(kw[\"name\"], kw.get(\"force\", False), kw.get(\"complete_task\", False)),\r\n \"worktree_events\": lambda **kw: EVENTS.list_recent(kw.get(\"limit\", 20)),\r\n}\r\n\r\nTOOLS = [\r\n {\r\n \"name\": \"bash\",\r\n \"description\": \"Run a shell command in the current workspace (blocking).\",\r\n \"input_schema\": {\r\n \"type\": \"object\",\r\n \"properties\": {\"command\": {\"type\": \"string\"}},\r\n \"required\": [\"command\"],\r\n },\r\n },\r\n {\r\n \"name\": \"read_file\",\r\n \"description\": \"Read file contents.\",\r\n \"input_schema\": {\r\n \"type\": \"object\",\r\n \"properties\": {\r\n \"path\": {\"type\": \"string\"},\r\n \"limit\": {\"type\": \"integer\"},\r\n },\r\n \"required\": [\"path\"],\r\n },\r\n },\r\n {\r\n \"name\": \"write_file\",\r\n \"description\": \"Write content to file.\",\r\n \"input_schema\": {\r\n \"type\": \"object\",\r\n \"properties\": {\r\n \"path\": {\"type\": \"string\"},\r\n \"content\": {\"type\": \"string\"},\r\n },\r\n \"required\": [\"path\", \"content\"],\r\n },\r\n },\r\n {\r\n \"name\": \"edit_file\",\r\n \"description\": \"Replace exact text in file.\",\r\n \"input_schema\": {\r\n \"type\": \"object\",\r\n \"properties\": {\r\n \"path\": {\"type\": \"string\"},\r\n \"old_text\": {\"type\": \"string\"},\r\n \"new_text\": {\"type\": \"string\"},\r\n },\r\n \"required\": [\"path\", \"old_text\", \"new_text\"],\r\n },\r\n },\r\n {\r\n \"name\": \"task_create\",\r\n \"description\": \"Create a new task on the shared task board.\",\r\n \"input_schema\": {\r\n \"type\": \"object\",\r\n \"properties\": {\r\n \"subject\": {\"type\": \"string\"},\r\n \"description\": {\"type\": \"string\"},\r\n },\r\n \"required\": [\"subject\"],\r\n },\r\n },\r\n {\r\n \"name\": \"task_list\",\r\n \"description\": \"List all tasks with status, owner, and worktree binding.\",\r\n \"input_schema\": {\"type\": \"object\", \"properties\": {}},\r\n },\r\n {\r\n \"name\": \"task_get\",\r\n \"description\": \"Get task details by ID.\",\r\n \"input_schema\": {\r\n \"type\": \"object\",\r\n \"properties\": {\"task_id\": {\"type\": \"integer\"}},\r\n \"required\": [\"task_id\"],\r\n },\r\n },\r\n {\r\n \"name\": \"task_update\",\r\n \"description\": \"Update task status or owner.\",\r\n \"input_schema\": {\r\n \"type\": \"object\",\r\n \"properties\": {\r\n \"task_id\": {\"type\": \"integer\"},\r\n \"status\": {\r\n \"type\": \"string\",\r\n \"enum\": [\"pending\", \"in_progress\", \"completed\"],\r\n },\r\n \"owner\": {\"type\": \"string\"},\r\n },\r\n \"required\": [\"task_id\"],\r\n },\r\n },\r\n {\r\n \"name\": \"task_bind_worktree\",\r\n \"description\": \"Bind a task to a worktree name.\",\r\n \"input_schema\": {\r\n \"type\": \"object\",\r\n \"properties\": {\r\n \"task_id\": {\"type\": \"integer\"},\r\n \"worktree\": {\"type\": \"string\"},\r\n \"owner\": {\"type\": \"string\"},\r\n },\r\n \"required\": [\"task_id\", \"worktree\"],\r\n },\r\n },\r\n {\r\n \"name\": \"worktree_create\",\r\n \"description\": \"Create a git worktree and optionally bind it to a task.\",\r\n \"input_schema\": {\r\n \"type\": \"object\",\r\n \"properties\": {\r\n \"name\": {\"type\": \"string\"},\r\n \"task_id\": {\"type\": \"integer\"},\r\n \"base_ref\": {\"type\": \"string\"},\r\n },\r\n \"required\": [\"name\"],\r\n },\r\n },\r\n {\r\n \"name\": \"worktree_list\",\r\n \"description\": \"List worktrees tracked in .worktrees/index.json.\",\r\n \"input_schema\": {\"type\": \"object\", \"properties\": {}},\r\n },\r\n {\r\n \"name\": \"worktree_status\",\r\n \"description\": \"Show git status for one worktree.\",\r\n \"input_schema\": {\r\n \"type\": \"object\",\r\n \"properties\": {\"name\": {\"type\": \"string\"}},\r\n \"required\": [\"name\"],\r\n },\r\n },\r\n {\r\n \"name\": \"worktree_run\",\r\n \"description\": \"Run a shell command in a named worktree directory.\",\r\n \"input_schema\": {\r\n \"type\": \"object\",\r\n \"properties\": {\r\n \"name\": {\"type\": \"string\"},\r\n \"command\": {\"type\": \"string\"},\r\n },\r\n \"required\": [\"name\", \"command\"],\r\n },\r\n },\r\n {\r\n \"name\": \"worktree_remove\",\r\n \"description\": \"Remove a worktree and optionally mark its bound task completed.\",\r\n \"input_schema\": {\r\n \"type\": \"object\",\r\n \"properties\": {\r\n \"name\": {\"type\": \"string\"},\r\n \"force\": {\"type\": \"boolean\"},\r\n \"complete_task\": {\"type\": \"boolean\"},\r\n },\r\n \"required\": [\"name\"],\r\n },\r\n },\r\n {\r\n \"name\": \"worktree_keep\",\r\n \"description\": \"Mark a worktree as kept in lifecycle state without removing it.\",\r\n \"input_schema\": {\r\n \"type\": \"object\",\r\n \"properties\": {\"name\": {\"type\": \"string\"}},\r\n \"required\": [\"name\"],\r\n },\r\n },\r\n {\r\n \"name\": \"worktree_events\",\r\n \"description\": \"List recent worktree/task lifecycle events from .worktrees/events.jsonl.\",\r\n \"input_schema\": {\r\n \"type\": \"object\",\r\n \"properties\": {\"limit\": {\"type\": \"integer\"}},\r\n },\r\n },\r\n]\r\n\r\n\r\ndef agent_loop(messages: list):\r\n while True:\r\n response = client.messages.create(\r\n model=MODEL,\r\n system=SYSTEM,\r\n messages=messages,\r\n tools=TOOLS,\r\n max_tokens=8000,\r\n )\r\n messages.append({\"role\": \"assistant\", \"content\": response.content})\r\n if response.stop_reason != \"tool_use\":\r\n return\r\n\r\n results = []\r\n for block in response.content:\r\n if block.type == \"tool_use\":\r\n handler = TOOL_HANDLERS.get(block.name)\r\n try:\r\n output = handler(**block.input) if handler else f\"Unknown tool: {block.name}\"\r\n except Exception as e:\r\n output = f\"Error: {e}\"\r\n print(f\"> {block.name}: {str(output)[:200]}\")\r\n results.append(\r\n {\r\n \"type\": \"tool_result\",\r\n \"tool_use_id\": block.id,\r\n \"content\": str(output),\r\n }\r\n )\r\n messages.append({\"role\": \"user\", \"content\": results})\r\n\r\n\r\nif __name__ == \"__main__\":\r\n print(f\"Repo root for s12: {REPO_ROOT}\")\r\n if not WORKTREES.git_available:\r\n print(\"Note: Not in a git repo. worktree_* tools will return errors.\")\r\n\r\n history = []\r\n while True:\r\n try:\r\n query = input(\"\\033[36ms12 >> \\033[0m\")\r\n except (EOFError, KeyboardInterrupt):\r\n break\r\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\r\n break\r\n history.append({\"role\": \"user\", \"content\": query})\r\n agent_loop(history)\r\n response_content = history[-1][\"content\"]\r\n if isinstance(response_content, list):\r\n for block in response_content:\r\n if hasattr(block, \"text\"):\r\n print(block.text)\r\n print()\r\n" } ], "diffs": [ @@ -844,7 +844,7 @@ "write_file", "edit_file" ], - "locDelta": 31 + "locDelta": 36 }, { "from": "s02", diff --git a/web/src/data/scenario-ru.ts b/web/src/data/scenario-ru.ts new file mode 100644 index 000000000..7ec5cde85 --- /dev/null +++ b/web/src/data/scenario-ru.ts @@ -0,0 +1,461 @@ +export interface ScenarioRuOverride { + title?: string; + description?: string; + steps?: Array<{ + content?: string; + annotation?: string; + }>; +} + +export const SCENARIO_RU_OVERRIDES: Record = { + s02: { + title: `Инструменты`, + description: `Агент работает с инструментами чтения, записи, правки и bash`, + steps: [ + { + content: `Добавь функцию greet(name) в hello.py`, + annotation: `Пользователь просит изменить код`, + }, + { + content: `Сначала прочитаю текущий файл.`, + annotation: `Модель выбирает read_file вместо cat через bash`, + }, + { + content: `hello.py`, + annotation: `Отдельный инструмент чтения даёт более чистый доступ к файлу`, + }, + { + content: `print("Hello, World!")`, + annotation: `Содержимое файла возвращается модели`, + }, + { + content: `hello.py +--- +def greet(name): + print(f"Hello, {name}!") + +greet("World")`, + annotation: `write_file заменяет содержимое файла целиком`, + }, + { + content: `Файл успешно записан`, + annotation: `Запись подтверждена`, + }, + { + content: `Я обновил hello.py: добавил функцию greet(name), которая принимает имя как параметр.`, + annotation: `4 инструмента лучше 1: у каждого есть чёткая и узкая роль`, + }, + ], + }, + s03: { + title: `План задач`, + description: `Агент создаёт видимый план до начала выполнения`, + steps: [ + { + content: `Собери CLI-калькулятор с операциями add, subtract, multiply, divide`, + annotation: `Многошаговая задача, которой помогает явное планирование`, + }, + { + content: `[ + { "id": 1, "task": "Создать calc.py с разбором аргументов", "status": "in_progress" }, + { "id": 2, "task": "Реализовать 4 операции", "status": "pending" }, + { "id": 3, "task": "Добавить обработку ошибок", "status": "pending" }, + { "id": 4, "task": "Проверить все операции", "status": "pending" } +]`, + annotation: `TodoManager создаёт видимый план ДО того, как будет написан код`, + }, + { + content: `Список задач создан: 4 пункта`, + annotation: `План теперь виден и его можно отслеживать`, + }, + { + content: `calc.py +--- +import sys + +def calculate(op, a, b): + ops = {'+': a+b, '-': a-b, '*': a*b, '/': a/b} + return ops.get(op, 'Unknown operator') + +if __name__ == '__main__': + a, op, b = float(sys.argv[1]), sys.argv[2], float(sys.argv[3]) + print(calculate(op, a, b))`, + annotation: `Выполняются задачи #1 и #2: создаём файл и операции`, + }, + { + content: `Файл успешно записан`, + annotation: `Код записан`, + }, + { + content: `python calc.py 10 + 5`, + annotation: `Выполняется задача #4: тестирование`, + }, + { + content: `15.0`, + annotation: `Тест проходит`, + }, + { + content: `Калькулятор собран и проверен. Все 4 операции работают корректно.`, + annotation: `Видимый план повышает качество выполнения, потому что прогресс можно отслеживать`, + }, + ], + }, + s04: { + title: `Субагенты`, + description: `Агент порождает субагента для изолированного выполнения подзадачи`, + steps: [ + { + content: `Сделай рефакторинг калькулятора: раздели его на модули и добавь тесты`, + annotation: `Сложная задача, подходящая для делегирования`, + }, + { + content: `Тесты я делегирую субагенту, а сам займусь рефакторингом.`, + annotation: `Агент решает разделить работу через инструмент Task`, + }, + { + content: `Напиши unit-тесты для calc.py, покрыв все 4 операции и крайние случаи вроде деления на ноль`, + annotation: `Task запускает субагента с собственным изолированным контекстом`, + }, + { + content: `[Субагент запущен] Контекст: свежий messages[], те же инструменты, ограниченный промпт`, + annotation: `Субагент получает чистый контекст и не путается в разговоре родителя`, + }, + { + content: `Создан test_calc.py с 8 тест-кейсами. Все проходят.`, + annotation: `Субагент завершает работу и возвращает родителю краткую сводку`, + }, + { + content: `calc.py -> calc/operations.py, calc/cli.py`, + annotation: `Родитель продолжает свою часть работы: рефакторинг`, + }, + { + content: `Файлы успешно записаны`, + annotation: `Модули разделены`, + }, + { + content: `Я разделил код на calc/operations.py и calc/cli.py. Тесты написал субагент, и они все проходят.`, + annotation: `Изоляция контекста не даёт разным задачам смешиваться`, + }, + ], + }, + s05: { + title: `Навыки`, + description: `Агент подгружает внешние знания из файлов SKILL.md`, + steps: [ + { + content: `Создай новый проект Next.js с TypeScript`, + annotation: `Задача требует знаний о конкретном фреймворке`, + }, + { + content: `[Загрузчик навыков] Ищу файлы SKILL.md... Найдено: nextjs.SKILL.md`, + annotation: `Загрузчик навыков находит подходящий навык до запуска основного цикла`, + }, + { + content: `[Навык подключён] nextjs.SKILL.md -> добавлено сообщение tool_result`, + annotation: `Содержимое навыка попадает в tool_result, а не раздувает системный промпт`, + }, + { + content: `У меня есть знания по настройке Next.js. Давайте создам структуру проекта.`, + annotation: `Модель получила знания о фреймворке из файла навыка`, + }, + { + content: `npx create-next-app@latest my-app --typescript --tailwind --eslint --app --src-dir`, + annotation: `Используем знания из навыка для правильных флагов`, + }, + { + content: `Успех! Создан my-app в /workspace/my-app`, + annotation: `Проект создан с корректной конфигурацией`, + }, + { + content: `Проект Next.js создан с TypeScript, Tailwind CSS, ESLint, App Router и директорией src/.`, + annotation: `Навыки приходят через tool_result, а не через системный промпт, поэтому контекст остаётся компактным`, + }, + ], + }, + s06: { + title: `Сжатие контекста`, + description: `Агент сжимает контекст, когда число токенов подходит к порогу`, + steps: [ + { + content: `Проанализируй все Python-файлы в этом проекте и кратко опиши каждый`, + annotation: `Задача, которая породит очень большой контекст`, + }, + { + content: `find . -name '*.py' -type f`, + annotation: `Ищем все файлы для анализа`, + }, + { + content: `./src/main.py +./src/utils.py +./src/models.py +./tests/test_main.py +... (ещё 28 файлов)`, + annotation: `Найдено 32 файла: чтение их всех сильно раздует контекст`, + }, + { + content: `Я буду читать и разбирать каждый файл по порядку.`, + annotation: `Агент начинает обрабатывать файлы один за другим`, + }, + { + content: `./src/main.py`, + annotation: `Читаем файл 1 из 32...`, + }, + { + content: `[..., 450 строк кода ...]`, + annotation: `Большой фрагмент содержимого добавлен в messages[]`, + }, + { + content: `[ContextManager] Число токенов: 148,231 / порог 150,000 +Запуск трёхслойного сжатия: + 1. Убрать содержимое tool_result > 1000 символов -> заменить сводкой + 2. Объединить последовательные ответы ассистента + 3. Сжать старейшие 50% разговора в сводку +Результат: 148,231 -> 42,087 токенов`, + annotation: `Сжатие автоматически срабатывает при приближении к лимиту`, + }, + { + content: `Продолжаю анализ уже со сжатым контекстом. Файлы 1-18 сведены в краткую сводку, перехожу к оставшимся.`, + annotation: `Забывая старые tool_result, агент может работать сколь угодно долго`, + }, + ], + }, + s08: { + title: `Фоновые задачи`, + description: `Агент выполняет долгие задачи в фоновых потоках`, + steps: [ + { + content: `Запусти тесты и, пока они идут, исправь lint-ошибки`, + annotation: `Две задачи, которые можно выполнять параллельно`, + }, + { + content: `Запущу тесты в фоне и параллельно поправлю lint.`, + annotation: `Агент видит возможность для конкурентного выполнения`, + }, + { + content: `pytest tests/ -v --timeout=300`, + annotation: `Менеджер фоновых задач создаёт поток для долгого прогона тестов`, + }, + { + content: `[Менеджер фоновых задач] Поток запущен: task_001 +Выполняется: pytest tests/ -v --timeout=300 +Статус: in_progress`, + annotation: `Поток работает независимо, а основной цикл продолжается`, + }, + { + content: `Фоновая задача запущена: task_001`, + annotation: `Управление сразу возвращается агенту`, + }, + { + content: `ruff check --fix .`, + annotation: `Пока тесты идут в фоне, агент исправляет lint`, + }, + { + content: `Исправлено 12 lint-ошибок в 5 файлах`, + annotation: `Lint исправляется параллельно с тестами`, + }, + { + content: `[Уведомление] Фоновая задача task_001 завершена +Результат: 47 тестов прошли, 2 упали +Проблемные тесты: test_auth.py::test_token_expiry, test_api.py::test_rate_limit`, + annotation: `Фоновый поток уведомляет основной цикл через очередь уведомлений`, + }, + { + content: `Lint уже исправлен. Тесты завершились: 47 прошли, 2 упали. Теперь починю падающие тесты.`, + annotation: `Неблокирующее выполнение через потоки и очередь позволяет реально работать параллельно`, + }, + ], + }, + s09: { + title: `Команды агентов`, + description: `Постоянные напарники общаются через JSONL-входящие ящики`, + steps: [ + { + content: `Собери полноценное приложение: фронтенд на React, бэкенд на Python, плюс тесты`, + annotation: `Крупная задача, требующая командной работы`, + }, + { + content: `spawn_teammate( + name: "frontend-dev", + role: "специалист по React" +) +spawn_teammate( + name: "backend-dev", + role: "специалист по Python API" +)`, + annotation: `В отличие от субагентов из s04, напарники живут дольше одной задачи`, + }, + { + content: `[Менеджер напарников] Команда создана: + .team/config.json записан + frontend-dev [idle] - собственный поток, собственный цикл агента + backend-dev [idle] - собственный поток, собственный цикл агента`, + annotation: `Каждый напарник крутит свой цикл while(tool_use) в отдельном потоке`, + }, + { + content: `send_message( + to: "backend-dev", + content: "Собери пользовательское API: GET/POST /users" +)`, + annotation: `Сообщение дописывается в .team/inbox/backend-dev.jsonl`, + }, + { + content: `[backend-dev] Проверка входящего ящика... 1 новое сообщение + От: lead + Содержимое: "Собери пользовательское API: GET/POST /users" + (входящий ящик очищен после чтения)`, + annotation: `Входящий ящик работает по схеме read-then-clear и не даёт обработать одно и то же дважды`, + }, + { + content: `[backend-dev -> frontend-dev] Ответ через входящий ящик: + "API готово: GET /users, POST /users, GET /users/:id" + Записано в .team/inbox/frontend-dev.jsonl`, + annotation: `Напарники пишут друг другу через тот же самый JSONL-механизм`, + }, + { + content: `Команда общается через файловые входящие ящики. У каждого напарника свой .jsonl-файл: для отправки туда дописывают строки, для чтения файл дренируется.`, + annotation: `JSONL-входящие ящики развязывают время отправителя и время получателя`, + }, + ], + }, + s10: { + title: `Протоколы команды`, + description: `Протоколы «запрос-ответ» для shutdown и согласования плана`, + steps: [ + { + content: `Работа закончена. Аккуратно останови команду.`, + annotation: `Нельзя просто убить потоки: нужна согласованная остановка`, + }, + { + content: `shutdown_request( + teammate: "backend-dev", + request_id: "req_abc123" +)`, + annotation: `Идентификатор запроса связывает запрос и ответ`, + }, + { + content: `[Shutdown FSM] req_abc123: pending + Lead -> inbox backend-dev: + { type: "shutdown_request", request_id: "req_abc123" }`, + annotation: `FSM отслеживает состояния: pending -> approved | rejected`, + }, + { + content: `[backend-dev] Получен shutdown request req_abc123 + Текущая задача: нет (idle) + Решение: approve + -> inbox lead: { type: "shutdown_response", request_id: "req_abc123", approve: true }`, + annotation: `Teammate сам решает, можно ли подтвердить остановку, исходя из своего состояния`, + }, + { + content: `[Shutdown FSM] req_abc123: approved + Поток backend-dev остановлен + Статус: idle -> shutdown`, + annotation: `Тот же паттерн с идентификатором запроса подходит и для согласования плана`, + }, + { + content: `backend-dev завершён корректно. Та же связка request_id + FSM подходит и для одобрения плана: напарник отправляет план, лидер подтверждает или отклоняет его по тому же request_id.`, + annotation: `Один паттерн корреляции, два применения: shutdown и согласование плана`, + }, + ], + }, + s11: { + title: `Автономные агенты`, + description: `Напарники самоорганизуются через idle-циклы и автозахват задач`, + steps: [ + { + content: `Реализуй весь бэклог фич`, + annotation: `Открытая задача для автономной командной работы`, + }, + { + content: `create_tasks([ + { title: "Аутентификация пользователя", status: "open" }, + { title: "Dashboard UI", status: "open" }, + { title: "Ограничение rate limit для API", status: "open" }, + { title: "Интеграционные тесты", status: "open" } +])`, + annotation: `Задачи создаются без владельца и доступны для автозахвата`, + }, + { + content: `Создано 4 задачи на общей доске`, + annotation: `Неназначенные задачи видны всем teammate`, + }, + { + content: `[frontend-dev] Сработал idle-цикл + 1. Проверить входящий ящик -> 0 сообщений + 2. Опросить доску задач -> найдена ничья задача #2 "Dashboard UI" + 3. Автоматически захватить задачу #2 + 4. Статус: idle -> working`, + annotation: `Idle-цикл: проверить входящий ящик, опросить задачи, автоматически захватить задачу и продолжить работу`, + }, + { + content: `[backend-dev] Сработал idle-цикл + 1. Проверить входящий ящик -> 0 сообщений + 2. Опросить доску задач -> найдена ничья задача #1 "Аутентификация пользователя" + 3. Автоматически захватить задачу #1 + 4. Статус: idle -> working`, + annotation: `Несколько напарников могут параллельно разобрать разные задачи`, + }, + { + content: `[tester] Сработал idle-цикл + 1. Проверить входящий ящик -> 0 сообщений + 2. Опросить доску задач -> задача #4 заблокирована #1, #2, #3 + 3. Нет доступных для захвата задач + 4. Статус: idle (повторная попытка через 30 с)`, + annotation: `Периодический polling по таймауту не даёт устроить busy-wait`, + }, + { + content: `Команда самоорганизуется: frontend-dev взял Dashboard UI, backend-dev взял аутентификацию. Tester ждёт, пока снимутся зависимости.`, + annotation: `Опрос по таймауту делает напарников автономными без микроменеджмента`, + }, + ], + }, + s12: { + title: `Изоляция задач и рабочих деревьев`, + description: `Общая доска задач плюс опциональные полосы рабочих деревьев для чистого параллельного выполнения`, + steps: [ + { + content: `Параллельно выполни рефакторинг auth и обновления login UI`, + annotation: `Две активные задачи в одном рабочем каталоге начнут конфликтовать`, + }, + { + content: `task_create(subject: "Рефакторинг auth") +task_create(subject: "Полировка login UI")`, + annotation: `Общая доска остаётся единственным источником правды для координации`, + }, + { + content: `worktree_create(name: "auth-refactor", task_id: 1) +worktree_create(name: "ui-login") +task_bind_worktree(task_id: 2, worktree: "ui-login")`, + annotation: `Выделение полосы и привязку задачи можно комбинировать: задача 2 привязывается уже после создания полосы`, + }, + { + content: `worktree.create.before/after emitted +.tasks/task_1.json -> { status: "in_progress", worktree: "auth-refactor" } +.tasks/task_2.json -> { status: "in_progress", worktree: "ui-login" } +.worktrees/index.json updated`, + annotation: `Состояние контура управления остаётся каноничным, а hook-подобные потребители могут реагировать на события жизненного цикла, не становясь источником истины`, + }, + { + content: `worktree_run(name: "auth-refactor", command: "pytest tests/auth -q") +worktree_run(name: "ui-login", command: "npm test -- login")`, + annotation: `В этой учебной среде команды маршрутизируются через cwd, привязанный к полосе; в других рантаймах это может быть переключение директории на уровне сессии. Инвариант один: контекст выполнения должен быть явным.`, + }, + { + content: `worktree_keep(name: "ui-login") +worktree_remove(name: "auth-refactor", complete_task: true) +worktree_events(limit: 10)`, + annotation: `Закрытие работы оформляется явным переходом состояния через инструменты: можно смешать keep/remove и тут же запросить события жизненного цикла`, + }, + { + content: `worktree.keep emitted for ui-login +worktree.remove.before/after emitted for auth-refactor +task.completed emitted for #1 +.worktrees/events.jsonl appended`, + annotation: `Переходы жизненного цикла становятся явными записями, а файлы задач и рабочих деревьев по-прежнему остаются источником истины`, + }, + { + content: `Доска задач отвечает за координацию, рабочие деревья — за изоляцию. Параллельные дорожки остаются чистыми и проверяемыми.`, + annotation: `Координируемся на одной доске, изолируемся по полосам только там, где это действительно нужно, а побочные механизмы политики и аудита вешаем на события жизненного цикла`, + }, + ], + }, +}; diff --git a/web/src/data/scenarios/s01.json b/web/src/data/scenarios/s01.json index 1582a28cc..eee306113 100644 --- a/web/src/data/scenarios/s01.json +++ b/web/src/data/scenarios/s01.json @@ -6,46 +6,78 @@ { "type": "user_message", "content": "Create a file called hello.py that prints 'Hello, World!'", - "annotation": "User sends a task to the agent" + "annotation": "User sends a task to the agent", + "ru": { + "content": "Создай файл `hello.py`, который выводит `Hello, World!`", + "annotation": "Пользователь отправляет агенту задачу" + } }, { "type": "assistant_text", "content": "I'll create that file for you using bash.", - "annotation": "The model decides to use its only tool: bash" + "annotation": "The model decides to use its only tool: bash", + "ru": { + "content": "Сейчас создам этот файл через Bash.", + "annotation": "Модель решает использовать свой единственный инструмент: Bash" + } }, { "type": "tool_call", "content": "echo 'print(\"Hello, World!\")' > hello.py", "toolName": "bash", - "annotation": "Tool call: the model generates a bash command" + "annotation": "Tool call: the model generates a bash command", + "ru": { + "annotation": "Вызов инструмента: модель генерирует команду Bash" + } }, { "type": "tool_result", "content": "", "toolName": "bash", - "annotation": "Bash returns empty output (success)" + "annotation": "Bash returns empty output (success)", + "ru": { + "annotation": "Bash возвращает пустой вывод, то есть команда успешно выполнилась" + } }, { "type": "assistant_text", "content": "Let me verify the file was created correctly.", - "annotation": "The loop continues: model sees the result and decides next action" + "annotation": "The loop continues: model sees the result and decides next action", + "ru": { + "content": "Проверю, что файл создался правильно.", + "annotation": "Цикл продолжается: модель видит результат и выбирает следующий шаг" + } }, { "type": "tool_call", "content": "cat hello.py", "toolName": "bash", - "annotation": "Another bash call to verify" + "annotation": "Another bash call to verify", + "ru": { + "annotation": "Ещё один вызов Bash для проверки результата" + } }, { "type": "tool_result", "content": "print(\"Hello, World!\")", "toolName": "bash", - "annotation": "File contents confirmed" + "annotation": "File contents confirmed", + "ru": { + "annotation": "Содержимое файла подтверждено" + } }, { "type": "assistant_text", "content": "Done! I've created hello.py with the print statement. You can run it with `python hello.py`.", - "annotation": "stop_reason != 'tool_use' -> loop breaks, agent responds to user" + "annotation": "stop_reason != 'tool_use' -> loop breaks, agent responds to user", + "ru": { + "content": "Готово! Я создал `hello.py` с оператором `print`. Запустить его можно командой `python hello.py`.", + "annotation": "stop_reason != 'tool_use' -> цикл завершается, агент отвечает пользователю" + } } - ] + ], + "ru": { + "title": "Цикл агента", + "description": "Минимальный агент, который использует только Bash для выполнения задач" + } } diff --git a/web/src/data/scenarios/s07.json b/web/src/data/scenarios/s07.json index fcbdc4d23..3cfa25636 100644 --- a/web/src/data/scenarios/s07.json +++ b/web/src/data/scenarios/s07.json @@ -6,48 +6,81 @@ { "type": "user_message", "content": "Build a REST API with user auth, database, and deployment config", - "annotation": "Multi-part task requiring dependency tracking" + "annotation": "Multi-part task requiring dependency tracking", + "ru": { + "content": "Собери REST API с авторизацией пользователей, базой данных и конфигом для деплоя", + "annotation": "Составная задача, где нужно отслеживать зависимости между частями работы" + } }, { "type": "tool_call", "content": "create_task(\"Set up database schema\", deps=[])\ncreate_task(\"Implement user model\", deps=[1])\ncreate_task(\"Add auth endpoints\", deps=[2])\ncreate_task(\"Write deployment config\", deps=[3])", "toolName": "task_manager", - "annotation": "TaskManager creates file-based tasks with explicit dependencies" + "annotation": "TaskManager creates file-based tasks with explicit dependencies", + "ru": { + "annotation": "TaskManager создаёт файловые задачи с явными зависимостями" + } }, { "type": "tool_result", "content": "Created 4 tasks in .tasks/\n #1 Set up database schema [ready]\n #2 Implement user model [blocked by #1]\n #3 Add auth endpoints [blocked by #2]\n #4 Write deployment config [blocked by #3]", "toolName": "task_manager", - "annotation": "Tasks persisted to .tasks/ directory as JSON files" + "annotation": "Tasks persisted to .tasks/ directory as JSON files", + "ru": { + "content": "Создано 4 задачи в .tasks/\n #1 Подготовить схему БД [готова к запуску]\n #2 Реализовать модель пользователя [заблокирована задачей #1]\n #3 Добавить auth-эндпоинты [заблокирована задачей #2]\n #4 Написать конфиг деплоя [заблокирована задачей #3]", + "annotation": "Задачи сохранены в каталоге .tasks/ как JSON-файлы" + } }, { "type": "tool_call", "content": "schema.sql\n---\nCREATE TABLE users (id SERIAL PRIMARY KEY, email TEXT UNIQUE, password_hash TEXT);", "toolName": "write_file", - "annotation": "Working on task #1" + "annotation": "Working on task #1", + "ru": { + "annotation": "Идёт работа над задачей #1" + } }, { "type": "tool_result", "content": "File written successfully", "toolName": "write_file", - "annotation": "Schema created" + "annotation": "Schema created", + "ru": { + "content": "Файл успешно записан", + "annotation": "Схема создана" + } }, { "type": "tool_call", "content": "complete_task(1)", "toolName": "task_manager", - "annotation": "Completing task #1 automatically unblocks task #2" + "annotation": "Completing task #1 automatically unblocks task #2", + "ru": { + "annotation": "Завершение задачи #1 автоматически разблокирует задачу #2" + } }, { "type": "tool_result", "content": "Task #1 completed. Task #2 is now ready.", "toolName": "task_manager", - "annotation": "File-based persistence outlives process memory" + "annotation": "File-based persistence outlives process memory", + "ru": { + "content": "Задача #1 завершена. Задача #2 теперь готова к работе.", + "annotation": "Файловая персистентность переживает память конкретного процесса" + } }, { "type": "assistant_text", "content": "Database schema is done. Moving to user model implementation.", - "annotation": "Dependency graph ensures tasks execute in correct order" + "annotation": "Dependency graph ensures tasks execute in correct order", + "ru": { + "content": "Схема базы данных готова. Перехожу к реализации модели пользователя.", + "annotation": "Граф зависимостей гарантирует, что задачи выполняются в правильном порядке" + } } - ] + ], + "ru": { + "title": "Система задач", + "description": "Агент использует файловые задачи с постоянным состоянием и зависимостями" + } } diff --git a/web/src/i18n/messages/ru.json b/web/src/i18n/messages/ru.json new file mode 100644 index 000000000..cdd6e7832 --- /dev/null +++ b/web/src/i18n/messages/ru.json @@ -0,0 +1,173 @@ +{ + "meta": { + "title": "Изучаем Claude Code", + "description": "Соберите мини-агента в стиле Claude Code с нуля, по одному механизму за раз" + }, + "nav": { + "home": "Главная", + "timeline": "Путь", + "compare": "Сравнение", + "layers": "Слои", + "github": "GitHub" + }, + "home": { + "hero_title": "Изучаем Claude Code", + "hero_subtitle": "Соберите мини-агента в стиле Claude Code с нуля, по одному механизму за раз", + "start": "Начать обучение", + "core_pattern": "Базовый паттерн", + "core_pattern_desc": "У всех AI-агентов для программирования один и тот же цикл: вызвать модель, выполнить инструменты, вернуть результаты обратно. Поверх него продакшен-системы добавляют политики, разрешения и жизненный цикл.", + "learning_path": "Учебный путь", + "learning_path_desc": "12 последовательных сессий: от простого цикла до изолированного автономного выполнения", + "layers_title": "Архитектурные слои", + "layers_desc": "Пять независимых аспектов, из которых собирается полноценный агент", + "loc": "LOC", + "learn_more": "Подробнее", + "versions_in_layer": "версий", + "message_flow": "Рост массива сообщений", + "message_flow_desc": "Посмотрите, как массив `messages` растёт по мере работы агентного цикла" + }, + "version": { + "loc": "строк кода", + "tools": "инструментов", + "new": "Новое", + "prev": "Предыдущая", + "next": "Следующая", + "view_source": "Исходник", + "view_diff": "Дифф", + "design_decisions": "Архитектурные решения", + "whats_new": "Что добавилось", + "tutorial": "Разбор", + "simulator": "Симулятор цикла агента", + "execution_flow": "Поток выполнения", + "architecture": "Архитектура", + "concept_viz": "Визуализация идеи", + "alternatives": "Рассмотренные альтернативы", + "tab_learn": "Учиться", + "tab_simulate": "Симуляция", + "tab_code": "Код", + "tab_deep_dive": "Разобрать глубже" + }, + "version_meta": { + "s01_subtitle": "Bash — всё, что нужно", + "s01_coreAddition": "Цикл агента с одним инструментом", + "s01_keyInsight": "Минимальное ядро агента — это цикл while и один инструмент", + "s02_subtitle": "Один обработчик на инструмент", + "s02_coreAddition": "Карта диспетчеризации инструментов", + "s02_keyInsight": "Сам цикл не меняется; новые инструменты просто регистрируются в карте диспетчеризации", + "s03_subtitle": "Сначала план, потом действие", + "s03_coreAddition": "менеджер плана задач и напоминание-подталкивание", + "s03_keyInsight": "Агент без плана начинает блуждать; сначала перечисли шаги, потом выполняй", + "s04_subtitle": "Чистый контекст для каждой подзадачи", + "s04_coreAddition": "Запуск субагента с изолированным messages[]", + "s04_keyInsight": "Субагенты работают в независимом messages[], поэтому основной диалог остаётся чистым", + "s05_subtitle": "Загружай по требованию", + "s05_coreAddition": "загрузчик навыков и двухслойная инъекция знаний", + "s05_keyInsight": "Знания стоит вводить через tool_result в нужный момент, а не тащить заранее в системный промпт", + "s06_subtitle": "Трёхслойное сжатие", + "s06_coreAddition": "микросжатие, автосжатие и архивирование", + "s06_keyInsight": "Контекст неизбежно заполняется; трёхслойное сжатие позволяет вести бесконечные сессии", + "s07_subtitle": "Граф задач и зависимости", + "s07_coreAddition": "менеджер задач с файловым состоянием и графом зависимостей", + "s07_keyInsight": "Файловый граф задач с порядком, параллелизмом и зависимостями становится хребтом координации для мультиагентной работы", + "s08_subtitle": "Фоновые потоки и уведомления", + "s08_coreAddition": "менеджер фоновых задач и очередь уведомлений", + "s08_keyInsight": "Медленные операции можно увести в фон, а агент продолжит думать дальше", + "s09_subtitle": "Напарники и почтовые ящики", + "s09_coreAddition": "менеджер напарников и файловый входящий ящик", + "s09_keyInsight": "Если один агент не справляется, он делегирует работу постоянным напарникам через асинхронные почтовые ящики", + "s10_subtitle": "Общие правила коммуникации", + "s10_coreAddition": "корреляция идентификатора запроса для двух протоколов", + "s10_keyInsight": "Один паттерн «запрос-ответ» обслуживает все переговоры внутри команды", + "s11_subtitle": "Сканируй доску и забирай задачи", + "s11_coreAddition": "опрос доски задач и самоуправление по тайм-аутам", + "s11_keyInsight": "Напарники сами смотрят на доску и забирают задачи; лидеру не нужно раздавать каждую вручную", + "s12_subtitle": "Изоляция по каталогам", + "s12_coreAddition": "жизненный цикл рабочих деревьев и поток событий поверх общей доски задач", + "s12_keyInsight": "Каждый работает в своём каталоге: задачи управляют целями, рабочие деревья — каталогами, а связывает их ID" + }, + "sim": { + "play": "Старт", + "pause": "Пауза", + "step": "Шаг", + "reset": "Сброс", + "speed": "Скорость", + "step_of": "из" + }, + "timeline": { + "title": "Учебный путь", + "subtitle": "s01 → s12: поступательное проектирование агента", + "layer_legend": "Легенда слоёв", + "loc_growth": "Рост LOC", + "learn_more": "Подробнее" + }, + "layers": { + "title": "Архитектурные слои", + "subtitle": "Пять независимых аспектов, из которых складывается полноценный агент", + "tools": "Что агент МОЖЕТ делать. Основа: инструменты дают модели возможность взаимодействовать с внешним миром.", + "planning": "Как организована работа. От простых списков дел до досок задач с зависимостями, общих для нескольких агентов.", + "memory": "Как удерживать контекст в пределах лимита. Стратегии сжатия позволяют агентам работать долго, не теряя связности.", + "concurrency": "Неблокирующее выполнение. Фоновые потоки и шины уведомлений дают параллельную работу без остановки цикла.", + "collaboration": "Координация нескольких агентов. Команды, сообщения и автономные напарники, которые сами принимают решения." + }, + "compare": { + "title": "Сравнение версий", + "subtitle": "Посмотрите, что изменилось между любыми двумя версиями", + "select_a": "Версия A", + "select_b": "Версия B", + "select_placeholder": "-- выберите --", + "loc_delta": "Изменение LOC", + "lines": "строк", + "new_tools_in_b": "Новые инструменты в B", + "new_classes_in_b": "Новые классы в B", + "new_functions_in_b": "Новые функции в B", + "tool_comparison": "Сравнение инструментов", + "only_in": "Только в", + "shared": "Общие", + "none": "Нет", + "source_diff": "Дифф исходного кода", + "empty_hint": "Выберите две версии выше, чтобы их сравнить.", + "architecture": "Архитектура", + "tools_label": "инструментов" + }, + "diff": { + "new_classes": "Новые классы", + "new_tools": "Новые инструменты", + "new_functions": "Новые функции", + "loc_delta": "Изменение LOC" + }, + "sessions": { + "s01": "Цикл агента", + "s02": "Инструменты", + "s03": "План задач", + "s04": "Субагенты", + "s05": "Навыки", + "s06": "Сжатие контекста", + "s07": "Система задач", + "s08": "Фоновые задачи", + "s09": "Команды агентов", + "s10": "Командные протоколы", + "s11": "Автономные агенты", + "s12": "Изоляция задач и рабочих деревьев" + }, + "layer_labels": { + "tools": "Инструменты и выполнение", + "planning": "Планирование и координация", + "memory": "Управление памятью", + "concurrency": "Параллельность", + "collaboration": "Сотрудничество" + }, + "viz": { + "s01": "Цикл while у агента", + "s02": "Карта диспетчеризации инструментов", + "s03": "Система напоминаний плана задач", + "s04": "Изоляция контекста субагента", + "s05": "Загрузка навыков по требованию", + "s06": "Трёхслойное сжатие контекста", + "s07": "Граф зависимостей задач", + "s08": "Дорожки фоновых задач", + "s09": "Почтовые ящики команд агентов", + "s10": "Командные протоколы как FSM", + "s11": "Цикл автономного агента", + "s12": "Изоляция задач через рабочие деревья" + } +} diff --git a/web/src/lib/i18n-server.ts b/web/src/lib/i18n-server.ts index 885008f44..76f53ebf2 100644 --- a/web/src/lib/i18n-server.ts +++ b/web/src/lib/i18n-server.ts @@ -1,10 +1,11 @@ import en from "@/i18n/messages/en.json"; import zh from "@/i18n/messages/zh.json"; import ja from "@/i18n/messages/ja.json"; +import ru from "@/i18n/messages/ru.json"; type Messages = typeof en; -const messagesMap: Record = { en, zh, ja }; +const messagesMap: Record = { en, zh, ja, ru }; export function getTranslations(locale: string, namespace: string) { const messages = messagesMap[locale] || en; diff --git a/web/src/lib/i18n.tsx b/web/src/lib/i18n.tsx index 9ab61e058..4f6dd00ba 100644 --- a/web/src/lib/i18n.tsx +++ b/web/src/lib/i18n.tsx @@ -3,10 +3,11 @@ import { createContext, useContext, ReactNode } from "react"; import en from "@/i18n/messages/en.json"; import zh from "@/i18n/messages/zh.json"; import ja from "@/i18n/messages/ja.json"; +import ru from "@/i18n/messages/ru.json"; type Messages = typeof en; -const messagesMap: Record = { en, zh, ja }; +const messagesMap: Record = { en, zh, ja, ru }; const I18nContext = createContext<{ locale: string; messages: Messages }>({ locale: "en", diff --git a/web/src/lib/version-i18n.ts b/web/src/lib/version-i18n.ts new file mode 100644 index 000000000..6e7f88dcd --- /dev/null +++ b/web/src/lib/version-i18n.ts @@ -0,0 +1,30 @@ +type TranslateFn = (key: string) => string; + +export function getTranslatedSessionTitle( + tSession: TranslateFn, + version: string, + fallback: string +) { + const translated = tSession(version); + return translated === version ? fallback : translated; +} + +export function getTranslatedLayerLabel( + tLayer: TranslateFn, + layer: string, + fallback: string +) { + const translated = tLayer(layer); + return translated === layer ? fallback : translated; +} + +export function getTranslatedVersionField( + tMeta: TranslateFn, + version: string, + field: "subtitle" | "coreAddition" | "keyInsight", + fallback: string +) { + const key = `${version}_${field}`; + const translated = tMeta(key); + return translated === key ? fallback : translated; +} diff --git a/web/src/types/agent-data.ts b/web/src/types/agent-data.ts index 7cf01a04d..62c960d94 100644 --- a/web/src/types/agent-data.ts +++ b/web/src/types/agent-data.ts @@ -25,7 +25,7 @@ export interface VersionDiff { export interface DocContent { version: string; - locale: "en" | "zh" | "ja"; + locale: "en" | "zh" | "ja" | "ru"; title: string; content: string; // raw markdown } @@ -48,6 +48,10 @@ export interface SimStep { annotation: string; toolName?: string; toolInput?: string; + ru?: { + content?: string; + annotation?: string; + }; } export interface Scenario { @@ -55,6 +59,10 @@ export interface Scenario { title: string; description: string; steps: SimStep[]; + ru?: { + title?: string; + description?: string; + }; } export interface FlowNode {
    TaskStatusOwner{copy.headers.task}{copy.headers.status}{copy.headers.owner}
    {task.name}{copy.taskNames[task.id]} - {task.status} + {copy.statuses[task.status]} {task.owner}