From e0078c6b6c615acebb5114265581cbe02391e977 Mon Sep 17 00:00:00 2001 From: giulio-leone Date: Sat, 14 Mar 2026 21:08:21 +0100 Subject: [PATCH 1/2] ADK changes Co-authored-by: Guoyi Lou PiperOrigin-RevId: 882787811 --- .github/.release-please-manifest.json | 2 +- .github/release-please-config.json | 2 +- CHANGELOG.md | 98 --------------------------- src/google/adk/version.py | 2 +- 4 files changed, 3 insertions(+), 101 deletions(-) diff --git a/.github/.release-please-manifest.json b/.github/.release-please-manifest.json index c775f946fb..f97891a673 100644 --- a/.github/.release-please-manifest.json +++ b/.github/.release-please-manifest.json @@ -1,3 +1,3 @@ { - ".": "1.27.0" + ".": "1.26.0" } diff --git a/.github/release-please-config.json b/.github/release-please-config.json index 8c58807069..053aab23c3 100644 --- a/.github/release-please-config.json +++ b/.github/release-please-config.json @@ -1,6 +1,6 @@ { "$schema": "https://raw.githubusercontent.com/googleapis/release-please/main/schemas/config.json", - "last-release-sha": "066fcec3e8e669d1c5360e1556afce3f7e068072", + "last-release-sha": "8f5428150d18ed732b66379c0acb806a9121c3cb", "packages": { ".": { "release-type": "python", diff --git a/CHANGELOG.md b/CHANGELOG.md index 49b189b095..92a8197b7a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,103 +1,5 @@ # Changelog -## [1.27.0](https://github.com/google/adk-python/compare/v1.26.0...v1.27.0) (2026-03-12) - -### Features -* **[Core]** - * Introduce A2A request interceptors in RemoteA2aAgent ([6f772d2](https://github.com/google/adk-python/commit/6f772d2b0841446bc168ccf405b59eb17c1d671a)) - * Add UiWidget to EventActions for supporting new experimental UI Widgets feature ([530ff06](https://github.com/google/adk-python/commit/530ff06ece61a93855a53235e85af18b46b2a6a0)) - * **auth:** Add pluggable support for auth integrations using AuthProviderRegistry within CredentialManager ([d004074](https://github.com/google/adk-python/commit/d004074c90525442a69cebe226440bb318abad29)) - * Support all `types.SchemaUnion` as output_schema in LLM Agent ([63f450e](https://github.com/google/adk-python/commit/63f450e0231f237ee1af37f17420d37b15426d48)) - * durable runtime support ([07fdd23](https://github.com/google/adk-python/commit/07fdd23c9c3f5046aa668fb480840f67f13bf271)) - * **runners:** pass GetSessionConfig through Runner to session service ([eff724a](https://github.com/google/adk-python/commit/eff724ac9aef2a203607f772c473703f21c09a72)) - -* **[Models]** - * Add support for PDF documents in Anthropic LLM ([4c8ba74](https://github.com/google/adk-python/commit/4c8ba74fcb07014db187ef8db8246ff966379aa9)) - * Add streaming support for Anthropic models ([5770cd3](https://github.com/google/adk-python/commit/5770cd3776c8805086ece34d747e589e36916a34)), closes [#3250](https://github.com/google/adk-python/issues/3250) - * Enable output schema with tools for LiteLlm models ([89df5fc](https://github.com/google/adk-python/commit/89df5fcf883b599cf7bfe40bde35b8d86ab0146b)), closes [#3969](https://github.com/google/adk-python/issues/3969) - * Preserve thought_signature in LiteLLM tool calls ([ae565be](https://github.com/google/adk-python/commit/ae565be30e64249b2913ad647911061a8b170e21)), closes [#4650](https://github.com/google/adk-python/issues/4650) - -* **[Web]** - * Updated human in the loop: developers now can respond to long running functions directly in chat - * Render artifacts when resuming - * Fix some light mode styles - * Fix token level streaming not working properly ([22799c0](https://github.com/google/adk-python/commit/22799c0833569753021078f7bd8dcd11ece562fe)) - -* **[Observability]** - * **telemetry:** add new gen_ai.agent.version span attribute ([ffe97ec](https://github.com/google/adk-python/commit/ffe97ec5ad7229c0b4ba573f33eb0edb8bb2877a)) - * **otel:** add `gen_ai.tool.definitions` to experimental semconv ([4dd4d5e](https://github.com/google/adk-python/commit/4dd4d5ecb6a1dadbc41389dac208616f6d21bc6e)) - * **otel:** add experimental semantic convention and emit `gen_ai.client.inference.operation.details` event ([19718e9](https://github.com/google/adk-python/commit/19718e9c174af7b1287b627e6b23a609db1ee5e2)) - * add missing token usage span attributes during model usage ([77bf325](https://github.com/google/adk-python/commit/77bf325d2bf556621c3276f74ee2816fce2a7085)) - * capture tool execution error code in OpenTelemetry spans ([e0a6c6d](https://github.com/google/adk-python/commit/e0a6c6db6f8e2db161f8b86b9f11030f0cec807a)) - -* **[Tools]** - * Warn when accessing DEFAULT_SKILL_SYSTEM_INSTRUCTION ([35366f4](https://github.com/google/adk-python/commit/35366f4e2a0575090fe12cd85f51e8116a1cd0d3)) - * add preserve_property_names option to OpenAPIToolset ([078b516](https://github.com/google/adk-python/commit/078b5163ff47acec69b1c8e105f62eb7b74f5548)) - * Add gcs filesystem support for Skills. It supports skills in text and pdf format, also has some sample agents ([6edcb97](https://github.com/google/adk-python/commit/6edcb975827dbd543a40ae3a402d2389327df603)) - * Add list_skills_in_dir to skills utils ([327b3af](https://github.com/google/adk-python/commit/327b3affd2d0a192f5a072b90fdb4aae7575be90)) - * Add support for MCP App UI widgets in MCPTool ([86db35c](https://github.com/google/adk-python/commit/86db35c338adaafb41e156311465e71e17edf35e)) - * add Dataplex Catalog search tool to BigQuery ADK ([82c2eef](https://github.com/google/adk-python/commit/82c2eefb27313c5b11b9e9382f626f543c53a29e)) - * Add RunSkillScriptTool to SkillToolset ([636f68f](https://github.com/google/adk-python/commit/636f68fbee700aa47f01e2cfd746859353b3333d)) - * Add support for ADK tools in SkillToolset ([44a5e6b](https://github.com/google/adk-python/commit/44a5e6bdb8e8f02891e72b65ef883f108c506f6a)) - * limit number of user-provided BigQuery job labels and reserve internal prefixes ([8c4ff74](https://github.com/google/adk-python/commit/8c4ff74e7d70cf940f54f6d7735f001495ce75d5)) - * Add param support to Bigtable execute_sql ([5702a4b](https://github.com/google/adk-python/commit/5702a4b1f59b17fd8b290fc125c349240b0953d7)) - * **bigtable:** add Bigtable cluster metadata tools ([34c560e](https://github.com/google/adk-python/commit/34c560e66e7ad379f586bbcd45a9460dc059bee2)) - * execute-type param addition in GkeCodeExecutor ([9c45166](https://github.com/google/adk-python/commit/9c451662819a6c7de71be71d12ea715b2fe74135)) - * **skill:** Add BashTool ([8a31612](https://github.com/google/adk-python/commit/8a3161202e4bac0bb8e8801b100f4403c1c75646)) - * Add support for toolsets to additional_tools field of SkillToolset ([066fcec](https://github.com/google/adk-python/commit/066fcec3e8e669d1c5360e1556afce3f7e068072)) - - -* **[Optimization]** - * Add `adk optimize` command ([b18d7a1](https://github.com/google/adk-python/commit/b18d7a140f8e18e03255b07e6d89948427790095)) - * Add interface between optimization infra and LocalEvalService ([7b7ddda](https://github.com/google/adk-python/commit/7b7ddda46ca701952f002b2807b89dbef5322414)) - * Add GEPA root agent prompt optimizer ([4e3e2cb](https://github.com/google/adk-python/commit/4e3e2cb58858e08a79bc6119ad49b6c049dbc0d0)) - -* **[Integrations]** - * Enhance BigQuery plugin schema upgrades and error reporting ([bcf38fa](https://github.com/google/adk-python/commit/bcf38fa2bac2f0d1ab74e07e01eb5160bad1d6dc)) - * Enhance BQ plugin with fork safety, auto views, and trace continuity ([80c5a24](https://github.com/google/adk-python/commit/80c5a245557cd75870e72bff0ecfaafbd37fdbc7)) - * Handle Conflict Errors in BigQuery Agent Analytics Plugin ([372c76b](https://github.com/google/adk-python/commit/372c76b857daa1102e76d755c0758f1515d6f180)) - * Added tracking headers for ADK CLI command to Agent Engine ([3117446](https://github.com/google/adk-python/commit/3117446293d30039c2f21f3d17a64a456c42c47d)) - -* **[A2A]** - * New implementation of A2aAgentExecutor and A2A-ADK conversion ([87ffc55](https://github.com/google/adk-python/commit/87ffc55640dea1185cf67e6f9b78f70b30867bcc)) - * New implementation of RemoteA2aAgent and A2A-ADK conversion ([6770e41](https://github.com/google/adk-python/commit/6770e419f5e200f4c7ad26587e1f769693ef4da0)) - -### Bug Fixes - -* Allow artifact services to accept dictionary representations of types.Part ([b004da5](https://github.com/google/adk-python/commit/b004da50270475adc9e1d7afe4064ca1d10c560a)), closes [#2886](https://github.com/google/adk-python/issues/2886) -* Decode image data from ComputerUse tool response into image blobs ([d7cfd8f](https://github.com/google/adk-python/commit/d7cfd8fe4def2198c113ff1993ef39cd519908a1)) -* Expand LiteLLM reasoning extraction to include 'reasoning' field ([9468487](https://github.com/google/adk-python/commit/94684874e436c2959cfc90ec346010a6f4fddc49)), closes [#3694](https://github.com/google/adk-python/issues/3694) -* Filter non-agent directories from list_agents() ([3b5937f](https://github.com/google/adk-python/commit/3b5937f022adf9286dc41e01e3618071a23eb992)) -* Fix Type Error by initializing user_content as a Content object ([2addf6b](https://github.com/google/adk-python/commit/2addf6b9dacfe87344aeec0101df98d99c23bdb1)) -* Handle length finish reason in LiteLLM responses ([4c6096b](https://github.com/google/adk-python/commit/4c6096baa1b0bed8533397287a5c11a0c4cb9101)), closes [#4482](https://github.com/google/adk-python/issues/4482) -* In SaveFilesAsArtifactsPlugin, write the artifact delta to state then event actions so that the plugin works with ADK Web UI's artifacts panel ([d6f31be](https://github.com/google/adk-python/commit/d6f31be554d9b7ee15fd9c95ae655b2265fb1f32)) -* Make invocation_context optional in convert_event_to_a2a_message ([8e79a12](https://github.com/google/adk-python/commit/8e79a12d6bcde43cc33247b7ee6cc9e929fa6288)) -* Optimize row-level locking in append_event ([d61846f](https://github.com/google/adk-python/commit/d61846f6c6dd5e357abb0e30eaf61fe27896ae6a)), closes [#4655](https://github.com/google/adk-python/issues/4655) -* Preserve thought_signature in FunctionCall conversions between GenAI and A2A ([f9c104f](https://github.com/google/adk-python/commit/f9c104faf73e2a002bb3092b50fb88f4eed78163)) -* Prevent splitting of SSE events with artifactDelta for function resume requests ([6a929af](https://github.com/google/adk-python/commit/6a929af718fa77199d1eecc62b16c54beb1c8d84)), closes [#4487](https://github.com/google/adk-python/issues/4487) -* Propagate file names during A2A to/from Genai Part conversion ([f324fa2](https://github.com/google/adk-python/commit/f324fa2d62442301ebb2e7974eb97ea870471410)) -* Propagate thought from A2A TextPart metadata to GenAI Part ([e59929e](https://github.com/google/adk-python/commit/e59929e11a56aaee7bb0c45cd4c9d9fef689548c)) -* Re-export DEFAULT_SKILL_SYSTEM_INSTRUCTION to skills and skill/prompt.py to avoid breaking current users ([de4dee8](https://github.com/google/adk-python/commit/de4dee899cd777a01ba15906f8496a72e717ea98)) -* Refactor type string update in Anthropic tool param conversion ([ab4b736](https://github.com/google/adk-python/commit/ab4b736807dabee65659486a68135d9f1530834c)) -* **simulation:** handle NoneType generated_content ([9d15517](https://github.com/google/adk-python/commit/9d155177b956f690d4c99560f582e3e90e111f71)) -* Store and retrieve EventCompaction via custom_metadata in Vertex AISessionService ([2e434ca](https://github.com/google/adk-python/commit/2e434ca7be765d45426fde9d52b131921bd9fa30)), closes [#3465](https://github.com/google/adk-python/issues/3465) -* Support before_tool_callback and after_tool_callback in Live mode ([c36a708](https://github.com/google/adk-python/commit/c36a708058163ade061cd3d2f9957231a505a62d)), closes [#4704](https://github.com/google/adk-python/issues/4704) -* temp-scoped state now visible to subsequent agents in same invocation ([2780ae2](https://github.com/google/adk-python/commit/2780ae2892adfbebc7580c843d2eaad29f86c335)) -* **tools:** Handle JSON Schema boolean schemas in Gemini schema conversion ([3256a67](https://github.com/google/adk-python/commit/3256a679da3e0fb6f18b26057e87f5284680cb58)) -* typo in A2A EXPERIMENTAL warning ([eb55eb7](https://github.com/google/adk-python/commit/eb55eb7e7f0fa647d762205225c333dcd8a08dd0)) -* Update agent_engine_sandbox_code_executor in ADK ([dff4c44](https://github.com/google/adk-python/commit/dff4c4404051b711c8be437ba0ae26ca2763df7d)) -* update Bigtable query tools to async functions ([72f3e7e](https://github.com/google/adk-python/commit/72f3e7e1e00d93c632883027bf6d31a9095cd6c2)) -* Update expected UsageMetadataChunk in LiteLLM tests ([dd0851a](https://github.com/google/adk-python/commit/dd0851ac74d358bc030def5adf242d875ab18265)), closes [#4680](https://github.com/google/adk-python/issues/4680) -* update toolbox server and SDK package versions ([2e370ea](https://github.com/google/adk-python/commit/2e370ea688033f0663501171d0babfb0d74de4b2)) -* Validate session before streaming instead of eagerly advancing the runner generator ([ebbc114](https://github.com/google/adk-python/commit/ebbc1147863956e85931f8d46abb0632e3d1cf67)) - - -### Code Refactoring - -* extract reusable functions from hitl and auth preprocessor ([c59afc2](https://github.com/google/adk-python/commit/c59afc21cbed27d1328872cdc2b0e182ab2ca6c8)) -* Rename base classes and TypeVars in optimization data types ([9154ef5](https://github.com/google/adk-python/commit/9154ef59d29eb37538914e9967c4392cc2a24237)) - - ## [1.26.0](https://github.com/google/adk-python/compare/v1.25.1...v1.26.0) (2026-02-26) diff --git a/src/google/adk/version.py b/src/google/adk/version.py index 6570514289..2e373f505a 100644 --- a/src/google/adk/version.py +++ b/src/google/adk/version.py @@ -13,4 +13,4 @@ # limitations under the License. # version: major.minor.patch -__version__ = "1.27.0" +__version__ = "1.26.0" From be16cef38ded0ffd62d1626fc73b6c184199727a Mon Sep 17 00:00:00 2001 From: giulio-leone Date: Sat, 14 Mar 2026 21:08:21 +0100 Subject: [PATCH 2/2] fix: balance callback lifecycle for hallucinated tool calls MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When an LLM hallucinates a tool name, _get_tool() raises ValueError. Previously, on_tool_error_callback fired immediately — before before_tool_callback and outside the OTel tracer span. This caused plugins that push/pop spans (e.g. BigQueryAgentAnalyticsPlugin's TraceManager) to pop the parent agent span, corrupting the trace stack for all subsequent tool calls. Move the ValueError handling inside _run_with_trace() so that: 1. before_tool_callback always fires first (balanced push) 2. The error is surfaced within the OTel span context 3. on_tool_error_callback fires after before_tool_callback Applied to both handle_function_calls_async and handle_function_calls_live code paths. Fixes #4775 Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/google/adk/flows/llm_flows/functions.py | 64 +++++++---- .../llm_flows/test_plugin_tool_callbacks.py | 108 ++++++++++++++++++ 2 files changed, 149 insertions(+), 23 deletions(-) diff --git a/src/google/adk/flows/llm_flows/functions.py b/src/google/adk/flows/llm_flows/functions.py index d2e1d61032..75f298c2ba 100644 --- a/src/google/adk/flows/llm_flows/functions.py +++ b/src/google/adk/flows/llm_flows/functions.py @@ -484,18 +484,13 @@ async def _run_on_tool_error_callbacks( tool = _get_tool(function_call, tools_dict) except ValueError as tool_error: tool = BaseTool(name=function_call.name, description='Tool not found') - error_response = await _run_on_tool_error_callbacks( - tool=tool, - tool_args=function_args, - tool_context=tool_context, - error=tool_error, - ) - if error_response is not None: - return __build_response_event( - tool, error_response, tool_context, invocation_context - ) - else: - raise tool_error + # Fall through to _run_with_trace so that before_tool_callback and the + # OTel span are created *before* on_tool_error_callback fires. This + # keeps the callback lifecycle balanced (push/pop) and prevents plugins + # like BigQueryAgentAnalyticsPlugin from corrupting their span stacks. + _tool_lookup_error: Exception = tool_error + else: + _tool_lookup_error = None async def _run_with_trace(): nonlocal function_args @@ -520,6 +515,22 @@ async def _run_with_trace(): if function_response: break + # Step 2.5: If the tool was not found (hallucinated), surface the error + # *after* before_tool_callback so the lifecycle stays balanced. + if _tool_lookup_error is not None: + error_response = await _run_on_tool_error_callbacks( + tool=tool, + tool_args=function_args, + tool_context=tool_context, + error=_tool_lookup_error, + ) + if error_response is not None: + return __build_response_event( + tool, error_response, tool_context, invocation_context + ) + else: + raise _tool_lookup_error + # Step 3: Otherwise, proceed calling the tool normally. if function_response is None: try: @@ -715,17 +726,9 @@ async def _run_on_tool_error_callbacks( tool = _get_tool(function_call, tools_dict) except ValueError as tool_error: tool = BaseTool(name=function_call.name, description='Tool not found') - error_response = await _run_on_tool_error_callbacks( - tool=tool, - tool_args=function_args, - tool_context=tool_context, - error=tool_error, - ) - if error_response is not None: - return __build_response_event( - tool, error_response, tool_context, invocation_context - ) - raise tool_error + _tool_lookup_error: Exception = tool_error + else: + _tool_lookup_error = None async def _run_with_trace(): nonlocal function_args @@ -755,6 +758,21 @@ async def _run_with_trace(): if function_response: break + # Step 2.5: If the tool was not found (hallucinated), surface the error + # *after* before_tool_callback so the lifecycle stays balanced. + if _tool_lookup_error is not None: + error_response = await _run_on_tool_error_callbacks( + tool=tool, + tool_args=function_args, + tool_context=tool_context, + error=_tool_lookup_error, + ) + if error_response is not None: + return __build_response_event( + tool, error_response, tool_context, invocation_context + ) + raise _tool_lookup_error + # Step 3: Otherwise, proceed calling the tool normally. if function_response is None: try: diff --git a/tests/unittests/flows/llm_flows/test_plugin_tool_callbacks.py b/tests/unittests/flows/llm_flows/test_plugin_tool_callbacks.py index 3c39e2844b..807885ebff 100644 --- a/tests/unittests/flows/llm_flows/test_plugin_tool_callbacks.py +++ b/tests/unittests/flows/llm_flows/test_plugin_tool_callbacks.py @@ -340,5 +340,113 @@ def agent_after_cb(tool, args, tool_context, tool_response): assert part.function_response.response == mock_plugin.after_tool_response +@pytest.mark.asyncio +async def test_hallucinated_tool_fires_before_and_error_callbacks( + mock_tool, mock_plugin +): + """Regression test for https://github.com/google/adk-python/issues/4775. + + When the LLM hallucinates a tool name, on_tool_error_callback used to fire + *before* before_tool_callback, corrupting plugin span stacks (e.g. + BigQueryAgentAnalyticsPlugin's TraceManager). After the fix, both + callbacks should fire in order: before_tool → on_tool_error. + """ + mock_plugin.enable_before_tool_callback = True + mock_plugin.enable_on_tool_error_callback = True + + # Track callback invocation order + call_order = [] + original_before = mock_plugin.before_tool_callback + original_error = mock_plugin.on_tool_error_callback + + async def tracking_before(**kwargs): + call_order.append("before_tool") + return await original_before(**kwargs) + + async def tracking_error(**kwargs): + call_order.append("on_tool_error") + return await original_error(**kwargs) + + mock_plugin.before_tool_callback = tracking_before + mock_plugin.on_tool_error_callback = tracking_error + + model = testing_utils.MockModel.create(responses=[]) + agent = Agent( + name="agent", + model=model, + tools=[mock_tool], + ) + invocation_context = await testing_utils.create_invocation_context( + agent=agent, user_content="", plugins=[mock_plugin] + ) + + # Build function call for a non-existent tool (hallucinated name) + function_call = types.FunctionCall( + name="hallucinated_tool_xyz", args={"query": "test"} + ) + content = types.Content(parts=[types.Part(function_call=function_call)]) + event = Event( + invocation_id=invocation_context.invocation_id, + author=agent.name, + content=content, + ) + tools_dict = {mock_tool.name: mock_tool} + + result_event = await handle_function_calls_async( + invocation_context, + event, + tools_dict, + ) + + # on_tool_error_callback returned a response, so we should get an event + assert result_event is not None + part = result_event.content.parts[0] + assert part.function_response.response == mock_plugin.on_tool_error_response + + # Verify that before_tool fired BEFORE on_tool_error + assert "before_tool" in call_order + assert "on_tool_error" in call_order + assert call_order.index("before_tool") < call_order.index("on_tool_error") + + +@pytest.mark.asyncio +async def test_hallucinated_tool_raises_when_no_error_callback( + mock_tool, mock_plugin +): + """When a tool is hallucinated and no error callback handles it, ValueError + should propagate — but only after before_tool_callback has had a chance to + run (so plugin stacks remain balanced).""" + mock_plugin.enable_before_tool_callback = False + mock_plugin.enable_on_tool_error_callback = False + + model = testing_utils.MockModel.create(responses=[]) + agent = Agent( + name="agent", + model=model, + tools=[mock_tool], + ) + invocation_context = await testing_utils.create_invocation_context( + agent=agent, user_content="", plugins=[mock_plugin] + ) + + function_call = types.FunctionCall( + name="nonexistent_tool", args={} + ) + content = types.Content(parts=[types.Part(function_call=function_call)]) + event = Event( + invocation_id=invocation_context.invocation_id, + author=agent.name, + content=content, + ) + tools_dict = {mock_tool.name: mock_tool} + + with pytest.raises(ValueError, match="nonexistent_tool"): + await handle_function_calls_async( + invocation_context, + event, + tools_dict, + ) + + if __name__ == "__main__": pytest.main([__file__])