diff --git a/sdk/agentserver/azure-ai-agentserver-agentframework/CHANGELOG.md b/sdk/agentserver/azure-ai-agentserver-agentframework/CHANGELOG.md
deleted file mode 100644
index cfcf2445e256..000000000000
--- a/sdk/agentserver/azure-ai-agentserver-agentframework/CHANGELOG.md
+++ /dev/null
@@ -1,7 +0,0 @@
-# Release History
-
-## 1.0.0b1 (2025-11-07)
-
-### Features Added
-
-First version
diff --git a/sdk/agentserver/azure-ai-agentserver-agentframework/LICENSE b/sdk/agentserver/azure-ai-agentserver-agentframework/LICENSE
deleted file mode 100644
index 63447fd8bbbf..000000000000
--- a/sdk/agentserver/azure-ai-agentserver-agentframework/LICENSE
+++ /dev/null
@@ -1,21 +0,0 @@
-Copyright (c) Microsoft Corporation.
-
-MIT License
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
\ No newline at end of file
diff --git a/sdk/agentserver/azure-ai-agentserver-agentframework/MANIFEST.in b/sdk/agentserver/azure-ai-agentserver-agentframework/MANIFEST.in
deleted file mode 100644
index 062fdaf366a0..000000000000
--- a/sdk/agentserver/azure-ai-agentserver-agentframework/MANIFEST.in
+++ /dev/null
@@ -1,9 +0,0 @@
-include *.md
-include LICENSE
-recursive-include tests *.py
-recursive-include samples *.py *.md
-recursive-include doc *.rst *.md
-include azure/__init__.py
-include azure/ai/__init__.py
-include azure/ai/agentserver/__init__.py
-include azure/ai/agentserver/agentframework/py.typed
\ No newline at end of file
diff --git a/sdk/agentserver/azure-ai-agentserver-agentframework/README.md b/sdk/agentserver/azure-ai-agentserver-agentframework/README.md
deleted file mode 100644
index 54d80aed48e7..000000000000
--- a/sdk/agentserver/azure-ai-agentserver-agentframework/README.md
+++ /dev/null
@@ -1,59 +0,0 @@
-# Azure AI Agent Server Adapter for Agent-framework Python
-
-
-
-## Getting started
-
-```bash
-pip install azure-ai-agentserver-agentframework
-```
-
-
-## Key concepts
-
-Azure AI Agent Server wraps your Agent-framework agent, and host it on the cloud.
-
-
-## Examples
-
-```python
-# your existing agent
-from my_framework_agent import my_awesome_agent
-
-# agent framework utils
-from azure.ai.agentserver.agentframework import from_agent_framework
-
-if __name__ == "__main__":
-    # with this simple line, your agent will be hosted on http://localhost:8088
-    from_agent_framework(my_awesome_agent).run()
-
-```
-
-## Troubleshooting
-
-First run your agent with azure-ai-agentserver-agentframework locally.
-
-If it works on local but failed on cloud. Check your logs in the application insight connected to your Azure AI Foundry Project.
-
-
-## Next steps
-
-Please visit [Samples](https://github.com/Azure/azure-sdk-for-python/tree/main/sdk/agentserver/azure-ai-agentserver-agentframework/samples) folder. There are several samples for you to build your agent with azure-ai-agentserver
-
-
-## Contributing
-
-This project welcomes contributions and suggestions. Most contributions require
-you to agree to a Contributor License Agreement (CLA) declaring that you have
-the right to, and actually do, grant us the rights to use your contribution.
-For details, visit https://cla.microsoft.com.
-
-When you submit a pull request, a CLA-bot will automatically determine whether
-you need to provide a CLA and decorate the PR appropriately (e.g., label,
-comment). Simply follow the instructions provided by the bot. You will only
-need to do this once across all repos using our CLA.
-
-This project has adopted the
-[Microsoft Open Source Code of Conduct][code_of_conduct]. For more information,
-see the Code of Conduct FAQ or contact opencode@microsoft.com with any
-additional questions or comments.
diff --git a/sdk/agentserver/azure-ai-agentserver-agentframework/azure/__init__.py b/sdk/agentserver/azure-ai-agentserver-agentframework/azure/__init__.py
deleted file mode 100644
index d55ccad1f573..000000000000
--- a/sdk/agentserver/azure-ai-agentserver-agentframework/azure/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-__path__ = __import__("pkgutil").extend_path(__path__, __name__)  # type: ignore
diff --git a/sdk/agentserver/azure-ai-agentserver-agentframework/azure/ai/__init__.py b/sdk/agentserver/azure-ai-agentserver-agentframework/azure/ai/__init__.py
deleted file mode 100644
index d55ccad1f573..000000000000
--- a/sdk/agentserver/azure-ai-agentserver-agentframework/azure/ai/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-__path__ = __import__("pkgutil").extend_path(__path__, __name__)  # type: ignore
diff --git a/sdk/agentserver/azure-ai-agentserver-agentframework/azure/ai/agentserver/__init__.py b/sdk/agentserver/azure-ai-agentserver-agentframework/azure/ai/agentserver/__init__.py
deleted file mode 100644
index d55ccad1f573..000000000000
--- a/sdk/agentserver/azure-ai-agentserver-agentframework/azure/ai/agentserver/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-__path__ = __import__("pkgutil").extend_path(__path__, __name__)  # type: ignore
diff --git a/sdk/agentserver/azure-ai-agentserver-agentframework/azure/ai/agentserver/agentframework/__init__.py b/sdk/agentserver/azure-ai-agentserver-agentframework/azure/ai/agentserver/agentframework/__init__.py
deleted file mode 100644
index af980a34799f..000000000000
--- a/sdk/agentserver/azure-ai-agentserver-agentframework/azure/ai/agentserver/agentframework/__init__.py
+++ /dev/null
@@ -1,16 +0,0 @@
-# ---------------------------------------------------------
-# Copyright (c) Microsoft Corporation. All rights reserved.
-# ---------------------------------------------------------
-__path__ = __import__("pkgutil").extend_path(__path__, __name__)
-
-from ._version import VERSION
-
-
-def from_agent_framework(agent):
-    from .agent_framework import AgentFrameworkCBAgent
-
-    return AgentFrameworkCBAgent(agent)
-
-
-__all__ = ["from_agent_framework"]
-__version__ = VERSION
diff --git a/sdk/agentserver/azure-ai-agentserver-agentframework/azure/ai/agentserver/agentframework/_version.py b/sdk/agentserver/azure-ai-agentserver-agentframework/azure/ai/agentserver/agentframework/_version.py
deleted file mode 100644
index be71c81bd282..000000000000
--- a/sdk/agentserver/azure-ai-agentserver-agentframework/azure/ai/agentserver/agentframework/_version.py
+++ /dev/null
@@ -1,9 +0,0 @@
-# coding=utf-8
-# --------------------------------------------------------------------------
-# Copyright (c) Microsoft Corporation. All rights reserved.
-# Licensed under the MIT License. See License.txt in the project root for license information.
-# Code generated by Microsoft (R) Python Code Generator.
-# Changes may cause incorrect behavior and will be lost if the code is regenerated.
-# --------------------------------------------------------------------------
-
-VERSION = "1.0.0b1"
diff --git a/sdk/agentserver/azure-ai-agentserver-agentframework/azure/ai/agentserver/agentframework/agent_framework.py b/sdk/agentserver/azure-ai-agentserver-agentframework/azure/ai/agentserver/agentframework/agent_framework.py
deleted file mode 100644
index 7177b522d2a9..000000000000
--- a/sdk/agentserver/azure-ai-agentserver-agentframework/azure/ai/agentserver/agentframework/agent_framework.py
+++ /dev/null
@@ -1,153 +0,0 @@
-# ---------------------------------------------------------
-# Copyright (c) Microsoft Corporation. All rights reserved.
-# ---------------------------------------------------------
-# pylint: disable=logging-fstring-interpolation
-from __future__ import annotations
-
-import asyncio  # pylint: disable=do-not-import-asyncio
-import os
-from typing import Any, AsyncGenerator, Union
-
-from agent_framework import AgentProtocol
-from agent_framework.azure import AzureAIAgentClient  # pylint: disable=no-name-in-module
-from opentelemetry import trace
-
-from azure.ai.agentserver.core import AgentRunContext, FoundryCBAgent
-from azure.ai.agentserver.core.constants import Constants as AdapterConstants
-from azure.ai.agentserver.core.logger import get_logger
-from azure.ai.agentserver.core.models import (
-    CreateResponse,
-    Response as OpenAIResponse,
-    ResponseStreamEvent,
-)
-from azure.ai.projects import AIProjectClient
-from azure.identity import DefaultAzureCredential
-
-from .models.agent_framework_input_converters import AgentFrameworkInputConverter
-from .models.agent_framework_output_non_streaming_converter import (
-    AgentFrameworkOutputNonStreamingConverter,
-)
-from .models.agent_framework_output_streaming_converter import AgentFrameworkOutputStreamingConverter
-from .models.constants import Constants
-
-logger = get_logger()
-
-
-class AgentFrameworkCBAgent(FoundryCBAgent):
-    """
-    Adapter class for integrating Agent Framework agents with the FoundryCB agent interface.
-
-    This class wraps an Agent Framework `AgentProtocol` instance and provides a unified interface
-    for running agents in both streaming and non-streaming modes. It handles input and output
-    conversion between the Agent Framework and the expected formats for FoundryCB agents.
-
-    Parameters:
-        agent (AgentProtocol): An instance of an Agent Framework agent to be adapted.
-
-    Usage:
-        - Instantiate with an Agent Framework agent.
-        - Call `agent_run` with a `CreateResponse` request body to execute the agent.
-        - Supports both streaming and non-streaming responses based on the `stream` flag.
-    """
-
-    def __init__(self, agent: AgentProtocol):
-        super().__init__()
-        self.agent = agent
-        logger.info(f"Initialized AgentFrameworkCBAgent with agent: {type(agent).__name__}")
-
-    def _resolve_stream_timeout(self, request_body: CreateResponse) -> float:
-        """Resolve idle timeout for streaming updates.
-
-        Order of precedence:
-        1) request_body.stream_timeout_s (if provided)
-        2) env var Constants.AGENTS_ADAPTER_STREAM_TIMEOUT_S
-        3) Constants.DEFAULT_STREAM_TIMEOUT_S
-
-        :param request_body: The CreateResponse request body.
-        :type request_body: CreateResponse
-
-        :return: The resolved stream timeout in seconds.
-        :rtype: float
-        """
-        override = request_body.get("stream_timeout_s", None)
-        if override is not None:
-            return float(override)
-        env_val = os.getenv(Constants.AGENTS_ADAPTER_STREAM_TIMEOUT_S)
-        return float(env_val) if env_val is not None else float(Constants.DEFAULT_STREAM_TIMEOUT_S)
-
-    def init_tracing(self):
-        exporter = os.environ.get(AdapterConstants.OTEL_EXPORTER_ENDPOINT)
-        app_insights_conn_str = os.environ.get(AdapterConstants.APPLICATION_INSIGHTS_CONNECTION_STRING)
-        project_endpoint = os.environ.get(AdapterConstants.AZURE_AI_PROJECT_ENDPOINT)
-
-        if project_endpoint:
-            project_client = AIProjectClient(endpoint=project_endpoint, credential=DefaultAzureCredential())
-            agent_client = AzureAIAgentClient(project_client=project_client)
-            agent_client.setup_azure_ai_observability()
-        elif exporter or app_insights_conn_str:
-            os.environ["WORKFLOW_ENABLE_OTEL"] = "true"
-            from agent_framework.observability import setup_observability
-
-            setup_observability(
-                enable_sensitive_data=True,
-                otlp_endpoint=exporter,
-                applicationinsights_connection_string=app_insights_conn_str,
-            )
-        self.tracer = trace.get_tracer(__name__)
-
-    async def agent_run(
-        self, context: AgentRunContext
-    ) -> Union[
-        OpenAIResponse,
-        AsyncGenerator[ResponseStreamEvent, Any],
-    ]:
-        logger.info(f"Starting agent_run with stream={context.stream}")
-        request_input = context.request.get("input")
-
-        input_converter = AgentFrameworkInputConverter()
-        message = input_converter.transform_input(request_input)
-        logger.debug(f"Transformed input message type: {type(message)}")
-
-        # Use split converters
-        if context.stream:
-            logger.info("Running agent in streaming mode")
-            streaming_converter = AgentFrameworkOutputStreamingConverter(context)
-
-            async def stream_updates():
-                update_count = 0
-                timeout_s = self._resolve_stream_timeout(context.request)
-                logger.info("Starting streaming with idle-timeout=%.2fs", timeout_s)
-                for ev in streaming_converter.initial_events():
-                    yield ev
-
-                # Iterate with per-update timeout; terminate if idle too long
-                aiter = self.agent.run_stream(message).__aiter__()
-                while True:
-                    try:
-                        update = await asyncio.wait_for(aiter.__anext__(), timeout=timeout_s)
-                    except StopAsyncIteration:
-                        logger.debug("Agent streaming iterator finished (StopAsyncIteration)")
-                        break
-                    except asyncio.TimeoutError:
-                        logger.warning("Streaming idle timeout reached (%.1fs); terminating stream.", timeout_s)
-                        for ev in streaming_converter.completion_events():
-                            yield ev
-                        return
-                    update_count += 1
-                    transformed = streaming_converter.transform_output_for_streaming(update)
-                    for event in transformed:
-                        yield event
-                for ev in streaming_converter.completion_events():
-                    yield ev
-                logger.info("Streaming completed with %d updates", update_count)
-
-            return stream_updates()
-
-        # Non-streaming path
-        logger.info("Running agent in non-streaming mode")
-        non_streaming_converter = AgentFrameworkOutputNonStreamingConverter(context)
-        result = await self.agent.run(message)
-        logger.debug(f"Agent run completed, result type: {type(result)}")
-        transformed_result = non_streaming_converter.transform_output_for_response(result)
-        logger.info("Agent run and transformation completed successfully")
-        return transformed_result
diff --git a/sdk/agentserver/azure-ai-agentserver-agentframework/azure/ai/agentserver/agentframework/models/__init__.py b/sdk/agentserver/azure-ai-agentserver-agentframework/azure/ai/agentserver/agentframework/models/__init__.py
deleted file mode 100644
index fdf8caba9ef5..000000000000
--- a/sdk/agentserver/azure-ai-agentserver-agentframework/azure/ai/agentserver/agentframework/models/__init__.py
+++ /dev/null
@@ -1,5 +0,0 @@
-# ---------------------------------------------------------
-# Copyright (c) Microsoft Corporation. All rights reserved.
-# ---------------------------------------------------------
-
-__path__ = __import__("pkgutil").extend_path(__path__, __name__)
diff --git a/sdk/agentserver/azure-ai-agentserver-agentframework/azure/ai/agentserver/agentframework/models/agent_framework_input_converters.py b/sdk/agentserver/azure-ai-agentserver-agentframework/azure/ai/agentserver/agentframework/models/agent_framework_input_converters.py
deleted file mode 100644
index 993be43e85c8..000000000000
--- a/sdk/agentserver/azure-ai-agentserver-agentframework/azure/ai/agentserver/agentframework/models/agent_framework_input_converters.py
+++ /dev/null
@@ -1,120 +0,0 @@
-# ---------------------------------------------------------
-# Copyright (c) Microsoft Corporation. All rights reserved.
-# ---------------------------------------------------------
-# pylint: disable=too-many-nested-blocks,too-many-return-statements,too-many-branches
-# mypy: disable-error-code="no-redef"
-from __future__ import annotations
-
-from typing import Dict, List
-
-from agent_framework import ChatMessage, Role as ChatRole
-from agent_framework._types import TextContent
-
-from azure.ai.agentserver.core.logger import get_logger
-
-logger = get_logger()
-
-
-class AgentFrameworkInputConverter:
-    """Normalize inputs for agent.run.
-
-    Accepts: str | List | None
-    Returns: None | str | ChatMessage | list[str] | list[ChatMessage]
-    """
-
-    def transform_input(
-        self,
-        input: str | List[Dict] | None,
-    ) -> str | ChatMessage | list[str] | list[ChatMessage] | None:
-        logger.debug("Transforming input of type: %s", type(input))
-
-        if input is None:
-            return None
-
-        if isinstance(input, str):
-            return input
-
-        try:
-            if isinstance(input, list):
-                messages: list[str | ChatMessage] = []
-
-                for item in input:
-                    # Case 1: ImplicitUserMessage with content as str or list of ItemContentInputText
-                    if self._is_implicit_user_message(item):
-                        content = item.get("content", None)
-                        if isinstance(content, str):
-                            messages.append(content)
-                        elif isinstance(content, list):
-                            text_parts: list[str] = []
-                            for content_item in content:
-                                text_content = self._extract_input_text(content_item)
-                                if text_content:
-                                    text_parts.append(text_content)
-                            if text_parts:
-                                messages.append(" ".join(text_parts))
-
-                    # Case 2: Explicit message params (user/assistant/system)
-                    elif (
-                        item.get("type") == "message"
-                        and item.get("role") is not None
-                        and item.get("content") is not None
-                    ):
-                        role_map = {
-                            "user": ChatRole.USER,
-                            "assistant": ChatRole.ASSISTANT,
-                            "system": ChatRole.SYSTEM,
-                        }
-                        role = role_map.get(item.get("role", "user"), ChatRole.USER)
-
-                        content_text = ""
-                        item_content = item.get("content", None)
-                        if item_content and isinstance(item_content, list):
-                            text_parts: list[str] = []
-                            for content_item in item_content:
-                                item_text = self._extract_input_text(content_item)
-                                if item_text:
-                                    text_parts.append(item_text)
-                            content_text = " ".join(text_parts) if text_parts else ""
-                        elif item_content and isinstance(item_content, str):
-                            content_text = str(item_content)
-
-                        if content_text:
-                            messages.append(ChatMessage(role=role, text=content_text))
-
-                # Determine the most natural return type
-                if not messages:
-                    return None
-                if len(messages) == 1:
-                    return messages[0]
-                if all(isinstance(m, str) for m in messages):
-                    return [m for m in messages if isinstance(m, str)]
-                if all(isinstance(m, ChatMessage) for m in messages):
-                    return [m for m in messages if isinstance(m, ChatMessage)]
-
-                # Mixed content: coerce ChatMessage to str by extracting TextContent parts
-                result: list[str] = []
-                for msg in messages:
-                    if isinstance(msg, ChatMessage):
-                        text_parts: list[str] = []
-                        for c in getattr(msg, "contents", []) or []:
-                            if isinstance(c, TextContent):
-                                text_parts.append(c.text)
-                        result.append(" ".join(text_parts) if text_parts else str(msg))
-                    else:
-                        result.append(str(msg))
-                return result
-
-            raise TypeError(f"Unsupported input type: {type(input)}")
-        except Exception as e:
-            logger.error("Error processing messages: %s", e, exc_info=True)
-            raise Exception(f"Error processing messages: {e}") from e  # pylint: disable=broad-exception-raised
-
-    def _is_implicit_user_message(self, item: Dict) -> bool:
-        return "content" in item and "role" not in item and "type" not in item
-
-    def _extract_input_text(self, content_item: Dict) -> str:
-        if content_item.get("type") == "input_text" and "text" in content_item:
-            text_content = content_item.get("text")
-            if isinstance(text_content, str):
-                return text_content
-        return None  # type: ignore
diff --git a/sdk/agentserver/azure-ai-agentserver-agentframework/azure/ai/agentserver/agentframework/models/agent_framework_output_non_streaming_converter.py b/sdk/agentserver/azure-ai-agentserver-agentframework/azure/ai/agentserver/agentframework/models/agent_framework_output_non_streaming_converter.py
deleted file mode 100644
index 805a5eeb9dec..000000000000
--- a/sdk/agentserver/azure-ai-agentserver-agentframework/azure/ai/agentserver/agentframework/models/agent_framework_output_non_streaming_converter.py
+++ /dev/null
@@ -1,232 +0,0 @@
-# ---------------------------------------------------------
-# Copyright (c) Microsoft Corporation. All rights reserved.
-# ---------------------------------------------------------
-from __future__ import annotations
-
-import datetime
-import json
-from typing import Any, List
-
-from agent_framework import AgentRunResponse, FunctionResultContent
-from agent_framework._types import FunctionCallContent, TextContent
-
-from azure.ai.agentserver.core import AgentRunContext
-from azure.ai.agentserver.core.logger import get_logger
-from azure.ai.agentserver.core.models import Response as OpenAIResponse
-from azure.ai.agentserver.core.models.projects import (
-    ItemContentOutputText,
-    ResponsesAssistantMessageItemResource,
-)
-
-from .agent_id_generator import AgentIdGenerator
-from .constants import Constants
-
-logger = get_logger()
-
-
-class AgentFrameworkOutputNonStreamingConverter:  # pylint: disable=name-too-long
-    """Non-streaming converter: AgentRunResponse -> OpenAIResponse."""
-
-    def __init__(self, context: AgentRunContext):
-        self._context = context
-        self._response_id = None
-        self._response_created_at = None
-
-    def _ensure_response_started(self) -> None:
-        if not self._response_id:
-            self._response_id = self._context.response_id  # type: ignore
-        if not self._response_created_at:
-            self._response_created_at = int(datetime.datetime.now(datetime.timezone.utc).timestamp())  # type: ignore
-
-    def _build_item_content_output_text(self, text: str) -> ItemContentOutputText:
-        return ItemContentOutputText(text=text, annotations=[])
-
-    def _new_assistant_message_item(self, message_text: str) -> ResponsesAssistantMessageItemResource:
-        item_content = self._build_item_content_output_text(message_text)
-        return ResponsesAssistantMessageItemResource(
-            id=self._context.id_generator.generate_message_id(), status="completed", content=[item_content]
-        )
-
-    def transform_output_for_response(self, response: AgentRunResponse) -> OpenAIResponse:
-        """Build an OpenAIResponse capturing all supported content types.
-
-        Previously this method only emitted text message items. We now also capture:
-          - FunctionCallContent  -> function_call output item
-          - FunctionResultContent -> function_call_output item
-
-        to stay aligned with the streaming converter so no output is lost.
-
-        :param response: The AgentRunResponse from the agent framework.
-        :type response: AgentRunResponse
-
-        :return: The constructed OpenAIResponse.
-        :rtype: OpenAIResponse
-        """
-        logger.debug("Transforming non-streaming response (messages=%d)", len(response.messages))
-        self._ensure_response_started()
-
-        completed_items: List[dict] = []
-
-        for i, message in enumerate(response.messages):
-            logger.debug("Non-streaming: processing message index=%d type=%s", i, type(message).__name__)
-            contents = getattr(message, "contents", None)
-            if not contents:
-                continue
-            for j, content in enumerate(contents):
-                logger.debug("  content index=%d in message=%d type=%s", j, i, type(content).__name__)
-                self._append_content_item(content, completed_items)
-
-        response_data = self._construct_response_data(completed_items)
-        openai_response = OpenAIResponse(response_data)
-        logger.info(
-            "OpenAIResponse built (id=%s, items=%d)",
-            self._response_id,
-            len(completed_items),
-        )
-        return openai_response
-
-    # ------------------------- helper append methods -------------------------
-
-    def _append_content_item(self, content: Any, sink: List[dict]) -> None:
-        """Dispatch a content object to the appropriate append helper.
-
-        Adding this indirection keeps the main transform method compact and makes it
-        simpler to extend with new content types later.
-
-        :param content: The content object to append.
-        :type content: Any
-        :param sink: The list to append the converted content dict to.
-        :type sink: List[dict]
-
-        :return: None
-        :rtype: None
-        """
-        if isinstance(content, TextContent):
-            self._append_text_content(content, sink)
-        elif isinstance(content, FunctionCallContent):
-            self._append_function_call_content(content, sink)
-        elif isinstance(content, FunctionResultContent):
-            self._append_function_result_content(content, sink)
-        else:
-            logger.debug("unsupported content type skipped: %s", type(content).__name__)
-
-    def _append_text_content(self, content: TextContent, sink: List[dict]) -> None:
-        text_value = getattr(content, "text", None)
-        if not text_value:
-            return
-        item_id = self._context.id_generator.generate_message_id()
-        sink.append(
-            {
-                "id": item_id,
-                "type": "message",
-                "status": "completed",
-                "role": "assistant",
-                "content": [
-                    {
-                        "type": "output_text",
-                        "text": text_value,
-                        "annotations": [],
-                        "logprobs": [],
-                    }
-                ],
-            }
-        )
-        logger.debug("    added message item id=%s text_len=%d", item_id, len(text_value))
-
-    def _append_function_call_content(self, content: FunctionCallContent, sink: List[dict]) -> None:
-        name = getattr(content, "name", "") or ""
-        arguments = getattr(content, "arguments", "")
-        if not isinstance(arguments, str):
-            try:
-                arguments = json.dumps(arguments)
-            except Exception:  # pragma: no cover - fallback # pylint: disable=broad-exception-caught
-                arguments = str(arguments)
-        call_id = getattr(content, "call_id", None) or self._context.id_generator.generate_function_call_id()
-        func_item_id = self._context.id_generator.generate_function_call_id()
-        sink.append(
-            {
-                "id": func_item_id,
-                "type": "function_call",
-                "status": "completed",
-                "call_id": call_id,
-                "name": name,
-                "arguments": arguments or "",
-            }
-        )
-        logger.debug(
-            "    added function_call item id=%s call_id=%s name=%s args_len=%d",
-            func_item_id,
-            call_id,
-            name,
-            len(arguments or ""),
-        )
-
-    def _append_function_result_content(self, content: FunctionResultContent, sink: List[dict]) -> None:
-        # Coerce the function result into a simple display string.
-        result = []
-        raw = getattr(content, "result", None)
-        if isinstance(raw, str):
-            result = [raw]
-        elif isinstance(raw, list):
-            for item in raw:
-                result.append(self._coerce_result_text(item))   # type: ignore
-        call_id = getattr(content, "call_id", None) or ""
-        func_out_id = self._context.id_generator.generate_function_output_id()
-        sink.append(
-            {
-                "id": func_out_id,
-                "type": "function_call_output",
-                "status": "completed",
-                "call_id": call_id,
-                "output": json.dumps(result) if len(result) > 0 else "",
-            }
-        )
-        logger.debug(
-            "added function_call_output item id=%s call_id=%s output_len=%d",
-            func_out_id,
-            call_id,
-            len(result),
-        )
-
-    # ------------- simple normalization helper -------------------------
-    def _coerce_result_text(self, value: Any) -> str | dict:
-        """
-        Return a string if value is already str or a TextContent-like object; else str(value).
-
-        :param value: The value to coerce.
-        :type value: Any
-
-        :return: The coerced string or dict.
-        :rtype: str | dict
-        """
-        if value is None:
-            return ""
-        if isinstance(value, str):
-            return value
-        # Direct TextContent instance
-        if isinstance(value, TextContent):
-            content_payload = {"type": "text", "text": getattr(value, "text", "")}
-            return content_payload
-
-        return ""
-
-    def _construct_response_data(self, output_items: List[dict]) -> dict:
-        agent_id = AgentIdGenerator.generate(self._context)
-
-        response_data = {
-            "object": "response",
-            "metadata": {},
-            "agent": agent_id,
-            "conversation": self._context.get_conversation_object(),
-            "type": "message",
-            "role": "assistant",
-            "temperature": Constants.DEFAULT_TEMPERATURE,
-            "top_p": Constants.DEFAULT_TOP_P,
-            "user": "",
-            "id": self._context.response_id,
-            "created_at": self._response_created_at,
-            "output": output_items,
-            "parallel_tool_calls": True,
-            "status": "completed",
-        }
-        return response_data
diff --git a/sdk/agentserver/azure-ai-agentserver-agentframework/azure/ai/agentserver/agentframework/models/agent_framework_output_streaming_converter.py b/sdk/agentserver/azure-ai-agentserver-agentframework/azure/ai/agentserver/agentframework/models/agent_framework_output_streaming_converter.py
deleted file mode 100644
index d9bc3199efb5..000000000000
--- a/sdk/agentserver/azure-ai-agentserver-agentframework/azure/ai/agentserver/agentframework/models/agent_framework_output_streaming_converter.py
+++ /dev/null
@@ -1,595 +0,0 @@
-# ---------------------------------------------------------
-# Copyright (c) Microsoft Corporation. All rights reserved.
-# ---------------------------------------------------------
-# pylint: disable=attribute-defined-outside-init,protected-access
-# mypy: disable-error-code="call-overload,assignment,arg-type"
-from __future__ import annotations
-
-import datetime
-import json
-import uuid
-from typing import Any, List, Optional, cast
-
-from agent_framework import AgentRunResponseUpdate, FunctionApprovalRequestContent, FunctionResultContent
-from agent_framework._types import (
-    ErrorContent,
-    FunctionCallContent,
-    TextContent,
-)
-
-from azure.ai.agentserver.core import AgentRunContext
-from azure.ai.agentserver.core.logger import get_logger
-from azure.ai.agentserver.core.models import (
-    Response as OpenAIResponse,
-    ResponseStreamEvent,
-)
-from azure.ai.agentserver.core.models.projects import (
-    FunctionToolCallItemResource,
-    FunctionToolCallOutputItemResource,
-    ItemContentOutputText,
-    ResponseCompletedEvent,
-    ResponseContentPartAddedEvent,
-    ResponseContentPartDoneEvent,
-    ResponseCreatedEvent,
-    ResponseErrorEvent,
-    ResponseFunctionCallArgumentsDeltaEvent,
-    ResponseFunctionCallArgumentsDoneEvent,
-    ResponseInProgressEvent,
-    ResponseOutputItemAddedEvent,
-    ResponseOutputItemDoneEvent,
-    ResponsesAssistantMessageItemResource,
-    ResponseTextDeltaEvent,
-    ResponseTextDoneEvent,
-)
-
-from .agent_id_generator import AgentIdGenerator
-
-logger = get_logger()
-
-
-class _BaseStreamingState:
-    """Base interface for streaming state handlers."""
-
-    def prework(self, ctx: Any) -> List[ResponseStreamEvent]:  # pylint: disable=unused-argument
-        return []
-
-    def convert_content(self, ctx: Any, content) -> List[ResponseStreamEvent]:  # pylint: disable=unused-argument
-        raise NotImplementedError
-
-    def afterwork(self, ctx: Any) -> List[ResponseStreamEvent]:  # pylint: disable=unused-argument
-        return []
-
-
-class _TextContentStreamingState(_BaseStreamingState):
-    """State handler for text and reasoning-text content during streaming."""
-
-    def __init__(self, context: AgentRunContext) -> None:
-        self.context = context
-        self.item_id = None
-        self.output_index = None
-        self.text_buffer = ""
-        self.text_part_started = False
-
-    def prework(self, ctx: Any) -> List[ResponseStreamEvent]:
-        events: List[ResponseStreamEvent] = []
-        if self.item_id is not None:
-            return events
-
-        # Start a new assistant message item (in_progress)
-        self.item_id = self.context.id_generator.generate_message_id()
-        self.output_index = ctx._next_output_index  # pylint: disable=protected-access
-        ctx._next_output_index += 1
-
-        message_item = ResponsesAssistantMessageItemResource(
-            id=self.item_id,
-            status="in_progress",
-            content=[],
-        )
-
-        events.append(
-            ResponseOutputItemAddedEvent(
-                sequence_number=ctx.next_sequence(),
-                output_index=self.output_index,
-                item=message_item,
-            )
-        )
-
-        if not self.text_part_started:
-            empty_part = ItemContentOutputText(text="", annotations=[], logprobs=[])
-            events.append(
-                ResponseContentPartAddedEvent(
-                    sequence_number=ctx.next_sequence(),
-                    item_id=self.item_id,
-                    output_index=self.output_index,
-                    content_index=0,
-                    part=empty_part,
-                )
-            )
-            self.text_part_started = True
-        return events
-
-    def convert_content(self, ctx: Any, content: TextContent) -> List[ResponseStreamEvent]:
-        events: List[ResponseStreamEvent] = []
-        if isinstance(content, TextContent):
-            delta = content.text or ""
-        else:
-            delta = getattr(content, "text", None) or getattr(content, "reasoning", "") or ""
-
-        # buffer accumulated text
-        self.text_buffer += delta
-
-        # emit delta event for text
-        assert self.item_id is not None, "Text state not initialized: missing item_id"
-        assert self.output_index is not None, "Text state not initialized: missing output_index"
-        events.append(
-            ResponseTextDeltaEvent(
-                sequence_number=ctx.next_sequence(),
-                item_id=self.item_id,
-                output_index=self.output_index,
-                content_index=0,
-                delta=delta,
-            )
-        )
-        return events
-
-    def afterwork(self, ctx: Any) -> List[ResponseStreamEvent]:
-        events: List[ResponseStreamEvent] = []
-        if not self.item_id:
-            return events
-
-        full_text = self.text_buffer
-        assert self.item_id is not None and self.output_index is not None
-        events.append(
-            ResponseTextDoneEvent(
-                sequence_number=ctx.next_sequence(),
-                item_id=self.item_id,
-                output_index=self.output_index,
-                content_index=0,
-                text=full_text,
-            )
-        )
-        final_part = ItemContentOutputText(text=full_text, annotations=[], logprobs=[])
-        events.append(
-            ResponseContentPartDoneEvent(
-                sequence_number=ctx.next_sequence(),
-                item_id=self.item_id,
-                output_index=self.output_index,
-                content_index=0,
-                part=final_part,
-            )
-        )
-        completed_item = ResponsesAssistantMessageItemResource(
-            id=self.item_id, status="completed", content=[final_part]
-        )
-        events.append(
-            ResponseOutputItemDoneEvent(
-                sequence_number=ctx.next_sequence(),
-                output_index=self.output_index,
-                item=completed_item,
-            )
-        )
-        ctx._last_completed_text = full_text  # pylint: disable=protected-access
-        # store for final response
-        ctx._completed_output_items.append(
-            {
-                "id": self.item_id,
-                "type": "message",
-                "status": "completed",
-                "content": [
-                    {
-                        "type": "output_text",
-                        "text": full_text,
-                        "annotations": [],
-                        "logprobs": [],
-                    }
-                ],
-                "role": "assistant",
-            }
-        )
-        # reset state
-        self.item_id = None
-        self.output_index = None
-        self.text_buffer = ""
-        self.text_part_started = False
-        return events
-
-
-class _FunctionCallStreamingState(_BaseStreamingState):
-    """State handler for function_call content during streaming."""
-
-    def __init__(self, context: AgentRunContext) -> None:
-        self.context = context
-        self.item_id = None
-        self.output_index = None
-        self.call_id = None
-        self.name = None
-        self.args_buffer = ""
-        self.requires_approval = False
-        self.approval_request_id: str | None = None
-
-    def prework(self, ctx: Any) -> List[ResponseStreamEvent]:
-        events: List[ResponseStreamEvent] = []
-        if self.item_id is not None:
-            return events
-        # initialize function-call item
-        self.item_id = self.context.id_generator.generate_function_call_id()
-        self.output_index = ctx._next_output_index
-        ctx._next_output_index += 1
-
-        self.call_id = self.call_id or str(uuid.uuid4())
-        function_item = FunctionToolCallItemResource(
-            id=self.item_id,
-            status="in_progress",
-            call_id=self.call_id,
-            name=self.name or "",
-            arguments="",
-        )
-        events.append(
-            ResponseOutputItemAddedEvent(
-                sequence_number=ctx.next_sequence(),
-                output_index=self.output_index,
-                item=function_item,
-            )
-        )
-        return events
-
-    def convert_content(self, ctx: Any, content: FunctionCallContent) -> List[ResponseStreamEvent]:
-        events: List[ResponseStreamEvent] = []
-        # record identifiers (once available)
-        self.name = getattr(content, "name", None) or self.name or ""
-        self.call_id = getattr(content, "call_id", None) or self.call_id or str(uuid.uuid4())
-
-        args_delta = content.arguments if isinstance(content.arguments, str) else json.dumps(content.arguments)
-        args_delta = args_delta or ""
-        self.args_buffer += args_delta
-        assert self.item_id is not None and self.output_index is not None
-        for ch in args_delta:
-            events.append(
-                ResponseFunctionCallArgumentsDeltaEvent(
-                    sequence_number=ctx.next_sequence(),
-                    item_id=self.item_id,
-                    output_index=self.output_index,
-                    delta=ch,
-                )
-            )
-
-        # finalize if arguments are detected to be complete
-        is_done = bool(
-            getattr(content, "is_final", False)
-            or getattr(content, "final", False)
-            or getattr(content, "done", False)
-            or getattr(content, "arguments_final", False)
-            or getattr(content, "arguments_done", False)
-            or getattr(content, "finish", False)
-        )
-        if not is_done and self.args_buffer:
-            try:
-                json.loads(self.args_buffer)
-                is_done = True
-            except Exception:  # pylint: disable=broad-exception-caught
-                pass
-
-        if is_done:
-            events.append(
-                ResponseFunctionCallArgumentsDoneEvent(
-                    sequence_number=ctx.next_sequence(),
-                    item_id=self.item_id,
-                    output_index=self.output_index,
-                    arguments=self.args_buffer,
-                )
-            )
-            events.extend(self.afterwork(ctx))
-        return events
-
-    def afterwork(self, ctx: Any) -> List[ResponseStreamEvent]:
-        events: List[ResponseStreamEvent] = []
-        if not self.item_id:
-            return events
-        assert self.call_id is not None
-        done_item = FunctionToolCallItemResource(
-            id=self.item_id,
-            status="completed",
-            call_id=self.call_id,
-            name=self.name or "",
-            arguments=self.args_buffer,
-        )
-        assert self.output_index is not None
-        events.append(
-            ResponseOutputItemDoneEvent(
-                sequence_number=ctx.next_sequence(),
-                output_index=self.output_index,
-                item=done_item,
-            )
-        )
-        # store for final response
-        ctx._completed_output_items.append(
-            {
-                "id": self.item_id,
-                "type": "function_call",
-                "call_id": self.call_id,
-                "name": self.name or "",
-                "arguments": self.args_buffer,
-                "status": "requires_approval" if self.requires_approval else "completed",
-                "requires_approval": self.requires_approval,
-                "approval_request_id": self.approval_request_id,
-            }
-        )
-        # reset
-        self.item_id = None
-        self.output_index = None
-        self.args_buffer = ""
-        self.call_id = None
-        self.name = None
-        self.requires_approval = False
-        self.approval_request_id = None
-        return events
-
-
-class _FunctionCallOutputStreamingState(_BaseStreamingState):
-    """Handles function_call_output items streaming (non-chunked simple output)."""
-
-    def __init__(
-        self,
-        context: AgentRunContext,
-        call_id: Optional[str] = None,
-        output: Optional[list[str]] = None,
-    ) -> None:
-        # Avoid mutable default argument (Ruff B006)
-        self.context = context
-        self.item_id = None
-        self.output_index = None
-        self.call_id = call_id
-        self.output = output if output is not None else []
-
-    def prework(self, ctx: Any) -> List[ResponseStreamEvent]:
-        events: List[ResponseStreamEvent] = []
-        if self.item_id is not None:
-            return events
-        self.item_id = self.context.id_generator.generate_function_output_id()
-        self.output_index = ctx._next_output_index
-        ctx._next_output_index += 1
-
-        self.call_id = self.call_id or str(uuid.uuid4())
-        item = FunctionToolCallOutputItemResource(
-            id=self.item_id,
-            status="in_progress",
-            call_id=self.call_id,
-            output="",
-        )
-        events.append(
-            ResponseOutputItemAddedEvent(
-                sequence_number=ctx.next_sequence(),
-                output_index=self.output_index,
-                item=item,
-            )
-        )
-        return events
-
-    def convert_content(self, ctx: Any, content: Any) -> List[ResponseStreamEvent]:  # no delta events for now
-        events: List[ResponseStreamEvent] = []
-        # treat entire output as final
-        result = []
-        raw = getattr(content, "result", None)
-        if isinstance(raw, str):
-            result = [raw or self.output]
-        elif isinstance(raw, list):
-            for item in raw:
-                result.append(self._coerce_result_text(item))
-        self.output = json.dumps(result) if len(result) > 0 else ""
-
-        events.extend(self.afterwork(ctx))
-        return events
-
-    def _coerce_result_text(self, value: Any) -> str | dict:
-        """
-        Return a string if value is already str or a TextContent-like object; else str(value).
-
-        :param value: The value to coerce.
-        :type value: Any
-
-        :return: The coerced string or dict.
-        :rtype: str | dict
-        """
-        if value is None:
-            return ""
-        if isinstance(value, str):
-            return value
-        # Direct TextContent instance
-        if isinstance(value, TextContent):
-            content_payload = {"type": "text", "text": getattr(value, "text", "")}
-            return content_payload
-
-        return ""
-
-    def afterwork(self, ctx: Any) -> List[ResponseStreamEvent]:
-        events: List[ResponseStreamEvent] = []
-        if not self.item_id:
-            return events
-        # Ensure types conform: call_id must be str (guarantee non-None) and output is a single string
-        str_call_id = self.call_id or ""
-        single_output: str = cast(str, self.output[0]) if self.output else ""
-        done_item = FunctionToolCallOutputItemResource(
-            id=self.item_id,
-            status="completed",
-            call_id=str_call_id,
-            output=single_output,
-        )
-        assert self.output_index is not None
-        events.append(
-            ResponseOutputItemDoneEvent(
-                sequence_number=ctx.next_sequence(),
-                output_index=self.output_index,
-                item=done_item,
-            )
-        )
-        ctx._completed_output_items.append(
-            {
-                "id": self.item_id,
-                "type": "function_call_output",
-                "status": "completed",
-                "call_id": self.call_id,
-                "output": self.output,
-            }
-        )
-        self.item_id = None
-        self.output_index = None
-        return events
-
-
-class AgentFrameworkOutputStreamingConverter:
-    """Streaming converter using content-type-specific state handlers."""
-
-    def __init__(self, context: AgentRunContext) -> None:
-        self._context = context
-        # sequence numbers must start at 0 for first emitted event
-        self._sequence = 0
-        self._response_id = None
-        self._response_created_at = None
-        self._next_output_index = 0
-        self._last_completed_text = ""
-        self._active_state: Optional[_BaseStreamingState] = None
-        self._active_kind = None  # "text" | "function_call" | "error"
-        # accumulate completed output items for final response
-        self._completed_output_items: List[dict] = []
-
-    def _ensure_response_started(self) -> None:
-        if not self._response_id:
-            self._response_id = self._context.response_id
-        if not self._response_created_at:
-            self._response_created_at = int(datetime.datetime.now(datetime.timezone.utc).timestamp())
-
-    def next_sequence(self) -> int:
-        self._sequence += 1
-        return self._sequence
-
-    def _switch_state(self, kind: str) -> List[ResponseStreamEvent]:
-        events: List[ResponseStreamEvent] = []
-        if self._active_state and self._active_kind != kind:
-            events.extend(self._active_state.afterwork(self))
-            self._active_state = None
-            self._active_kind = None
-
-        if self._active_state is None:
-            if kind == "text":
-                self._active_state = _TextContentStreamingState(self._context)
-            elif kind == "function_call":
-                self._active_state = _FunctionCallStreamingState(self._context)
-            elif kind == "function_call_output":
-                self._active_state = _FunctionCallOutputStreamingState(self._context)
-            else:
-                self._active_state = None
-            self._active_kind = kind
-            if self._active_state:
-                events.extend(self._active_state.prework(self))
-        return events
-
-    def transform_output_for_streaming(self, update: AgentRunResponseUpdate) -> List[ResponseStreamEvent]:
-        logger.debug(
-            "Transforming streaming update with %d contents",
-            len(update.contents) if getattr(update, "contents", None) else 0,
-        )
-        self._ensure_response_started()
-        events: List[ResponseStreamEvent] = []
-
-        if getattr(update, "contents", None):
-            for i, content in enumerate(update.contents):
-                logger.debug("Processing content %d: %s", i, type(content))
-                if isinstance(content, TextContent):
-                    events.extend(self._switch_state("text"))
-                    if isinstance(self._active_state, _TextContentStreamingState):
-                        events.extend(self._active_state.convert_content(self, content))
-                elif isinstance(content, FunctionCallContent):
-                    events.extend(self._switch_state("function_call"))
-                    if isinstance(self._active_state, _FunctionCallStreamingState):
-                        events.extend(self._active_state.convert_content(self, content))
-                elif isinstance(content, FunctionResultContent):
-                    events.extend(self._switch_state("function_call_output"))
-                    if isinstance(self._active_state, _FunctionCallOutputStreamingState):
-                        call_id = getattr(content, "call_id", None)
-                        if call_id:
-                            self._active_state.call_id = call_id
-                        events.extend(self._active_state.convert_content(self, content))
-                elif isinstance(content, FunctionApprovalRequestContent):
-                    events.extend(self._switch_state("function_call"))
-                    if isinstance(self._active_state, _FunctionCallStreamingState):
-                        self._active_state.requires_approval = True
-                        self._active_state.approval_request_id = getattr(content, "id", None)
-                        events.extend(self._active_state.convert_content(self, content.function_call))
-                elif isinstance(content, ErrorContent):
-                    # errors are stateless; flush current state and emit error
-                    events.extend(self._switch_state("error"))
-                    events.append(
-                        ResponseErrorEvent(
-                            sequence_number=self.next_sequence(),
-                            code=getattr(content, "error_code", None) or "server_error",
-                            message=getattr(content, "message", None) or "An error occurred",
-                            param="",
-                        )
-                    )
-        return events
-
-    def finalize_last_content(self) -> List[ResponseStreamEvent]:
-        events: List[ResponseStreamEvent] = []
-        if self._active_state:
-            events.extend(self._active_state.afterwork(self))
-            self._active_state = None
-            self._active_kind = None
-        return events
-
-    def build_response(self, status: str) -> OpenAIResponse:
-        self._ensure_response_started()
-        agent_id = AgentIdGenerator.generate(self._context)
-        response_data = {
-            "object": "response",
-            "agent_id": agent_id,
-            "id": self._response_id,
-            "status": status,
-            "created_at": self._response_created_at,
-        }
-        if status == "completed" and self._completed_output_items:
-            response_data["output"] = self._completed_output_items
-        return OpenAIResponse(response_data)
-
-    # High-level helpers to emit lifecycle events for streaming
-    def initial_events(self) -> List[ResponseStreamEvent]:
-        """
-        Emit ResponseCreatedEvent and an initial ResponseInProgressEvent.
-
-        :return: List of initial response stream events.
-        :rtype: List[ResponseStreamEvent]
-        """
-        self._ensure_response_started()
-        events: List[ResponseStreamEvent] = []
-        created_response = self.build_response(status="in_progress")
-        events.append(
-            ResponseCreatedEvent(
-                sequence_number=self.next_sequence(),
-                response=created_response,
-            )
-        )
-        events.append(
-            ResponseInProgressEvent(
-                sequence_number=self.next_sequence(),
-                response=self.build_response(status="in_progress"),
-            )
-        )
-        return events
-
-    def completion_events(self) -> List[ResponseStreamEvent]:
-        """
-        Finalize any active content and emit a single ResponseCompletedEvent.
-
-        :return: List of completion response stream events.
-        :rtype: List[ResponseStreamEvent]
-        """
-        self._ensure_response_started()
-        events: List[ResponseStreamEvent] = []
-        events.extend(self.finalize_last_content())
-        completed_response = self.build_response(status="completed")
-        events.append(
-            ResponseCompletedEvent(
-                sequence_number=self.next_sequence(),
-                response=completed_response,
-            )
-        )
-        return events
diff --git a/sdk/agentserver/azure-ai-agentserver-agentframework/azure/ai/agentserver/agentframework/models/agent_id_generator.py b/sdk/agentserver/azure-ai-agentserver-agentframework/azure/ai/agentserver/agentframework/models/agent_id_generator.py
deleted file mode 100644
index da4045898a5e..000000000000
--- a/sdk/agentserver/azure-ai-agentserver-agentframework/azure/ai/agentserver/agentframework/models/agent_id_generator.py
+++ /dev/null
@@ -1,44 +0,0 @@
-# ---------------------------------------------------------
-# Copyright (c) Microsoft Corporation. All rights reserved.
-# ---------------------------------------------------------
-"""Helper utilities for constructing AgentId model instances.
-
-Centralizes logic for safely building a `models.AgentId` from a request agent
-object. We intentionally do not allow overriding the generated model's fixed
-`type` literal ("agent_id"). If the provided object lacks a name, `None` is
-returned so callers can decide how to handle absence.
-"""
-
-from __future__ import annotations
-
-from typing import Optional
-
-from azure.ai.agentserver.core import AgentRunContext
-from azure.ai.agentserver.core.models import projects
-
-
-class AgentIdGenerator:
-    @staticmethod
-    def generate(context: AgentRunContext) -> Optional[projects.AgentId]:
-        """
-        Builds an AgentId model from the request agent object in the provided context.
-
-        :param context: The AgentRunContext containing the request.
-        :type context: AgentRunContext
-
-        :return: The constructed AgentId model, or None if the request lacks an agent name.
-        :rtype: Optional[projects.AgentId]
-        """
-        agent = context.request.get("agent")
-        if not agent:
-            return None
-
-        agent_id = projects.AgentId(
-            {
-                "type": agent.type,
-                "name": agent.name,
-                "version": agent.version,
-            }
-        )
-
-        return agent_id
diff --git a/sdk/agentserver/azure-ai-agentserver-agentframework/azure/ai/agentserver/agentframework/models/constants.py b/sdk/agentserver/azure-ai-agentserver-agentframework/azure/ai/agentserver/agentframework/models/constants.py
deleted file mode 100644
index 859e115e425e..000000000000
--- a/sdk/agentserver/azure-ai-agentserver-agentframework/azure/ai/agentserver/agentframework/models/constants.py
+++ /dev/null
@@ -1,13 +0,0 @@
-# ---------------------------------------------------------
-# Copyright (c) Microsoft Corporation. All rights reserved.
-# ---------------------------------------------------------
-class Constants:
-    # streaming configuration
-    # Environment variable name to control idle timeout for streaming updates (seconds)
-    AGENTS_ADAPTER_STREAM_TIMEOUT_S = "AGENTS_ADAPTER_STREAM_TIMEOUT_S"
-    # Default idle timeout (seconds) when env var or request override not provided
-    DEFAULT_STREAM_TIMEOUT_S = 300.0
-
-    # model defaults
-    DEFAULT_TEMPERATURE = 1.0
-    DEFAULT_TOP_P = 1.0
diff --git a/sdk/agentserver/azure-ai-agentserver-agentframework/azure/ai/agentserver/agentframework/py.typed b/sdk/agentserver/azure-ai-agentserver-agentframework/azure/ai/agentserver/agentframework/py.typed
deleted file mode 100644
index e69de29bb2d1..000000000000
diff --git a/sdk/agentserver/azure-ai-agentserver-agentframework/cspell.json b/sdk/agentserver/azure-ai-agentserver-agentframework/cspell.json
deleted file mode 100644
index 116acbc87af3..000000000000
--- a/sdk/agentserver/azure-ai-agentserver-agentframework/cspell.json
+++ /dev/null
@@ -1,16 +0,0 @@
-{
-    "ignoreWords": [
-      "azureai",
-      "fstring",
-      "mslearn",
-      "envtemplate",
-      "pysort",
-      "redef"
-    ],
-    "ignorePaths": [
-      "*.csv",
-      "*.json",
-      "*.rst",
-      "*/samples/*"
-    ]
-  }
\ No newline at end of file
diff --git a/sdk/agentserver/azure-ai-agentserver-agentframework/dev_requirements.txt b/sdk/agentserver/azure-ai-agentserver-agentframework/dev_requirements.txt
deleted file mode 100644
index 6c036d7fb4e0..000000000000
--- a/sdk/agentserver/azure-ai-agentserver-agentframework/dev_requirements.txt
+++ /dev/null
@@ -1,4 +0,0 @@
--e ../../../eng/tools/azure-sdk-tools
-../azure-ai-agentserver-core
-python-dotenv
-pywin32; sys_platform == 'win32'
\ No newline at end of file
diff --git a/sdk/agentserver/azure-ai-agentserver-agentframework/pyproject.toml b/sdk/agentserver/azure-ai-agentserver-agentframework/pyproject.toml
deleted file mode 100644
index a76ebff78bed..000000000000
--- a/sdk/agentserver/azure-ai-agentserver-agentframework/pyproject.toml
+++ /dev/null
@@ -1,67 +0,0 @@
-[project]
-name = "azure-ai-agentserver-agentframework"
-# Keep using your _version.py; setuptools reads it via [tool.setuptools.dynamic] below
-dynamic = ["version", "readme"]
-description = "Agents server adapter for Azure AI"
-requires-python = ">=3.10"
-authors = [
-  { name = "Microsoft Corporation", email = "azpysdkhelp@microsoft.com" },
-]
-license = "MIT"
-classifiers = [
-    "Development Status :: 7 - Inactive",
-    "Programming Language :: Python",
-    "Programming Language :: Python :: 3 :: Only",
-    "Programming Language :: Python :: 3",
-    "Programming Language :: Python :: 3.10",
-    "Programming Language :: Python :: 3.11",
-    "Programming Language :: Python :: 3.12",
-    "Programming Language :: Python :: 3.13",
-]
-keywords = ["azure", "azure sdk"]
-
-dependencies = [
-    "azure-ai-agentserver-core",
-    "agent-framework-azure-ai==1.0.0b251007",
-    "agent-framework-core==1.0.0b251007",
-    "opentelemetry-exporter-otlp-proto-grpc>=1.36.0",
-]
-
-[build-system]
-requires = ["setuptools>=69", "wheel"]
-build-backend = "setuptools.build_meta"
-
-[tool.setuptools.packages.find]
-exclude = [
-    "tests*",
-    "samples*",
-    "doc*",
-    "azure",
-    "azure.ai",
-]
-
-[tool.setuptools.dynamic]
-version = { attr = "azure.ai.agentserver.agentframework._version.VERSION" }
-readme = { file = ["README.md"], content-type = "text/markdown" }
-
-[tool.setuptools.package-data]
-pytyped = ["py.typed"]
-
-[tool.ruff]
-line-length = 120
-target-version = "py311"
-lint.select = ["E", "F", "B", "I"]   # E=pycodestyle errors, F=Pyflakes, B=bugbear, I=import sort
-lint.ignore = []
-fix = false
-
-[tool.ruff.lint.isort]
-known-first-party = ["azure.ai.agentserver.agentframework"]
-combine-as-imports = true
-
-[tool.azure-sdk-build]
-breaking = false   # incompatible python version
-pyright = false
-verifytypes = false   # incompatible python version for -core
-verify_keywords = false
-mindependency = false  # depends on -core package
-whl_no_aio = false
\ No newline at end of file
diff --git a/sdk/agentserver/azure-ai-agentserver-agentframework/pyrightconfig.json b/sdk/agentserver/azure-ai-agentserver-agentframework/pyrightconfig.json
deleted file mode 100644
index 55c6c3a91957..000000000000
--- a/sdk/agentserver/azure-ai-agentserver-agentframework/pyrightconfig.json
+++ /dev/null
@@ -1,3 +0,0 @@
-{
-    "reportAttributeAccessIssue": "warning"
-}
\ No newline at end of file
diff --git a/sdk/agentserver/azure-ai-agentserver-agentframework/samples/basic_simple/.envtemplate b/sdk/agentserver/azure-ai-agentserver-agentframework/samples/basic_simple/.envtemplate
deleted file mode 100644
index bd646f163bb7..000000000000
--- a/sdk/agentserver/azure-ai-agentserver-agentframework/samples/basic_simple/.envtemplate
+++ /dev/null
@@ -1,3 +0,0 @@
-AZURE_OPENAI_ENDPOINT=https://<endpoint-name>.cognitiveservices.azure.com/
-OPENAI_API_VERSION=2025-03-01-preview
-AZURE_OPENAI_CHAT_DEPLOYMENT_NAME=<deployment-name>
\ No newline at end of file
diff --git a/sdk/agentserver/azure-ai-agentserver-agentframework/samples/basic_simple/README.md b/sdk/agentserver/azure-ai-agentserver-agentframework/samples/basic_simple/README.md
deleted file mode 100644
index 64f19cefcbcb..000000000000
--- a/sdk/agentserver/azure-ai-agentserver-agentframework/samples/basic_simple/README.md
+++ /dev/null
@@ -1,46 +0,0 @@
-pip install -e src/adapter/python
-# Agent Framework Sample
-
-This sample demonstrates how to use the agents hosting adapter with Microsoft Agent Framework.
-
-## Prerequisites
-
-> **Azure sign-in:** Run `az login` before starting the sample so `DefaultAzureCredential` can acquire a CLI token.
-
-### Environment Variables
-
-Copy `.envtemplate` to `.env` and supply:
-
-```
-AZURE_OPENAI_ENDPOINT=https://<endpoint-name>.cognitiveservices.azure.com/
-OPENAI_API_VERSION=2025-03-01-preview
-AZURE_OPENAI_CHAT_DEPLOYMENT_NAME=<deployment-name>
-```
-
-## Running the Sample
-
-Follow these steps from this folder:
-
-1) Start the agent server (defaults to 0.0.0.0:8088):
-
-```bash
-python minimal_example.py
-```
-
-2) Send a non-streaming request (returns a single JSON response):
-
-```bash
-curl -sS \
-  -H "Content-Type: application/json" \
-  -X POST http://localhost:8088/responses \
-  -d "{\"input\":\"What's the weather like in Seattle?\",\"stream\":false}"
-```
-
-3) Send a streaming request (server-sent events). Use -N to disable curl buffering:
-
-```bash
-curl -N \
-  -H "Content-Type: application/json" \
-  -X POST http://localhost:8088/responses \
-  -d "{\"input\":\"What's the weather like in New York?\",\"stream\":true}"
-```
\ No newline at end of file
diff --git a/sdk/agentserver/azure-ai-agentserver-agentframework/samples/basic_simple/minimal_example.py b/sdk/agentserver/azure-ai-agentserver-agentframework/samples/basic_simple/minimal_example.py
deleted file mode 100644
index 15afa52f42b8..000000000000
--- a/sdk/agentserver/azure-ai-agentserver-agentframework/samples/basic_simple/minimal_example.py
+++ /dev/null
@@ -1,33 +0,0 @@
-# Copyright (c) Microsoft. All rights reserved.
-
-from random import randint
-from typing import Annotated
-
-from agent_framework.azure import AzureOpenAIChatClient
-from azure.identity import DefaultAzureCredential
-from dotenv import load_dotenv
-
-from azure.ai.agentserver.agentframework import from_agent_framework
-
-load_dotenv()
-
-
-def get_weather(
-    location: Annotated[str, "The location to get the weather for."],
-) -> str:
-    """Get the weather for a given location."""
-    conditions = ["sunny", "cloudy", "rainy", "stormy"]
-    return f"The weather in {location} is {conditions[randint(0, 3)]} with a high of {randint(10, 30)}°C."
-
-
-def main() -> None:
-    agent = AzureOpenAIChatClient(credential=DefaultAzureCredential()).create_agent(
-        instructions="You are a helpful weather agent.",
-        tools=get_weather,
-    )
-
-    from_agent_framework(agent).run()
-
-
-if __name__ == "__main__":
-    main()
diff --git a/sdk/agentserver/azure-ai-agentserver-agentframework/samples/basic_simple/requirements.txt b/sdk/agentserver/azure-ai-agentserver-agentframework/samples/basic_simple/requirements.txt
deleted file mode 100644
index c044abf99eb1..000000000000
--- a/sdk/agentserver/azure-ai-agentserver-agentframework/samples/basic_simple/requirements.txt
+++ /dev/null
@@ -1,5 +0,0 @@
-python-dotenv>=1.0.0
-azure-identity
-agent-framework-azure-ai
-azure-ai-agentserver-core
-azure-ai-agentserver-agentframework
diff --git a/sdk/agentserver/azure-ai-agentserver-agentframework/samples/mcp_apikey/.envtemplate b/sdk/agentserver/azure-ai-agentserver-agentframework/samples/mcp_apikey/.envtemplate
deleted file mode 100644
index 4130d12b4da8..000000000000
--- a/sdk/agentserver/azure-ai-agentserver-agentframework/samples/mcp_apikey/.envtemplate
+++ /dev/null
@@ -1,4 +0,0 @@
-AZURE_OPENAI_ENDPOINT=https://<endpoint-name>.cognitiveservices.azure.com/
-OPENAI_API_VERSION=2025-03-01-preview
-AZURE_OPENAI_CHAT_DEPLOYMENT_NAME=<deployment-name>
-GITHUB_TOKEN=<your-github-token>
\ No newline at end of file
diff --git a/sdk/agentserver/azure-ai-agentserver-agentframework/samples/mcp_apikey/README.md b/sdk/agentserver/azure-ai-agentserver-agentframework/samples/mcp_apikey/README.md
deleted file mode 100644
index 3a033eb45c43..000000000000
--- a/sdk/agentserver/azure-ai-agentserver-agentframework/samples/mcp_apikey/README.md
+++ /dev/null
@@ -1,61 +0,0 @@
-pip install -e src/adapter/python
-## Agent Framework MCP GitHub Token Sample
-
-This sample mirrors the simpler `mcp_simple` Agent Framework sample but adds an MCP server (GitHub) that requires a Bearer token (`GITHUB_TOKEN`). The token is injected as an HTTP Authorization header when constructing the `MCPStreamableHTTPTool`.
-
-### Script
-
-- `mcp_apikey.py` – Creates a `ChatAgent` configured with an `AzureOpenAIChatClient` and a GitHub MCP tool, then serves it via the agents hosting adapter (`from_agent_framework(...).run_async()`).
-
-## Prerequisites
-
-> **Azure sign-in:** Run `az login` before starting the sample so `DefaultAzureCredential` can acquire a CLI token.
-
-### Environment Variables
-
-Copy `.envtemplate` to `.env` and supply:
-
-```
-AZURE_OPENAI_ENDPOINT=https://<endpoint-name>.cognitiveservices.azure.com/
-OPENAI_API_VERSION=2025-03-01-preview
-AZURE_OPENAI_CHAT_DEPLOYMENT_NAME=<deployment-name>
-GITHUB_TOKEN=<your-github-token>
-```
-
-### GitHub Token Setup
-
-To obtain a GitHub token for the MCP server:
-
-1. Go to [GitHub Settings > Developer settings > Personal access tokens > Tokens (classic)](https://github.com/settings/tokens)
-1. Click "Generate new token" → "Generate new token (classic)"
-1. Select the minimum required scopes under the "repo" category. For this sample, the following scopes are sufficient:
-   - `public_repo` (Access public repositories)
-   - `repo:status` (Access commit statuses)
-   If you need access to private repositories, also select `repo` (Full control of private repositories).
-1. Click "Generate token"
-1. Copy the token immediately (you won't be able to see it again)
-1. Add it to your `.env` file as `GITHUB_TOKEN=<your-token>`
-
-### Run
-
-From this folder:
-
-```bash
-python mcp_apikey.py
-```
-
-### Test (non‑streaming example)
-
-```bash
-curl -X POST http://localhost:8088/responses \
-  -H "Content-Type: application/json" \
-  -d '{"input":"summarize the last change in <repo url>","stream":false}'
-```
-
-### Test (streaming example)
-
-```bash
-curl -X POST http://localhost:8088/responses \
-  -H "Content-Type: application/json" \
-  -d '{"input":"summarize the last change in <repo url>","stream":true}'
-```
diff --git a/sdk/agentserver/azure-ai-agentserver-agentframework/samples/mcp_apikey/mcp_apikey.py b/sdk/agentserver/azure-ai-agentserver-agentframework/samples/mcp_apikey/mcp_apikey.py
deleted file mode 100644
index 985d7fd01e0c..000000000000
--- a/sdk/agentserver/azure-ai-agentserver-agentframework/samples/mcp_apikey/mcp_apikey.py
+++ /dev/null
@@ -1,42 +0,0 @@
-# Copyright (c) Microsoft. All rights reserved.
-
-import asyncio
-import os
-
-from agent_framework import MCPStreamableHTTPTool
-from agent_framework.azure import AzureOpenAIChatClient
-from azure.identity import DefaultAzureCredential
-from dotenv import load_dotenv
-
-from azure.ai.agentserver.agentframework import from_agent_framework
-
-MCP_TOOL_NAME = "github"  # Expected tool name exposed by the GitHub MCP server
-MCP_TOOL_URL = "https://api.githubcopilot.com/mcp/"  # Base MCP server endpoint
-
-load_dotenv()
-
-
-async def main() -> None:
-    github_token = os.getenv("GITHUB_TOKEN")
-    if not github_token:
-        raise RuntimeError(
-            "GITHUB_TOKEN environment variable not set. Provide a GitHub token with MCP access."
-        )
-
-    agent = AzureOpenAIChatClient(credential=DefaultAzureCredential()).create_agent(
-        instructions="You are a helpful assistant that answers GitHub questions. Use only the exposed MCP tools.",
-        tools=MCPStreamableHTTPTool(
-            name=MCP_TOOL_NAME,
-            url=MCP_TOOL_URL,
-            headers={
-                "Authorization": f"Bearer {github_token}",
-            },
-        ),
-    )
-
-    async with agent:
-        await from_agent_framework(agent).run_async()
-
-
-if __name__ == "__main__":
-    asyncio.run(main())
diff --git a/sdk/agentserver/azure-ai-agentserver-agentframework/samples/mcp_apikey/requirements.txt b/sdk/agentserver/azure-ai-agentserver-agentframework/samples/mcp_apikey/requirements.txt
deleted file mode 100644
index c044abf99eb1..000000000000
--- a/sdk/agentserver/azure-ai-agentserver-agentframework/samples/mcp_apikey/requirements.txt
+++ /dev/null
@@ -1,5 +0,0 @@
-python-dotenv>=1.0.0
-azure-identity
-agent-framework-azure-ai
-azure-ai-agentserver-core
-azure-ai-agentserver-agentframework
diff --git a/sdk/agentserver/azure-ai-agentserver-agentframework/samples/mcp_simple/.envtemplate b/sdk/agentserver/azure-ai-agentserver-agentframework/samples/mcp_simple/.envtemplate
deleted file mode 100644
index bd646f163bb7..000000000000
--- a/sdk/agentserver/azure-ai-agentserver-agentframework/samples/mcp_simple/.envtemplate
+++ /dev/null
@@ -1,3 +0,0 @@
-AZURE_OPENAI_ENDPOINT=https://<endpoint-name>.cognitiveservices.azure.com/
-OPENAI_API_VERSION=2025-03-01-preview
-AZURE_OPENAI_CHAT_DEPLOYMENT_NAME=<deployment-name>
\ No newline at end of file
diff --git a/sdk/agentserver/azure-ai-agentserver-agentframework/samples/mcp_simple/README.md b/sdk/agentserver/azure-ai-agentserver-agentframework/samples/mcp_simple/README.md
deleted file mode 100644
index 672512c73052..000000000000
--- a/sdk/agentserver/azure-ai-agentserver-agentframework/samples/mcp_simple/README.md
+++ /dev/null
@@ -1,339 +0,0 @@
-# Agent Framework MCP Simple Python Sample
-
-This sample demonstrates how to run a Microsoft Agent Framework `ChatAgent` that calls a Model Context Protocol (MCP) HTTP endpoint (Microsoft Learn MCP) using the Container Agents Adapter and the `AzureAIAgentClient` from the `agent-framework-azure-ai` package.
-
-## What It Shows
-- Creating an Agent Framework `ChatAgent` with an `AzureAIAgentClient`
-- Adding an MCP tool via `MCPStreamableHTTPTool`
-- Serving the agent over HTTP using the Container Agents Adapter (`from_agent_framework(...).run()`)
-- Handling both streaming and non‑streaming response modes (client controlled via the `stream` flag in the request body)
-
-## File Overview
-- `mcp_simple.py` – Agent factory + server bootstrap. Loads `.env` relative to its location.
-- `.env` – Local environment file with Azure AI project configuration variables.
-
-## Prerequisites
-
-> **Azure sign-in:** Run `az login` before starting the sample so `DefaultAzureCredential` can acquire a CLI token.
-
-Packages actually imported by `simple-mcp.py`:
-- agent-framework-azure-ai (published package with Agent Framework client + MCP support)
-- agents_adapter
-- azure-identity
-- python-dotenv
-
-Install from PyPI (from the repo root: `container_agents/`):
-```bash
-pip install agent-framework-azure-ai azure-identity python-dotenv
-curl -sS \
-# Agent Framework MCP Simple Python Sample
-
-This sample demonstrates how to run a Microsoft Agent Framework `ChatAgent` that calls a Model Context Protocol (MCP) HTTP endpoint (Microsoft Learn MCP) using the agentserver adapter and the `AzureOpenAIChatClient` from the `agent-framework` package.
-
-## What It Shows
-
-- Creating an Agent Framework `ChatAgent` with an `AzureOpenAIChatClient`
-- Adding an MCP tool via `MCPStreamableHTTPTool`
-- Serving the agent over HTTP using the agentserver adapter (`from_agent_framework(...).run()`)
-- Handling both streaming and non‑streaming response modes (client controlled via the `stream` flag in the request body)
-
-## File Overview
-
-- `mcp_simple.py` – Agent factory + server bootstrap. Loads `.env` relative to its location.
-- `.env` – Local environment file with Azure AI project configuration variables.
-
-## Prerequisites
-
-> **Azure sign-in:** Run `az login` before starting the sample so `DefaultAzureCredential` can acquire a CLI token.
-
-### Install Dependencies
-
-Initialize a virtual environment and then install dependencies:
-
-```bash
-pip install -r ./requirements.txt --pre
-```
-
-### Environment Variables
-
-Copy `.envtemplate` to `.env` and supply:
-
-```
-AZURE_OPENAI_ENDPOINT=https://<endpoint-name>.cognitiveservices.azure.com/
-OPENAI_API_VERSION=2025-03-01-preview
-AZURE_OPENAI_CHAT_DEPLOYMENT_NAME=<deployment-name>
-```
-
-## Running the Server
-
-From this folder:
-
-```bash
-python mcp_simple.py
-```
-
-## Making Requests
-
-Non‑streaming:
-
-```bash
-curl -sS \
-    -H "Content-Type: application/json" \
-    -X POST http://localhost:8088/responses \
-    -d "{\"input\":\"How do I create an Azure Storage Account using the Azure CLI?\",\"stream\":false}"
-```
-
-Streaming (Server‑Sent Events, keep `-N` to avoid curl buffering):
-
-```bash
-curl -sS \
-    -H "Content-Type: application/json" \
-    -X POST http://localhost:8088/responses  \
-    -d "{\"input\":\"What is Microsoft Semantic Kernel in brief?\",\"stream\":true}"
-```
-
-[comment]: # ( cspell:ignore mult ained )
-
-event: response.output_text.delta
-data: {"type": "response.output_text.delta", "sequence_number": 51, "item_id": "7c7115b0-b1b2-4682-9acd-5cb05a3c8123", "output_index": 0, "content_index": 0, "delta": " traditional"}
-
-event: response.output_text.delta
-data: {"type": "response.output_text.delta", "sequence_number": 52, "item_id": "7c7115b0-b1b2-4682-9acd-5cb05a3c8123", "output_index": 0, "content_index": 0, "delta": " programming"}
-
-event: response.output_text.delta
-data: {"type": "response.output_text.delta", "sequence_number": 53, "item_id": "7c7115b0-b1b2-4682-9acd-5cb05a3c8123", "output_index": 0, "content_index": 0, "delta": "."}
-
-event: response.output_text.delta
-data: {"type": "response.output_text.delta", "sequence_number": 54, "item_id": "7c7115b0-b1b2-4682-9acd-5cb05a3c8123", "output_index": 0, "content_index": 0, "delta": " Semantic"}
-
-event: response.output_text.delta
-data: {"type": "response.output_text.delta", "sequence_number": 55, "item_id": "7c7115b0-b1b2-4682-9acd-5cb05a3c8123", "output_index": 0, "content_index": 0, "delta": " Kernel"}
-
-event: response.output_text.delta
-data: {"type": "response.output_text.delta", "sequence_number": 56, "item_id": "7c7115b0-b1b2-4682-9acd-5cb05a3c8123", "output_index": 0, "content_index": 0, "delta": " allows"}
-
-event: response.output_text.delta
-data: {"type": "response.output_text.delta", "sequence_number": 57, "item_id": "7c7115b0-b1b2-4682-9acd-5cb05a3c8123", "output_index": 0, "content_index": 0, "delta": " developers"}
-
-event: response.output_text.delta
-data: {"type": "response.output_text.delta", "sequence_number": 58, "item_id": "7c7115b0-b1b2-4682-9acd-5cb05a3c8123", "output_index": 0, "content_index": 0, "delta": " to"}
-
-event: response.output_text.delta
-data: {"type": "response.output_text.delta", "sequence_number": 59, "item_id": "7c7115b0-b1b2-4682-9acd-5cb05a3c8123", "output_index": 0, "content_index": 0, "delta": " combine"}
-
-event: response.output_text.delta
-data: {"type": "response.output_text.delta", "sequence_number": 60, "item_id": "7c7115b0-b1b2-4682-9acd-5cb05a3c8123", "output_index": 0, "content_index": 0, "delta": " natural"}
-
-event: response.output_text.delta
-data: {"type": "response.output_text.delta", "sequence_number": 61, "item_id": "7c7115b0-b1b2-4682-9acd-5cb05a3c8123", "output_index": 0, "content_index": 0, "delta": " language"}
-
-event: response.output_text.delta
-data: {"type": "response.output_text.delta", "sequence_number": 62, "item_id": "7c7115b0-b1b2-4682-9acd-5cb05a3c8123", "output_index": 0, "content_index": 0, "delta": " processing"}
-
-event: response.output_text.delta
-data: {"type": "response.output_text.delta", "sequence_number": 63, "item_id": "7c7115b0-b1b2-4682-9acd-5cb05a3c8123", "output_index": 0, "content_index": 0, "delta": " capabilities"}
-
-event: response.output_text.delta
-data: {"type": "response.output_text.delta", "sequence_number": 64, "item_id": "7c7115b0-b1b2-4682-9acd-5cb05a3c8123", "output_index": 0, "content_index": 0, "delta": " with"}
-
-event: response.output_text.delta
-data: {"type": "response.output_text.delta", "sequence_number": 65, "item_id": "7c7115b0-b1b2-4682-9acd-5cb05a3c8123", "output_index": 0, "content_index": 0, "delta": " conventional"}
-
-event: response.output_text.delta
-data: {"type": "response.output_text.delta", "sequence_number": 66, "item_id": "7c7115b0-b1b2-4682-9acd-5cb05a3c8123", "output_index": 0, "content_index": 0, "delta": " code"}
-
-event: response.output_text.delta
-data: {"type": "response.output_text.delta", "sequence_number": 67, "item_id": "7c7115b0-b1b2-4682-9acd-5cb05a3c8123", "output_index": 0, "content_index": 0, "delta": " to"}
-
-event: response.output_text.delta
-data: {"type": "response.output_text.delta", "sequence_number": 68, "item_id": "7c7115b0-b1b2-4682-9acd-5cb05a3c8123", "output_index": 0, "content_index": 0, "delta": " create"}
-
-event: response.output_text.delta
-data: {"type": "response.output_text.delta", "sequence_number": 69, "item_id": "7c7115b0-b1b2-4682-9acd-5cb05a3c8123", "output_index": 0, "content_index": 0, "delta": " AI"}
-
-event: response.output_text.delta
-data: {"type": "response.output_text.delta", "sequence_number": 70, "item_id": "7c7115b0-b1b2-4682-9acd-5cb05a3c8123", "output_index": 0, "content_index": 0, "delta": "-based"}
-
-event: response.output_text.delta
-data: {"type": "response.output_text.delta", "sequence_number": 71, "item_id": "7c7115b0-b1b2-4682-9acd-5cb05a3c8123", "output_index": 0, "content_index": 0, "delta": " solutions"}
-
-event: response.output_text.delta
-data: {"type": "response.output_text.delta", "sequence_number": 72, "item_id": "7c7115b0-b1b2-4682-9acd-5cb05a3c8123", "output_index": 0, "content_index": 0, "delta": " that"}
-
-event: response.output_text.delta
-data: {"type": "response.output_text.delta", "sequence_number": 73, "item_id": "7c7115b0-b1b2-4682-9acd-5cb05a3c8123", "output_index": 0, "content_index": 0, "delta": " include"}
-
-event: response.output_text.delta
-data: {"type": "response.output_text.delta", "sequence_number": 74, "item_id": "7c7115b0-b1b2-4682-9acd-5cb05a3c8123", "output_index": 0, "content_index": 0, "delta": " memory"}
-
-event: response.output_text.delta
-data: {"type": "response.output_text.delta", "sequence_number": 75, "item_id": "7c7115b0-b1b2-4682-9acd-5cb05a3c8123", "output_index": 0, "content_index": 0, "delta": " management"}
-
-event: response.output_text.delta
-data: {"type": "response.output_text.delta", "sequence_number": 76, "item_id": "7c7115b0-b1b2-4682-9acd-5cb05a3c8123", "output_index": 0, "content_index": 0, "delta": ","}
-
-event: response.output_text.delta
-data: {"type": "response.output_text.delta", "sequence_number": 77, "item_id": "7c7115b0-b1b2-4682-9acd-5cb05a3c8123", "output_index": 0, "content_index": 0, "delta": " complex"}
-
-event: response.output_text.delta
-data: {"type": "response.output_text.delta", "sequence_number": 78, "item_id": "7c7115b0-b1b2-4682-9acd-5cb05a3c8123", "output_index": 0, "content_index": 0, "delta": " workflows"}
-
-event: response.output_text.delta
-data: {"type": "response.output_text.delta", "sequence_number": 79, "item_id": "7c7115b0-b1b2-4682-9acd-5cb05a3c8123", "output_index": 0, "content_index": 0, "delta": ","}
-
-event: response.output_text.delta
-data: {"type": "response.output_text.delta", "sequence_number": 80, "item_id": "7c7115b0-b1b2-4682-9acd-5cb05a3c8123", "output_index": 0, "content_index": 0, "delta": " embeddings"}
-
-event: response.output_text.delta
-data: {"type": "response.output_text.delta", "sequence_number": 81, "item_id": "7c7115b0-b1b2-4682-9acd-5cb05a3c8123", "output_index": 0, "content_index": 0, "delta": ","}
-
-event: response.output_text.delta
-data: {"type": "response.output_text.delta", "sequence_number": 82, "item_id": "7c7115b0-b1b2-4682-9acd-5cb05a3c8123", "output_index": 0, "content_index": 0, "delta": " and"}
-
-event: response.output_text.delta
-data: {"type": "response.output_text.delta", "sequence_number": 83, "item_id": "7c7115b0-b1b2-4682-9acd-5cb05a3c8123", "output_index": 0, "content_index": 0, "delta": " intelligent"}
-
-event: response.output_text.delta
-data: {"type": "response.output_text.delta", "sequence_number": 84, "item_id": "7c7115b0-b1b2-4682-9acd-5cb05a3c8123", "output_index": 0, "content_index": 0, "delta": " decision"}
-
-event: response.output_text.delta
-data: {"type": "response.output_text.delta", "sequence_number": 85, "item_id": "7c7115b0-b1b2-4682-9acd-5cb05a3c8123", "output_index": 0, "content_index": 0, "delta": "-making"}
-
-event: response.output_text.delta
-data: {"type": "response.output_text.delta", "sequence_number": 86, "item_id": "7c7115b0-b1b2-4682-9acd-5cb05a3c8123", "output_index": 0, "content_index": 0, "delta": " features"}
-
-event: response.output_text.delta
-data: {"type": "response.output_text.delta", "sequence_number": 87, "item_id": "7c7115b0-b1b2-4682-9acd-5cb05a3c8123", "output_index": 0, "content_index": 0, "delta": "."}
-
-event: response.output_text.delta
-data: {"type": "response.output_text.delta", "sequence_number": 88, "item_id": "7c7115b0-b1b2-4682-9acd-5cb05a3c8123", "output_index": 0, "content_index": 0, "delta": " Its"}
-
-event: response.output_text.delta
-data: {"type": "response.output_text.delta", "sequence_number": 89, "item_id": "7c7115b0-b1b2-4682-9acd-5cb05a3c8123", "output_index": 0, "content_index": 0, "delta": " extens"}
-
-event: response.output_text.delta
-data: {"type": "response.output_text.delta", "sequence_number": 90, "item_id": "7c7115b0-b1b2-4682-9acd-5cb05a3c8123", "output_index": 0, "content_index": 0, "delta": "ible"}
-
-event: response.output_text.delta
-data: {"type": "response.output_text.delta", "sequence_number": 91, "item_id": "7c7115b0-b1b2-4682-9acd-5cb05a3c8123", "output_index": 0, "content_index": 0, "delta": " and"}
-
-event: response.output_text.delta
-data: {"type": "response.output_text.delta", "sequence_number": 92, "item_id": "7c7115b0-b1b2-4682-9acd-5cb05a3c8123", "output_index": 0, "content_index": 0, "delta": " modular"}
-
-event: response.output_text.delta
-data: {"type": "response.output_text.delta", "sequence_number": 93, "item_id": "7c7115b0-b1b2-4682-9acd-5cb05a3c8123", "output_index": 0, "content_index": 0, "delta": " design"}
-
-event: response.output_text.delta
-data: {"type": "response.output_text.delta", "sequence_number": 94, "item_id": "7c7115b0-b1b2-4682-9acd-5cb05a3c8123", "output_index": 0, "content_index": 0, "delta": " supports"}
-
-event: response.output_text.delta
-data: {"type": "response.output_text.delta", "sequence_number": 95, "item_id": "7c7115b0-b1b2-4682-9acd-5cb05a3c8123", "output_index": 0, "content_index": 0, "delta": " the"}
-
-event: response.output_text.delta
-data: {"type": "response.output_text.delta", "sequence_number": 96, "item_id": "7c7115b0-b1b2-4682-9acd-5cb05a3c8123", "output_index": 0, "content_index": 0, "delta": " creation"}
-
-event: response.output_text.delta
-data: {"type": "response.output_text.delta", "sequence_number": 97, "item_id": "7c7115b0-b1b2-4682-9acd-5cb05a3c8123", "output_index": 0, "content_index": 0, "delta": " of"}
-
-event: response.output_text.delta
-data: {"type": "response.output_text.delta", "sequence_number": 98, "item_id": "7c7115b0-b1b2-4682-9acd-5cb05a3c8123", "output_index": 0, "content_index": 0, "delta": " complex"}
-
-event: response.output_text.delta
-data: {"type": "response.output_text.delta", "sequence_number": 99, "item_id": "7c7115b0-b1b2-4682-9acd-5cb05a3c8123", "output_index": 0, "content_index": 0, "delta": ","}
-
-event: response.output_text.delta
-data: {"type": "response.output_text.delta", "sequence_number": 100, "item_id": "7c7115b0-b1b2-4682-9acd-5cb05a3c8123", "output_index": 0, "content_index": 0, "delta": " mult"}
-
-event: response.output_text.delta
-data: {"type": "response.output_text.delta", "sequence_number": 101, "item_id": "7c7115b0-b1b2-4682-9acd-5cb05a3c8123", "output_index": 0, "content_index": 0, "delta": "iste"}
-
-event: response.output_text.delta
-data: {"type": "response.output_text.delta", "sequence_number": 102, "item_id": "7c7115b0-b1b2-4682-9acd-5cb05a3c8123", "output_index": 0, "content_index": 0, "delta": "p"}
-
-event: response.output_text.delta
-data: {"type": "response.output_text.delta", "sequence_number": 103, "item_id": "7c7115b0-b1b2-4682-9acd-5cb05a3c8123", "output_index": 0, "content_index": 0, "delta": " pipelines"}
-
-event: response.output_text.delta
-data: {"type": "response.output_text.delta", "sequence_number": 104, "item_id": "7c7115b0-b1b2-4682-9acd-5cb05a3c8123", "output_index": 0, "content_index": 0, "delta": " that"}
-
-event: response.output_text.delta
-data: {"type": "response.output_text.delta", "sequence_number": 105, "item_id": "7c7115b0-b1b2-4682-9acd-5cb05a3c8123", "output_index": 0, "content_index": 0, "delta": " take"}
-
-event: response.output_text.delta
-data: {"type": "response.output_text.delta", "sequence_number": 106, "item_id": "7c7115b0-b1b2-4682-9acd-5cb05a3c8123", "output_index": 0, "content_index": 0, "delta": " advantage"}
-
-event: response.output_text.delta
-data: {"type": "response.output_text.delta", "sequence_number": 107, "item_id": "7c7115b0-b1b2-4682-9acd-5cb05a3c8123", "output_index": 0, "content_index": 0, "delta": " of"}
-
-event: response.output_text.delta
-data: {"type": "response.output_text.delta", "sequence_number": 108, "item_id": "7c7115b0-b1b2-4682-9acd-5cb05a3c8123", "output_index": 0, "content_index": 0, "delta": " the"}
-
-event: response.output_text.delta
-data: {"type": "response.output_text.delta", "sequence_number": 109, "item_id": "7c7115b0-b1b2-4682-9acd-5cb05a3c8123", "output_index": 0, "content_index": 0, "delta": " power"}
-
-event: response.output_text.delta
-data: {"type": "response.output_text.delta", "sequence_number": 110, "item_id": "7c7115b0-b1b2-4682-9acd-5cb05a3c8123", "output_index": 0, "content_index": 0, "delta": " of"}
-
-event: response.output_text.delta
-data: {"type": "response.output_text.delta", "sequence_number": 111, "item_id": "7c7115b0-b1b2-4682-9acd-5cb05a3c8123", "output_index": 0, "content_index": 0, "delta": " L"}
-
-event: response.output_text.delta
-data: {"type": "response.output_text.delta", "sequence_number": 112, "item_id": "7c7115b0-b1b2-4682-9acd-5cb05a3c8123", "output_index": 0, "content_index": 0, "delta": "LM"}
-
-event: response.output_text.delta
-data: {"type": "response.output_text.delta", "sequence_number": 113, "item_id": "7c7115b0-b1b2-4682-9acd-5cb05a3c8123", "output_index": 0, "content_index": 0, "delta": "s"}
-
-event: response.output_text.delta
-data: {"type": "response.output_text.delta", "sequence_number": 114, "item_id": "7c7115b0-b1b2-4682-9acd-5cb05a3c8123", "output_index": 0, "content_index": 0, "delta": " while"}
-
-event: response.output_text.delta
-data: {"type": "response.output_text.delta", "sequence_number": 115, "item_id": "7c7115b0-b1b2-4682-9acd-5cb05a3c8123", "output_index": 0, "content_index": 0, "delta": " allowing"}
-
-event: response.output_text.delta
-data: {"type": "response.output_text.delta", "sequence_number": 116, "item_id": "7c7115b0-b1b2-4682-9acd-5cb05a3c8123", "output_index": 0, "content_index": 0, "delta": " fine"}
-
-event: response.output_text.delta
-data: {"type": "response.output_text.delta", "sequence_number": 117, "item_id": "7c7115b0-b1b2-4682-9acd-5cb05a3c8123", "output_index": 0, "content_index": 0, "delta": "-gr"}
-
-event: response.output_text.delta
-data: {"type": "response.output_text.delta", "sequence_number": 118, "item_id": "7c7115b0-b1b2-4682-9acd-5cb05a3c8123", "output_index": 0, "content_index": 0, "delta": "ained"}
-
-event: response.output_text.delta
-data: {"type": "response.output_text.delta", "sequence_number": 119, "item_id": "7c7115b0-b1b2-4682-9acd-5cb05a3c8123", "output_index": 0, "content_index": 0, "delta": " control"}
-
-event: response.output_text.delta
-data: {"type": "response.output_text.delta", "sequence_number": 120, "item_id": "7c7115b0-b1b2-4682-9acd-5cb05a3c8123", "output_index": 0, "content_index": 0, "delta": " for"}
-
-event: response.output_text.delta
-data: {"type": "response.output_text.delta", "sequence_number": 121, "item_id": "7c7115b0-b1b2-4682-9acd-5cb05a3c8123", "output_index": 0, "content_index": 0, "delta": " developers"}
-
-event: response.output_text.delta
-data: {"type": "response.output_text.delta", "sequence_number": 122, "item_id": "7c7115b0-b1b2-4682-9acd-5cb05a3c8123", "output_index": 0, "content_index": 0, "delta": "."}
-
-event: response.output_text.done
-data: {"type": "response.output_text.done", "sequence_number": 123, "item_id": "7c7115b0-b1b2-4682-9acd-5cb05a3c8123", "output_index": 0, "content_index": 0, "text": "Microsoft Semantic Kernel (SK) is an open-source software development kit (SDK) designed to help developers build AI applications by seamlessly integrating large language models (LLMs), such as OpenAI's GPT or Azure OpenAI Service, with traditional programming. Semantic Kernel allows developers to combine natural language processing capabilities with conventional code to create AI-based solutions that include memory management, complex workflows, embeddings, and intelligent decision-making features. Its extensible and modular design supports the creation of complex, multistep pipelines that take advantage of the power of LLMs while allowing fine-grained control for developers."}
-
-event: response.content_part.done
-data: {"type": "response.content_part.done", "sequence_number": 124, "item_id": "7c7115b0-b1b2-4682-9acd-5cb05a3c8123", "output_index": 0, "content_index": 0, "part": {"type": "output_text", "text": "Microsoft Semantic Kernel (SK) is an open-source software development kit (SDK) designed to help developers build AI applications by seamlessly integrating large language models (LLMs), such as OpenAI's GPT or Azure OpenAI Service, with traditional programming. Semantic Kernel allows developers to combine natural language processing capabilities with conventional code to create AI-based solutions that include memory management, complex workflows, embeddings, and intelligent decision-making features. Its extensible and modular design supports the creation of complex, multistep pipelines that take advantage of the power of LLMs while allowing fine-grained control for developers.", "annotations": []}}
-
-event: response.output_item.done
-data: {"type": "response.output_item.done", "sequence_number": 125, "output_index": 0, "item": {"type": "message", "role": "assistant", "id": "7c7115b0-b1b2-4682-9acd-5cb05a3c8123", "status": "completed", "content": [{"type": "output_text", "text": "Microsoft Semantic Kernel (SK) is an open-source software development kit (SDK) designed to help developers build AI applications by seamlessly integrating large language models (LLMs), such as OpenAI's GPT or Azure OpenAI Service, with traditional programming. Semantic Kernel allows developers to combine natural language processing capabilities with conventional code to create AI-based solutions that include memory management, complex workflows, embeddings, and intelligent decision-making features. Its extensible and modular design supports the creation of complex, multistep pipelines that take advantage of the power of LLMs while allowing fine-grained control for developers.", "annotations": []}]}}
-
-event: response.completed
-data: {"type": "response.completed", "sequence_number": 126, "response": {"metadata": {}, "temperature": 1.0, "top_p": 1.0, "user": "", "id": "41249d4a-f6e4-4a01-950b-b67e9c812a7b", "created_at": 1757651565, "output": [{"id": "08772107-2062-40ed-982e-704d685a84df", "type": "message", "role": "assistant", "status": "completed", "content": [{"type": "output_text", "text": "Microsoft Semantic Kernel (SK) is an open-source software development kit (SDK) designed to help developers build AI applications by seamlessly integrating large language models (LLMs), such as OpenAI's GPT or Azure OpenAI Service, with traditional programming. Semantic Kernel allows developers to combine natural language processing capabilities with conventional code to create AI-based solutions that include memory management, complex workflows, embeddings, and intelligent decision-making features. Its extensible and modular design supports the creation of complex, multistep pipelines that take advantage of the power of LLMs while allowing fine-grained control for developers.", "annotations": []}]}], "parallel_tool_calls": true, "status": "completed", "object": "response"}}
-```
-
-## Customization Ideas
-- Add additional MCP tools (multiple `MCPStreamableHTTPTool` instances in a list)
-- Combine MCP + local Python tool functions
-- Swap `AzureChatClient` for a different model provider client supported by Agent Framework
-
-## Troubleshooting
-- 401/403 errors: Check Azure AI project endpoint & deployment values in `.env` and ensure your Azure login or service principal credentials are valid
-- Name resolution / network errors: Verify the MCP endpoint URL is reachable (`curl https://learn.microsoft.com/api/mcp`)
-- Empty / slow responses: Ensure the Azure AI deployment name matches an active model deployment in the project and that the service has sufficient quota
-
-## Support
-For Agent Framework issues: https://github.com/microsoft/agent-framework
-
-For adapter issues, open an issue in this repository.
diff --git a/sdk/agentserver/azure-ai-agentserver-agentframework/samples/mcp_simple/mcp_simple.py b/sdk/agentserver/azure-ai-agentserver-agentframework/samples/mcp_simple/mcp_simple.py
deleted file mode 100644
index 6b59771fe0da..000000000000
--- a/sdk/agentserver/azure-ai-agentserver-agentframework/samples/mcp_simple/mcp_simple.py
+++ /dev/null
@@ -1,29 +0,0 @@
-# Copyright (c) Microsoft. All rights reserved.
-
-import asyncio
-
-from agent_framework import MCPStreamableHTTPTool
-from agent_framework.azure import AzureOpenAIChatClient
-from azure.identity import DefaultAzureCredential
-from dotenv import load_dotenv
-
-from azure.ai.agentserver.agentframework import from_agent_framework
-
-MCP_TOOL_NAME = "Microsoft Learn MCP"
-MCP_TOOL_URL = "https://learn.microsoft.com/api/mcp"
-
-load_dotenv()
-
-
-async def main() -> None:
-    agent = AzureOpenAIChatClient(credential=DefaultAzureCredential()).create_agent(
-        instructions="You are a helpful assistant that answers Microsoft documentation questions.",
-        tools=MCPStreamableHTTPTool(name=MCP_TOOL_NAME, url=MCP_TOOL_URL),
-    )
-
-    async with agent:
-        await from_agent_framework(agent).run_async()
-
-
-if __name__ == "__main__":
-    asyncio.run(main())
diff --git a/sdk/agentserver/azure-ai-agentserver-agentframework/samples/mcp_simple/requirements.txt b/sdk/agentserver/azure-ai-agentserver-agentframework/samples/mcp_simple/requirements.txt
deleted file mode 100644
index c044abf99eb1..000000000000
--- a/sdk/agentserver/azure-ai-agentserver-agentframework/samples/mcp_simple/requirements.txt
+++ /dev/null
@@ -1,5 +0,0 @@
-python-dotenv>=1.0.0
-azure-identity
-agent-framework-azure-ai
-azure-ai-agentserver-core
-azure-ai-agentserver-agentframework
diff --git a/sdk/agentserver/azure-ai-agentserver-agentframework/samples/simple_async/.envtemplate b/sdk/agentserver/azure-ai-agentserver-agentframework/samples/simple_async/.envtemplate
deleted file mode 100644
index 6bbdcc8dda37..000000000000
--- a/sdk/agentserver/azure-ai-agentserver-agentframework/samples/simple_async/.envtemplate
+++ /dev/null
@@ -1,3 +0,0 @@
-AZURE_OPENAI_ENDPOINT=https://<endpoint-name>.cognitiveservices.azure.com/
-OPENAI_API_VERSION=2025-03-01-preview
-AZURE_OPENAI_CHAT_DEPLOYMENT_NAME=<deployment-name>
diff --git a/sdk/agentserver/azure-ai-agentserver-agentframework/samples/simple_async/README.md b/sdk/agentserver/azure-ai-agentserver-agentframework/samples/simple_async/README.md
deleted file mode 100644
index b7124bd8b5da..000000000000
--- a/sdk/agentserver/azure-ai-agentserver-agentframework/samples/simple_async/README.md
+++ /dev/null
@@ -1,45 +0,0 @@
-# Agent Framework Async Python Sample
-
-This sample demonstrates how to use the agents hosting adapter in an async implementation with Microsoft Agent Framework.
-
-## Prerequisites
-
-> **Azure sign-in:** Run `az login` before starting the sample so `DefaultAzureCredential` can acquire a CLI token.
-
-### Environment Variables
-
-Copy `.envtemplate` to `.env` and supply:
-
-```
-AZURE_OPENAI_ENDPOINT=https://<endpoint-name>.cognitiveservices.azure.com/
-OPENAI_API_VERSION=2025-03-01-preview
-AZURE_OPENAI_CHAT_DEPLOYMENT_NAME=<deployment-name>
-```
-
-## Running the Sample
-
-Follow these steps from this folder:
-
-1) Start the agent server (defaults to 0.0.0.0:8088):
-
-```bash
-python minimal_async_example.py
-```
-
-2) Send a non-streaming request (returns a single JSON response):
-
-```bash
-curl -sS \
-  -H "Content-Type: application/json" \
-  -X POST http://localhost:8088/responses \
-  -d "{\"input\":\"What's the weather like in Seattle?\",\"stream\":false}"
-```
-
-3) Send a streaming request (server-sent events). Use -N to disable curl buffering:
-
-```bash
-curl -N \
-  -H "Content-Type: application/json" \
-  -X POST http://localhost:8088/responses \
-  -d "{\"input\":\"What's the weather like in New York?\",\"stream\":true}"
-```
diff --git a/sdk/agentserver/azure-ai-agentserver-agentframework/samples/simple_async/minimal_async_example.py b/sdk/agentserver/azure-ai-agentserver-agentframework/samples/simple_async/minimal_async_example.py
deleted file mode 100644
index 4c69c8afa84d..000000000000
--- a/sdk/agentserver/azure-ai-agentserver-agentframework/samples/simple_async/minimal_async_example.py
+++ /dev/null
@@ -1,35 +0,0 @@
-# Copyright (c) Microsoft. All rights reserved.
-
-import asyncio
-from random import randint
-from typing import Annotated
-
-from agent_framework.azure import AzureOpenAIChatClient
-from azure.identity import DefaultAzureCredential
-from dotenv import load_dotenv
-
-from azure.ai.agentserver.agentframework import from_agent_framework
-
-load_dotenv()
-
-
-def get_weather(
-    location: Annotated[str, "The location to get the weather for."],
-) -> str:
-    """Get the weather for a given location."""
-    conditions = ["sunny", "cloudy", "rainy", "stormy"]
-    return f"The weather in {location} is {conditions[randint(0, 3)]} with a high of {randint(10, 30)}°C."
-
-
-async def main() -> None:
-    agent = AzureOpenAIChatClient(credential=DefaultAzureCredential()).create_agent(
-        instructions="You are a helpful weather agent.",
-        tools=get_weather,
-    )
-
-    async with agent:
-        await from_agent_framework(agent).run_async()
-
-
-if __name__ == "__main__":
-    asyncio.run(main())
diff --git a/sdk/agentserver/azure-ai-agentserver-agentframework/samples/simple_async/requirements.txt b/sdk/agentserver/azure-ai-agentserver-agentframework/samples/simple_async/requirements.txt
deleted file mode 100644
index 1b446cdc0367..000000000000
--- a/sdk/agentserver/azure-ai-agentserver-agentframework/samples/simple_async/requirements.txt
+++ /dev/null
@@ -1,5 +0,0 @@
-python-dotenv
-azure-identity
-agent-framework-azure-ai
-azure-ai-agentserver-core
-azure-ai-agentserver-agentframework
\ No newline at end of file
diff --git a/sdk/agentserver/azure-ai-agentserver-agentframework/samples/workflow_agent_simple/.envtemplate b/sdk/agentserver/azure-ai-agentserver-agentframework/samples/workflow_agent_simple/.envtemplate
deleted file mode 100644
index 990182342fca..000000000000
--- a/sdk/agentserver/azure-ai-agentserver-agentframework/samples/workflow_agent_simple/.envtemplate
+++ /dev/null
@@ -1,3 +0,0 @@
-AZURE_AI_PROJECT_ENDPOINT=<foundry-project-endpoint>
-AZURE_AI_MODEL_DEPLOYMENT_NAME=<model-deployment-name>
-AGENT_PROJECT_NAME=<agent-project-name-optional>
\ No newline at end of file
diff --git a/sdk/agentserver/azure-ai-agentserver-agentframework/samples/workflow_agent_simple/README.md b/sdk/agentserver/azure-ai-agentserver-agentframework/samples/workflow_agent_simple/README.md
deleted file mode 100644
index 59bb6b9f19ec..000000000000
--- a/sdk/agentserver/azure-ai-agentserver-agentframework/samples/workflow_agent_simple/README.md
+++ /dev/null
@@ -1,287 +0,0 @@
-## Workflow Agent Reflection Sample (Python)
-
-This sample demonstrates how to wrap an Agent Framework workflow (with iterative review + improvement) as an agent using the Container Agents Adapter. It implements a "reflection" pattern consisting of two executors:
-
-- Worker: Produces an initial answer (and revised answers after feedback)
-- Reviewer: Evaluates the answer against quality criteria and either approves or returns constructive feedback
-
-The workflow cycles until the Reviewer approves the response. Only approved content is emitted externally (streamed the same way as a normal agent response). This pattern is useful for quality‑controlled assistance, gated tool use, evaluative chains, or iterative refinement.
-
-### Key Concepts Shown
-- `WorkflowBuilder` + `.as_agent()` to expose a workflow as a standard agent
-- Bidirectional edges enabling cyclical review (Worker ↔ Reviewer)
-- Structured output parsing (Pydantic model) for review feedback
-- Emitting `AgentRunUpdateEvent` to stream only approved messages
-- Managing pending requests and re‑submission with incorporated feedback
-
-File: `workflow_agent_simple.py`
-
----
-
-## Prerequisites
-
-> **Azure sign-in:** Run `az login` before starting the sample so `DefaultAzureCredential` can acquire a CLI token.
-
-Dependencies used by `workflow_agent_simple.py`:
-- agent-framework-azure-ai (published package with workflow abstractions)
-- agents_adapter
-- azure-identity (for `DefaultAzureCredential`)
-- python-dotenv (loads `.env` for local credentials)
-- pydantic (pulled transitively; listed for clarity)
-
-Install from PyPI (from the repo root: `container_agents/`):
-```bash
-pip install agent-framework-azure-ai azure-identity python-dotenv
-
-pip install -e src/adapter/python
-```
-
----
-
-## Additional Requirements
-
-1. Azure AI project with a model deployment (supports Microsoft hosted, Azure OpenAI, or custom models exposed via Azure AI Foundry).
-
----
-
-## Configuration
-
-Copy `.envtemplate` to `.env` and fill in real values:
-```
-AZURE_AI_PROJECT_ENDPOINT=<foundry-project-endpoint>
-AZURE_AI_MODEL_DEPLOYMENT_NAME=<model-deployment-name>
-AGENT_PROJECT_NAME=<agent-project-name-optional>
-```
-`AGENT_PROJECT_NAME` lets you override the default Azure AI agent project for this workflow; omit it to fall back to the SDK default.
-
----
-
-## Run the Workflow Agent
-
-From this folder:
-
-```bash
-python workflow_agent_simple.py
-```
-The server (via the adapter) will start on `0.0.0.0:8088` by default.
-
----
-
-## Send a Non‑Streaming Request
-
-```bash
-curl -sS \
-  -H "Content-Type: application/json" \
-  -X POST http://localhost:8088/runs \
-  -d '{"input":"Explain the concept of reflection in this workflow sample.","stream":false}'
-```
-
-Sample output (non‑streaming):
-
-```
-Processing 1 million files in parallel and writing their contents into a sorted output file can be a computationally and resource-intensive task. To handle it effectively, you can use Python with libraries like `concurrent.futures` for parallelism and `heapq` for the sorting and merging.
-
-Below is an example implementation:
-
-import os
-from concurrent.futures import ThreadPoolExecutor
-import heapq
-
-def read_file(file_path):
-    """Read the content of a single file and return it as a list of lines."""
-    with open(file_path, 'r') as file:
-        return file.readlines()
-
-def parallel_read_files(file_paths, max_workers=8):
-    """
-    Read files in parallel and return all the lines in memory.
-    :param file_paths: List of file paths to read.
-    :param max_workers: Number of worker threads to use for parallelism.
-    """
-    all_lines = []
-    with ThreadPoolExecutor(max_workers=max_workers) as executor:
-        # Submit tasks to read each file in parallel
-        results = executor.map(read_file, file_paths)
-        # Collect the results
-        for lines in results:
-            all_lines.extend(lines)
-    return all_lines
-
-def write_sorted_output(lines, output_file_path):
-    """
-    Write sorted lines to the output file.
-    :param lines: List of strings to be sorted and written.
-    :param output_file_path: File path to write the sorted result.
-    """
-    sorted_lines = sorted(lines)
-    with open(output_file_path, 'w') as output_file:
-        output_file.writelines(sorted_lines)
-
-def main(directory_path, output_file_path):
-    """
-    Main function to read files in parallel and write sorted output.
-    :param directory_path: Path to the directory containing input files.
-    :param output_file_path: File path to write the sorted output.
-    """
-    # Get a list of all the file paths in the given directory
-    file_paths = [os.path.join(directory_path, f) for f in os.listdir(directory_path) if os.path.isfile(os.path.join(directory_path, f))]
-    
-    print(f"Found {len(file_paths)} files. Reading files in parallel...")
-    
-    # Read all lines from the files in parallel
-    all_lines = parallel_read_files(file_paths)
-    
-    print(f"Total lines read: {len(all_lines)}. Sorting and writing to output file...")
-    
-    # Write the sorted lines to the output file
-    write_sorted_output(all_lines, output_file_path)
-    
-    print(f"Sorted output written to: {output_file_path}")
-
-if __name__ == "__main__":
-    # Replace these paths with the appropriate input directory and output file path
-    input_directory = "path/to/input/directory"  # Directory containing 1 million files
-    output_file = "path/to/output/sorted_output.txt"  # Output file path
-    
-    main(input_directory, output_file)
-
-### Key Features and Steps:
-
-1. **Parallel Reading with `ThreadPoolExecutor`**:
-   - Files are read in parallel using threads to improve I/O performance since reading many files is mostly I/O-bound.
-
-2. **Sorting and Writing**:
-   - Once all lines are aggregated into memory, they are sorted using Python's `sorted()` function and written to the output file in one go.
-
-3. **Handles Large Number of Files**:
-   - The program uses threads to manage the potentially massive number of files in parallel, saving time instead of processing them serially.
-
-### Considerations:
-- **Memory Usage**: This script reads all file contents into memory. If the total size of the files is too large, you may encounter memory issues. In such cases, consider processing the files in smaller chunks.
-- **Sorting**: For extremely large data, consider using an external/merge sort technique to handle sorting in smaller chunks.
-- **I/O Performance**: Ensure that your I/O subsystem and disk can handle the load.
-
-Let me know if you'd like an optimized version to handle larger datasets with limited memory!
-
-Usage (if provided): None
-```
-
----
-
-## Send a Streaming Request (Server-Sent Events)
-
-```bash
-curl -N \
-  -H "Content-Type: application/json" \
-  -X POST http://localhost:8088/runs \
-  -d '{"input":"How does the reviewer decide to approve?","stream":true}'
-```
-
-Sample output (streaming):
-
-```
-Here is a Python script that demonstrates parallel reading of 1 million files using `concurrent.futures` for parallelism and `heapq` to write the outputs to a sorted file. This approach ensures efficiency when dealing with such a large number of files.
-
-
-import os
-import heapq
-from concurrent.futures import ThreadPoolExecutor
-
-def read_file(file_path):
-    """
-    Read the content of a single file and return it as a list of lines.
-    """
-    with open(file_path, 'r') as file:
-        return file.readlines()
-
-def parallel_read_files(file_paths, max_workers=4):
-    """
-    Read multiple files in parallel.
-    """
-    all_lines = []
-    with ThreadPoolExecutor(max_workers=max_workers) as executor:
-        # Submit reading tasks to the thread pool
-        futures = [executor.submit(read_file, file_path) for file_path in file_paths]
-        
-        # Gather results as they are completed
-        for future in futures:
-            all_lines.extend(future.result())
-    
-    return all_lines
-
-def write_sorted_output(lines, output_file):
-    """
-    Write sorted lines to an output file.
-    """
-    sorted_lines = sorted(lines)
-    with open(output_file, 'w') as file:
-        file.writelines(sorted_lines)
-
-if __name__ == "__main__":
-    # Set the directory containing your input files
-    input_directory = 'path_to_your_folder_with_files'
-    
-    # Get the list of all input files
-    file_paths = [os.path.join(input_directory, f) for f in os.listdir(input_directory) if os.path.isfile(os.path.join(input_directory, f))]
-    
-    # Specify the number of threads for parallel processing
-    max_threads = 8  # Adjust according to your system's capabilities
-    
-    # Step 1: Read all files in parallel
-    print("Reading files in parallel...")
-    all_lines = parallel_read_files(file_paths, max_workers=max_threads)
-    
-    # Step 2: Write the sorted data to the output file
-    output_file = 'sorted_output.txt'
-    print(f"Writing sorted output to {output_file}...")
-    write_sorted_output(all_lines, output_file)
-    
-    print("Operation complete.")
-
-[comment]: # ( cspell:ignore pysort )
-
-### Key Points:
-1. **Parallel Read**: The reading of files is handled using `concurrent.futures.ThreadPoolExecutor`, allowing multiple files to be processed simultaneously.
-
-2. **Sorted Output**: After collecting all lines from the files, the `sorted()` function is used to sort the content in memory. This ensures that the final output file will have all data in sorted order.
-
-3. **Adjustable Parallelism**: The `max_threads` parameter can be modified to control the number of threads used for file reading. The value should match your system's capabilities for optimal performance.
-
-4. **Large Data Handling**: If the data from 1 million files is too large to fit into memory, consider using an external merge sort algorithm or a library like `pysort` for efficient external sorting.
-
-Let me know if you'd like improvements or adjustments for more specific scenarios!
-Final usage (if provided): None
-```
-
-> Only the final approved assistant content is emitted as normal output deltas; intermediate review feedback stays internal.
-
----
-
-## How the Reflection Loop Works
-1. User query enters the workflow (Worker start executor)
-2. Worker produces an answer with model call
-3. Reviewer evaluates using a structured schema (`feedback`, `approved`)
-4. If not approved: Worker augments context with feedback + regeneration instruction, then re‑answers
-5. Loop continues until `approved=True`
-6. Approved content is emitted as `AgentRunResponseUpdate` (streamed externally)
-
----
-
-## Troubleshooting
-| Issue | Resolution |
-|-------|------------|
-| `DefaultAzureCredential` errors | Run `az login` or configure a service principal. |
-| Empty / no streaming | Confirm `stream` flag in request JSON and that the event loop is healthy. |
-| Model 404 / deployment error | Verify `AZURE_AI_MODEL_DEPLOYMENT_NAME` exists in the Azure AI project configured by `AZURE_AI_PROJECT_ENDPOINT`. |
-| `.env` not loading | Ensure `.env` sits beside the script (or set `dotenv_path`) and that `python-dotenv` is installed. |
-
----
-
-## Related Resources
-- Agent Framework repo: https://github.com/microsoft/agent-framework
-- Basic simple sample README (same folder structure) for installation reference
-
----
-
-## License & Support
-This sample follows the repository's LICENSE. For questions about unreleased Agent Framework features, contact the Agent Framework team via its GitHub repository.
diff --git a/sdk/agentserver/azure-ai-agentserver-agentframework/samples/workflow_agent_simple/requirements.txt b/sdk/agentserver/azure-ai-agentserver-agentframework/samples/workflow_agent_simple/requirements.txt
deleted file mode 100644
index bfc51b4deaa3..000000000000
--- a/sdk/agentserver/azure-ai-agentserver-agentframework/samples/workflow_agent_simple/requirements.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-python-dotenv>=1.0.0
-pydantic==2.12.2
-azure-identity
-agent-framework-azure-ai
-azure-ai-agentserver-core
-azure-ai-agentserver-agentframework
diff --git a/sdk/agentserver/azure-ai-agentserver-agentframework/samples/workflow_agent_simple/workflow_agent_simple.py b/sdk/agentserver/azure-ai-agentserver-agentframework/samples/workflow_agent_simple/workflow_agent_simple.py
deleted file mode 100644
index ce3cca956273..000000000000
--- a/sdk/agentserver/azure-ai-agentserver-agentframework/samples/workflow_agent_simple/workflow_agent_simple.py
+++ /dev/null
@@ -1,292 +0,0 @@
-# Copyright (c) Microsoft. All rights reserved.
-
-import asyncio
-from dataclasses import dataclass
-from uuid import uuid4
-
-from agent_framework import (
-    AgentRunResponseUpdate,
-    AgentRunUpdateEvent,
-    BaseChatClient,
-    ChatMessage,
-    Contents,
-    Executor,
-    Role as ChatRole,
-    WorkflowBuilder,
-    WorkflowContext,
-    handler,
-)
-from agent_framework_azure_ai import AzureAIAgentClient
-from azure.identity.aio import DefaultAzureCredential
-from dotenv import load_dotenv
-from pydantic import BaseModel
-
-from azure.ai.agentserver.agentframework import from_agent_framework
-
-"""
-The following sample demonstrates how to wrap a workflow as an agent using WorkflowAgent.
-
-This sample shows how to:
-1. Create a workflow with a reflection pattern (Worker + Reviewer executors)
-2. Wrap the workflow as an agent using the .as_agent() method
-3. Stream responses from the workflow agent like a regular agent
-4. Implement a review-retry mechanism where responses are iteratively improved
-
-The example implements a quality-controlled AI assistant where:
-- Worker executor generates responses to user queries
-- Reviewer executor evaluates the responses and provides feedback
-- If not approved, the Worker incorporates feedback and regenerates the response
-- The cycle continues until the response is approved
-- Only approved responses are emitted to the external consumer
-
-Key concepts demonstrated:
-- WorkflowAgent: Wraps a workflow to make it behave as an agent
-- Bidirectional workflow with cycles (Worker ↔ Reviewer)
-- AgentRunUpdateEvent: How workflows communicate with external consumers
-- Structured output parsing for review feedback
-- State management with pending requests tracking
-"""
-
-
-@dataclass
-class ReviewRequest:
-    request_id: str
-    user_messages: list[ChatMessage]
-    agent_messages: list[ChatMessage]
-
-
-@dataclass
-class ReviewResponse:
-    request_id: str
-    feedback: str
-    approved: bool
-
-
-load_dotenv()
-
-
-class Reviewer(Executor):
-    """An executor that reviews messages and provides feedback."""
-
-    def __init__(self, chat_client: BaseChatClient) -> None:
-        super().__init__(id="reviewer")
-        self._chat_client = chat_client
-
-    @handler
-    async def review(
-        self, request: ReviewRequest, ctx: WorkflowContext[ReviewResponse]
-    ) -> None:
-        print(
-            f"🔍 Reviewer: Evaluating response for request {request.request_id[:8]}..."
-        )
-
-        # Use the chat client to review the message and use structured output.
-        # NOTE: this can be modified to use an evaluation framework.
-
-        class _Response(BaseModel):
-            feedback: str
-            approved: bool
-
-        # Define the system prompt.
-        messages = [
-            ChatMessage(
-                role=ChatRole.SYSTEM,
-                text="You are a reviewer for an AI agent, please provide feedback on the "
-                "following exchange between a user and the AI agent, "
-                "and indicate if the agent's responses are approved or not.\n"
-                "Use the following criteria for your evaluation:\n"
-                "- Relevance: Does the response address the user's query?\n"
-                "- Accuracy: Is the information provided correct?\n"
-                "- Clarity: Is the response easy to understand?\n"
-                "- Completeness: Does the response cover all aspects of the query?\n"
-                "Be critical in your evaluation and provide constructive feedback.\n"
-                "Do not approve until all criteria are met.",
-            )
-        ]
-
-        # Add user and agent messages to the chat history.
-        messages.extend(request.user_messages)
-
-        # Add agent messages to the chat history.
-        messages.extend(request.agent_messages)
-
-        # Add add one more instruction for the assistant to follow.
-        messages.append(
-            ChatMessage(
-                role=ChatRole.USER,
-                text="Please provide a review of the agent's responses to the user.",
-            )
-        )
-
-        print("🔍 Reviewer: Sending review request to LLM...")
-        # Get the response from the chat client.
-        response = await self._chat_client.get_response(
-            messages=messages, response_format=_Response
-        )
-
-        # Parse the response.
-        parsed = _Response.model_validate_json(response.messages[-1].text)
-
-        print(f"🔍 Reviewer: Review complete - Approved: {parsed.approved}")
-        print(f"🔍 Reviewer: Feedback: {parsed.feedback}")
-
-        # Send the review response.
-        await ctx.send_message(
-            ReviewResponse(
-                request_id=request.request_id,
-                feedback=parsed.feedback,
-                approved=parsed.approved,
-            )
-        )
-
-
-class Worker(Executor):
-    """An executor that performs tasks for the user."""
-
-    def __init__(self, chat_client: BaseChatClient) -> None:
-        super().__init__(id="worker")
-        self._chat_client = chat_client
-        self._pending_requests: dict[str, tuple[ReviewRequest, list[ChatMessage]]] = {}
-
-    @handler
-    async def handle_user_messages(
-        self, user_messages: list[ChatMessage], ctx: WorkflowContext[ReviewRequest]
-    ) -> None:
-        print("🔧 Worker: Received user messages, generating response...")
-
-        # Handle user messages and prepare a review request for the reviewer.
-        # Define the system prompt.
-        messages = [
-            ChatMessage(role=ChatRole.SYSTEM, text="You are a helpful assistant.")
-        ]
-
-        # Add user messages.
-        messages.extend(user_messages)
-
-        print("🔧 Worker: Calling LLM to generate response...")
-        # Get the response from the chat client.
-        response = await self._chat_client.get_response(messages=messages)
-        print(f"🔧 Worker: Response generated: {response.messages[-1].text}")
-
-        # Add agent messages.
-        messages.extend(response.messages)
-
-        # Create the review request.
-        request = ReviewRequest(
-            request_id=str(uuid4()),
-            user_messages=user_messages,
-            agent_messages=response.messages,
-        )
-
-        print(
-            f"🔧 Worker: Generated response, sending to reviewer (ID: {request.request_id[:8]})"
-        )
-        # Send the review request.
-        await ctx.send_message(request)
-
-        # Add to pending requests.
-        self._pending_requests[request.request_id] = (request, messages)
-
-    @handler
-    async def handle_review_response(
-        self, review: ReviewResponse, ctx: WorkflowContext[ReviewRequest]
-    ) -> None:
-        print(
-            f"🔧 Worker: Received review for request {review.request_id[:8]} - Approved: {review.approved}"
-        )
-
-        # Handle the review response. Depending on the approval status,
-        # either emit the approved response as AgentRunUpdateEvent, or
-        # retry given the feedback.
-        if review.request_id not in self._pending_requests:
-            raise ValueError(
-                f"Received review response for unknown request ID: {review.request_id}"
-            )
-        # Remove the request from pending requests.
-        request, messages = self._pending_requests.pop(review.request_id)
-
-        if review.approved:
-            print("✅ Worker: Response approved! Emitting to external consumer...")
-            # If approved, emit the agent run response update to the workflow's
-            # external consumer.
-            contents: list[Contents] = []
-            for message in request.agent_messages:
-                contents.extend(message.contents)
-            # Emitting an AgentRunUpdateEvent in a workflow wrapped by a WorkflowAgent
-            # will send the AgentRunResponseUpdate to the WorkflowAgent's
-            # event stream.
-            await ctx.add_event(
-                AgentRunUpdateEvent(
-                    self.id,
-                    data=AgentRunResponseUpdate(
-                        contents=contents, role=ChatRole.ASSISTANT
-                    ),
-                )
-            )
-            return
-
-        print(f"❌ Worker: Response not approved. Feedback: {review.feedback}")
-        print("🔧 Worker: Incorporating feedback and regenerating response...")
-
-        # Construct new messages with feedback.
-        messages.append(ChatMessage(role=ChatRole.SYSTEM, text=review.feedback))
-
-        # Add additional instruction to address the feedback.
-        messages.append(
-            ChatMessage(
-                role=ChatRole.SYSTEM,
-                text="Please incorporate the feedback above, and provide a response to user's next message.",
-            )
-        )
-        messages.extend(request.user_messages)
-
-        # Get the new response from the chat client.
-        response = await self._chat_client.get_response(messages=messages)
-        print(
-            f"🔧 Worker: New response generated after feedback: {response.messages[-1].text}"
-        )
-
-        # Process the response.
-        messages.extend(response.messages)
-
-        print(
-            f"🔧 Worker: Generated improved response, sending for re-review (ID: {review.request_id[:8]})"
-        )
-        # Send an updated review request.
-        new_request = ReviewRequest(
-            request_id=review.request_id,
-            user_messages=request.user_messages,
-            agent_messages=response.messages,
-        )
-        await ctx.send_message(new_request)
-
-        # Add to pending requests.
-        self._pending_requests[new_request.request_id] = (new_request, messages)
-
-
-def build_agent(chat_client: BaseChatClient):
-    reviewer = Reviewer(chat_client=chat_client)
-    worker = Worker(chat_client=chat_client)
-    return (
-        WorkflowBuilder()
-        .add_edge(
-            worker, reviewer
-        )  # <--- This edge allows the worker to send requests to the reviewer
-        .add_edge(
-            reviewer, worker
-        )  # <--- This edge allows the reviewer to send feedback back to the worker
-        .set_start_executor(worker)
-        .build()
-        .as_agent()  # Convert the workflow to an agent.
-    )
-
-
-async def main() -> None:
-    async with DefaultAzureCredential() as credential:
-        async with AzureAIAgentClient(async_credential=credential) as chat_client:
-            agent = build_agent(chat_client)
-            await from_agent_framework(agent).run_async()
-
-
-if __name__ == "__main__":
-    asyncio.run(main())
diff --git a/sdk/agentserver/azure-ai-agentserver-agentframework/tests/__init__.py b/sdk/agentserver/azure-ai-agentserver-agentframework/tests/__init__.py
deleted file mode 100644
index 4a5d26360bce..000000000000
--- a/sdk/agentserver/azure-ai-agentserver-agentframework/tests/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-# Unit tests package
diff --git a/sdk/agentserver/azure-ai-agentserver-agentframework/tests/conftest.py b/sdk/agentserver/azure-ai-agentserver-agentframework/tests/conftest.py
deleted file mode 100644
index a56a7164c0a3..000000000000
--- a/sdk/agentserver/azure-ai-agentserver-agentframework/tests/conftest.py
+++ /dev/null
@@ -1,15 +0,0 @@
-"""
-Pytest configuration and shared fixtures for unit tests.
-"""
-
-import sys
-from pathlib import Path
-
-# Ensure package sources are importable during tests
-tests_root = Path(__file__).resolve()
-src_root = tests_root.parents[4]
-packages_root = tests_root.parents[2] / "packages"
-
-for path in (packages_root, src_root):
-	if str(path) not in sys.path:
-		sys.path.insert(0, str(path))
diff --git a/sdk/agentserver/azure-ai-agentserver-agentframework/tests/unit_tests/test_agent_framework_input_converter.py b/sdk/agentserver/azure-ai-agentserver-agentframework/tests/unit_tests/test_agent_framework_input_converter.py
deleted file mode 100644
index 3dab36131f8d..000000000000
--- a/sdk/agentserver/azure-ai-agentserver-agentframework/tests/unit_tests/test_agent_framework_input_converter.py
+++ /dev/null
@@ -1,139 +0,0 @@
-import importlib
-
-import pytest
-
-from agent_framework import ChatMessage, Role as ChatRole
-
-converter_module = importlib.import_module(
-	"azure.ai.agentserver.agentframework.models.agent_framework_input_converters"
-)
-AgentFrameworkInputConverter = converter_module.AgentFrameworkInputConverter
-
-
-@pytest.fixture()
-def converter() -> AgentFrameworkInputConverter:
-	return AgentFrameworkInputConverter()
-
-
-@pytest.mark.unit
-def test_transform_none_returns_none(converter: AgentFrameworkInputConverter) -> None:
-	assert converter.transform_input(None) is None
-
-
-@pytest.mark.unit
-def test_transform_string_returns_same(converter: AgentFrameworkInputConverter) -> None:
-	assert converter.transform_input("hello") == "hello"
-
-
-@pytest.mark.unit
-def test_transform_implicit_user_message_with_string(converter: AgentFrameworkInputConverter) -> None:
-	payload = [{"content": "How are you?"}]
-
-	result = converter.transform_input(payload)
-
-	assert result == "How are you?"
-
-
-@pytest.mark.unit
-def test_transform_implicit_user_message_with_input_text_list(converter: AgentFrameworkInputConverter) -> None:
-	payload = [
-		{
-			"content": [
-				{"type": "input_text", "text": "Hello"},
-				{"type": "input_text", "text": "world"},
-			]
-		}
-	]
-
-	result = converter.transform_input(payload)
-
-	assert result == "Hello world"
-
-
-@pytest.mark.unit
-def test_transform_explicit_message_returns_chat_message(converter: AgentFrameworkInputConverter) -> None:
-	payload = [
-		{
-			"type": "message",
-			"role": "assistant",
-			"content": [
-				{"type": "input_text", "text": "Hi there"},
-			],
-		}
-	]
-
-	result = converter.transform_input(payload)
-
-	assert isinstance(result, ChatMessage)
-	assert result.role == ChatRole.ASSISTANT
-	assert result.text == "Hi there"
-
-
-@pytest.mark.unit
-def test_transform_multiple_explicit_messages_returns_list(converter: AgentFrameworkInputConverter) -> None:
-	payload = [
-		{
-			"type": "message",
-			"role": "user",
-			"content": "Hello",
-		},
-		{
-			"type": "message",
-			"role": "assistant",
-			"content": [
-				{"type": "input_text", "text": "Greetings"},
-			],
-		},
-	]
-
-	result = converter.transform_input(payload)
-
-	assert isinstance(result, list)
-	assert len(result) == 2
-	assert all(isinstance(item, ChatMessage) for item in result)
-	assert result[0].role == ChatRole.USER
-	assert result[0].text == "Hello"
-	assert result[1].role == ChatRole.ASSISTANT
-	assert result[1].text == "Greetings"
-
-
-@pytest.mark.unit
-def test_transform_mixed_messages_coerces_to_strings(converter: AgentFrameworkInputConverter) -> None:
-	payload = [
-		{"content": "First"},
-		{
-			"type": "message",
-			"role": "assistant",
-			"content": [
-				{"type": "input_text", "text": "Second"},
-			],
-		},
-	]
-
-	result = converter.transform_input(payload)
-
-	assert result == ["First", "Second"]
-
-
-@pytest.mark.unit
-def test_transform_invalid_input_type_raises(converter: AgentFrameworkInputConverter) -> None:
-	with pytest.raises(Exception) as exc_info:
-		converter.transform_input({"content": "invalid"})
-
-	assert "Unsupported input type" in str(exc_info.value)
-
-
-@pytest.mark.unit
-def test_transform_skips_non_text_entries(converter: AgentFrameworkInputConverter) -> None:
-	payload = [
-		{
-			"content": [
-				{"type": "input_text", "text": 123},
-				{"type": "image", "url": "https://example.com"},
-			]
-		}
-	]
-
-	result = converter.transform_input(payload)
-
-	assert result is None
diff --git a/sdk/agentserver/azure-ai-agentserver-core/CHANGELOG.md b/sdk/agentserver/azure-ai-agentserver-core/CHANGELOG.md
index 2fd75a41cf4a..ef921fa5ccba 100644
--- a/sdk/agentserver/azure-ai-agentserver-core/CHANGELOG.md
+++ b/sdk/agentserver/azure-ai-agentserver-core/CHANGELOG.md
@@ -5,20 +5,28 @@
 ### Features Added
 
 - `AgentServerHost` base class (Starlette subclass) with health probe (`/readiness`), graceful shutdown, and port binding.
-- `TracingHelper` for OpenTelemetry tracing with Azure Monitor and OTLP exporters.
-- Auto-enable tracing when Application Insights or OTLP endpoint is configured.
-- W3C Trace Context propagation and `leaf_customer_span_id` baggage re-parenting.
+- Automatic OpenTelemetry tracing with Azure Monitor and OTLP exporters (included as primary dependencies).
+- `request_span()` host method and `end_span()` / `record_error()` / `trace_stream()` public functions for protocol SDK tracing.
+- Overridable tracing setup via `configure_tracing` constructor parameter.
 - `create_error_response()` utility for standard error envelope responses.
-- `get_logger()` for library-scoped logging.
 - Cooperative mixin inheritance for multi-protocol composition.
 - Hypercorn-based ASGI server with HTTP/1.1 support.
 
 ### Breaking Changes
 
 - Renamed `AgentHost` → `AgentServerHost`; now inherits from `Starlette` directly.
-- Removed `register_routes()` — protocol packages now subclass `AgentServerHost` and extend `self.routes` in `__init__`.
+- Removed `register_routes()` — protocol packages now subclass `AgentServerHost` and pass routes via `super().__init__()`.
 - Removed lazy `app` property — `AgentServerHost` IS the ASGI app.
+- Replaced `TracingHelper` class with module-level functions (`request_span`, `end_span`, `record_error`, `trace_stream`, `configure_tracing`).
 - Replaced `ErrorResponse.create()` static method with module-level `create_error_response()` function.
-- Replaced `AgentLogger.get()` static method with module-level `get_logger()` function.
+- Removed `AgentLogger` / `get_logger()` — use `logging.getLogger("azure.ai.agentserver")` directly.
 - Removed `AGENT_LOG_LEVEL` and `AGENT_GRACEFUL_SHUTDOWN_TIMEOUT` environment variable support from `Constants`.
+- Removed `leaf_customer_span_id` baggage mechanism and W3C Baggage propagation.
+- OpenTelemetry is now a required dependency (was optional `[tracing]` extras).
 - Renamed health endpoint from `/healthy` to `/readiness`.
+
+## 1.0.0b1 (2025-11-07)
+
+### Features Added
+
+First version
diff --git a/sdk/agentserver/azure-ai-agentserver-core/README.md b/sdk/agentserver/azure-ai-agentserver-core/README.md
index 373d942c8f1a..add29e0bb57b 100644
--- a/sdk/agentserver/azure-ai-agentserver-core/README.md
+++ b/sdk/agentserver/azure-ai-agentserver-core/README.md
@@ -1,4 +1,4 @@
-# Azure AI AgentServerHost Core for Python
+# Azure AI Agent Server Core client library for Python
 
 The `azure-ai-agentserver-core` package provides the foundation host framework for building Azure AI Hosted Agent containers. It handles the protocol-agnostic infrastructure — health probes, graceful shutdown, OpenTelemetry tracing, and ASGI serving — so that protocol packages can focus on their endpoint logic.
 
@@ -10,11 +10,7 @@ The `azure-ai-agentserver-core` package provides the foundation host framework f
 pip install azure-ai-agentserver-core
 ```
 
-To enable OpenTelemetry tracing with Azure Monitor and OTLP exporters:
-
-```bash
-pip install azure-ai-agentserver-core[tracing]
-```
+OpenTelemetry tracing with Azure Monitor and OTLP exporters is included by default.
 
 ### Prerequisites
 
diff --git a/sdk/agentserver/azure-ai-agentserver-core/azure/ai/agentserver/core/__init__.py b/sdk/agentserver/azure-ai-agentserver-core/azure/ai/agentserver/core/__init__.py
index 130f89eb4d62..05d392307569 100644
--- a/sdk/agentserver/azure-ai-agentserver-core/azure/ai/agentserver/core/__init__.py
+++ b/sdk/agentserver/azure-ai-agentserver-core/azure/ai/agentserver/core/__init__.py
@@ -9,27 +9,28 @@
 Public API::
 
     from azure.ai.agentserver.core import (
-        get_logger,
+        AgentConfig,
         AgentServerHost,
-        Constants,
         create_error_response,
-        TracingHelper,
+        end_span,
+        record_error,
+        trace_stream,
     )
 """
 __path__ = __import__("pkgutil").extend_path(__path__, __name__)
 
 from ._base import AgentServerHost
-from ._constants import Constants
+from ._config import AgentConfig
 from ._errors import create_error_response
-from ._logger import get_logger
-from ._tracing import TracingHelper
+from ._tracing import end_span, record_error, trace_stream
 from ._version import VERSION
 
 __all__ = [
+    "AgentConfig",
     "AgentServerHost",
-    "Constants",
     "create_error_response",
-    "get_logger",
-    "TracingHelper",
+    "end_span",
+    "record_error",
+    "trace_stream",
 ]
 __version__ = VERSION
diff --git a/sdk/agentserver/azure-ai-agentserver-core/azure/ai/agentserver/core/_base.py b/sdk/agentserver/azure-ai-agentserver-core/azure/ai/agentserver/core/_base.py
index c67b3a3fe9ad..2a4255fb667e 100644
--- a/sdk/agentserver/azure-ai-agentserver-core/azure/ai/agentserver/core/_base.py
+++ b/sdk/agentserver/azure-ai-agentserver-core/azure/ai/agentserver/core/_base.py
@@ -4,8 +4,11 @@
 import asyncio  # pylint: disable=do-not-import-asyncio
 import contextlib
 import logging
-from collections.abc import AsyncGenerator, Awaitable, Callable  # pylint: disable=import-error
-from typing import Any, Optional
+import os
+import signal
+import sys
+from collections.abc import AsyncGenerator, AsyncIterable, AsyncIterator, Awaitable, Callable  # pylint: disable=import-error
+from typing import Any, Optional, Union
 
 from starlette.applications import Starlette
 from starlette.middleware import Middleware
@@ -14,17 +17,19 @@
 from starlette.responses import Response
 from starlette.routing import Route
 
-from . import _config
-from ._logger import get_logger
-from ._tracing import TracingHelper
+from . import _config, _tracing
+from ._version import VERSION as _CORE_VERSION
 
-logger = get_logger()
+logger = logging.getLogger("azure.ai.agentserver")
 
 # Pre-built health-check response to avoid per-request allocation.
 _HEALTHY_BODY = b'{"status":"healthy"}'
 
-# Server identity header value (name only — no version to avoid information disclosure).
-_PLATFORM_SERVER_VALUE = "azure-ai-agentserver-core"
+# Server identity header per spec: {sdk}/{version} (python/{runtime})
+_PLATFORM_SERVER_VALUE = (
+    f"azure-ai-agentserver-core/{_CORE_VERSION} "
+    f"(python/{sys.version_info.major}.{sys.version_info.minor})"
+)
 
 # Sentinel attribute name set on the console handler to prevent adding duplicates
 # across multiple AgentServerHost instantiations.
@@ -91,6 +96,7 @@ def __init__(
         applicationinsights_connection_string: Optional[str] = None,
         graceful_shutdown_timeout: Optional[int] = None,
         log_level: Optional[str] = None,
+        configure_tracing: Optional[Callable[..., None]] = _tracing.configure_tracing,
         routes: Optional[list[Route]] = None,
         **kwargs: Any,
     ) -> None:
@@ -106,17 +112,22 @@ def __init__(
             setattr(_console, _CONSOLE_HANDLER_ATTR, True)
             logger.addHandler(_console)
 
-        # Tracing — enabled when App Insights or OTLP endpoint is configured
-        _conn_str = _config.resolve_appinsights_connection_string(applicationinsights_connection_string)
-        _otlp_endpoint = _config.resolve_otlp_endpoint()
-        _tracing_on = bool(_conn_str or _otlp_endpoint)
-        self._tracing: Optional[TracingHelper] = None
-        if _tracing_on:
-            try:
-                self._tracing = TracingHelper(connection_string=_conn_str)
-            except Exception:  # pylint: disable=broad-exception-caught
-                logger.warning("Failed to initialize tracing; continuing without tracing.", exc_info=True)
-                self._tracing = None
+        # Suppress noisy Azure SDK and OTel exporter logs
+        logging.getLogger("azure.core.pipeline.policies.http_logging_policy").setLevel(logging.WARNING)
+        logging.getLogger("azure.monitor.opentelemetry.exporter").setLevel(logging.WARNING)
+
+        # Resolved configuration (accessible as self.config)
+        self.config = _config.AgentConfig.from_env()
+
+        # Tracing — overridable setup function
+        _conn_str = applicationinsights_connection_string or self.config.appinsights_connection_string
+        if configure_tracing is not None:
+            _tracing_on = bool(_conn_str or self.config.otlp_endpoint)
+            if _tracing_on:
+                try:
+                    configure_tracing(connection_string=_conn_str)
+                except Exception:  # pylint: disable=broad-exception-caught
+                    logger.warning("Failed to initialize tracing; continuing without tracing.", exc_info=True)
 
         # Timeouts ---------------------------------------------------------
         self._graceful_shutdown_timeout = _config.resolve_graceful_shutdown_timeout(
@@ -165,17 +176,51 @@ async def _lifespan(_app: Starlette) -> AsyncGenerator[None, None]:  # noqa: RUF
         )
 
     # ------------------------------------------------------------------
-    # Tracing accessor (for protocol subclasses)
+    # Tracing (for protocol subclasses)
     # ------------------------------------------------------------------
 
-    @property
-    def tracing(self) -> Optional[TracingHelper]:
-        """Return the tracing helper, or *None* when tracing is disabled.
+    #: Default instrumentation scope for tracing spans.
+    #: Protocol subclasses should override this per the spec.
+    _INSTRUMENTATION_SCOPE = "Azure.AI.AgentServer"
 
-        :return: The tracing helper instance.
-        :rtype: Optional[TracingHelper]
+    @contextlib.contextmanager
+    def request_span(
+        self,
+        headers: Any,
+        request_id: str,
+        operation: str,
+        *,
+        operation_name: Optional[str] = None,
+        session_id: str = "",
+        end_on_exit: bool = True,
+    ) -> Any:
+        """Create a request-scoped span with this host's identity attributes.
+
+        Delegates to :func:`_tracing.request_span` with pre-populated
+        agent identity from environment variables.
+
+        :param headers: HTTP request headers.
+        :param request_id: The request/invocation ID.
+        :param operation: Span operation (e.g. ``"invoke_agent"``).
+        :param operation_name: Optional ``gen_ai.operation.name`` value.
+        :param session_id: Session ID.
+        :param end_on_exit: Whether to end the span when the context exits.
+        :return: Context manager yielding the OTel span.
         """
-        return self._tracing
+        with _tracing.request_span(
+            headers,
+            request_id,
+            operation,
+            agent_id=self.config.agent_id,
+            agent_name=self.config.agent_name,
+            agent_version=self.config.agent_version,
+            project_id=self.config.project_id,
+            operation_name=operation_name,
+            session_id=session_id,
+            end_on_exit=end_on_exit,
+            instrumentation_scope=self._INSTRUMENTATION_SCOPE,
+        ) as span:
+            yield span
 
     # ------------------------------------------------------------------
     # Shutdown handler (server-level lifecycle)
@@ -216,6 +261,8 @@ def _build_hypercorn_config(self, host: str, port: int) -> object:
         config = HypercornConfig()
         config.bind = [f"{host}:{port}"]
         config.graceful_timeout = float(self._graceful_shutdown_timeout)
+        # Spec requires HTTP/1.1 only — disable HTTP/2
+        config.h2_max_concurrent_streams = 0
         return config
 
     def run(self, host: str = "0.0.0.0", port: Optional[int] = None) -> None:
@@ -231,7 +278,24 @@ def run(self, host: str = "0.0.0.0", port: Optional[int] = None) -> None:
         resolved_port = _config.resolve_port(port)
         logger.info("AgentServerHost starting on %s:%s", host, resolved_port)
         config = self._build_hypercorn_config(host, resolved_port)
-        asyncio.run(_hypercorn_serve(self, config))  # type: ignore[arg-type]
+
+        # Register SIGTERM handler to log the signal and initiate
+        # Hypercorn's graceful shutdown.
+        original_sigterm = signal.getsignal(signal.SIGTERM)
+
+        def _handle_sigterm(signum: int, frame: Any) -> None:
+            logger.info("SIGTERM received, initiating graceful shutdown")
+            # Restore the original handler so the re-raised signal is not
+            # caught by this handler again (avoids infinite recursion).
+            signal.signal(signal.SIGTERM, original_sigterm)
+            os.kill(os.getpid(), signal.SIGTERM)
+
+        signal.signal(signal.SIGTERM, _handle_sigterm)
+
+        try:
+            asyncio.run(_hypercorn_serve(self, config))  # type: ignore[arg-type]
+        finally:
+            signal.signal(signal.SIGTERM, original_sigterm)
 
     async def run_async(self, host: str = "0.0.0.0", port: Optional[int] = None) -> None:
         """Start the server asynchronously (awaitable).
@@ -261,3 +325,40 @@ async def _readiness_endpoint(self, request: Request) -> Response:  # pylint: di
         :rtype: Response
         """
         return Response(_HEALTHY_BODY, media_type="application/json")
+
+    # ------------------------------------------------------------------
+    # Streaming utilities
+    # ------------------------------------------------------------------
+
+    _Content = Union[str, bytes, memoryview]
+
+    @staticmethod
+    async def sse_keepalive_stream(
+        iterator: "AsyncIterable[AgentServerHost._Content]",
+        interval: int,
+    ) -> "AsyncIterator[AgentServerHost._Content]":
+        """Interleave SSE keep-alive comment frames into a streaming body.
+
+        Emits ``b": keep-alive\\n\\n"`` whenever the upstream iterator has not
+        produced a chunk within *interval* seconds.  This prevents
+        proxies/load-balancers from closing idle connections.
+
+        :param iterator: The async iterable to wrap.
+        :param interval: Seconds between keep-alive frames. Must be > 0.
+        :return: An async iterator with interleaved keep-alive frames.
+        """
+        ait = iterator.__aiter__()
+        # Reuse the same __anext__ task across timeouts to avoid cancelling
+        # the upstream iterator when wait_for expires.
+        pending: "Optional[asyncio.Task[AgentServerHost._Content]]" = None
+        while True:
+            if pending is None:
+                pending = asyncio.ensure_future(ait.__anext__())
+            try:
+                chunk = await asyncio.wait_for(asyncio.shield(pending), timeout=interval)
+                pending = None  # consumed — create new task next iteration
+                yield chunk
+            except asyncio.TimeoutError:
+                yield b": keep-alive\n\n"
+            except StopAsyncIteration:
+                break
diff --git a/sdk/agentserver/azure-ai-agentserver-core/azure/ai/agentserver/core/_config.py b/sdk/agentserver/azure-ai-agentserver-core/azure/ai/agentserver/core/_config.py
index a57ddf68b262..769e6d2be89a 100644
--- a/sdk/agentserver/azure-ai-agentserver-core/azure/ai/agentserver/core/_config.py
+++ b/sdk/agentserver/azure-ai-agentserver-core/azure/ai/agentserver/core/_config.py
@@ -1,7 +1,7 @@
 # ---------------------------------------------------------
 # Copyright (c) Microsoft Corporation. All rights reserved.
 # ---------------------------------------------------------
-"""Configuration resolution helpers for AgentHost hosting.
+"""Configuration resolution helpers for AgentServerHost.
 
 Each ``resolve_*`` function follows the same hierarchy:
 1. Explicit argument (if not *None*)
@@ -14,9 +14,96 @@
 misconfiguration is surfaced at startup rather than silently masked.
 """
 import os
+from dataclasses import dataclass
 from typing import Optional
 
-from ._constants import Constants
+# ======================================================================
+# Environment variable keys (internal — users access values via AgentConfig)
+# ======================================================================
+
+_ENV_FOUNDRY_AGENT_NAME = "FOUNDRY_AGENT_NAME"
+_ENV_FOUNDRY_AGENT_VERSION = "FOUNDRY_AGENT_VERSION"
+_ENV_FOUNDRY_PROJECT_ENDPOINT = "FOUNDRY_PROJECT_ENDPOINT"
+_ENV_FOUNDRY_PROJECT_ARM_ID = "FOUNDRY_PROJECT_ARM_ID"
+_ENV_FOUNDRY_AGENT_SESSION_ID = "FOUNDRY_AGENT_SESSION_ID"
+_ENV_PORT = "PORT"
+_ENV_APPLICATIONINSIGHTS_CONNECTION_STRING = "APPLICATIONINSIGHTS_CONNECTION_STRING"
+_ENV_OTEL_EXPORTER_OTLP_ENDPOINT = "OTEL_EXPORTER_OTLP_ENDPOINT"
+_ENV_SSE_KEEPALIVE_INTERVAL = "SSE_KEEPALIVE_INTERVAL"
+
+_DEFAULT_PORT = 8088
+_DEFAULT_SSE_KEEPALIVE_INTERVAL = 15
+
+
+# ======================================================================
+# AgentConfig — resolved environment values
+# ======================================================================
+
+
+@dataclass(frozen=True)
+class AgentConfig:
+    """Resolved configuration for an agent server host.
+
+    All values are populated from environment variables at creation time.
+    Access via ``app.config``::
+
+        app = InvocationAgentServerHost()
+        print(app.config.agent_name)
+        print(app.config.project_endpoint)
+
+    :param agent_name: Agent name from ``FOUNDRY_AGENT_NAME``.
+    :param agent_version: Agent version from ``FOUNDRY_AGENT_VERSION``.
+    :param agent_id: Combined identifier (``"name:version"`` or ``"name"`` or ``""``).
+    :param project_endpoint: Foundry project endpoint from ``FOUNDRY_PROJECT_ENDPOINT``.
+    :param project_id: Foundry project ARM resource ID from ``FOUNDRY_PROJECT_ARM_ID``.
+    :param session_id: Default session ID from ``FOUNDRY_AGENT_SESSION_ID``.
+    :param port: Server port from ``PORT`` (default 8088).
+    :param appinsights_connection_string: Application Insights connection string.
+    :param otlp_endpoint: OTLP exporter endpoint.
+    :param sse_keepalive_interval: SSE keep-alive interval in seconds (0 = disabled).
+    """
+
+    agent_name: str
+    agent_version: str
+    agent_id: str
+    project_endpoint: str
+    project_id: str
+    session_id: str
+    port: int
+    appinsights_connection_string: str
+    otlp_endpoint: str
+    sse_keepalive_interval: int
+
+    @classmethod
+    def from_env(cls) -> "AgentConfig":
+        """Create an ``AgentConfig`` by reading all platform environment variables.
+
+        :return: A frozen config with resolved values.
+        :rtype: AgentConfig
+        """
+        agent_name = os.environ.get(_ENV_FOUNDRY_AGENT_NAME, "")
+        agent_version = os.environ.get(_ENV_FOUNDRY_AGENT_VERSION, "")
+
+        if agent_name and agent_version:
+            agent_id = f"{agent_name}:{agent_version}"
+        elif agent_name:
+            agent_id = agent_name
+        else:
+            agent_id = ""
+
+        return cls(
+            agent_name=agent_name,
+            agent_version=agent_version,
+            agent_id=agent_id,
+            project_endpoint=os.environ.get(_ENV_FOUNDRY_PROJECT_ENDPOINT, ""),
+            project_id=os.environ.get(_ENV_FOUNDRY_PROJECT_ARM_ID, ""),
+            session_id=os.environ.get(_ENV_FOUNDRY_AGENT_SESSION_ID, ""),
+            port=resolve_port(None),
+            appinsights_connection_string=os.environ.get(
+                _ENV_APPLICATIONINSIGHTS_CONNECTION_STRING, ""),
+            otlp_endpoint=os.environ.get(_ENV_OTEL_EXPORTER_OTLP_ENDPOINT, ""),
+            sse_keepalive_interval=resolve_sse_keepalive_interval(None),
+        )
 
 
 def _parse_int_env(var_name: str) -> Optional[int]:
@@ -88,10 +175,10 @@ def resolve_port(port: Optional[int]) -> int:
     """
     if port is not None:
         return _validate_port(_require_int("port", port), "port")
-    env_port = _parse_int_env(Constants.PORT)
+    env_port = _parse_int_env("PORT")
     if env_port is not None:
-        return _validate_port(env_port, Constants.PORT)
-    return Constants.DEFAULT_PORT
+        return _validate_port(env_port, "PORT")
+    return _DEFAULT_PORT
 
 
 _DEFAULT_GRACEFUL_SHUTDOWN_TIMEOUT = 30
@@ -133,7 +220,7 @@ def resolve_appinsights_connection_string(
     if connection_string is not None:
         return connection_string
     return os.environ.get(
-        Constants.APPLICATIONINSIGHTS_CONNECTION_STRING
+        _ENV_APPLICATIONINSIGHTS_CONNECTION_STRING
     )
 
 
@@ -164,7 +251,7 @@ def resolve_agent_name() -> str:
     :return: The agent name, or an empty string if not set.
     :rtype: str
     """
-    return os.environ.get(Constants.FOUNDRY_AGENT_NAME, "")
+    return os.environ.get(_ENV_FOUNDRY_AGENT_NAME, "")
 
 
 def resolve_agent_version() -> str:
@@ -173,7 +260,7 @@ def resolve_agent_version() -> str:
     :return: The agent version, or an empty string if not set.
     :rtype: str
     """
-    return os.environ.get(Constants.FOUNDRY_AGENT_VERSION, "")
+    return os.environ.get(_ENV_FOUNDRY_AGENT_VERSION, "")
 
 
 def resolve_project_id() -> str:
@@ -185,7 +272,26 @@ def resolve_project_id() -> str:
     :return: The project ARM resource ID, or an empty string if not set.
     :rtype: str
     """
-    return os.environ.get(Constants.FOUNDRY_PROJECT_ARM_ID, "")
+    return os.environ.get(_ENV_FOUNDRY_PROJECT_ARM_ID, "")
+
+
+def resolve_sse_keepalive_interval(interval: Optional[int] = None) -> int:
+    """Resolve the SSE keep-alive interval from argument, env var, or default.
+
+    Resolution order: explicit *interval* → ``SSE_KEEPALIVE_INTERVAL`` env var
+    → ``15`` (seconds).  A value of ``0`` disables keep-alive.
+
+    :param interval: Explicitly requested interval in seconds, or None.
+    :type interval: Optional[int]
+    :return: The resolved interval in seconds (0 means disabled).
+    :rtype: int
+    """
+    if interval is not None:
+        return max(0, _require_int("sse_keepalive_interval", interval))
+    env_val = _parse_int_env(_ENV_SSE_KEEPALIVE_INTERVAL)
+    if env_val is not None:
+        return max(0, env_val)
+    return _DEFAULT_SSE_KEEPALIVE_INTERVAL
 
 
 def resolve_otlp_endpoint() -> Optional[str]:
@@ -194,5 +300,5 @@ def resolve_otlp_endpoint() -> Optional[str]:
     :return: The OTLP endpoint URL, or None if not set or empty.
     :rtype: Optional[str]
     """
-    value = os.environ.get(Constants.OTEL_EXPORTER_OTLP_ENDPOINT, "")
+    value = os.environ.get(_ENV_OTEL_EXPORTER_OTLP_ENDPOINT, "")
     return value if value else None
diff --git a/sdk/agentserver/azure-ai-agentserver-core/azure/ai/agentserver/core/_constants.py b/sdk/agentserver/azure-ai-agentserver-core/azure/ai/agentserver/core/_constants.py
index 8d0f1aeb16e4..74b7c0708931 100644
--- a/sdk/agentserver/azure-ai-agentserver-core/azure/ai/agentserver/core/_constants.py
+++ b/sdk/agentserver/azure-ai-agentserver-core/azure/ai/agentserver/core/_constants.py
@@ -20,5 +20,9 @@ class Constants:
     APPLICATIONINSIGHTS_CONNECTION_STRING = "APPLICATIONINSIGHTS_CONNECTION_STRING"
     OTEL_EXPORTER_OTLP_ENDPOINT = "OTEL_EXPORTER_OTLP_ENDPOINT"
 
+    # SSE keep-alive
+    SSE_KEEPALIVE_INTERVAL = "SSE_KEEPALIVE_INTERVAL"
+    DEFAULT_SSE_KEEPALIVE_INTERVAL = 15  # seconds, 0 to disable
+
     # Session identity
     FOUNDRY_AGENT_SESSION_ID = "FOUNDRY_AGENT_SESSION_ID"
diff --git a/sdk/agentserver/azure-ai-agentserver-core/azure/ai/agentserver/core/_logger.py b/sdk/agentserver/azure-ai-agentserver-core/azure/ai/agentserver/core/_logger.py
deleted file mode 100644
index c1612b9b3ae6..000000000000
--- a/sdk/agentserver/azure-ai-agentserver-core/azure/ai/agentserver/core/_logger.py
+++ /dev/null
@@ -1,22 +0,0 @@
-# ---------------------------------------------------------
-# Copyright (c) Microsoft Corporation. All rights reserved.
-# ---------------------------------------------------------
-"""Logging facade for AgentHost.
-
-Usage::
-
-    from azure.ai.agentserver.core import get_logger
-
-    logger = get_logger()
-    logger.info("Processing request")
-"""
-import logging
-
-
-def get_logger() -> logging.Logger:
-    """Return the library-scoped logger.
-
-    :return: Logger instance for ``azure.ai.agentserver``.
-    :rtype: logging.Logger
-    """
-    return logging.getLogger("azure.ai.agentserver")
diff --git a/sdk/agentserver/azure-ai-agentserver-core/azure/ai/agentserver/core/_tracing.py b/sdk/agentserver/azure-ai-agentserver-core/azure/ai/agentserver/core/_tracing.py
index bef02da563e8..a4e8f895bedc 100644
--- a/sdk/agentserver/azure-ai-agentserver-core/azure/ai/agentserver/core/_tracing.py
+++ b/sdk/agentserver/azure-ai-agentserver-core/azure/ai/agentserver/core/_tracing.py
@@ -3,26 +3,31 @@
 # ---------------------------------------------------------
 """OpenTelemetry tracing for AgentServerHost.
 
-Tracing is automatically enabled when an Application Insights connection
-string (``APPLICATIONINSIGHTS_CONNECTION_STRING``) or an OTLP exporter
-endpoint (``OTEL_EXPORTER_OTLP_ENDPOINT``) is available.
+This module provides functions (not classes) for tracing:
 
-Requires ``opentelemetry-api`` to be installed::
+- :func:`configure_tracing` — one-time exporter setup (called by ``AgentServerHost.__init__``)
+- :func:`request_span` — create a request-scoped span with GenAI attributes
+- :func:`end_span` / :func:`record_error` — span lifecycle helpers
+- :func:`trace_stream` — wrap streaming responses with span lifecycle
 
-    pip install azure-ai-agentserver-core[tracing]
-
-If the package is not installed, tracing silently becomes a no-op.
+OpenTelemetry is a required dependency — these functions always create
+real spans.  Azure Monitor export is optional (lazy-imported).
 """
 import logging
+import os
 from collections.abc import AsyncIterable, AsyncIterator, Mapping  # pylint: disable=import-error
 from contextlib import contextmanager
 from typing import Any, Iterator, Optional, Union
 
+from opentelemetry import baggage as _otel_baggage, context as _otel_context, trace
+from opentelemetry.baggage.propagation import W3CBaggagePropagator
+from opentelemetry.propagate import composite
+from opentelemetry.trace.propagation.tracecontext import TraceContextTextMapPropagator
+
 from . import _config
-from ._logger import get_logger
 
 _Content = Union[str, bytes, memoryview]
-_W3C_HEADERS = ("traceparent", "tracestate")
+_W3C_HEADERS = ("traceparent", "tracestate", "baggage")
 
 # GenAI semantic convention attribute keys
 _ATTR_SERVICE_NAME = "service.name"
@@ -40,221 +45,186 @@
 _GEN_AI_SYSTEM_VALUE = "azure.ai.agentserver"
 _GEN_AI_PROVIDER_NAME_VALUE = "AzureAI Hosted Agents"
 
-logger = get_logger()
+logger = logging.getLogger("azure.ai.agentserver")
+
+# Composite propagator handles both traceparent/tracestate AND baggage
+_propagator = composite.CompositePropagator([
+    TraceContextTextMapPropagator(),
+    W3CBaggagePropagator(),
+])
 
-_HAS_OTEL = False
-try:
-    from opentelemetry import trace
-    from opentelemetry.trace.propagation.tracecontext import TraceContextTextMapPropagator
 
-    _HAS_OTEL = True
-except ImportError:
-    pass
+# ======================================================================
+# Public API: exporter setup
+# ======================================================================
+
 
+def configure_tracing(connection_string: Optional[str] = None) -> None:
+    """Configure OpenTelemetry exporters for Azure Monitor and OTLP.
 
-class TracingHelper:
-    """Lightweight wrapper around OpenTelemetry.
+    Called once at startup by ``AgentServerHost.__init__``.  Users may
+    pass a custom function (or ``None``) via the ``configure_tracing``
+    constructor parameter to override or disable this default setup.
 
-    Automatically configures Azure Monitor and OTLP exporters when the
-    corresponding environment variables are set.  All span creation and
-    lifecycle is managed by the host framework -- developers never interact
-    with this class directly.
+    :param connection_string: Application Insights connection string.
+        When provided, traces and logs are exported to Azure Monitor.
+    :type connection_string: str or None
     """
+    resource = _create_resource()
+    provider = _ensure_trace_provider(resource)
 
-    def __init__(self, connection_string: Optional[str] = None) -> None:
-        self._enabled = _HAS_OTEL
-        self._tracer: Any = None
-        self._propagator: Any = None
-
-        self._agent_name = _config.resolve_agent_name()
-        self._agent_version = _config.resolve_agent_version()
-        self._project_id = _config.resolve_project_id()
-
-        if self._agent_name and self._agent_version:
-            self._agent_id = f"{self._agent_name}:{self._agent_version}"
-        elif self._agent_name:
-            self._agent_id = self._agent_name
-        else:
-            self._agent_id = ""
-
-        if not self._enabled:
-            logger.warning(
-                "Tracing was enabled but opentelemetry-api is not installed. "
-                "Install it with: pip install azure-ai-agentserver-core[tracing]"
-            )
-            return
-
-        resource = _create_resource()
-        trace_provider = _ensure_trace_provider(resource)
-
-        if trace_provider is not None:
-            trace_provider.add_span_processor(_FoundryEnrichmentSpanProcessor(
-                agent_name=self._agent_name or None,
-                agent_version=self._agent_version or None,
-                agent_id=self._agent_id or None,
-                project_id=self._project_id or None,
-            ))
-
-        if connection_string:
-            self._setup_azure_monitor(connection_string, resource, trace_provider)
-
-        otlp_endpoint = _config.resolve_otlp_endpoint()
-        if otlp_endpoint:
-            self._setup_otlp_export(otlp_endpoint, resource, trace_provider)
-
-        self._tracer = trace.get_tracer("azure.ai.agentserver")
-        self._propagator = TraceContextTextMapPropagator()
-
-    # ------------------------------------------------------------------
-    # Exporter configuration
-    # ------------------------------------------------------------------
-
-    @staticmethod
-    def _setup_azure_monitor(connection_string: str, resource: Any, trace_provider: Any) -> None:
-        if resource is None:
-            return
-        _setup_trace_export(trace_provider, connection_string)
-        _setup_log_export(resource, connection_string)
-
-    @staticmethod
-    def _setup_otlp_export(endpoint: str, resource: Any, trace_provider: Any) -> None:
-        if resource is None:
-            return
-        _setup_otlp_trace_export(trace_provider, endpoint)
-        _setup_otlp_log_export(resource, endpoint)
-
-    # ------------------------------------------------------------------
-    # Span creation
-    # ------------------------------------------------------------------
-
-    @contextmanager
-    def request_span(
-        self,
-        headers: Mapping[str, str],
-        invocation_id: str,
-        span_operation: str,
-        operation_name: Optional[str] = None,
-        session_id: str = "",
-        end_on_exit: bool = True,
-    ) -> Iterator[Any]:
-        """Create a request-scoped span from HTTP headers.
-
-        Extracts W3C trace context, builds GenAI attributes, and creates
-        a span set as current in context (child spans are correctly parented).
-
-        For **non-streaming** requests use ``end_on_exit=True`` (default).
-        For **streaming** requests use ``end_on_exit=False`` and end the
-        span via :meth:`trace_stream`.
-
-        :param headers: HTTP request headers.
-        :type headers: ~collections.abc.Mapping[str, str]
-        :param invocation_id: The request/invocation ID.
-        :type invocation_id: str
-        :param span_operation: Span operation name (e.g. ``"invoke_agent"``).
-        :type span_operation: str
-        :param operation_name: Optional ``gen_ai.operation.name`` value.
-        :type operation_name: str or None
-        :param session_id: Session ID (empty string if absent).
-        :type session_id: str
-        :param end_on_exit: Whether to end the span when the context exits.
-        :type end_on_exit: bool
-        :return: Context manager yielding the OTel span or *None*.
-        :rtype: ~typing.Iterator
-        """
-        if not self._enabled or self._tracer is None:
-            yield None
-            return
-
-        # Build span name
-        name = f"{span_operation} {self._agent_id}" if self._agent_id else span_operation
-
-        # Build attributes
-        attrs: dict[str, str] = {
-            _ATTR_SERVICE_NAME: _SERVICE_NAME_VALUE,
-            _ATTR_GEN_AI_SYSTEM: _GEN_AI_SYSTEM_VALUE,
-            _ATTR_GEN_AI_PROVIDER_NAME: _GEN_AI_PROVIDER_NAME_VALUE,
-            _ATTR_GEN_AI_RESPONSE_ID: invocation_id,
-            _ATTR_GEN_AI_AGENT_ID: self._agent_id,
-        }
-        if self._agent_name:
-            attrs[_ATTR_GEN_AI_AGENT_NAME] = self._agent_name
-        if self._agent_version:
-            attrs[_ATTR_GEN_AI_AGENT_VERSION] = self._agent_version
-        if operation_name:
-            attrs[_ATTR_GEN_AI_OPERATION_NAME] = operation_name
-        if session_id:
-            attrs[_ATTR_GEN_AI_CONVERSATION_ID] = session_id
-        if self._project_id:
-            attrs[_ATTR_FOUNDRY_PROJECT_ID] = self._project_id
-
-        # Extract W3C trace context
-        carrier = _extract_w3c_carrier(headers)
-        ctx = self._propagator.extract(carrier=carrier) if carrier else None
-
-        with self._tracer.start_as_current_span(
-            name=name,
-            attributes=attrs,
-            kind=trace.SpanKind.SERVER,
-            context=ctx,
-            end_on_exit=end_on_exit,
-        ) as otel_span:
-            yield otel_span
-
-    # ------------------------------------------------------------------
-    # Span lifecycle
-    # ------------------------------------------------------------------
-
-    def end_span(self, span: Any, exc: Optional[BaseException] = None) -> None:
-        """End a span, optionally recording an error first.
-
-        :param span: The OTel span to end, or None.
-        :type span: any
-        :param exc: Optional exception to record on the span.
-        :type exc: ~BaseException or None
-        """
-        if span is None:
-            return
-        if exc is not None:
-            self.record_error(span, exc)
-        span.end()
-
-    @staticmethod
-    def record_error(span: Any, exc: BaseException) -> None:
-        """Record an exception and ERROR status on a span.
-
-        :param span: The OTel span to record the error on.
-        :type span: any
-        :param exc: The exception to record.
-        :type exc: ~BaseException
-        """
-        if span is not None and _HAS_OTEL:
-            span.set_status(trace.StatusCode.ERROR, str(exc))
-            span.record_exception(exc)
-
-    async def trace_stream(
-        self, iterator: AsyncIterable[_Content], span: Any
-    ) -> AsyncIterator[_Content]:
-        """Wrap a streaming body so the span covers the full transmission.
-
-        Yields chunks unchanged.  Ends the span when the iterator is
-        exhausted or raises an exception.
-
-        :param iterator: The async iterable to wrap.
-        :type iterator: ~collections.abc.AsyncIterable
-        :param span: The OTel span to end when the stream completes.
-        :type span: any
-        :return: An async iterator yielding chunks unchanged.
-        :rtype: ~collections.abc.AsyncIterator
-        """
-        error: Optional[BaseException] = None
-        try:
-            async for chunk in iterator:
-                yield chunk
-        except BaseException as exc:
-            error = exc
-            raise
-        finally:
-            self.end_span(span, exc=error)
+    if provider is not None:
+        _register_enrichment_processor(provider)
+
+    if connection_string:
+        if resource is not None:
+            _setup_trace_export(provider, connection_string)
+            _setup_log_export(resource, connection_string)
+
+    otlp_endpoint = _config.resolve_otlp_endpoint()
+    if otlp_endpoint and resource is not None:
+        _setup_otlp_trace_export(provider, otlp_endpoint)
+        _setup_otlp_log_export(resource, otlp_endpoint)
+
+
+# ======================================================================
+# Public API: span operations
+# ======================================================================
+
+
+@contextmanager
+def request_span(
+    headers: Mapping[str, str],
+    request_id: str,
+    operation: str,
+    *,
+    agent_id: str = "",
+    agent_name: str = "",
+    agent_version: str = "",
+    project_id: str = "",
+    operation_name: Optional[str] = None,
+    session_id: str = "",
+    end_on_exit: bool = True,
+    instrumentation_scope: str = "Azure.AI.AgentServer",
+) -> Iterator[Any]:
+    """Create a request-scoped span with GenAI semantic convention attributes.
+
+    Extracts W3C trace context from *headers* and creates a span set as
+    current in context (child spans are correctly parented).
+
+    For **non-streaming** requests use ``end_on_exit=True`` (default).
+    For **streaming** use ``end_on_exit=False`` and end via :func:`trace_stream`.
+
+    :param headers: HTTP request headers.
+    :param request_id: The request/invocation ID.
+    :param operation: Span operation (e.g. ``"invoke_agent"``).
+    :param agent_id: Agent identifier (``"name:version"`` or ``"name"``).
+    :param agent_name: Agent name from FOUNDRY_AGENT_NAME.
+    :param agent_version: Agent version from FOUNDRY_AGENT_VERSION.
+    :param project_id: Foundry project ARM resource ID.
+    :param operation_name: Optional ``gen_ai.operation.name`` value.
+    :param session_id: Session ID (empty string if absent).
+    :param end_on_exit: Whether to end the span when the context exits.
+    :return: Context manager yielding the OTel span.
+    """
+    tracer = trace.get_tracer(instrumentation_scope)
+
+    # Build span name
+    name = f"{operation} {agent_id}" if agent_id else operation
+
+    # Build attributes
+    attrs: dict[str, str] = {
+        _ATTR_SERVICE_NAME: agent_name or _SERVICE_NAME_VALUE,
+        _ATTR_GEN_AI_SYSTEM: _GEN_AI_SYSTEM_VALUE,
+        _ATTR_GEN_AI_PROVIDER_NAME: _GEN_AI_PROVIDER_NAME_VALUE,
+        _ATTR_GEN_AI_RESPONSE_ID: request_id,
+        _ATTR_GEN_AI_AGENT_ID: agent_id,
+    }
+    if agent_name:
+        attrs[_ATTR_GEN_AI_AGENT_NAME] = agent_name
+    if agent_version:
+        attrs[_ATTR_GEN_AI_AGENT_VERSION] = agent_version
+    if operation_name:
+        attrs[_ATTR_GEN_AI_OPERATION_NAME] = operation_name
+    if session_id:
+        attrs[_ATTR_GEN_AI_CONVERSATION_ID] = session_id
+    if project_id:
+        attrs[_ATTR_FOUNDRY_PROJECT_ID] = project_id
+
+    # Propagate platform request correlation ID as span attribute AND baggage
+    x_request_id = headers.get("x-request-id")
+    if x_request_id:
+        attrs["x_request_id"] = x_request_id
+
+    # Extract W3C trace context (traceparent + tracestate + baggage)
+    carrier = _extract_w3c_carrier(headers)
+    ctx = _propagator.extract(carrier=carrier) if carrier else None
+
+    # Add x-request-id to baggage for downstream propagation
+    if x_request_id:
+        ctx = _otel_baggage.set_baggage("x_request_id", x_request_id, context=ctx)
+
+    with tracer.start_as_current_span(
+        name=name,
+        attributes=attrs,
+        kind=trace.SpanKind.SERVER,
+        context=ctx,
+        end_on_exit=end_on_exit,
+    ) as otel_span:
+        yield otel_span
+
+
+def end_span(span: Any, exc: Optional[BaseException] = None) -> None:
+    """End a span, optionally recording an error first.
+
+    No-op when *span* is ``None``.
+
+    :param span: The OTel span to end, or ``None``.
+    :param exc: Optional exception to record before ending.
+    """
+    if span is None:
+        return
+    if exc is not None:
+        record_error(span, exc)
+    span.end()
+
+
+def record_error(span: Any, exc: BaseException) -> None:
+    """Record an exception and ERROR status on a span.
+
+    Sets ``error.type`` and ``otel.status.description`` per OTel
+    semantic conventions.
+
+    :param span: The OTel span, or ``None``.
+    :param exc: The exception to record.
+    """
+    if span is not None:
+        span.set_status(trace.StatusCode.ERROR, str(exc))
+        span.set_attribute("error.type", type(exc).__name__)
+        span.record_exception(exc)
+
+
+async def trace_stream(
+    iterator: AsyncIterable[_Content], span: Any
+) -> AsyncIterator[_Content]:
+    """Wrap a streaming body so the span covers the full transmission.
+
+    Yields chunks unchanged.  Ends the span when the iterator is
+    exhausted or raises an exception.
+
+    :param iterator: The async iterable to wrap.
+    :param span: The OTel span to end on completion, or ``None``.
+    :return: An async iterator yielding chunks unchanged.
+    """
+    error: Optional[BaseException] = None
+    try:
+        async for chunk in iterator:
+            yield chunk
+    except BaseException as exc:
+        error = exc
+        raise
+    finally:
+        end_span(span, exc=error)
 
 
 # ======================================================================
@@ -300,8 +270,34 @@ def force_flush(self, timeout_millis: int = 30000) -> bool:  # pylint: disable=u
         return True
 
 
+class _BaggageLogRecordProcessor:
+    """OTel log record processor that copies W3C Baggage entries into log attributes.
+
+    Per container-image-spec §6.1, all baggage key-value pairs from the
+    current span context should appear as attributes on every log record
+    for end-to-end correlation.
+    """
+
+    def on_emit(self, log_data: Any) -> None:  # pylint: disable=unused-argument
+        """Copy baggage entries into the log record's attributes."""
+        try:
+            ctx = _otel_context.get_current()
+            entries = _otel_baggage.get_all(context=ctx)
+            if entries and hasattr(log_data, 'log_record') and log_data.log_record:
+                for key, value in entries.items():
+                    log_data.log_record.attributes[key] = value  # type: ignore[index]
+        except Exception:  # pylint: disable=broad-except
+            pass
+
+    def shutdown(self) -> None:
+        pass
+
+    def force_flush(self, timeout_millis: int = 30000) -> bool:  # pylint: disable=unused-argument
+        return True
+
+
 # ======================================================================
-# Infrastructure: resource, provider, exporters
+# Internal: resource, provider, exporters
 # ======================================================================
 
 
@@ -309,9 +305,12 @@ def _create_resource() -> Any:
     try:
         from opentelemetry.sdk.resources import Resource
     except ImportError:
-        logger.warning("OTel SDK not installed. pip install azure-ai-agentserver-core[tracing]")
+        logger.warning("OTel SDK not installed — tracing resource creation failed.")
         return None
-    return Resource.create({_ATTR_SERVICE_NAME: _SERVICE_NAME_VALUE})
+    # service.name maps to cloud_RoleName in App Insights
+    agent_name = os.environ.get(_config._ENV_FOUNDRY_AGENT_NAME, "")  # pylint: disable=protected-access
+    service_name = agent_name or _SERVICE_NAME_VALUE
+    return Resource.create({_ATTR_SERVICE_NAME: service_name})
 
 
 def _ensure_trace_provider(resource: Any) -> Any:
@@ -329,12 +328,35 @@ def _ensure_trace_provider(resource: Any) -> Any:
     return provider
 
 
+_enrichment_configured = False
 _az_trace_configured = False
 _az_log_configured = False
 _otlp_trace_configured = False
 _otlp_log_configured = False
 
 
+def _register_enrichment_processor(provider: Any) -> None:
+    global _enrichment_configured  # pylint: disable=global-statement
+    if _enrichment_configured:
+        return
+    agent_name = _config.resolve_agent_name() or None
+    agent_version = _config.resolve_agent_version() or None
+    project_id = _config.resolve_project_id() or None
+
+    if agent_name and agent_version:
+        agent_id = f"{agent_name}:{agent_version}"
+    elif agent_name:
+        agent_id = agent_name
+    else:
+        agent_id = None
+
+    provider.add_span_processor(_FoundryEnrichmentSpanProcessor(
+        agent_name=agent_name, agent_version=agent_version,
+        agent_id=agent_id, project_id=project_id,
+    ))
+    _enrichment_configured = True
+
+
 def _setup_trace_export(provider: Any, connection_string: str) -> None:
     global _az_trace_configured  # pylint: disable=global-statement
     if _az_trace_configured or provider is None:
@@ -367,6 +389,7 @@ def _setup_log_export(resource: Any, connection_string: str) -> None:
     set_logger_provider(log_provider)
     log_provider.add_log_record_processor(BatchLogRecordProcessor(
         AzureMonitorLogExporter(connection_string=connection_string)))
+    log_provider.add_log_record_processor(_BaggageLogRecordProcessor())
     logging.getLogger().addHandler(LoggingHandler(logger_provider=log_provider))
     _az_log_configured = True
     logger.info("Application Insights log exporter configured.")
@@ -394,7 +417,7 @@ def _setup_otlp_log_export(resource: Any, endpoint: str) -> None:
     try:
         from opentelemetry._logs import get_logger_provider
         from opentelemetry.exporter.otlp.proto.grpc._log_exporter import OTLPLogExporter
-        from opentelemetry.sdk._logs import LoggerProvider
+        from opentelemetry.sdk._logs import LoggerProvider, LoggingHandler
         from opentelemetry.sdk._logs.export import BatchLogRecordProcessor
     except ImportError:
         logger.warning("OTLP log export requires opentelemetry-exporter-otlp-proto-grpc.")
@@ -408,6 +431,13 @@ def _setup_otlp_log_export(resource: Any, endpoint: str) -> None:
         set_logger_provider(log_provider)
     log_provider.add_log_record_processor(BatchLogRecordProcessor(
         OTLPLogExporter(endpoint=endpoint)))  # type: ignore[union-attr]
+    log_provider.add_log_record_processor(_BaggageLogRecordProcessor())
+    # Note: LoggingHandler is NOT added here to avoid duplicating the
+    # handler already installed by _setup_log_export. The OTel LoggerProvider
+    # receives log records via the handler added there (or from direct OTel
+    # log API usage).  If OTLP is the only exporter, add a handler:
+    if not _az_log_configured:
+        logging.getLogger().addHandler(LoggingHandler(logger_provider=log_provider))
     _otlp_log_configured = True
     logger.info("OTLP log exporter configured (endpoint=%s).", endpoint)
 
diff --git a/sdk/agentserver/azure-ai-agentserver-core/pyproject.toml b/sdk/agentserver/azure-ai-agentserver-core/pyproject.toml
index 4080fb6de696..46a8e2672095 100644
--- a/sdk/agentserver/azure-ai-agentserver-core/pyproject.toml
+++ b/sdk/agentserver/azure-ai-agentserver-core/pyproject.toml
@@ -23,10 +23,6 @@ keywords = ["azure", "azure sdk", "agent", "agentserver", "core"]
 dependencies = [
     "starlette>=0.45.0",
     "hypercorn>=0.17.0",
-]
-
-[project.optional-dependencies]
-tracing = [
     "opentelemetry-api>=1.20.0",
     "opentelemetry-sdk>=1.20.0",
     "opentelemetry-exporter-otlp-proto-grpc>=1.20.0",
diff --git a/sdk/agentserver/azure-ai-agentserver-core/samples/selfhosted_invocation/selfhosted_invocation.py b/sdk/agentserver/azure-ai-agentserver-core/samples/selfhosted_invocation/selfhosted_invocation.py
index 1b6152ff0f31..9fc296ef775b 100644
--- a/sdk/agentserver/azure-ai-agentserver-core/samples/selfhosted_invocation/selfhosted_invocation.py
+++ b/sdk/agentserver/azure-ai-agentserver-core/samples/selfhosted_invocation/selfhosted_invocation.py
@@ -13,7 +13,7 @@
 
 Usage::
 
-    pip install azure-ai-agentserver-core[tracing]
+    pip install azure-ai-agentserver-core
 
     # Enable tracing via App Insights connection string
     export APPLICATIONINSIGHTS_CONNECTION_STRING="InstrumentationKey=..."
@@ -28,18 +28,18 @@
     curl http://localhost:8088/readiness
     # -> {"status": "healthy"}
 """
-import contextlib
+import logging
 import os
 import uuid
-from typing import Any, Optional
+from typing import Any
 
 from starlette.requests import Request
 from starlette.responses import JSONResponse, Response
 from starlette.routing import Route
 
-from azure.ai.agentserver.core import get_logger, AgentServerHost, TracingHelper
+from azure.ai.agentserver.core import AgentServerHost, record_error
 
-logger = get_logger()
+logger = logging.getLogger("azure.ai.agentserver")
 
 
 class SelfHostedInvocationHost(AgentServerHost):
@@ -61,18 +61,10 @@ async def _invoke(self, request: Request) -> Response:
             or str(uuid.uuid4())
         )
 
-        if self.tracing is not None:
-            span_cm = self.tracing.request_span(
-                headers=request.headers,
-                invocation_id=invocation_id,
-                span_operation="invoke_agent",
-                operation_name="invoke_agent",
-                session_id=session_id,
-            )
-        else:
-            span_cm = contextlib.nullcontext(None)
-
-        with span_cm as otel_span:
+        with self.request_span(
+            request.headers, invocation_id, "invoke_agent",
+            operation_name="invoke_agent", session_id=session_id,
+        ) as otel_span:
             logger.info("Processing invocation %s in session %s", invocation_id, session_id)
 
             try:
@@ -80,8 +72,7 @@ async def _invoke(self, request: Request) -> Response:
                 name = data.get("name", "World")
                 result = {"greeting": f"Hello, {name}!"}
             except Exception as exc:
-                if self.tracing is not None and otel_span is not None:
-                    self.tracing.record_error(otel_span, exc)
+                record_error(otel_span, exc)
                 logger.error("Invocation %s failed: %s", invocation_id, exc)
                 raise
 
diff --git a/sdk/agentserver/azure-ai-agentserver-core/tests/test_graceful_shutdown.py b/sdk/agentserver/azure-ai-agentserver-core/tests/test_graceful_shutdown.py
index 652bbb62317c..5a8f2374dbb9 100644
--- a/sdk/agentserver/azure-ai-agentserver-core/tests/test_graceful_shutdown.py
+++ b/sdk/agentserver/azure-ai-agentserver-core/tests/test_graceful_shutdown.py
@@ -5,6 +5,7 @@
 import asyncio
 import logging
 import os
+import signal
 from unittest import mock
 
 import pytest
@@ -303,3 +304,71 @@ async def send(message):
 
     await agent(scope, receive, send)
     assert completed is False  # handler was NOT called
+
+
+# ------------------------------------------------------------------ #
+# SIGTERM handler registration in run()
+# ------------------------------------------------------------------ #
+
+
+class TestSigtermHandler:
+    """Tests for SIGTERM signal handler installed by run()."""
+
+    def _restore_sigterm(self):
+        """Fixture-like helper: save and restore the SIGTERM handler."""
+        original = signal.getsignal(signal.SIGTERM)
+        yield
+        signal.signal(signal.SIGTERM, original)
+
+    def test_run_installs_sigterm_handler(self) -> None:
+        """run() registers a SIGTERM handler that logs and re-raises."""
+        original = signal.getsignal(signal.SIGTERM)
+        try:
+            agent = AgentServerHost(graceful_shutdown_timeout=5)
+            handler_at_serve_time = None
+
+            def fake_asyncio_run(coro):
+                nonlocal handler_at_serve_time
+                handler_at_serve_time = signal.getsignal(signal.SIGTERM)
+                coro.close()
+
+            with mock.patch("asyncio.run", side_effect=fake_asyncio_run):
+                agent.run(host="127.0.0.1", port=9999)
+
+            assert handler_at_serve_time is not None
+            assert callable(handler_at_serve_time)
+            assert handler_at_serve_time is not original
+        finally:
+            signal.signal(signal.SIGTERM, original)
+
+    def test_sigterm_handler_logs_and_reraises(self, caplog: pytest.LogCaptureFixture) -> None:
+        """The installed SIGTERM handler logs then re-raises via os.kill."""
+        original = signal.getsignal(signal.SIGTERM)
+        try:
+            agent = AgentServerHost(graceful_shutdown_timeout=5)
+            handler_at_serve_time = None
+
+            def fake_asyncio_run(coro):
+                nonlocal handler_at_serve_time
+                handler_at_serve_time = signal.getsignal(signal.SIGTERM)
+                coro.close()
+
+            with mock.patch("asyncio.run", side_effect=fake_asyncio_run):
+                agent.run(host="127.0.0.1", port=9999)
+
+            assert callable(handler_at_serve_time)
+
+            # Invoke the handler and verify it:
+            # 1) logs the message
+            # 2) restores the original handler
+            # 3) calls os.kill to re-raise
+            with (
+                caplog.at_level(logging.INFO, logger="azure.ai.agentserver"),
+                mock.patch("os.kill") as mock_kill,
+            ):
+                handler_at_serve_time(signal.SIGTERM, None)
+
+            assert any("SIGTERM received" in r.message for r in caplog.records)
+            mock_kill.assert_called_once_with(os.getpid(), signal.SIGTERM)
+        finally:
+            signal.signal(signal.SIGTERM, original)
diff --git a/sdk/agentserver/azure-ai-agentserver-core/tests/test_server_routes.py b/sdk/agentserver/azure-ai-agentserver-core/tests/test_server_routes.py
index de7af76363f6..85e28c1bf15e 100644
--- a/sdk/agentserver/azure-ai-agentserver-core/tests/test_server_routes.py
+++ b/sdk/agentserver/azure-ai-agentserver-core/tests/test_server_routes.py
@@ -10,7 +10,7 @@
 
 from azure.ai.agentserver.core import AgentServerHost
 from azure.ai.agentserver.core._config import resolve_port
-from azure.ai.agentserver.core._constants import Constants
+
 
 
 # ------------------------------------------------------------------ #
@@ -34,7 +34,7 @@ def test_default_port(self) -> None:
             env = os.environ.copy()
             env.pop("PORT", None)
             with mock.patch.dict(os.environ, env, clear=True):
-                assert resolve_port(None) == Constants.DEFAULT_PORT
+                assert resolve_port(None) == 8088
 
     def test_invalid_env_var_raises(self) -> None:
         with mock.patch.dict(os.environ, {"PORT": "not-a-number"}):
diff --git a/sdk/agentserver/azure-ai-agentserver-core/tests/test_tracing.py b/sdk/agentserver/azure-ai-agentserver-core/tests/test_tracing.py
index da212dabe18b..2758d4e8353f 100644
--- a/sdk/agentserver/azure-ai-agentserver-core/tests/test_tracing.py
+++ b/sdk/agentserver/azure-ai-agentserver-core/tests/test_tracing.py
@@ -2,7 +2,6 @@
 # Copyright (c) Microsoft Corporation. All rights reserved.
 # ---------------------------------------------------------
 """Tests for tracing configuration — not invocation spans (those live in the invocations package)."""
-import contextlib
 import os
 from unittest import mock
 
@@ -12,7 +11,7 @@
     resolve_agent_version,
     resolve_appinsights_connection_string,
 )
-from azure.ai.agentserver.core._constants import Constants
+
 
 
 # ------------------------------------------------------------------ #
@@ -21,41 +20,42 @@
 
 
 class TestTracingToggle:
-    """Tracing is enabled when App Insights or OTLP endpoint is configured."""
+    """Tracing is configured when App Insights or OTLP endpoint is available."""
 
     def test_tracing_disabled_when_no_endpoints(self) -> None:
         env = os.environ.copy()
-        env.pop(Constants.APPLICATIONINSIGHTS_CONNECTION_STRING, None)
-        env.pop(Constants.OTEL_EXPORTER_OTLP_ENDPOINT, None)
+        env.pop("APPLICATIONINSIGHTS_CONNECTION_STRING", None)
+        env.pop("OTEL_EXPORTER_OTLP_ENDPOINT", None)
         with mock.patch.dict(os.environ, env, clear=True):
-            agent = AgentServerHost()
-            assert agent.tracing is None
+            mock_configure = mock.MagicMock()
+            AgentServerHost(configure_tracing=mock_configure)
+            mock_configure.assert_not_called()
 
     def test_tracing_enabled_via_appinsights_env_var(self) -> None:
-        with mock.patch.dict(os.environ, {Constants.APPLICATIONINSIGHTS_CONNECTION_STRING: "InstrumentationKey=test"}):
-            with mock.patch(
-                "azure.ai.agentserver.core._tracing.TracingHelper.__init__",
-                return_value=None,
-            ):
-                agent = AgentServerHost()
-                assert agent.tracing is not None
+        with mock.patch.dict(os.environ, {"APPLICATIONINSIGHTS_CONNECTION_STRING": "InstrumentationKey=test"}):
+            mock_configure = mock.MagicMock()
+            AgentServerHost(configure_tracing=mock_configure)
+            mock_configure.assert_called_once()
 
     def test_tracing_enabled_via_otlp_env_var(self) -> None:
-        with mock.patch.dict(os.environ, {Constants.OTEL_EXPORTER_OTLP_ENDPOINT: "http://localhost:4318"}):
-            with mock.patch(
-                "azure.ai.agentserver.core._tracing.TracingHelper.__init__",
-                return_value=None,
-            ):
-                agent = AgentServerHost()
-                assert agent.tracing is not None
+        with mock.patch.dict(os.environ, {"OTEL_EXPORTER_OTLP_ENDPOINT": "http://localhost:4318"}):
+            mock_configure = mock.MagicMock()
+            AgentServerHost(configure_tracing=mock_configure)
+            mock_configure.assert_called_once()
 
     def test_tracing_enabled_via_constructor_connection_string(self) -> None:
-        with mock.patch(
-            "azure.ai.agentserver.core._tracing.TracingHelper.__init__",
-            return_value=None,
-        ):
-            agent = AgentServerHost(applicationinsights_connection_string="InstrumentationKey=ctor")
-            assert agent.tracing is not None
+        mock_configure = mock.MagicMock()
+        AgentServerHost(
+            applicationinsights_connection_string="InstrumentationKey=ctor",
+            configure_tracing=mock_configure,
+        )
+        mock_configure.assert_called_once_with(connection_string="InstrumentationKey=ctor")
+
+    def test_tracing_disabled_when_configure_tracing_is_none(self) -> None:
+        """Passing configure_tracing=None disables tracing entirely."""
+        with mock.patch.dict(os.environ, {"APPLICATIONINSIGHTS_CONNECTION_STRING": "InstrumentationKey=test"}):
+            # Should not raise even with App Insights configured
+            AgentServerHost(configure_tracing=None)
 
 
 # ------------------------------------------------------------------ #
@@ -72,20 +72,20 @@ def test_explicit_wins(self) -> None:
     def test_env_var(self) -> None:
         with mock.patch.dict(
             os.environ,
-            {Constants.APPLICATIONINSIGHTS_CONNECTION_STRING: "InstrumentationKey=env"},
+            {"APPLICATIONINSIGHTS_CONNECTION_STRING": "InstrumentationKey=env"},
         ):
             assert resolve_appinsights_connection_string(None) == "InstrumentationKey=env"
 
     def test_none_when_unset(self) -> None:
         env = os.environ.copy()
-        env.pop(Constants.APPLICATIONINSIGHTS_CONNECTION_STRING, None)
+        env.pop("APPLICATIONINSIGHTS_CONNECTION_STRING", None)
         with mock.patch.dict(os.environ, env, clear=True):
             assert resolve_appinsights_connection_string(None) is None
 
     def test_explicit_overrides_env_var(self) -> None:
         with mock.patch.dict(
             os.environ,
-            {Constants.APPLICATIONINSIGHTS_CONNECTION_STRING: "InstrumentationKey=env"},
+            {"APPLICATIONINSIGHTS_CONNECTION_STRING": "InstrumentationKey=env"},
         ):
             result = resolve_appinsights_connection_string("InstrumentationKey=explicit")
             assert result == "InstrumentationKey=explicit"
@@ -97,44 +97,27 @@ def test_explicit_overrides_env_var(self) -> None:
 
 
 class TestSetupAzureMonitor:
-    """Verify _setup_azure_monitor calls the right helpers."""
-
-    @staticmethod
-    def _tracing_mocks() -> contextlib.ExitStack:
-        """Enter the common set of mocks needed to instantiate TracingHelper."""
-        stack = contextlib.ExitStack()
-        stack.enter_context(mock.patch("azure.ai.agentserver.core._tracing._HAS_OTEL", True))
-        stack.enter_context(mock.patch("azure.ai.agentserver.core._tracing.trace", create=True))
-        stack.enter_context(
-            mock.patch("azure.ai.agentserver.core._tracing.TraceContextTextMapPropagator", create=True)
-        )
-        stack.enter_context(
-            mock.patch("azure.ai.agentserver.core._tracing._ensure_trace_provider", return_value=mock.MagicMock())
-        )
-        return stack
+    """Verify configure_tracing calls the right exporter setup functions."""
 
     def test_setup_azure_monitor_called_when_conn_str_provided(self) -> None:
-        with self._tracing_mocks():
-            with mock.patch(
-                "azure.ai.agentserver.core._tracing.TracingHelper._setup_azure_monitor"
-            ) as mock_setup:
-                with mock.patch("azure.ai.agentserver.core._tracing.TracingHelper._setup_otlp_export"):
-                    from azure.ai.agentserver.core._tracing import TracingHelper
-                    TracingHelper(connection_string="InstrumentationKey=test")
-                    # _setup_azure_monitor receives (connection_string, resource, trace_provider)
-                    mock_setup.assert_called_once()
-                    args = mock_setup.call_args[0]
-                    assert args[0] == "InstrumentationKey=test"
+        with mock.patch("azure.ai.agentserver.core._tracing._setup_trace_export") as mock_trace:
+            with mock.patch("azure.ai.agentserver.core._tracing._setup_log_export"):
+                with mock.patch("azure.ai.agentserver.core._tracing._setup_otlp_trace_export"):
+                    with mock.patch("azure.ai.agentserver.core._tracing._setup_otlp_log_export"):
+                        from azure.ai.agentserver.core import _tracing
+                        _tracing.configure_tracing(connection_string="InstrumentationKey=test")
+                        mock_trace.assert_called_once()
+                        args = mock_trace.call_args[0]
+                        assert args[1] == "InstrumentationKey=test"
 
     def test_setup_azure_monitor_not_called_when_no_conn_str(self) -> None:
-        with self._tracing_mocks():
-            with mock.patch(
-                "azure.ai.agentserver.core._tracing.TracingHelper._setup_azure_monitor"
-            ) as mock_setup:
-                with mock.patch("azure.ai.agentserver.core._tracing.TracingHelper._setup_otlp_export"):
-                    from azure.ai.agentserver.core._tracing import TracingHelper
-                    TracingHelper(connection_string=None)
-                    mock_setup.assert_not_called()
+        with mock.patch("azure.ai.agentserver.core._tracing._setup_trace_export") as mock_trace:
+            with mock.patch("azure.ai.agentserver.core._tracing._setup_log_export"):
+                with mock.patch("azure.ai.agentserver.core._tracing._setup_otlp_trace_export"):
+                    with mock.patch("azure.ai.agentserver.core._tracing._setup_otlp_log_export"):
+                        from azure.ai.agentserver.core import _tracing
+                        _tracing.configure_tracing(connection_string=None)
+                        mock_trace.assert_not_called()
 
 
 # ------------------------------------------------------------------ #
@@ -143,17 +126,15 @@ def test_setup_azure_monitor_not_called_when_no_conn_str(self) -> None:
 
 
 class TestConstructorConnectionString:
-    """Verify AgentServerHost forwards the connection string to TracingHelper."""
+    """Verify AgentServerHost forwards the connection string to configure_tracing."""
 
     def test_constructor_passes_connection_string(self) -> None:
-        with mock.patch(
-            "azure.ai.agentserver.core._tracing.TracingHelper.__init__",
-            return_value=None,
-        ) as mock_init:
-            AgentServerHost(
-                applicationinsights_connection_string="InstrumentationKey=ctor",
-            )
-            mock_init.assert_called_once_with(connection_string="InstrumentationKey=ctor")
+        mock_configure = mock.MagicMock()
+        AgentServerHost(
+            applicationinsights_connection_string="InstrumentationKey=ctor",
+            configure_tracing=mock_configure,
+        )
+        mock_configure.assert_called_once_with(connection_string="InstrumentationKey=ctor")
 
 
 # ------------------------------------------------------------------ #
@@ -165,22 +146,22 @@ class TestAgentIdentityResolution:
     """Tests for resolve_agent_name() and resolve_agent_version()."""
 
     def test_agent_name_from_env(self) -> None:
-        with mock.patch.dict(os.environ, {Constants.FOUNDRY_AGENT_NAME: "my-agent"}):
+        with mock.patch.dict(os.environ, {"FOUNDRY_AGENT_NAME": "my-agent"}):
             assert resolve_agent_name() == "my-agent"
 
     def test_agent_name_default_empty(self) -> None:
         env = os.environ.copy()
-        env.pop(Constants.FOUNDRY_AGENT_NAME, None)
+        env.pop("FOUNDRY_AGENT_NAME", None)
         with mock.patch.dict(os.environ, env, clear=True):
             assert resolve_agent_name() == ""
 
     def test_agent_version_from_env(self) -> None:
-        with mock.patch.dict(os.environ, {Constants.FOUNDRY_AGENT_VERSION: "2.0"}):
+        with mock.patch.dict(os.environ, {"FOUNDRY_AGENT_VERSION": "2.0"}):
             assert resolve_agent_version() == "2.0"
 
     def test_agent_version_default_empty(self) -> None:
         env = os.environ.copy()
-        env.pop(Constants.FOUNDRY_AGENT_VERSION, None)
+        env.pop("FOUNDRY_AGENT_VERSION", None)
         with mock.patch.dict(os.environ, env, clear=True):
             assert resolve_agent_version() == ""
 
diff --git a/sdk/agentserver/azure-ai-agentserver-githubcopilot/azure/ai/agentserver/githubcopilot/_copilot_adapter.py b/sdk/agentserver/azure-ai-agentserver-githubcopilot/azure/ai/agentserver/githubcopilot/_copilot_adapter.py
index ea6d2a72385c..8e22e10279d1 100644
--- a/sdk/agentserver/azure-ai-agentserver-githubcopilot/azure/ai/agentserver/githubcopilot/_copilot_adapter.py
+++ b/sdk/agentserver/azure-ai-agentserver-githubcopilot/azure/ai/agentserver/githubcopilot/_copilot_adapter.py
@@ -18,7 +18,7 @@
     most developers should use.
 """
 import asyncio
-import logging as _logging
+import logging
 import os
 import pathlib
 import time
@@ -38,7 +38,9 @@
         ProviderConfig = dict
 
 from azure.ai.agentserver.core.constants import Constants
-from azure.ai.agentserver.core.logger import get_logger
+
+logger = logging.getLogger("azure.ai.agentserver")
+
 from azure.ai.agentserver.core.models import Response as OpenAIResponse
 from azure.ai.agentserver.core.models.projects import (
     ResponseCompletedEvent,
@@ -59,10 +61,9 @@
 from ._copilot_response_converter import CopilotResponseConverter, CopilotStreamingResponseConverter
 from ._tool_acl import ToolAcl
 
-logger = get_logger()
 
 # Suppress noisy OTel detach warnings from async generator context switches.
-_logging.getLogger("opentelemetry.context").setLevel(_logging.CRITICAL)
+logging.getLogger("opentelemetry.context").setLevel(logging.CRITICAL)
 
 _COGNITIVE_SERVICES_SCOPE = "https://cognitiveservices.azure.com/.default"
 
@@ -71,12 +72,12 @@
 # Health-check log filter
 # ---------------------------------------------------------------------------
 
-class _HealthCheckFilter(_logging.Filter):
+class _HealthCheckFilter(logging.Filter):
     """Drop health-check access-log records so they don't pollute App Insights."""
 
     _PATHS = ("/liveness", "/readiness")
 
-    def filter(self, record: _logging.LogRecord) -> bool:  # noqa: A003
+    def filter(self, record: logging.LogRecord) -> bool:  # noqa: A003
         msg = record.getMessage()
         return not any(p in msg for p in self._PATHS)
 
@@ -209,7 +210,7 @@ def __init__(
         # is cosmetic (health-check noise), not a functional issue.
         _hc_filter = _HealthCheckFilter()
         for _name in ("uvicorn", "uvicorn.access", "uvicorn.error"):
-            _logging.getLogger(_name).addFilter(_hc_filter)
+            logging.getLogger(_name).addFilter(_hc_filter)
 
         # Build default config (handles BYOK provider setup from env vars)
         default_config = _build_session_config()
diff --git a/sdk/agentserver/azure-ai-agentserver-invocations/CHANGELOG.md b/sdk/agentserver/azure-ai-agentserver-invocations/CHANGELOG.md
index 1cb00d1154d0..4aaf13f5595a 100644
--- a/sdk/agentserver/azure-ai-agentserver-invocations/CHANGELOG.md
+++ b/sdk/agentserver/azure-ai-agentserver-invocations/CHANGELOG.md
@@ -5,11 +5,13 @@
 ### Features Added
 
 - Initial release of `azure-ai-agentserver-invocations`.
-- `InvocationHandler` for wiring invocation protocol endpoints to an `AgentHost`.
-- Decorator-based handler registration (`@invocations.invoke_handler`).
+- `InvocationAgentServerHost` — a Starlette-based host subclass for the invocations protocol.
+- Decorator-based handler registration (`@app.invoke_handler`, `@app.get_invocation_handler`, `@app.cancel_invocation_handler`).
 - Optional `GET /invocations/{id}` and `POST /invocations/{id}/cancel` endpoints.
 - `GET /invocations/docs/openapi.json` for OpenAPI spec serving.
 - Invocation ID tracking and session correlation via `agent_session_id` query parameter.
 - Distributed tracing with GenAI semantic convention span attributes.
-- W3C Baggage propagation for cross-service correlation.
+- W3C Baggage propagation of `invocation_id` and `session_id` for cross-service correlation.
+- Structured logging with `invocation_id` and `session_id` via `contextvars`.
 - Streaming response support with span lifecycle management.
+- Cooperative mixin inheritance for multi-protocol composition.
diff --git a/sdk/agentserver/azure-ai-agentserver-invocations/README.md b/sdk/agentserver/azure-ai-agentserver-invocations/README.md
index a4624fa18301..b32b43c3a3a5 100644
--- a/sdk/agentserver/azure-ai-agentserver-invocations/README.md
+++ b/sdk/agentserver/azure-ai-agentserver-invocations/README.md
@@ -1,4 +1,4 @@
-# Azure AI AgentServerHost Invocations for Python
+# Azure AI Agent Server Invocations client library for Python
 
 The `azure-ai-agentserver-invocations` package provides the invocation protocol endpoints for Azure AI Hosted Agent containers. It plugs into the [`azure-ai-agentserver-core`](https://pypi.org/project/azure-ai-agentserver-core/) host framework and adds the full invocation lifecycle: `POST /invocations`, `GET /invocations/{id}`, `POST /invocations/{id}/cancel`, and `GET /invocations/docs/openapi.json`.
 
@@ -76,7 +76,6 @@ When tracing is enabled on the `AgentServerHost`, invocation spans are automatic
 
 ```python
 from azure.ai.agentserver.invocations import InvocationAgentServerHost
-from azure.ai.agentserver.invocations import InvocationAgentServerHost
 from starlette.requests import Request
 from starlette.responses import JSONResponse, Response
 
@@ -97,7 +96,6 @@ app.run()
 import asyncio
 import json
 
-from azure.ai.agentserver.invocations import InvocationAgentServerHost
 from azure.ai.agentserver.invocations import InvocationAgentServerHost
 from starlette.requests import Request
 from starlette.responses import JSONResponse, Response
@@ -138,7 +136,6 @@ async def cancel_invocation(request: Request) -> Response:
 ```python
 import json
 
-from azure.ai.agentserver.invocations import InvocationAgentServerHost
 from azure.ai.agentserver.invocations import InvocationAgentServerHost
 from starlette.requests import Request
 from starlette.responses import Response, StreamingResponse
diff --git a/sdk/agentserver/azure-ai-agentserver-invocations/azure/ai/agentserver/invocations/_invocation.py b/sdk/agentserver/azure-ai-agentserver-invocations/azure/ai/agentserver/invocations/_invocation.py
index a4e026073705..d6e6b81f43be 100644
--- a/sdk/agentserver/azure-ai-agentserver-invocations/azure/ai/agentserver/invocations/_invocation.py
+++ b/sdk/agentserver/azure-ai-agentserver-invocations/azure/ai/agentserver/invocations/_invocation.py
@@ -6,34 +6,72 @@
 Provides the invocation protocol endpoints and handler decorators
 as a :class:`~azure.ai.agentserver.core.AgentServerHost` subclass.
 """
-import contextlib
+import contextvars
 import inspect
-import os
+import logging
 import re
+import threading
 import uuid
 from collections.abc import Awaitable, Callable  # pylint: disable=import-error
 from typing import Any, Optional
 
+from opentelemetry import baggage as _otel_baggage, context as _otel_context
 from starlette.requests import Request
 from starlette.responses import JSONResponse, Response, StreamingResponse
 from starlette.routing import Route
 
 from azure.ai.agentserver.core import (  # pylint: disable=no-name-in-module
     AgentServerHost,
-    get_logger,
-    Constants,
     create_error_response,
+    end_span,
+    record_error,
+    trace_stream,
 )
 
 from ._constants import InvocationConstants
 
-logger = get_logger()
+logger = logging.getLogger("azure.ai.agentserver")
 
 # Maximum length and allowed characters for user-provided IDs (defense in depth).
 _MAX_ID_LENGTH = 256
 _VALID_ID_RE = re.compile(r"^[a-zA-Z0-9\-_.:]+$")
 
 
+# Context variables for structured logging — concurrency-safe alternative to logger filters.
+_invocation_id_var: contextvars.ContextVar[str] = contextvars.ContextVar("invocation_id", default="")
+_session_id_var: contextvars.ContextVar[str] = contextvars.ContextVar("session_id", default="")
+
+
+class _InvocationLogFilter(logging.Filter):
+    """Attach invocation and session IDs to every log record from context vars.
+
+    Reads from ``contextvars`` rather than instance state, so a single
+    filter instance can be installed once on the logger (not per-request).
+    """
+
+    def filter(self, record: logging.LogRecord) -> bool:
+        record.invocation_id = _invocation_id_var.get("")  # type: ignore[attr-defined]
+        record.session_id = _session_id_var.get("")  # type: ignore[attr-defined]
+        return True
+
+
+# Install once on first request — no per-request add/remove needed.
+_log_filter_lock = threading.Lock()
+_log_filter_installed = False
+
+
+def _ensure_log_filter() -> None:
+    """Install the log filter on first use (lazy, thread-safe)."""
+    global _log_filter_installed  # pylint: disable=global-statement
+    if _log_filter_installed:
+        return
+    with _log_filter_lock:
+        if _log_filter_installed:
+            return
+        logger.addFilter(_InvocationLogFilter())
+        _log_filter_installed = True
+
+
 def _sanitize_id(value: str, fallback: str) -> str:
     """Validate a user-provided ID string.
 
@@ -82,6 +120,8 @@ async def handle(request):
     :type openapi_spec: Optional[dict[str, Any]]
     """
 
+    _INSTRUMENTATION_SCOPE = "Azure.AI.AgentServer.Invocations"
+
     def __init__(
         self,
         *,
@@ -252,10 +292,9 @@ def _wrap_streaming_response(
         :return: The same response object, with its body_iterator replaced.
         :rtype: ~starlette.responses.StreamingResponse
         """
-        if self._tracing is None:
+        if otel_span is None:
             return response
-
-        response.body_iterator = self._tracing.trace_stream(response.body_iterator, otel_span)
+        response.body_iterator = trace_stream(response.body_iterator, otel_span)
         return response
 
     # ------------------------------------------------------------------
@@ -277,21 +316,41 @@ async def _create_invocation_endpoint(self, request: Request) -> Response:
         # Session ID: query param overrides env var / generated UUID
         raw_session_id = (
             request.query_params.get("agent_session_id")
-            or os.environ.get(Constants.FOUNDRY_AGENT_SESSION_ID)
+            or self.config.session_id
             or ""
         )
         session_id = _sanitize_id(raw_session_id, str(uuid.uuid4()))
         request.state.session_id = session_id
 
-        with self._request_span(
+        # Platform isolation headers — expose to handlers
+        request.state.user_isolation_key = request.headers.get("x-agent-user-isolation-key", "")
+        request.state.chat_isolation_key = request.headers.get("x-agent-chat-isolation-key", "")
+
+        with self.request_span(
             request.headers, invocation_id, "invoke_agent",
             operation_name="invoke_agent", session_id=session_id,
+            end_on_exit=False,
         ) as otel_span:
             self._safe_set_attrs(otel_span, {
                 InvocationConstants.ATTR_SPAN_INVOCATION_ID: invocation_id,
                 InvocationConstants.ATTR_SPAN_SESSION_ID: session_id,
             })
 
+            # Propagate invocation/session IDs as W3C baggage so downstream
+            # services receive them automatically via the baggage header.
+            ctx = _otel_context.get_current()
+            ctx = _otel_baggage.set_baggage(
+                "azure.ai.agentserver.invocation_id", invocation_id, context=ctx,
+            )
+            ctx = _otel_baggage.set_baggage(
+                "azure.ai.agentserver.session_id", session_id, context=ctx,
+            )
+            baggage_token = _otel_context.attach(ctx)
+
+            # Set structured logging context (concurrency-safe via contextvars)
+            _ensure_log_filter()
+            inv_token = _invocation_id_var.set(invocation_id)
+            sess_token = _session_id_var.set(session_id)
             try:
                 response = await self._dispatch_invoke(request)
                 response.headers[InvocationConstants.INVOCATION_ID_HEADER] = invocation_id
@@ -301,8 +360,7 @@ async def _create_invocation_endpoint(self, request: Request) -> Response:
                     InvocationConstants.ATTR_SPAN_ERROR_CODE: "not_implemented",
                     InvocationConstants.ATTR_SPAN_ERROR_MESSAGE: str(exc),
                 })
-                if self._tracing is not None:
-                    self._tracing.end_span(otel_span, exc=exc)
+                end_span(otel_span, exc=exc)
                 logger.error("Invocation %s failed: %s", invocation_id, exc)
                 return create_error_response(
                     "not_implemented",
@@ -318,8 +376,7 @@ async def _create_invocation_endpoint(self, request: Request) -> Response:
                     InvocationConstants.ATTR_SPAN_ERROR_CODE: "internal_error",
                     InvocationConstants.ATTR_SPAN_ERROR_MESSAGE: str(exc),
                 })
-                if self._tracing is not None:
-                    self._tracing.end_span(otel_span, exc=exc)
+                end_span(otel_span, exc=exc)
                 logger.error("Error processing invocation %s: %s", invocation_id, exc, exc_info=True)
                 return create_error_response(
                     "internal_error",
@@ -330,68 +387,44 @@ async def _create_invocation_endpoint(self, request: Request) -> Response:
                         InvocationConstants.SESSION_ID_HEADER: session_id,
                     },
                 )
+            finally:
+                _invocation_id_var.reset(inv_token)
+                _session_id_var.reset(sess_token)
+                try:
+                    _otel_context.detach(baggage_token)
+                except ValueError:
+                    pass
 
             if isinstance(response, StreamingResponse):
-                # trace_stream will end the span when streaming completes
                 return self._wrap_streaming_response(response, otel_span)
 
-            # Non-streaming: end span now
-            if self._tracing is not None:
-                self._tracing.end_span(otel_span)
-
+            end_span(otel_span)
             return response
 
-    def _request_span(
-        self,
-        headers: Any,
-        invocation_id: str,
-        span_operation: str,
-        operation_name: Optional[str] = None,
-        session_id: str = "",
-    ) -> Any:
-        """Create a request span — returns a no-op context manager when tracing is off.
-
-        :param headers: HTTP request headers.
-        :type headers: any
-        :param invocation_id: The request/invocation ID.
-        :type invocation_id: str
-        :param span_operation: Span operation name.
-        :type span_operation: str
-        :param operation_name: Optional ``gen_ai.operation.name`` value.
-        :type operation_name: str or None
-        :param session_id: Session ID (empty string if absent).
-        :type session_id: str
-        :return: Context manager yielding the OTel span or *None*.
-        :rtype: any
-        """
-        if self._tracing is not None:
-            return self._tracing.request_span(
-                headers, invocation_id, span_operation,
-                operation_name=operation_name, session_id=session_id,
-                end_on_exit=False,
-            )
-        return contextlib.nullcontext(None)
-
     async def _traced_invocation_endpoint(
         self,
         request: Request,
         span_operation: str,
         dispatch: Callable[[Request], Awaitable[Response]],
     ) -> Response:
-        invocation_id = request.path_params["invocation_id"]
+        raw_invocation_id = request.path_params["invocation_id"]
+        invocation_id = _sanitize_id(raw_invocation_id, raw_invocation_id)
         request.state.invocation_id = invocation_id
 
-        span_cm: Any = contextlib.nullcontext(None)
-        if self._tracing is not None:
-            span_cm = self._tracing.request_span(
-                request.headers, invocation_id, span_operation,
-                session_id=request.query_params.get("agent_session_id", ""),
-            )
-        with span_cm as _otel_span:
+        raw_session_id = request.query_params.get("agent_session_id", "")
+        session_id = _sanitize_id(raw_session_id, "") if raw_session_id else ""
+
+        with self.request_span(
+            request.headers, invocation_id, span_operation,
+            operation_name=span_operation, session_id=session_id,
+        ) as _otel_span:
             self._safe_set_attrs(_otel_span, {
                 InvocationConstants.ATTR_SPAN_INVOCATION_ID: invocation_id,
-                InvocationConstants.ATTR_SPAN_SESSION_ID: request.query_params.get("agent_session_id", ""),
+                InvocationConstants.ATTR_SPAN_SESSION_ID: session_id,
             })
+            _ensure_log_filter()
+            inv_token = _invocation_id_var.set(invocation_id)
+            sess_token = _session_id_var.set(session_id)
             try:
                 response = await dispatch(request)
                 response.headers[InvocationConstants.INVOCATION_ID_HEADER] = invocation_id
@@ -401,10 +434,7 @@ async def _traced_invocation_endpoint(
                     InvocationConstants.ATTR_SPAN_ERROR_CODE: "internal_error",
                     InvocationConstants.ATTR_SPAN_ERROR_MESSAGE: str(exc),
                 })
-                # The exception is caught here (not re-raised), so OTel's
-                # start_as_current_span won't see it.  Record it explicitly.
-                if self._tracing is not None:
-                    self._tracing.record_error(_otel_span, exc)
+                record_error(_otel_span, exc)
                 logger.error("Error in %s %s: %s", span_operation, invocation_id, exc, exc_info=True)
                 return create_error_response(
                     "internal_error",
@@ -412,6 +442,9 @@ async def _traced_invocation_endpoint(
                     status_code=500,
                     headers={InvocationConstants.INVOCATION_ID_HEADER: invocation_id},
                 )
+            finally:
+                _invocation_id_var.reset(inv_token)
+                _session_id_var.reset(sess_token)
 
     async def _get_invocation_endpoint(self, request: Request) -> Response:
         return await self._traced_invocation_endpoint(
diff --git a/sdk/agentserver/azure-ai-agentserver-invocations/tests/test_session_id.py b/sdk/agentserver/azure-ai-agentserver-invocations/tests/test_session_id.py
index 51001270acdd..6398f2f8d327 100644
--- a/sdk/agentserver/azure-ai-agentserver-invocations/tests/test_session_id.py
+++ b/sdk/agentserver/azure-ai-agentserver-invocations/tests/test_session_id.py
@@ -67,15 +67,15 @@ async def handle(request: Request) -> Response:
 @pytest.mark.asyncio
 async def test_post_invocations_uses_env_var():
     """POST /invocations uses FOUNDRY_AGENT_SESSION_ID env var when no query param."""
-    app = InvocationAgentServerHost()
+    with patch.dict(os.environ, {"FOUNDRY_AGENT_SESSION_ID": "env-session"}):
+        app = InvocationAgentServerHost()
 
-    @app.invoke_handler
-    async def handle(request: Request) -> Response:
-        return Response(content=b"ok")
+        @app.invoke_handler
+        async def handle(request: Request) -> Response:
+            return Response(content=b"ok")
 
-    transport = ASGITransport(app=app)
-    async with AsyncClient(transport=transport, base_url="http://testserver") as client:
-        with patch.dict(os.environ, {"FOUNDRY_AGENT_SESSION_ID": "env-session"}):
+        transport = ASGITransport(app=app)
+        async with AsyncClient(transport=transport, base_url="http://testserver") as client:
             resp = await client.post("/invocations", content=b"test")
     assert resp.headers["x-agent-session-id"] == "env-session"
 
diff --git a/sdk/agentserver/azure-ai-agentserver-invocations/tests/test_span_parenting.py b/sdk/agentserver/azure-ai-agentserver-invocations/tests/test_span_parenting.py
index a4acc1e712eb..7dbd295fc73c 100644
--- a/sdk/agentserver/azure-ai-agentserver-invocations/tests/test_span_parenting.py
+++ b/sdk/agentserver/azure-ai-agentserver-invocations/tests/test_span_parenting.py
@@ -30,10 +30,17 @@
     _HAS_OTEL = False
 
 if _HAS_OTEL:
+    # Reuse the global provider if already set by another test module,
+    # otherwise create one. This avoids overriding the provider when
+    # multiple test files run in the same process.
+    _existing = trace.get_tracer_provider()
+    if hasattr(_existing, "add_span_processor"):
+        _PROVIDER = _existing
+    else:
+        _PROVIDER = SdkTracerProvider()
+        trace.set_tracer_provider(_PROVIDER)
     _EXPORTER = InMemorySpanExporter()
-    _PROVIDER = SdkTracerProvider()
     _PROVIDER.add_span_processor(SimpleSpanProcessor(_EXPORTER))
-    trace.set_tracer_provider(_PROVIDER)
 else:
     _EXPORTER = None
 
@@ -53,7 +60,7 @@ def _get_spans():
 def _make_server_with_child_span():
     """Server whose handler creates a child span (simulating a framework)."""
     with patch.dict(os.environ, {"APPLICATIONINSIGHTS_CONNECTION_STRING": "InstrumentationKey=test"}):
-        with patch("azure.ai.agentserver.core._tracing.TracingHelper._setup_azure_monitor"):
+        with patch("azure.ai.agentserver.core._tracing._setup_trace_export"):
             app = InvocationAgentServerHost()
     child_tracer = trace.get_tracer("test.framework")
 
@@ -68,7 +75,7 @@ async def handle(request: Request) -> Response:
 def _make_streaming_server_with_child_span():
     """Server with streaming response whose handler creates a child span."""
     with patch.dict(os.environ, {"APPLICATIONINSIGHTS_CONNECTION_STRING": "InstrumentationKey=test"}):
-        with patch("azure.ai.agentserver.core._tracing.TracingHelper._setup_azure_monitor"):
+        with patch("azure.ai.agentserver.core._tracing._setup_trace_export"):
             app = InvocationAgentServerHost()
     child_tracer = trace.get_tracer("test.framework")
 
diff --git a/sdk/agentserver/azure-ai-agentserver-invocations/tests/test_tracing.py b/sdk/agentserver/azure-ai-agentserver-invocations/tests/test_tracing.py
index efd1f5501da9..711c08c0aa92 100644
--- a/sdk/agentserver/azure-ai-agentserver-invocations/tests/test_tracing.py
+++ b/sdk/agentserver/azure-ai-agentserver-invocations/tests/test_tracing.py
@@ -7,7 +7,7 @@
 from unittest.mock import patch
 
 import pytest
-from httpx import ASGITransport, AsyncClient
+from starlette.testclient import TestClient
 from starlette.requests import Request
 from starlette.responses import JSONResponse, Response, StreamingResponse
 
@@ -23,7 +23,7 @@
     from opentelemetry import trace
     from opentelemetry.sdk.trace import TracerProvider as SdkTracerProvider
     from opentelemetry.sdk.trace.export import SimpleSpanProcessor
-    from opentelemetry.sdk.trace.export.in_memory import InMemorySpanExporter
+    from opentelemetry.sdk.trace.export.in_memory_span_exporter import InMemorySpanExporter
 
     _HAS_OTEL = True
 except ImportError:
@@ -31,10 +31,14 @@
 
 # Module-level provider so all tests share the same exporter
 if _HAS_OTEL:
+    _existing = trace.get_tracer_provider()
+    if hasattr(_existing, "add_span_processor"):
+        _MODULE_PROVIDER = _existing
+    else:
+        _MODULE_PROVIDER = SdkTracerProvider()
+        trace.set_tracer_provider(_MODULE_PROVIDER)
     _MODULE_EXPORTER = InMemorySpanExporter()
-    _MODULE_PROVIDER = SdkTracerProvider()
     _MODULE_PROVIDER.add_span_processor(SimpleSpanProcessor(_MODULE_EXPORTER))
-    trace.set_tracer_provider(_MODULE_PROVIDER)
 else:
     _MODULE_EXPORTER = None
     _MODULE_PROVIDER = None
@@ -62,8 +66,9 @@ def _get_spans():
 
 def _make_tracing_server(**kwargs):
     """Create an InvocationAgentServerHost with tracing enabled."""
-    with patch("azure.ai.agentserver.core._tracing.TracingHelper._setup_azure_monitor"):
-        server = InvocationAgentServerHost(**kwargs)
+    with patch.dict(os.environ, {"APPLICATIONINSIGHTS_CONNECTION_STRING": "InstrumentationKey=test"}):
+        with patch("azure.ai.agentserver.core._tracing._setup_trace_export"):
+            server = InvocationAgentServerHost(**kwargs)
 
     @server.invoke_handler
     async def handle(request: Request) -> Response:
@@ -75,8 +80,9 @@ async def handle(request: Request) -> Response:
 
 def _make_tracing_server_with_get_cancel(**kwargs):
     """Create a tracing-enabled server with get/cancel handlers."""
-    with patch("azure.ai.agentserver.core._tracing.TracingHelper._setup_azure_monitor"):
-        server = InvocationAgentServerHost(**kwargs)
+    with patch.dict(os.environ, {"APPLICATIONINSIGHTS_CONNECTION_STRING": "InstrumentationKey=test"}):
+        with patch("azure.ai.agentserver.core._tracing._setup_trace_export"):
+            server = InvocationAgentServerHost(**kwargs)
 
     store: dict[str, bytes] = {}
 
@@ -93,7 +99,7 @@ async def get_handler(request: Request) -> Response:
             return Response(content=store[inv_id])
         return JSONResponse({"error": {"code": "not_found", "message": "Not found"}}, status_code=404)
 
-    @app.cancel_invocation_handler
+    @server.cancel_invocation_handler
     async def cancel_handler(request: Request) -> Response:
         inv_id = request.path_params["invocation_id"]
         if inv_id in store:
@@ -101,13 +107,14 @@ async def cancel_handler(request: Request) -> Response:
             return JSONResponse({"status": "cancelled"})
         return JSONResponse({"error": {"code": "not_found", "message": "Not found"}}, status_code=404)
 
-    return app
+    return server
 
 
 def _make_failing_tracing_server(**kwargs):
     """Create a tracing-enabled server whose handler raises."""
-    with patch("azure.ai.agentserver.core._tracing.TracingHelper._setup_azure_monitor"):
-        server = InvocationAgentServerHost(**kwargs)
+    with patch.dict(os.environ, {"APPLICATIONINSIGHTS_CONNECTION_STRING": "InstrumentationKey=test"}):
+        with patch("azure.ai.agentserver.core._tracing._setup_trace_export"):
+            server = InvocationAgentServerHost(**kwargs)
 
     @server.invoke_handler
     async def handle(request: Request) -> Response:
@@ -118,8 +125,9 @@ async def handle(request: Request) -> Response:
 
 def _make_streaming_tracing_server(**kwargs):
     """Create a tracing-enabled server with streaming response."""
-    with patch("azure.ai.agentserver.core._tracing.TracingHelper._setup_azure_monitor"):
-        server = InvocationAgentServerHost(**kwargs)
+    with patch.dict(os.environ, {"APPLICATIONINSIGHTS_CONNECTION_STRING": "InstrumentationKey=test"}):
+        with patch("azure.ai.agentserver.core._tracing._setup_trace_export"):
+            server = InvocationAgentServerHost(**kwargs)
 
     @server.invoke_handler
     async def handle(request: Request) -> StreamingResponse:
@@ -136,9 +144,8 @@ async def generate():
 # Tracing disabled by default
 # ---------------------------------------------------------------------------
 
-@pytest.mark.asyncio
-async def test_tracing_disabled_by_default():
-    """No spans are created when tracing is not enabled."""
+def test_tracing_disabled_by_default():
+    """Invoke spans are still created by the global tracer when tracing is not explicitly configured."""
     if _MODULE_EXPORTER:
         _MODULE_EXPORTER.clear()
 
@@ -148,29 +155,26 @@ async def test_tracing_disabled_by_default():
     async def handle(request: Request) -> Response:
         return Response(content=b"ok")
 
-    transport = ASGITransport(app=app)
-    async with AsyncClient(transport=transport, base_url="http://testserver") as client:
-        await client.post("/invocations", content=b"test")
+    client = TestClient(app)
+    client.post("/invocations", content=b"test")
 
-    # No spans should be created (server has no tracing helper)
-    # The module-level provider may capture unrelated spans,
-    # but none should be from our server
+    # With the function-based tracing design, spans are always created
+    # when OTel is installed (via the global tracer). The difference is
+    # whether exporters are configured. Verify a span IS created.
     spans = _get_spans()
     invoke_spans = [s for s in spans if "invoke_agent" in s.name]
-    assert len(invoke_spans) == 0
+    assert len(invoke_spans) >= 1
 
 
 # ---------------------------------------------------------------------------
 # Tracing enabled creates invoke span with correct name
 # ---------------------------------------------------------------------------
 
-@pytest.mark.asyncio
-async def test_tracing_enabled_creates_invoke_span():
+def test_tracing_enabled_creates_invoke_span():
     """Tracing enabled creates a span named 'invoke_agent'."""
     server = _make_tracing_server()
-    transport = ASGITransport(app=server)
-    async with AsyncClient(transport=transport, base_url="http://testserver") as client:
-        await client.post("/invocations", content=b"test")
+    client = TestClient(server)
+    client.post("/invocations", content=b"test")
 
     spans = _get_spans()
     invoke_spans = [s for s in spans if "invoke_agent" in s.name]
@@ -182,13 +186,11 @@ async def test_tracing_enabled_creates_invoke_span():
 # Invoke error records exception
 # ---------------------------------------------------------------------------
 
-@pytest.mark.asyncio
-async def test_invoke_error_records_exception():
+def test_invoke_error_records_exception():
     """When handler raises, the span records the exception."""
     server = _make_failing_tracing_server()
-    transport = ASGITransport(app=server)
-    async with AsyncClient(transport=transport, base_url="http://testserver") as client:
-        resp = await client.post("/invocations", content=b"test")
+    client = TestClient(server)
+    resp = client.post("/invocations", content=b"test")
     assert resp.status_code == 500
 
     spans = _get_spans()
@@ -203,30 +205,26 @@ async def test_invoke_error_records_exception():
 # GET/cancel create spans
 # ---------------------------------------------------------------------------
 
-@pytest.mark.asyncio
-async def test_get_invocation_creates_span():
+def test_get_invocation_creates_span():
     """GET /invocations/{id} creates a span."""
     server = _make_tracing_server_with_get_cancel()
-    transport = ASGITransport(app=server)
-    async with AsyncClient(transport=transport, base_url="http://testserver") as client:
-        resp = await client.post("/invocations", content=b"data")
-        inv_id = resp.headers["x-agent-invocation-id"]
-        await client.get(f"/invocations/{inv_id}")
+    client = TestClient(server)
+    resp = client.post("/invocations", content=b"data")
+    inv_id = resp.headers["x-agent-invocation-id"]
+    client.get(f"/invocations/{inv_id}")
 
     spans = _get_spans()
     get_spans = [s for s in spans if "get_invocation" in s.name]
     assert len(get_spans) >= 1
 
 
-@pytest.mark.asyncio
-async def test_cancel_invocation_creates_span():
+def test_cancel_invocation_creates_span():
     """POST /invocations/{id}/cancel creates a span."""
     server = _make_tracing_server_with_get_cancel()
-    transport = ASGITransport(app=server)
-    async with AsyncClient(transport=transport, base_url="http://testserver") as client:
-        resp = await client.post("/invocations", content=b"data")
-        inv_id = resp.headers["x-agent-invocation-id"]
-        await client.post(f"/invocations/{inv_id}/cancel")
+    client = TestClient(server)
+    resp = client.post("/invocations", content=b"data")
+    inv_id = resp.headers["x-agent-invocation-id"]
+    client.post(f"/invocations/{inv_id}/cancel")
 
     spans = _get_spans()
     cancel_spans = [s for s in spans if "cancel_invocation" in s.name]
@@ -237,20 +235,18 @@ async def test_cancel_invocation_creates_span():
 # Tracing via env var
 # ---------------------------------------------------------------------------
 
-@pytest.mark.asyncio
-async def test_tracing_via_appinsights_env_var():
+def test_tracing_via_appinsights_env_var():
     """Tracing is enabled when APPLICATIONINSIGHTS_CONNECTION_STRING is set."""
     with patch.dict(os.environ, {"APPLICATIONINSIGHTS_CONNECTION_STRING": "InstrumentationKey=test"}):
-        with patch("azure.ai.agentserver.core._tracing.TracingHelper._setup_azure_monitor"):
+        with patch("azure.ai.agentserver.core._tracing._setup_trace_export"):
             app = InvocationAgentServerHost()
 
     @app.invoke_handler
     async def handle(request: Request) -> Response:
         return Response(content=b"ok")
 
-    transport = ASGITransport(app=app)
-    async with AsyncClient(transport=transport, base_url="http://testserver") as client:
-        await client.post("/invocations", content=b"test")
+    client = TestClient(app)
+    client.post("/invocations", content=b"test")
 
     spans = _get_spans()
     invoke_spans = [s for s in spans if "invoke_agent" in s.name]
@@ -261,9 +257,9 @@ async def handle(request: Request) -> Response:
 # No tracing when no endpoints configured
 # ---------------------------------------------------------------------------
 
-@pytest.mark.asyncio
-async def test_no_tracing_when_no_endpoints():
-    """Tracing is disabled when no connection string or OTLP endpoint is set."""
+def test_no_tracing_when_no_endpoints():
+    """When no connection string or OTLP endpoint is set, configure_tracing is not called,
+    but spans are still created (they're just not exported)."""
     env = os.environ.copy()
     env.pop("APPLICATIONINSIGHTS_CONNECTION_STRING", None)
     env.pop("OTEL_EXPORTER_OTLP_ENDPOINT", None)
@@ -277,36 +273,35 @@ async def handle(request: Request) -> Response:
     if _MODULE_EXPORTER:
         _MODULE_EXPORTER.clear()
 
-    transport = ASGITransport(app=app)
-    async with AsyncClient(transport=transport, base_url="http://testserver") as client:
-        await client.post("/invocations", content=b"test")
+    client = TestClient(app)
+    client.post("/invocations", content=b"test")
 
+    # Spans are still created via the global tracer — the difference
+    # is no exporters are configured to send them anywhere.
     spans = _get_spans()
     invoke_spans = [s for s in spans if "invoke_agent" in s.name]
-    assert len(invoke_spans) == 0
+    assert len(invoke_spans) >= 1
 
 
 # ---------------------------------------------------------------------------
 # Traceparent propagation
 # ---------------------------------------------------------------------------
 
-@pytest.mark.asyncio
-async def test_traceparent_propagation():
+def test_traceparent_propagation():
     """Server propagates traceparent header into span context."""
     server = _make_tracing_server()
-    transport = ASGITransport(app=server)
 
     # Create a traceparent
     trace_id_hex = uuid.uuid4().hex
     span_id_hex = uuid.uuid4().hex[:16]
     traceparent = f"00-{trace_id_hex}-{span_id_hex}-01"
 
-    async with AsyncClient(transport=transport, base_url="http://testserver") as client:
-        await client.post(
-            "/invocations",
-            content=b"test",
-            headers={"traceparent": traceparent},
-        )
+    client = TestClient(server)
+    client.post(
+        "/invocations",
+        content=b"test",
+        headers={"traceparent": traceparent},
+    )
 
     spans = _get_spans()
     invoke_spans = [s for s in spans if "invoke_agent" in s.name]
@@ -321,13 +316,11 @@ async def test_traceparent_propagation():
 # Streaming spans
 # ---------------------------------------------------------------------------
 
-@pytest.mark.asyncio
-async def test_streaming_creates_span():
+def test_streaming_creates_span():
     """Streaming response creates and completes a span."""
     server = _make_streaming_tracing_server()
-    transport = ASGITransport(app=server)
-    async with AsyncClient(transport=transport, base_url="http://testserver") as client:
-        resp = await client.post("/invocations", content=b"test")
+    client = TestClient(server)
+    resp = client.post("/invocations", content=b"test")
     assert resp.status_code == 200
 
     spans = _get_spans()
@@ -339,13 +332,11 @@ async def test_streaming_creates_span():
 # GenAI attributes on invoke span
 # ---------------------------------------------------------------------------
 
-@pytest.mark.asyncio
-async def test_genai_attributes_on_invoke_span():
+def test_genai_attributes_on_invoke_span():
     """Invoke span has GenAI semantic convention attributes."""
     server = _make_tracing_server()
-    transport = ASGITransport(app=server)
-    async with AsyncClient(transport=transport, base_url="http://testserver") as client:
-        await client.post("/invocations", content=b"test")
+    client = TestClient(server)
+    client.post("/invocations", content=b"test")
 
     spans = _get_spans()
     invoke_spans = [s for s in spans if "invoke_agent" in s.name]
@@ -361,16 +352,14 @@ async def test_genai_attributes_on_invoke_span():
 # Session ID in gen_ai.conversation.id
 # ---------------------------------------------------------------------------
 
-@pytest.mark.asyncio
-async def test_session_id_in_conversation_id():
+def test_session_id_in_conversation_id():
     """Session ID is set as gen_ai.conversation.id on invoke span."""
     server = _make_tracing_server()
-    transport = ASGITransport(app=server)
-    async with AsyncClient(transport=transport, base_url="http://testserver") as client:
-        await client.post(
-            "/invocations?agent_session_id=test-session",
-            content=b"test",
-        )
+    client = TestClient(server)
+    client.post(
+        "/invocations?agent_session_id=test-session",
+        content=b"test",
+    )
 
     spans = _get_spans()
     invoke_spans = [s for s in spans if "invoke_agent" in s.name]
@@ -383,15 +372,13 @@ async def test_session_id_in_conversation_id():
 # GenAI attributes on get_invocation span
 # ---------------------------------------------------------------------------
 
-@pytest.mark.asyncio
-async def test_genai_attributes_on_get_span():
+def test_genai_attributes_on_get_span():
     """GET invocation span has GenAI attributes."""
     server = _make_tracing_server_with_get_cancel()
-    transport = ASGITransport(app=server)
-    async with AsyncClient(transport=transport, base_url="http://testserver") as client:
-        resp = await client.post("/invocations", content=b"data")
-        inv_id = resp.headers["x-agent-invocation-id"]
-        await client.get(f"/invocations/{inv_id}")
+    client = TestClient(server)
+    resp = client.post("/invocations", content=b"data")
+    inv_id = resp.headers["x-agent-invocation-id"]
+    client.get(f"/invocations/{inv_id}")
 
     spans = _get_spans()
     get_spans = [s for s in spans if "get_invocation" in s.name]
@@ -405,14 +392,12 @@ async def test_genai_attributes_on_get_span():
 # Namespaced invocation_id attribute
 # ---------------------------------------------------------------------------
 
-@pytest.mark.asyncio
-async def test_namespaced_invocation_id_attribute():
+def test_namespaced_invocation_id_attribute():
     """Invoke span has azure.ai.agentserver.invocations.invocation_id."""
     server = _make_tracing_server()
-    transport = ASGITransport(app=server)
-    async with AsyncClient(transport=transport, base_url="http://testserver") as client:
-        resp = await client.post("/invocations", content=b"test")
-        inv_id = resp.headers["x-agent-invocation-id"]
+    client = TestClient(server)
+    resp = client.post("/invocations", content=b"test")
+    inv_id = resp.headers["x-agent-invocation-id"]
 
     spans = _get_spans()
     invoke_spans = [s for s in spans if "invoke_agent" in s.name]
@@ -421,48 +406,11 @@ async def test_namespaced_invocation_id_attribute():
     assert attrs.get("azure.ai.agentserver.invocations.invocation_id") == inv_id
 
 
-# ---------------------------------------------------------------------------
-# Baggage tests
-# ---------------------------------------------------------------------------
-
-@pytest.mark.asyncio
-async def test_baggage_leaf_customer_span_id():
-    """Baggage leaf_customer_span_id overrides parent span ID."""
-    server = _make_tracing_server()
-    transport = ASGITransport(app=server)
-
-    trace_id_hex = uuid.uuid4().hex
-    original_span_id = uuid.uuid4().hex[:16]
-    leaf_span_id = uuid.uuid4().hex[:16]
-    traceparent = f"00-{trace_id_hex}-{original_span_id}-01"
-    baggage = f"leaf_customer_span_id={leaf_span_id}"
-
-    async with AsyncClient(transport=transport, base_url="http://testserver") as client:
-        await client.post(
-            "/invocations",
-            content=b"test",
-            headers={
-                "traceparent": traceparent,
-                "baggage": baggage,
-            },
-        )
-
-    spans = _get_spans()
-    invoke_spans = [s for s in spans if "invoke_agent" in s.name]
-    assert len(invoke_spans) >= 1
-    span = invoke_spans[0]
-    # The parent span ID should be overridden to leaf_span_id
-    if span.parent is not None:
-        actual_parent_span_id = format(span.parent.span_id, "016x")
-        assert actual_parent_span_id == leaf_span_id
-
-
 # ---------------------------------------------------------------------------
 # Agent name/version in span names
 # ---------------------------------------------------------------------------
 
-@pytest.mark.asyncio
-async def test_agent_name_in_span_name():
+def test_agent_name_in_span_name():
     """Agent name from env var appears in span name."""
     with patch.dict(os.environ, {
         "FOUNDRY_AGENT_NAME": "my-agent",
@@ -470,9 +418,8 @@ async def test_agent_name_in_span_name():
     }):
         server = _make_tracing_server()
 
-    transport = ASGITransport(app=server)
-    async with AsyncClient(transport=transport, base_url="http://testserver") as client:
-        await client.post("/invocations", content=b"test")
+    client = TestClient(server)
+    client.post("/invocations", content=b"test")
 
     spans = _get_spans()
     invoke_spans = [s for s in spans if "invoke_agent" in s.name]
@@ -481,8 +428,7 @@ async def test_agent_name_in_span_name():
     assert "2.0" in invoke_spans[0].name
 
 
-@pytest.mark.asyncio
-async def test_agent_name_only_in_span_name():
+def test_agent_name_only_in_span_name():
     """Agent name without version in span name."""
     env_override = {"FOUNDRY_AGENT_NAME": "solo-agent"}
     env_copy = os.environ.copy()
@@ -491,9 +437,8 @@ async def test_agent_name_only_in_span_name():
     with patch.dict(os.environ, env_copy, clear=True):
         server = _make_tracing_server()
 
-    transport = ASGITransport(app=server)
-    async with AsyncClient(transport=transport, base_url="http://testserver") as client:
-        await client.post("/invocations", content=b"test")
+    client = TestClient(server)
+    client.post("/invocations", content=b"test")
 
     spans = _get_spans()
     invoke_spans = [s for s in spans if "invoke_agent" in s.name]
@@ -505,8 +450,5 @@ async def test_agent_name_only_in_span_name():
 # Project endpoint attribute
 # ---------------------------------------------------------------------------
 
-@pytest.mark.asyncio
-async def test_project_endpoint_env_var():
+def test_project_endpoint_env_var():
     """FOUNDRY_PROJECT_ENDPOINT constant matches the expected env var name."""
-    from azure.ai.agentserver.core import Constants
-    assert Constants.FOUNDRY_PROJECT_ENDPOINT == "FOUNDRY_PROJECT_ENDPOINT"
diff --git a/sdk/agentserver/azure-ai-agentserver-langgraph/CHANGELOG.md b/sdk/agentserver/azure-ai-agentserver-langgraph/CHANGELOG.md
deleted file mode 100644
index cfcf2445e256..000000000000
--- a/sdk/agentserver/azure-ai-agentserver-langgraph/CHANGELOG.md
+++ /dev/null
@@ -1,7 +0,0 @@
-# Release History
-
-## 1.0.0b1 (2025-11-07)
-
-### Features Added
-
-First version
diff --git a/sdk/agentserver/azure-ai-agentserver-langgraph/LICENSE b/sdk/agentserver/azure-ai-agentserver-langgraph/LICENSE
deleted file mode 100644
index 63447fd8bbbf..000000000000
--- a/sdk/agentserver/azure-ai-agentserver-langgraph/LICENSE
+++ /dev/null
@@ -1,21 +0,0 @@
-Copyright (c) Microsoft Corporation.
-
-MIT License
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
\ No newline at end of file
diff --git a/sdk/agentserver/azure-ai-agentserver-langgraph/MANIFEST.in b/sdk/agentserver/azure-ai-agentserver-langgraph/MANIFEST.in
deleted file mode 100644
index 452c12399ff7..000000000000
--- a/sdk/agentserver/azure-ai-agentserver-langgraph/MANIFEST.in
+++ /dev/null
@@ -1,9 +0,0 @@
-include *.md
-include LICENSE
-recursive-include tests *.py
-recursive-include samples *.py *.md
-recursive-include doc *.rst *.md
-include azure/__init__.py
-include azure/ai/__init__.py
-include azure/ai/agentserver/__init__.py
-include azure/ai/agentserver/langgraph/py.typed
\ No newline at end of file
diff --git a/sdk/agentserver/azure-ai-agentserver-langgraph/README.md b/sdk/agentserver/azure-ai-agentserver-langgraph/README.md
deleted file mode 100644
index 1c1eaab6837e..000000000000
--- a/sdk/agentserver/azure-ai-agentserver-langgraph/README.md
+++ /dev/null
@@ -1,64 +0,0 @@
-# Azure AI Agent Server Adapter for LangGraph Python
-
-
-## Getting started
-
-```bash
-pip install azure-ai-agentserver-langgraph
-```
-
-
-## Key concepts
-
-Azure AI Agent Server wraps your LangGraph agent, and host it on the cloud.
-
-
-## Examples
-
-```python
-# your existing agent
-from my_langgraph_agent import my_awesome_agent
-
-# langgraph utils
-from azure.ai.agentserver.langgraph import from_langgraph
-
-if __name__ == "__main__":
-    # with this simple line, your agent will be hosted on http://localhost:8088
-    from_langgraph(my_awesome_agent).run()
-
-```
-
-**Note**
-If your langgraph agent was not using langgraph's builtin [MessageState](https://langchain-ai.github.io/langgraph/concepts/low_level/?h=messagesstate#messagesstate), you should implement your own `LanggraphStateConverter` and provide to `from_langgraph`.
-
-Reference this [example](https://github.com/Azure/azure-sdk-for-python/blob/main/sdk/agentserver/azure-ai-agentserver-langgraph/samples/custom_state/main.py) for more details.
-
-
-## Troubleshooting
-
-First run your agent with azure-ai-agentserver-langgraph locally.
-
-If it works on local but failed on cloud. Check your logs in the application insight connected to your Azure AI Foundry Project.
-
-
-## Next steps
-
-Please visit [Samples](https://github.com/Azure/azure-sdk-for-python/tree/main/sdk/agentserver/azure-ai-agentserver-langgraph/samples) folder. There are several samples for you to build your agent with azure-ai-agentserver-* packages
-
-
-## Contributing
-
-This project welcomes contributions and suggestions. Most contributions require
-you to agree to a Contributor License Agreement (CLA) declaring that you have
-the right to, and actually do, grant us the rights to use your contribution.
-For details, visit https://cla.microsoft.com.
-
-When you submit a pull request, a CLA-bot will automatically determine whether
-you need to provide a CLA and decorate the PR appropriately (e.g., label,
-comment). Simply follow the instructions provided by the bot. You will only
-need to do this once across all repos using our CLA.
-
-This project has adopted the
-[Microsoft Open Source Code of Conduct][code_of_conduct]. For more information,
-see the Code of Conduct FAQ or contact opencode@microsoft.com with any
-additional questions or comments.
diff --git a/sdk/agentserver/azure-ai-agentserver-langgraph/azure/__init__.py b/sdk/agentserver/azure-ai-agentserver-langgraph/azure/__init__.py
deleted file mode 100644
index d55ccad1f573..000000000000
--- a/sdk/agentserver/azure-ai-agentserver-langgraph/azure/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-__path__ = __import__("pkgutil").extend_path(__path__, __name__)  # type: ignore
diff --git a/sdk/agentserver/azure-ai-agentserver-langgraph/azure/ai/__init__.py b/sdk/agentserver/azure-ai-agentserver-langgraph/azure/ai/__init__.py
deleted file mode 100644
index d55ccad1f573..000000000000
--- a/sdk/agentserver/azure-ai-agentserver-langgraph/azure/ai/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-__path__ = __import__("pkgutil").extend_path(__path__, __name__)  # type: ignore
diff --git a/sdk/agentserver/azure-ai-agentserver-langgraph/azure/ai/agentserver/__init__.py b/sdk/agentserver/azure-ai-agentserver-langgraph/azure/ai/agentserver/__init__.py
deleted file mode 100644
index d55ccad1f573..000000000000
--- a/sdk/agentserver/azure-ai-agentserver-langgraph/azure/ai/agentserver/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-__path__ = __import__("pkgutil").extend_path(__path__, __name__)  # type: ignore
diff --git a/sdk/agentserver/azure-ai-agentserver-langgraph/azure/ai/agentserver/langgraph/__init__.py b/sdk/agentserver/azure-ai-agentserver-langgraph/azure/ai/agentserver/langgraph/__init__.py
deleted file mode 100644
index ed2e0d4d493a..000000000000
--- a/sdk/agentserver/azure-ai-agentserver-langgraph/azure/ai/agentserver/langgraph/__init__.py
+++ /dev/null
@@ -1,21 +0,0 @@
-# ---------------------------------------------------------
-# Copyright (c) Microsoft Corporation. All rights reserved.
-# ---------------------------------------------------------
-__path__ = __import__("pkgutil").extend_path(__path__, __name__)
-
-from typing import TYPE_CHECKING, Optional
-
-from ._version import VERSION
-
-if TYPE_CHECKING:  # pragma: no cover
-    from . import models
-
-
-def from_langgraph(agent, state_converter: Optional["models.LanggraphStateConverter"] = None):
-    from .langgraph import LangGraphAdapter
-
-    return LangGraphAdapter(agent, state_converter=state_converter)
-
-
-__all__ = ["from_langgraph"]
-__version__ = VERSION
diff --git a/sdk/agentserver/azure-ai-agentserver-langgraph/azure/ai/agentserver/langgraph/_version.py b/sdk/agentserver/azure-ai-agentserver-langgraph/azure/ai/agentserver/langgraph/_version.py
deleted file mode 100644
index be71c81bd282..000000000000
--- a/sdk/agentserver/azure-ai-agentserver-langgraph/azure/ai/agentserver/langgraph/_version.py
+++ /dev/null
@@ -1,9 +0,0 @@
-# coding=utf-8
-# --------------------------------------------------------------------------
-# Copyright (c) Microsoft Corporation. All rights reserved.
-# Licensed under the MIT License. See License.txt in the project root for license information.
-# Code generated by Microsoft (R) Python Code Generator.
-# Changes may cause incorrect behavior and will be lost if the code is regenerated.
-# --------------------------------------------------------------------------
-
-VERSION = "1.0.0b1"
diff --git a/sdk/agentserver/azure-ai-agentserver-langgraph/azure/ai/agentserver/langgraph/langgraph.py b/sdk/agentserver/azure-ai-agentserver-langgraph/azure/ai/agentserver/langgraph/langgraph.py
deleted file mode 100644
index efa2b3393266..000000000000
--- a/sdk/agentserver/azure-ai-agentserver-langgraph/azure/ai/agentserver/langgraph/langgraph.py
+++ /dev/null
@@ -1,165 +0,0 @@
-# ---------------------------------------------------------
-# Copyright (c) Microsoft Corporation. All rights reserved.
-# ---------------------------------------------------------
-# pylint: disable=logging-fstring-interpolation,broad-exception-caught
-# mypy: disable-error-code="assignment,arg-type"
-import os
-import re
-from typing import Optional
-
-from langchain_core.runnables import RunnableConfig
-from langgraph.graph.state import CompiledStateGraph
-
-from azure.ai.agentserver.core.constants import Constants
-from azure.ai.agentserver.core.logger import get_logger
-from azure.ai.agentserver.core.server.base import FoundryCBAgent
-from azure.ai.agentserver.core.server.common.agent_run_context import AgentRunContext
-
-from .models import (
-    LanggraphMessageStateConverter,
-    LanggraphStateConverter,
-)
-from .models.utils import is_state_schema_valid
-
-logger = get_logger()
-
-
-class LangGraphAdapter(FoundryCBAgent):
-    """
-    Adapter for LangGraph Agent.
-    """
-
-    def __init__(self, graph: CompiledStateGraph, state_converter: Optional[LanggraphStateConverter] = None):
-        """
-        Initialize the LangGraphAdapter with a CompiledStateGraph.
-
-        :param graph: The LangGraph StateGraph to adapt.
-        :type graph: CompiledStateGraph
-        :param state_converter: custom state converter. Required if graph state is not MessagesState.
-        :type state_converter: Optional[LanggraphStateConverter]
-        """
-        super().__init__()
-        self.graph = graph
-        self.azure_ai_tracer = None
-        if not state_converter:
-            if is_state_schema_valid(self.graph.builder.state_schema):
-                self.state_converter = LanggraphMessageStateConverter()
-            else:
-                raise ValueError("state_converter is required for non-MessagesState graph.")
-        else:
-            self.state_converter = state_converter
-
-    async def agent_run(self, context: AgentRunContext):
-        input_data = self.state_converter.request_to_state(context)
-        logger.debug(f"Converted input data: {input_data}")
-        if not context.stream:
-            response = await self.agent_run_non_stream(input_data, context)
-            return response
-        return self.agent_run_astream(input_data, context)
-
-    def init_tracing_internal(self, exporter_endpoint=None, app_insights_conn_str=None):
-        # set env vars for langsmith
-        os.environ["LANGSMITH_OTEL_ENABLED"] = "true"
-        os.environ["LANGSMITH_TRACING"] = "true"
-        os.environ["LANGSMITH_OTEL_ONLY"] = "true"
-        if app_insights_conn_str:
-            # setup azure ai telemetry callbacks
-            try:
-                from langchain_azure_ai.callbacks.tracers import AzureAIOpenTelemetryTracer
-
-                self.azure_ai_tracer = AzureAIOpenTelemetryTracer(
-                    connection_string=app_insights_conn_str,
-                    enable_content_recording=True,
-                    name=self.get_agent_identifier(),
-                )
-                logger.info("AzureAIOpenTelemetryTracer initialized successfully.")
-            except Exception as e:
-                logger.error(f"Failed to import AzureAIOpenTelemetryTracer, ignore: {e}")
-
-    def setup_otlp_exporter(self, endpoint, provider):
-        endpoint = self.format_otlp_endpoint(endpoint)
-        return super().setup_otlp_exporter(endpoint, provider)
-
-    def get_trace_attributes(self):
-        attrs = super().get_trace_attributes()
-        attrs["service.namespace"] = "azure.ai.agentserver.langgraph"
-        return attrs
-
-    async def agent_run_non_stream(self, input_data: dict, context: AgentRunContext):
-        """
-        Run the agent with non-streaming response.
-
-        :param input_data: The input data to run the agent with.
-        :type input_data: dict
-        :param context: The context for the agent run.
-        :type context: AgentRunContext
-
-        :return: The response of the agent run.
-        :rtype: dict
-        """
-
-        try:
-            config = self.create_runnable_config(context)
-            stream_mode = self.state_converter.get_stream_mode(context)
-            result = await self.graph.ainvoke(input_data, config=config, stream_mode=stream_mode)  # type: ignore
-            output = self.state_converter.state_to_response(result, context)
-            return output
-        except Exception as e:
-            logger.error(f"Error during agent run: {e}")
-            raise e
-
-    async def agent_run_astream(self, input_data: dict, context: AgentRunContext):
-        """
-        Run the agent with streaming response.
-
-        :param input_data: The input data to run the agent with.
-        :type input_data: dict
-        :param context: The context for the agent run.
-        :type context: AgentRunContext
-
-        :return: An async generator yielding the response stream events.
-        :rtype: AsyncGenerator[dict]
-        """
-        try:
-            logger.info(f"Starting streaming agent run {context.response_id}")
-            config = self.create_runnable_config(context)
-            stream_mode = self.state_converter.get_stream_mode(context)
-            stream = self.graph.astream(input=input_data, config=config, stream_mode=stream_mode)
-            async for result in self.state_converter.state_to_response_stream(stream, context):
-                yield result
-        except Exception as e:
-            logger.error(f"Error during streaming agent run: {e}")
-            raise e
-
-    def create_runnable_config(self, context: AgentRunContext) -> RunnableConfig:
-        """
-        Create a RunnableConfig from the converted request data.
-
-        :param context: The context for the agent run.
-        :type context: AgentRunContext
-
-        :return: The RunnableConfig for the agent run.
-        :rtype: RunnableConfig
-        """
-        config = RunnableConfig(
-            configurable={
-                "thread_id": context.conversation_id,
-            },
-            callbacks=[self.azure_ai_tracer] if self.azure_ai_tracer else None,
-        )
-        return config
-
-    def format_otlp_endpoint(self, endpoint: str) -> str:
-        m = re.match(r"^(https?://[^/]+)", endpoint)
-        if m:
-            return f"{m.group(1)}/v1/traces"
-        return endpoint
-
-    def get_agent_identifier(self) -> str:
-        agent_name = os.getenv(Constants.AGENT_NAME)
-        if agent_name:
-            return agent_name
-        agent_id = os.getenv(Constants.AGENT_ID)
-        if agent_id:
-            return agent_id
-        return "HostedAgent-LangGraph"
diff --git a/sdk/agentserver/azure-ai-agentserver-langgraph/azure/ai/agentserver/langgraph/models/__init__.py b/sdk/agentserver/azure-ai-agentserver-langgraph/azure/ai/agentserver/langgraph/models/__init__.py
deleted file mode 100644
index eb6285a6279b..000000000000
--- a/sdk/agentserver/azure-ai-agentserver-langgraph/azure/ai/agentserver/langgraph/models/__init__.py
+++ /dev/null
@@ -1,15 +0,0 @@
-# ---------------------------------------------------------
-# Copyright (c) Microsoft Corporation. All rights reserved.
-# ---------------------------------------------------------
-from .langgraph_request_converter import LangGraphRequestConverter
-from .langgraph_response_converter import LangGraphResponseConverter
-from .langgraph_state_converter import LanggraphMessageStateConverter, LanggraphStateConverter
-from .langgraph_stream_response_converter import LangGraphStreamResponseConverter
-
-__all__ = [
-    "LangGraphRequestConverter",
-    "LangGraphResponseConverter",
-    "LangGraphStreamResponseConverter",
-    "LanggraphStateConverter",
-    "LanggraphMessageStateConverter",
-]
diff --git a/sdk/agentserver/azure-ai-agentserver-langgraph/azure/ai/agentserver/langgraph/models/langgraph_request_converter.py b/sdk/agentserver/azure-ai-agentserver-langgraph/azure/ai/agentserver/langgraph/models/langgraph_request_converter.py
deleted file mode 100644
index d29a346b192b..000000000000
--- a/sdk/agentserver/azure-ai-agentserver-langgraph/azure/ai/agentserver/langgraph/models/langgraph_request_converter.py
+++ /dev/null
@@ -1,155 +0,0 @@
-# ---------------------------------------------------------
-# Copyright (c) Microsoft Corporation. All rights reserved.
-# ---------------------------------------------------------
-# pylint: disable=logging-fstring-interpolation
-# mypy: ignore-errors
-import json
-from typing import Dict, List
-
-from langchain_core.messages import (
-    AIMessage,
-    AnyMessage,
-    HumanMessage,
-    SystemMessage,
-    ToolMessage,
-)
-from langchain_core.messages.tool import ToolCall
-
-from azure.ai.agentserver.core.logger import get_logger
-from azure.ai.agentserver.core.models import CreateResponse, openai as openai_models, projects as project_models
-
-logger = get_logger()
-
-role_mapping = {
-    project_models.ResponsesMessageRole.USER: HumanMessage,
-    project_models.ResponsesMessageRole.SYSTEM: SystemMessage,
-    project_models.ResponsesMessageRole.ASSISTANT: AIMessage,
-}
-
-item_content_type_mapping = {
-    project_models.ItemContentType.INPUT_TEXT: "text",
-    project_models.ItemContentType.INPUT_AUDIO: "audio",
-    project_models.ItemContentType.INPUT_IMAGE: "image",
-    project_models.ItemContentType.INPUT_FILE: "file",
-    project_models.ItemContentType.OUTPUT_TEXT: "text",
-    project_models.ItemContentType.OUTPUT_AUDIO: "audio",
-    # project_models.ItemContentType.REFUSAL: "refusal",
-}
-
-
-class LangGraphRequestConverter:
-    def __init__(self, data: CreateResponse):
-        self.data: CreateResponse = data
-
-    def convert(self) -> dict:
-        # Convert the CreateRunRequest input to a format suitable for LangGraph
-        langgraph_input = {"messages": []}
-
-        instructions = self.data.get("instructions")
-        if instructions and isinstance(instructions, str):
-            langgraph_input["messages"].append(SystemMessage(content=instructions))
-
-        input = self.data.get("input")
-        if isinstance(input, str):
-            langgraph_input["messages"].append(HumanMessage(content=input))
-        elif isinstance(input, List):
-            for inner in input:
-                message = self.convert_input(inner)
-                langgraph_input["messages"].append(message)
-        else:
-            raise ValueError(f"Unsupported input type: {type(input)}, {input}")
-        return langgraph_input
-
-    def convert_input(self, item: openai_models.ResponseInputItemParam) -> AnyMessage:
-        """
-        Convert ResponseInputItemParam to a LangGraph message
-
-        :param item: The ResponseInputItemParam to convert from request.
-        :type item: openai_models.ResponseInputItemParam
-
-        :return: The converted LangGraph message.
-        :rtype: AnyMessage
-        """
-        item_type = item.get("type", project_models.ItemType.MESSAGE)
-        if item_type == project_models.ItemType.MESSAGE:
-            # this is a message
-            return self.convert_message(item)
-        if item_type == project_models.ItemType.FUNCTION_CALL:
-            return self.convert_function_call(item)
-        if item_type == project_models.ItemType.FUNCTION_CALL_OUTPUT:
-            return self.convert_function_call_output(item)
-        raise ValueError(f"Unsupported OpenAIItemParam type: {item_type}, {item}")
-
-    def convert_message(self, message: dict) -> AnyMessage:
-        """
-        Convert a message dict to a LangGraph message
-
-        :param message: The message dict to convert.
-        :type message: dict
-
-        :return: The converted LangGraph message.
-        :rtype: AnyMessage
-        """
-        content = message.get("content")
-        role = message.get("role", project_models.ResponsesMessageRole.USER)
-        if not content:
-            raise ValueError(f"Message missing content: {message}")
-        if isinstance(content, str):
-            return role_mapping[role](content=content)
-        if isinstance(content, list):
-            return role_mapping[role](content=self.convert_OpenAIItemContentList(content))
-        raise ValueError(f"Unsupported ResponseMessagesItemParam content type: {type(content)}, {content}")
-
-    def convert_function_call(self, item: dict) -> AnyMessage:
-        try:
-            item = openai_models.ResponseFunctionToolCallParam(**item)
-            argument = item.get("arguments", None)
-            args = json.loads(argument) if argument else {}
-        except json.JSONDecodeError as e:
-            raise ValueError(f"Invalid JSON in function call arguments: {item}") from e
-        except Exception as e:
-            raise ValueError(f"Invalid function call item: {item}") from e
-        return AIMessage(tool_calls=[ToolCall(id=item.get("call_id"), name=item.get("name"), args=args)], content="")
-
-    def convert_function_call_output(self, item: dict) -> ToolMessage:
-        try:
-            item = openai_models.response_input_item_param.FunctionCallOutput(**item)  # pylint: disable=no-member
-        except Exception as e:
-            raise ValueError(f"Invalid function call output item: {item}") from e
-
-        output = item.get("output", None)
-        if isinstance(output, str):
-            return ToolMessage(content=output, tool_call_id=item.get("call_id"))
-        if isinstance(output, list):
-            return ToolMessage(content=self.convert_OpenAIItemContentList(output), tool_call_id=item.get("call_id"))
-        raise ValueError(f"Unsupported function call output type: {type(output)}, {output}")
-
-    def convert_OpenAIItemContentList(self, content: List[Dict]) -> List[Dict]:
-        """
-        Convert ItemContent to a list format
-
-        :param content: The list of ItemContent to convert.
-        :type content: List[Dict]
-
-        :return: The converted list of ItemContent.
-        :rtype: List[Dict]
-        """
-        result = []
-        for item in content:
-            result.append(self.convert_OpenAIItemContent(item))
-        return result
-
-    def convert_OpenAIItemContent(self, content: Dict) -> Dict:
-        """
-        Convert ItemContent to a dict format
-
-        :param content: The ItemContent to convert.
-        :type content: Dict
-
-        :return: The converted ItemContent.
-        :rtype: Dict
-        """
-        res = content.copy()
-        content_type = content.get("type")
-        res["type"] = item_content_type_mapping.get(content_type, content_type)
-        return res
diff --git a/sdk/agentserver/azure-ai-agentserver-langgraph/azure/ai/agentserver/langgraph/models/langgraph_response_converter.py b/sdk/agentserver/azure-ai-agentserver-langgraph/azure/ai/agentserver/langgraph/models/langgraph_response_converter.py
deleted file mode 100644
index 086e75e287d4..000000000000
--- a/sdk/agentserver/azure-ai-agentserver-langgraph/azure/ai/agentserver/langgraph/models/langgraph_response_converter.py
+++ /dev/null
@@ -1,136 +0,0 @@
-# ---------------------------------------------------------
-# Copyright (c) Microsoft Corporation. All rights reserved.
-# ---------------------------------------------------------
-# pylint: disable=logging-fstring-interpolation,broad-exception-caught,logging-not-lazy
-# mypy: disable-error-code="valid-type,call-overload,attr-defined"
-import copy
-from typing import List
-
-from langchain_core import messages
-from langchain_core.messages import AnyMessage
-
-from azure.ai.agentserver.core.logger import get_logger
-from azure.ai.agentserver.core.models import projects as project_models
-from azure.ai.agentserver.core.server.common.agent_run_context import AgentRunContext
-
-from .utils import extract_function_call
-
-logger = get_logger()
-
-
-class LangGraphResponseConverter:
-    def __init__(self, context: AgentRunContext, output):
-        self.context = context
-        self.output = output
-
-    def convert(self) -> list[project_models.ItemResource]:
-        res = []
-        for step in self.output:
-            for node_name, node_output in step.items():
-                message_arr = node_output.get("messages")
-                if not message_arr:
-                    logger.warning(f"No messages found in node {node_name} output: {node_output}")
-                    continue
-                for message in message_arr:
-                    try:
-                        converted = self.convert_output_message(message)
-                        res.append(converted)
-                    except Exception as e:
-                        logger.error(f"Error converting message {message}: {e}")
-        return res
-
-    def convert_output_message(self, output_message: AnyMessage):  # pylint: disable=inconsistent-return-statements
-        # Implement the conversion logic for inner inputs
-        if isinstance(output_message, messages.HumanMessage):
-            return project_models.ResponsesUserMessageItemResource(
-                content=self.convert_MessageContent(
-                    output_message.content, role=project_models.ResponsesMessageRole.USER
-                ),
-                id=self.context.id_generator.generate_message_id(),
-                status="completed",  # temporary status, can be adjusted based on actual logic
-            )
-        if isinstance(output_message, messages.SystemMessage):
-            return project_models.ResponsesSystemMessageItemResource(
-                content=self.convert_MessageContent(
-                    output_message.content, role=project_models.ResponsesMessageRole.SYSTEM
-                ),
-                id=self.context.id_generator.generate_message_id(),
-                status="completed",
-            )
-        if isinstance(output_message, messages.AIMessage):
-            if output_message.tool_calls:
-                # If there are tool calls, we assume there is only ONE function call
-                if len(output_message.tool_calls) > 1:
-                    logger.warning(
-                        f"There are {len(output_message.tool_calls)} tool calls found. "
-                        + "Only the first one will be processed."
-                    )
-                tool_call = output_message.tool_calls[0]
-                name, call_id, argument = extract_function_call(tool_call)  # type: ignore
-                return project_models.FunctionToolCallItemResource(
-                    call_id=call_id,
-                    name=name,
-                    arguments=argument,
-                    id=self.context.id_generator.generate_function_call_id(),
-                    status="completed",
-                )
-            return project_models.ResponsesAssistantMessageItemResource(
-                content=self.convert_MessageContent(
-                    output_message.content, role=project_models.ResponsesMessageRole.ASSISTANT
-                ),
-                id=self.context.id_generator.generate_message_id(),
-                status="completed",
-            )
-        if isinstance(output_message, messages.ToolMessage):
-            return project_models.FunctionToolCallOutputItemResource(
-                call_id=output_message.tool_call_id,
-                output=output_message.content,
-                id=self.context.id_generator.generate_function_output_id(),
-            )
-
-    def convert_MessageContent(
-        self, content, role: project_models.ResponsesMessageRole
-    ) -> List[project_models.ItemContent]:
-        if isinstance(content, str):
-            return [self.convert_MessageContentItem(content, role)]
-        return [self.convert_MessageContentItem(item, role) for item in content]
-
-    def convert_MessageContentItem(
-        self, content, role: project_models.ResponsesMessageRole
-    ) -> project_models.ItemContent:
-        content_dict = copy.deepcopy(content) if isinstance(content, dict) else {"text": content}
-
-        content_type = None
-        if isinstance(content, str):
-            langgraph_content_type = "text"
-        else:
-            langgraph_content_type = content.get("type", "text")
-
-        if langgraph_content_type == "text":
-            if role == project_models.ResponsesMessageRole.ASSISTANT:
-                content_type = project_models.ItemContentType.OUTPUT_TEXT
-            else:
-                content_type = project_models.ItemContentType.INPUT_TEXT
-        elif langgraph_content_type == "image":
-            if role == project_models.ResponsesMessageRole.USER:
-                content_type = project_models.ItemContentType.INPUT_IMAGE
-            else:
-                raise ValueError("Image content from assistant is not supported")
-        elif langgraph_content_type == "audio":
-            if role == project_models.ResponsesMessageRole.USER:
-                content_type = project_models.ItemContentType.INPUT_AUDIO
-            else:
-                content_type = project_models.ItemContentType.OUTPUT_AUDIO
-        elif langgraph_content_type == "file":
-            if role == project_models.ResponsesMessageRole.USER:
-                content_type = project_models.ItemContentType.INPUT_FILE
-            else:
-                raise ValueError("File content from assistant is not supported")
-        else:
-            raise ValueError(f"Unsupported content: {content}")
-
-        content_dict["type"] = content_type
-        if content_type == project_models.ItemContentType.OUTPUT_TEXT:
-            content_dict["annotations"] = []  # annotation is required for output_text
-
-        return project_models.ItemContent(content_dict)
diff --git a/sdk/agentserver/azure-ai-agentserver-langgraph/azure/ai/agentserver/langgraph/models/langgraph_state_converter.py b/sdk/agentserver/azure-ai-agentserver-langgraph/azure/ai/agentserver/langgraph/models/langgraph_state_converter.py
deleted file mode 100644
index a1bc2181f919..000000000000
--- a/sdk/agentserver/azure-ai-agentserver-langgraph/azure/ai/agentserver/langgraph/models/langgraph_state_converter.py
+++ /dev/null
@@ -1,143 +0,0 @@
-# ---------------------------------------------------------
-# Copyright (c) Microsoft Corporation. All rights reserved.
-# ---------------------------------------------------------
-# mypy: disable-error-code="call-overload,override"
-"""Base interface for converting between LangGraph internal state and OpenAI-style responses.
-
-A LanggraphStateConverter implementation bridges:
-  1. Incoming CreateResponse (wrapped in AgentRunContext) -> initial graph state
-  2. Internal graph state -> final non-streaming Response
-  3. Streaming graph state events -> ResponseStreamEvent sequence
-  4. Declares which stream mode (if any) is supported for a given run context
-
-Concrete implementations should:
-  * Decide and document the shape of the state dict they return in request_to_state
-  * Handle aggregation, error mapping, and metadata propagation in state_to_response
-  * Incrementally translate async stream_state items in state_to_response_stream
-
-Do NOT perform network I/O directly inside these methods (other than awaiting the
-provided async iterator). Keep them pure transformation layers so they are testable.
-"""
-
-from __future__ import annotations
-
-import time
-from abc import ABC, abstractmethod
-from typing import Any, AsyncGenerator, AsyncIterator, Dict
-
-from azure.ai.agentserver.core.models import Response, ResponseStreamEvent
-from azure.ai.agentserver.core.server.common.agent_run_context import AgentRunContext
-
-from .langgraph_request_converter import LangGraphRequestConverter
-from .langgraph_response_converter import LangGraphResponseConverter
-from .langgraph_stream_response_converter import LangGraphStreamResponseConverter
-
-
-class LanggraphStateConverter(ABC):
-    """
-    Abstract base class for LangGraph state <-> response conversion.
-
-    :meta private:
-    """
-
-    @abstractmethod
-    def get_stream_mode(self, context: AgentRunContext) -> str:
-        """Return a string indicating streaming mode for this run.
-
-        Examples: "values", "updates", "messages", "custom", "debug".
-        Implementations may inspect context.request.stream or other flags.
-        Must be fast and side-effect free.
-
-        :param context: The context for the agent run.
-        :type context: AgentRunContext
-
-        :return: The streaming mode as a string.
-        :rtype: str
-        """
-
-    @abstractmethod
-    def request_to_state(self, context: AgentRunContext) -> Dict[str, Any]:
-        """Convert the incoming request (via context) to an initial LangGraph state.
-
-        Return a serializable dict that downstream graph execution expects.
-        Should not mutate the context. Raise ValueError on invalid input.
-
-        :param context: The context for the agent run.
-        :type context: AgentRunContext
-
-        :return: The initial LangGraph state as a dictionary.
-        :rtype: Dict[str, Any]
-        """
-
-    @abstractmethod
-    def state_to_response(self, state: Any, context: AgentRunContext) -> Response:
-        """Convert a completed LangGraph state into a final non-streaming Response object.
-
-        Implementations must construct and return an models.Response.
-        The returned object should include output items, usage (if available),
-        and reference the agent / conversation from context.
-
-        :param state: The completed LangGraph state.
-        :type state: Any
-        :param context: The context for the agent run.
-        :type context: AgentRunContext
-
-        :return: The final non-streaming Response object.
-        :rtype: Response
-        """
-
-    @abstractmethod
-    async def state_to_response_stream(
-        self, stream_state: AsyncIterator[Dict[str, Any] | Any], context: AgentRunContext
-    ) -> AsyncGenerator[ResponseStreamEvent, None]:
-        """Convert an async iterator of partial state updates into stream events.
-
-        Yield ResponseStreamEvent objects in the correct order. Implementations
-        are responsible for emitting lifecycle events (created, in_progress, deltas,
-        completed, errors) consistent with the OpenAI Responses streaming contract.
-
-        :param stream_state: An async iterator of partial LangGraph state updates.
-        :type stream_state: AsyncIterator[Dict[str, Any] | Any]
-        :param context: The context for the agent run.
-        :type context: AgentRunContext
-
-        :return: An async generator yielding ResponseStreamEvent objects.
-        :rtype: AsyncGenerator[ResponseStreamEvent, None]
-        """
-
-
-class LanggraphMessageStateConverter(LanggraphStateConverter):
-    """Converter implementation for langgraph built-in MessageState."""
-
-    def get_stream_mode(self, context: AgentRunContext) -> str:
-        if context.request.get("stream"):
-            return "messages"
-        return "updates"
-
-    def request_to_state(self, context: AgentRunContext) -> Dict[str, Any]:
-        converter = LangGraphRequestConverter(context.request)
-        return converter.convert()
-
-    def state_to_response(self, state: Any, context: AgentRunContext) -> Response:
-        converter = LangGraphResponseConverter(context, state)
-        output = converter.convert()
-
-        agent_id = context.get_agent_id_object()
-        conversation = context.get_conversation_object()
-        response = Response(
-            object="response",
-            id=context.response_id,
-            agent=agent_id,
-            conversation=conversation,
-            metadata=context.request.get("metadata"),
-            created_at=int(time.time()),
-            output=output,
-        )
-        return response
-
-    async def state_to_response_stream(
-        self, stream_state: AsyncIterator[Dict[str, Any] | Any], context: AgentRunContext
-    ) -> AsyncGenerator[ResponseStreamEvent, None]:
-        response_converter = LangGraphStreamResponseConverter(stream_state, context)
-        async for result in response_converter.convert():
-            yield result
diff --git a/sdk/agentserver/azure-ai-agentserver-langgraph/azure/ai/agentserver/langgraph/models/langgraph_stream_response_converter.py b/sdk/agentserver/azure-ai-agentserver-langgraph/azure/ai/agentserver/langgraph/models/langgraph_stream_response_converter.py
deleted file mode 100644
index cba1db014ed8..000000000000
--- a/sdk/agentserver/azure-ai-agentserver-langgraph/azure/ai/agentserver/langgraph/models/langgraph_stream_response_converter.py
+++ /dev/null
@@ -1,74 +0,0 @@
-# ---------------------------------------------------------
-# Copyright (c) Microsoft Corporation. All rights reserved.
-# ---------------------------------------------------------
-# pylint: disable=logging-fstring-interpolation
-# mypy: disable-error-code="assignment,valid-type"
-from typing import List
-
-from langchain_core.messages import AnyMessage
-
-from azure.ai.agentserver.core.logger import get_logger
-from azure.ai.agentserver.core.models import ResponseStreamEvent
-from azure.ai.agentserver.core.server.common.agent_run_context import AgentRunContext
-
-from .response_event_generators import (
-    ResponseEventGenerator,
-    ResponseStreamEventGenerator,
-    StreamEventState,
-)
-
-logger = get_logger()
-
-
-class LangGraphStreamResponseConverter:
-    def __init__(self, stream, context: AgentRunContext):
-        self.stream = stream
-        self.context = context
-
-        self.stream_state = StreamEventState()
-        self.current_generator: ResponseEventGenerator = None
-
-    async def convert(self):
-        async for message, _ in self.stream:
-            try:
-                if self.current_generator is None:
-                    self.current_generator = ResponseStreamEventGenerator(logger, None)
-
-                converted = self.try_process_message(message, self.context)
-                for event in converted:
-                    yield event  # yield each event separately
-            except Exception as e:
-                logger.error(f"Error converting message {message}: {e}")
-                raise ValueError(f"Error converting message {message}") from e
-
-        logger.info("Stream ended, finalizing response.")
-        # finalize the stream
-        converted = self.try_process_message(None, self.context)
-        for event in converted:
-            yield event  # yield each event separately
-
-    def try_process_message(self, event: AnyMessage, context: AgentRunContext) -> List[ResponseStreamEvent]:
-        if event and not self.current_generator:
-            self.current_generator = ResponseStreamEventGenerator(logger, None)
-
-        is_processed = False
-        next_processor = self.current_generator
-        returned_events = []
-        while not is_processed:
-            is_processed, next_processor, processed_events = self.current_generator.try_process_message(
-                event, context, self.stream_state
-            )
-            returned_events.extend(processed_events)
-            if not is_processed and next_processor == self.current_generator:
-                logger.warning(
-                    f"Message can not be processed by current generator {type(self.current_generator).__name__}:"
-                    + f" {type(event)}: {event}"
-                )
-                break
-            if next_processor != self.current_generator:
-                logger.info(
-                    f"Switching processor from {type(self.current_generator).__name__} "
-                    + f"to {type(next_processor).__name__}"
-                )
-                self.current_generator = next_processor
-        return returned_events
diff --git a/sdk/agentserver/azure-ai-agentserver-langgraph/azure/ai/agentserver/langgraph/models/response_event_generators/__init__.py b/sdk/agentserver/azure-ai-agentserver-langgraph/azure/ai/agentserver/langgraph/models/response_event_generators/__init__.py
deleted file mode 100644
index 7b9f0362e4ba..000000000000
--- a/sdk/agentserver/azure-ai-agentserver-langgraph/azure/ai/agentserver/langgraph/models/response_event_generators/__init__.py
+++ /dev/null
@@ -1,11 +0,0 @@
-# ---------------------------------------------------------
-# Copyright (c) Microsoft Corporation. All rights reserved.
-# ---------------------------------------------------------
-from .response_event_generator import ResponseEventGenerator, StreamEventState
-from .response_stream_event_generator import ResponseStreamEventGenerator
-
-__all__ = [
-    "ResponseEventGenerator",
-    "ResponseStreamEventGenerator",
-    "StreamEventState",
-]
diff --git a/sdk/agentserver/azure-ai-agentserver-langgraph/azure/ai/agentserver/langgraph/models/response_event_generators/item_content_helpers.py b/sdk/agentserver/azure-ai-agentserver-langgraph/azure/ai/agentserver/langgraph/models/response_event_generators/item_content_helpers.py
deleted file mode 100644
index ae169d866ee5..000000000000
--- a/sdk/agentserver/azure-ai-agentserver-langgraph/azure/ai/agentserver/langgraph/models/response_event_generators/item_content_helpers.py
+++ /dev/null
@@ -1,61 +0,0 @@
-# ---------------------------------------------------------
-# Copyright (c) Microsoft Corporation. All rights reserved.
-# ---------------------------------------------------------
-from azure.ai.agentserver.core.models import projects as project_models
-
-
-class ItemContentHelper:
-    def __init__(self, content_type: str):
-        self.content_type = content_type
-        self.has_aggregated_content = False
-
-    def create_item_content(self) -> project_models.ItemContent:
-        return project_models.ItemContent(
-            type=self.content_type,
-        )
-
-
-class InputTextItemContentHelper(ItemContentHelper):
-    def __init__(self):
-        super().__init__(project_models.ItemContentType.INPUT_TEXT)
-        self.text = ""
-
-    def create_item_content(self):
-        return project_models.ItemContentInputText(text=self.text)
-
-    def aggregate_content(self, item):
-        self.has_aggregated_content = True
-        if isinstance(item, str):
-            self.text += item
-            return
-        if not isinstance(item, dict):
-            return
-        text = item.get("text")
-        if isinstance(text, str):
-            self.text += text
-
-
-class OutputTextItemContentHelper(ItemContentHelper):
-    def __init__(self):
-        super().__init__(project_models.ItemContentType.OUTPUT_TEXT)
-        self.text = ""
-        self.annotations = []
-        self.logprobs = []
-
-    def create_item_content(self):
-        return project_models.ItemContentOutputText(
-            text=self.text,
-            annotations=self.annotations,
-            logprobs=self.logprobs,
-        )
-
-    def aggregate_content(self, item):
-        self.has_aggregated_content = True
-        if isinstance(item, str):
-            self.text += item
-            return
-        if not isinstance(item, dict):
-            return
-        text = item.get("text")
-        if isinstance(text, str):
-            self.text += text
diff --git a/sdk/agentserver/azure-ai-agentserver-langgraph/azure/ai/agentserver/langgraph/models/response_event_generators/item_resource_helpers.py b/sdk/agentserver/azure-ai-agentserver-langgraph/azure/ai/agentserver/langgraph/models/response_event_generators/item_resource_helpers.py
deleted file mode 100644
index a1c97423d5ae..000000000000
--- a/sdk/agentserver/azure-ai-agentserver-langgraph/azure/ai/agentserver/langgraph/models/response_event_generators/item_resource_helpers.py
+++ /dev/null
@@ -1,114 +0,0 @@
-# ---------------------------------------------------------
-# Copyright (c) Microsoft Corporation. All rights reserved.
-# ---------------------------------------------------------
-# mypy: disable-error-code="assignment"
-from azure.ai.agentserver.core.models import projects as project_models
-
-from ..utils import extract_function_call
-
-
-class ItemResourceHelper:
-    def __init__(self, item_type: str, item_id: str = None):
-        self.item_type = item_type
-        self.item_id = item_id
-
-    def create_item_resource(self, is_done: bool):
-        pass
-
-    def add_aggregate_content(self, item):
-        pass
-
-    def get_aggregated_content(self):
-        pass
-
-
-class FunctionCallItemResourceHelper(ItemResourceHelper):
-    def __init__(self, item_id: str = None, tool_call: dict = None):
-        super().__init__(project_models.ItemType.FUNCTION_CALL, item_id)
-        self.call_id = None
-        self.name = None
-        self.arguments = ""
-        if tool_call:
-            self.name, self.call_id, _ = extract_function_call(tool_call)
-
-    def create_item_resource(self, is_done: bool):
-        content = {
-            "id": self.item_id,
-            "type": self.item_type,
-            "call_id": self.call_id,
-            "name": self.name,
-            "arguments": self.arguments if self.arguments else "",
-            "status": "in_progress" if not is_done else "completed",
-        }
-        return project_models.ItemResource(content)
-
-    def add_aggregate_content(self, item):
-        if isinstance(item, str):
-            self.arguments += item
-            return
-        if not isinstance(item, dict):
-            return
-        if item.get("type") != project_models.ItemType.FUNCTION_CALL:
-            return
-        _, _, argument = extract_function_call(item)
-        if argument:
-            self.arguments += argument
-
-    def get_aggregated_content(self):
-        return self.create_item_resource(is_done=True)
-
-
-class FunctionCallOutputItemResourceHelper(ItemResourceHelper):
-    def __init__(self, item_id: str = None, call_id: str = None):
-        super().__init__(project_models.ItemType.FUNCTION_CALL_OUTPUT, item_id)
-        self.call_id = call_id
-        self.content = ""
-
-    def create_item_resource(self, is_done: bool):
-        content = {
-            "id": self.item_id,
-            "type": self.item_type,
-            "status": "in_progress" if not is_done else "completed",
-            "call_id": self.call_id,
-            "output": self.content,
-        }
-        return project_models.ItemResource(content)
-
-    def add_aggregate_content(self, item):
-        if isinstance(item, str):
-            self.content += item
-            return
-        if not isinstance(item, dict):
-            return
-        content = item.get("text")
-        if isinstance(content, str):
-            self.content += content
-
-    def get_aggregated_content(self):
-        return self.create_item_resource(is_done=True)
-
-
-class MessageItemResourceHelper(ItemResourceHelper):
-    def __init__(self, item_id: str, role: project_models.ResponsesMessageRole):
-        super().__init__(project_models.ItemType.MESSAGE, item_id)
-        self.role = role
-        self.content: list[project_models.ItemContent] = []
-
-    def create_item_resource(self, is_done: bool):
-        content = {
-            "id": self.item_id,
-            "type": self.item_type,
-            "status": "in_progress" if not is_done else "completed",
-            "content": self.content,
-            "role": self.role,
-        }
-        return project_models.ItemResource(content)
-
-    def add_aggregate_content(self, item):
-        if isinstance(item, dict):
-            item = project_models.ItemContent(item)
-        if isinstance(item, project_models.ItemContent):
-            self.content.append(item)
-
-    def get_aggregated_content(self):
-        return self.create_item_resource(is_done=True)
diff --git a/sdk/agentserver/azure-ai-agentserver-langgraph/azure/ai/agentserver/langgraph/models/response_event_generators/response_content_part_event_generator.py b/sdk/agentserver/azure-ai-agentserver-langgraph/azure/ai/agentserver/langgraph/models/response_event_generators/response_content_part_event_generator.py
deleted file mode 100644
index fe141887a2b2..000000000000
--- a/sdk/agentserver/azure-ai-agentserver-langgraph/azure/ai/agentserver/langgraph/models/response_event_generators/response_content_part_event_generator.py
+++ /dev/null
@@ -1,153 +0,0 @@
-# ---------------------------------------------------------
-# Copyright (c) Microsoft Corporation. All rights reserved.
-# ---------------------------------------------------------
-# pylint: disable=unused-argument,consider-using-in,consider-merging-isinstance
-# mypy: ignore-errors
-from typing import List
-
-from langchain_core import messages as langgraph_messages
-
-from azure.ai.agentserver.core.models import projects as project_models
-
-from . import item_content_helpers
-from .response_event_generator import ResponseEventGenerator, StreamEventState
-from .response_output_text_event_generator import ResponseOutputTextEventGenerator
-
-
-class ResponseContentPartEventGenerator(ResponseEventGenerator):
-    def __init__(
-        self,
-        logger,
-        parent: ResponseEventGenerator,
-        item_id: str,
-        message_id: str,
-        output_index: int,
-        content_index: int,
-    ):
-        super().__init__(logger, parent)
-        self.output_index = output_index
-        self.content_index = content_index
-        self.item_id = item_id
-        self.message_id = message_id
-        self.aggregated_content = ""
-        self.item_content_helper = None
-
-    def try_process_message(
-        self, message, context, stream_state: StreamEventState
-    ) -> tuple[bool, ResponseEventGenerator, List[project_models.ResponseStreamEvent]]:
-        is_processed = False
-        events = []
-        next_processor = self
-        if not self.item_content_helper:
-            if not self.try_create_item_content_helper(message):
-                # cannot create item content, skip this message
-                self.logger.warning(f"Cannot create item content helper for message: {message}")
-                return True, self, []
-        if self.item_content_helper and not self.started:
-            self.started, start_events = self.on_start(message, context, stream_state)
-            if not self.started:
-                # could not start processing, skip this message
-                return True, self, []
-            events.extend(start_events)
-
-        if self.should_end(message):
-            complete_events = self.on_end(message, context, stream_state)
-            events.extend(complete_events)
-            next_processor = self.parent
-            is_processed = self.has_finish_reason(message) if message else False
-            return is_processed, next_processor, events
-
-        child_processor = self.create_child_processor(message)
-        if child_processor:
-            next_processor = child_processor
-
-        return is_processed, next_processor, events
-
-    def on_start(   # mypy: ignore[override]
-        self, event, run_details, stream_state: StreamEventState
-    ) -> tuple[bool, List[project_models.ResponseStreamEvent]]:
-        if self.started:
-            return False, []
-
-        start_event = project_models.ResponseContentPartAddedEvent(
-            item_id=self.item_id,
-            output_index=self.output_index,
-            content_index=self.content_index,
-            part=self.item_content_helper.create_item_content(),
-            sequence_number=stream_state.sequence_number,
-        )
-        stream_state.sequence_number += 1
-        self.started = True
-
-        return True, [start_event]
-
-    def on_end(self, message, context, stream_state: StreamEventState
-            ) -> List[project_models.ResponseStreamEvent]:   # mypy: ignore[override]
-        aggregated_content = self.item_content_helper.create_item_content()
-        done_event = project_models.ResponseContentPartDoneEvent(
-            item_id=self.item_id,
-            output_index=self.output_index,
-            content_index=self.content_index,
-            part=aggregated_content,
-            sequence_number=stream_state.sequence_number,
-        )
-        stream_state.sequence_number += 1
-        if self.parent:
-            self.parent.aggregate_content(aggregated_content.as_dict())
-        return [done_event]
-
-    def try_create_item_content_helper(self, message):
-        if isinstance(message, langgraph_messages.AIMessage) or isinstance(message, langgraph_messages.ToolMessage):
-            if self.is_text_content(message.content):
-                self.item_content_helper = item_content_helpers.OutputTextItemContentHelper()
-                return True
-        if isinstance(message, langgraph_messages.HumanMessage) or isinstance(
-            message, langgraph_messages.SystemMessage
-        ):
-            if self.is_text_content(message.content):
-                self.item_content_helper = item_content_helpers.InputTextItemContentHelper()
-                return True
-        return False
-
-    def aggregate_content(self, content):
-        return self.item_content_helper.aggregate_content(content)
-
-    def is_text_content(self, content):
-        if isinstance(content, str):
-            return True
-        if isinstance(content, list) and all(isinstance(c, str) for c in content):
-            return True
-        return False
-
-    def create_child_processor(self, message) -> ResponseEventGenerator:
-        if (
-            self.item_content_helper.content_type == project_models.ItemContentType.INPUT_TEXT
-            or self.item_content_helper.content_type == project_models.ItemContentType.OUTPUT_TEXT
-        ):
-            return ResponseOutputTextEventGenerator(
-                logger=self.logger,
-                parent=self,
-                content_index=self.content_index,
-                output_index=self.output_index,
-                item_id=self.item_id,
-                message_id=self.message_id,
-            )
-        raise ValueError(f"Unsupported item content type for child processor: {self.item_content_helper.content_type}")
-
-    def has_finish_reason(self, message) -> bool:
-        if not isinstance(message, langgraph_messages.BaseMessageChunk):
-            return False
-        if message.response_metadata and message.response_metadata.get("finish_reason"):
-            return True
-        return False
-
-    def should_end(self, event) -> bool:
-        # Determine if the event indicates end of the stream for this item
-        if event is None:
-            return True
-        if event.id != self.message_id:
-            return True
-        # if is Message not MessageChunk, should create child and end in the second iteration
-        if not isinstance(event, langgraph_messages.BaseMessageChunk):
-            return self.item_content_helper.has_aggregated_content
-        return False
diff --git a/sdk/agentserver/azure-ai-agentserver-langgraph/azure/ai/agentserver/langgraph/models/response_event_generators/response_event_generator.py b/sdk/agentserver/azure-ai-agentserver-langgraph/azure/ai/agentserver/langgraph/models/response_event_generators/response_event_generator.py
deleted file mode 100644
index ee19ca74f4bb..000000000000
--- a/sdk/agentserver/azure-ai-agentserver-langgraph/azure/ai/agentserver/langgraph/models/response_event_generators/response_event_generator.py
+++ /dev/null
@@ -1,92 +0,0 @@
-# ---------------------------------------------------------
-# Copyright (c) Microsoft Corporation. All rights reserved.
-# ---------------------------------------------------------
-# pylint: disable=unused-argument,unnecessary-pass
-# mypy: disable-error-code="valid-type"
-from typing import List
-
-from langchain_core.messages import AnyMessage
-
-from azure.ai.agentserver.core.models import projects as project_models
-from azure.ai.agentserver.core.server.common.agent_run_context import AgentRunContext
-
-
-class StreamEventState:
-    """
-    :meta private:
-    State information for the stream event processing.
-    """
-
-    sequence_number: int = 0
-
-
-class ResponseEventGenerator:
-    """
-    :meta private:
-    Abstract base class for response event generators.
-    """
-
-    started: bool = False
-
-    def __init__(self, logger, parent):
-        self.logger = logger
-        self.parent = parent  # parent generator
-
-    def try_process_message(
-            self,
-            message: AnyMessage,   # mypy: ignore[valid-type]
-            context: AgentRunContext,
-            stream_state: StreamEventState
-        ):    # mypy: ignore[empty-body]
-        """
-        Try to process the incoming message.
-
-        :param message: The incoming message to process.
-        :type message: AnyMessage
-        :param context: The agent run context.
-        :type context: AgentRunContext
-        :param stream_state: The current stream event state.
-        :type stream_state: StreamEventState
-
-        :return: tuple of (is_processed, next_processor, events)
-        :rtype: tuple[bool, ResponseEventGenerator, List[ResponseStreamEvent]]
-        """
-        pass
-
-    def on_start(self) -> tuple[bool, List[project_models.ResponseStreamEvent]]:
-        """
-        Generate the starting events for this layer.
-
-        :return: tuple of (started, events)
-        :rtype: tuple[bool, List[ResponseStreamEvent]]
-        """
-        return False, []
-
-    def on_end(
-            self, message: AnyMessage, context: AgentRunContext, stream_state: StreamEventState
-        ) -> tuple[bool, List[project_models.ResponseStreamEvent]]:
-        """
-        Generate the ending events for this layer.
-        TODO: handle different end conditions, e.g. normal end, error end, etc.
-
-        :param message: The incoming message to process.
-        :type message: AnyMessage
-        :param context: The agent run context.
-        :type context: AgentRunContext
-        :param stream_state: The current stream event state.
-        :type stream_state: StreamEventState
-
-        :return: tuple of (started, events)
-        :rtype: tuple[bool, List[ResponseStreamEvent]]
-        """
-        return False, []
-
-    def aggregate_content(self):
-        """
-        Aggregate the content for this layer.
-        It is called by its child processor to pass up aggregated content.
-
-        :return: content from child processor
-        :rtype: str | dict
-        """
-        pass
diff --git a/sdk/agentserver/azure-ai-agentserver-langgraph/azure/ai/agentserver/langgraph/models/response_event_generators/response_function_call_argument_event_generator.py b/sdk/agentserver/azure-ai-agentserver-langgraph/azure/ai/agentserver/langgraph/models/response_event_generators/response_function_call_argument_event_generator.py
deleted file mode 100644
index dbaed3ac9258..000000000000
--- a/sdk/agentserver/azure-ai-agentserver-langgraph/azure/ai/agentserver/langgraph/models/response_event_generators/response_function_call_argument_event_generator.py
+++ /dev/null
@@ -1,126 +0,0 @@
-# ---------------------------------------------------------
-# Copyright (c) Microsoft Corporation. All rights reserved.
-# ---------------------------------------------------------
-# pylint: disable=unused-argument,name-too-long
-# mypy: ignore-errors
-from typing import List
-
-from langchain_core import messages as langgraph_messages
-from langchain_core.messages import AnyMessage
-
-from azure.ai.agentserver.core.models import projects as project_models
-from azure.ai.agentserver.core.server.common.agent_run_context import AgentRunContext
-
-from ..utils import extract_function_call
-from . import ResponseEventGenerator, StreamEventState
-
-
-class ResponseFunctionCallArgumentEventGenerator(ResponseEventGenerator):
-    def __init__(self, logger, parent: ResponseEventGenerator, item_id, message_id, output_index: int):
-        super().__init__(logger, parent)
-        self.item_id = item_id
-        self.output_index = output_index
-        self.aggregated_content = ""
-        self.message_id = message_id
-
-    def try_process_message(
-        self, message, context: AgentRunContext, stream_state: StreamEventState
-    ) -> tuple[bool, ResponseEventGenerator, List[project_models.ResponseStreamEvent]]:
-        is_processed = False
-        events = []
-        next_processor = self
-        if not self.started:
-            self.started = True  # does not need to do anything special on start
-
-        is_processed, next_processor, processed_events = self.process(message, context, stream_state)
-        if not is_processed:
-            self.logger.warning(f"FunctionCallArgumentEventGenerator did not process message: {message}")
-        events.extend(processed_events)
-
-        if self.should_end(message):
-            has_finish_reason = self.has_finish_reason(message)
-            complete_events = self.on_end(message, context, stream_state)
-            events.extend(complete_events)
-            next_processor = self.parent
-            is_processed = has_finish_reason  # if has finish reason, mark as processed and stop further processing
-
-        return is_processed, next_processor, events
-
-    def on_start(
-        self, event: AnyMessage, run_details, stream_state: StreamEventState
-    ) -> tuple[bool, List[project_models.ResponseStreamEvent]]:
-        if self.started:
-            return True, []
-        self.started = True
-        return True, []
-
-    def process(
-        self, message: AnyMessage, run_details, stream_state: StreamEventState
-    ) -> tuple[bool, ResponseEventGenerator, List[project_models.ResponseStreamEvent]]:
-        tool_call = self.get_tool_call_info(message)
-        if tool_call:
-            _, _, argument = extract_function_call(tool_call)
-            if argument:
-                argument_delta_event = project_models.ResponseFunctionCallArgumentsDeltaEvent(
-                    item_id=self.item_id,
-                    output_index=self.output_index,
-                    delta=argument,
-                    sequence_number=stream_state.sequence_number,
-                )
-                stream_state.sequence_number += 1
-                self.aggregated_content += argument
-                return True, self, [argument_delta_event]
-        return False, self, []
-
-    def has_finish_reason(self, message: AnyMessage) -> bool:
-        if not message or message.id != self.message_id:
-            return False
-        if isinstance(message, langgraph_messages.AIMessageChunk):
-            if not message.tool_call_chunks:
-                # new tool call started, end this argument processing
-                return True
-            if message.response_metadata.get("finish_reason"):
-                # tool call finished
-                return True
-        elif isinstance(message, langgraph_messages.AIMessage):
-            return True
-        return False
-
-    def should_end(self, event: AnyMessage) -> bool:
-        if event is None:
-            return True
-        if event.id != self.message_id:
-            return True
-        return False
-
-    def on_end(
-        self, message: AnyMessage, context: AgentRunContext, stream_state: StreamEventState
-    ) -> tuple[bool, List[project_models.ResponseStreamEvent]]:
-        done_event = project_models.ResponseFunctionCallArgumentsDoneEvent(
-            item_id=self.item_id,
-            output_index=self.output_index,
-            arguments=self.aggregated_content,
-            sequence_number=stream_state.sequence_number,
-        )
-        stream_state.sequence_number += 1
-        self.parent.aggregate_content(self.aggregated_content)  # pass aggregated content to parent
-        return [done_event]
-
-    def get_tool_call_info(self, message: langgraph_messages.AnyMessage):
-        if isinstance(message, langgraph_messages.AIMessageChunk):
-            if message.tool_call_chunks:
-                if len(message.tool_call_chunks) > 1:
-                    self.logger.warning(
-                        f"There are {len(message.tool_call_chunks)} tool calls found. "
-                        + "Only the first one will be processed."
-                    )
-                return message.tool_call_chunks[0]
-        elif isinstance(message, langgraph_messages.AIMessage):
-            if message.tool_calls:
-                if len(message.tool_calls) > 1:
-                    self.logger.warning(
-                        f"There are {len(message.tool_calls)} tool calls found. "
-                        + "Only the first one will be processed."
-                    )
-                return message.tool_calls[0]
-        return None
diff --git a/sdk/agentserver/azure-ai-agentserver-langgraph/azure/ai/agentserver/langgraph/models/response_event_generators/response_output_item_event_generator.py b/sdk/agentserver/azure-ai-agentserver-langgraph/azure/ai/agentserver/langgraph/models/response_event_generators/response_output_item_event_generator.py
deleted file mode 100644
index a2606d1541c1..000000000000
--- a/sdk/agentserver/azure-ai-agentserver-langgraph/azure/ai/agentserver/langgraph/models/response_event_generators/response_output_item_event_generator.py
+++ /dev/null
@@ -1,163 +0,0 @@
-# ---------------------------------------------------------
-# Copyright (c) Microsoft Corporation. All rights reserved.
-# ---------------------------------------------------------
-# pylint: disable=unused-argument
-# mypy: ignore-errors
-from typing import List
-
-from langchain_core import messages as langgraph_messages
-from langchain_core.messages import AnyMessage
-
-from azure.ai.agentserver.core.models import projects as project_models
-from azure.ai.agentserver.core.server.common.agent_run_context import AgentRunContext
-from azure.ai.agentserver.core.server.common.id_generator.id_generator import IdGenerator
-
-from . import ResponseEventGenerator, StreamEventState, item_resource_helpers
-from .response_content_part_event_generator import ResponseContentPartEventGenerator
-from .response_function_call_argument_event_generator import ResponseFunctionCallArgumentEventGenerator
-
-
-class ResponseOutputItemEventGenerator(ResponseEventGenerator):
-    def __init__(self, logger, parent: ResponseEventGenerator, output_index: int, message_id: str = None):
-        super().__init__(logger, parent)
-        self.output_index = output_index
-        self.message_id = message_id
-        self.item_resource_helper = None
-
-    def try_process_message(
-        self, message: AnyMessage, context: AgentRunContext, stream_state: StreamEventState
-    ) -> tuple[bool, ResponseEventGenerator, List[project_models.ResponseStreamEvent]]:
-        is_processed = False
-        next_processor = self
-        events = []
-        if self.item_resource_helper is None:
-            if not self.try_create_item_resource_helper(message, context.id_generator):
-                # cannot create item resource, skip this message
-                self.logger.warning(f"Cannot create item resource helper for message: {message}, skipping.")
-                return True, self, []
-
-        if self.item_resource_helper and not self.started:
-            self.started, start_events = self.on_start(message, context, stream_state)
-            if not self.started:
-                # could not start processing, skip this message
-                self.logger.warning(f"Cannot create start events for message: {message}, skipping.")
-                return True, self, []
-            events.extend(start_events)
-
-        if self.should_end(message):
-            # not the message this processor is handling
-            complete_events = self.on_end(message, context, stream_state)
-            is_processed = self.message_id == message.id if message else False
-            next_processor = self.parent
-            events.extend(complete_events)
-            return is_processed, next_processor, events
-
-        child_processor = self.create_child_processor(message)
-        if child_processor:
-            self.logger.info(f"Created child processor: {child_processor}")
-            return False, child_processor, events
-
-        if message:
-            # no child processor, process the content directly
-            self.aggregate_content(message.content)
-            is_processed = True
-
-        return is_processed, next_processor, events
-
-    def on_start(
-        self, event: AnyMessage, context: AgentRunContext, stream_state: StreamEventState
-    ) -> tuple[bool, List[project_models.ResponseStreamEvent]]:
-        if self.started:
-            return True, []
-
-        item_resource = self.item_resource_helper.create_item_resource(is_done=False)
-        if item_resource is None:
-            # cannot know what item resource to create
-            return False, None
-        item_added_event = project_models.ResponseOutputItemAddedEvent(
-            output_index=self.output_index,
-            sequence_number=stream_state.sequence_number,
-            item=item_resource,
-        )
-        stream_state.sequence_number += 1
-        self.started = True
-        return True, [item_added_event]
-
-    def should_end(self, event: AnyMessage) -> bool:
-        if event is None:
-            self.logger.info("Received None event, ending processor.")
-            return True
-        if event.id != self.message_id:
-            return True
-        return False
-
-    def on_end(
-        self, message: AnyMessage, context: AgentRunContext, stream_state: StreamEventState
-    ) -> tuple[bool, List[project_models.ResponseStreamEvent]]:
-        if not self.started:  # should not happen
-            return []
-
-        item_resource = self.item_resource_helper.create_item_resource(is_done=True)
-        # response item done event
-        done_event = project_models.ResponseOutputItemDoneEvent(
-            output_index=self.output_index,
-            sequence_number=stream_state.sequence_number,
-            item=item_resource,
-        )
-        stream_state.sequence_number += 1
-        self.parent.aggregate_content(item_resource)  # pass aggregated content to parent
-        return [done_event]
-
-    def aggregate_content(self, content):
-        # aggregate content from child processor
-        self.item_resource_helper.add_aggregate_content(content)
-
-    def try_create_item_resource_helper(self, event: AnyMessage, id_generator: IdGenerator):  # pylint: disable=too-many-return-statements
-        if isinstance(event, langgraph_messages.AIMessageChunk) and event.tool_call_chunks:
-            self.item_resource_helper = item_resource_helpers.FunctionCallItemResourceHelper(
-                item_id=id_generator.generate_function_call_id(), tool_call=event.tool_call_chunks[0]
-            )
-            return True
-        if isinstance(event, langgraph_messages.AIMessage) and event.tool_calls:
-            self.item_resource_helper = item_resource_helpers.FunctionCallItemResourceHelper(
-                item_id=id_generator.generate_function_call_id(), tool_call=event.tool_calls[0]
-            )
-            return True
-        if isinstance(event, langgraph_messages.AIMessage) and event.content:
-            self.item_resource_helper = item_resource_helpers.MessageItemResourceHelper(
-                item_id=id_generator.generate_message_id(), role=project_models.ResponsesMessageRole.ASSISTANT
-            )
-            return True
-        if isinstance(event, langgraph_messages.HumanMessage) and event.content:
-            self.item_resource_helper = item_resource_helpers.MessageItemResourceHelper(
-                item_id=id_generator.generate_message_id(), role=project_models.ResponsesMessageRole.USER
-            )
-            return True
-        if isinstance(event, langgraph_messages.SystemMessage) and event.content:
-            self.item_resource_helper = item_resource_helpers.MessageItemResourceHelper(
-                item_id=id_generator.generate_message_id(), role=project_models.ResponsesMessageRole.SYSTEM
-            )
-            return True
-        if isinstance(event, langgraph_messages.ToolMessage):
-            self.item_resource_helper = item_resource_helpers.FunctionCallOutputItemResourceHelper(
-                item_id=id_generator.generate_function_output_id(), call_id=event.tool_call_id
-            )
-            return True
-        return False
-
-    def create_child_processor(self, message: AnyMessage):
-        if self.item_resource_helper is None:
-            return None
-        if self.item_resource_helper.item_type == project_models.ItemType.FUNCTION_CALL:
-            return ResponseFunctionCallArgumentEventGenerator(
-                self.logger,
-                self,
-                item_id=self.item_resource_helper.item_id,
-                message_id=message.id,
-                output_index=self.output_index,
-            )
-        if self.item_resource_helper.item_type == project_models.ItemType.MESSAGE:
-            return ResponseContentPartEventGenerator(
-                self.logger, self, self.item_resource_helper.item_id, message.id, self.output_index, content_index=0
-            )
-        return None
diff --git a/sdk/agentserver/azure-ai-agentserver-langgraph/azure/ai/agentserver/langgraph/models/response_event_generators/response_output_text_event_generator.py b/sdk/agentserver/azure-ai-agentserver-langgraph/azure/ai/agentserver/langgraph/models/response_event_generators/response_output_text_event_generator.py
deleted file mode 100644
index b6be81ec7cb2..000000000000
--- a/sdk/agentserver/azure-ai-agentserver-langgraph/azure/ai/agentserver/langgraph/models/response_event_generators/response_output_text_event_generator.py
+++ /dev/null
@@ -1,112 +0,0 @@
-# ---------------------------------------------------------
-# Copyright (c) Microsoft Corporation. All rights reserved.
-# ---------------------------------------------------------
-# pylint: disable=unused-argument
-# mypy: disable-error-code="return-value,assignment"
-from typing import List
-
-from azure.ai.agentserver.core.models import projects as project_models
-from azure.ai.agentserver.core.server.common.agent_run_context import AgentRunContext
-
-from .response_event_generator import (
-    ResponseEventGenerator,
-    StreamEventState,
-)
-
-
-class ResponseOutputTextEventGenerator(ResponseEventGenerator):
-    def __init__(
-        self,
-        logger,
-        parent: ResponseEventGenerator,
-        content_index: int,
-        output_index: int,
-        item_id: str,
-        message_id: str,
-    ):
-        super().__init__(logger, parent)
-        self.output_index = output_index
-        self.content_index = content_index
-        self.item_id = item_id
-        self.message_id = message_id
-        self.aggregated_content = ""
-
-    def try_process_message(
-        self, message, context, stream_state: StreamEventState
-    ) -> tuple[bool, ResponseEventGenerator, List[project_models.ResponseStreamEvent]]:
-        is_processed = False
-        events = []
-        next_processor = self
-        if not self.started:
-            self.started = True
-
-        if message:
-            is_processed, next_processor, processed_events = self.process(message, context, stream_state)
-            if not is_processed:
-                self.logger.warning(f"OutputTextEventGenerator did not process message: {message}")
-            events.extend(processed_events)
-
-        if self.should_end(message):
-            is_processed, complete_events = self.on_end(message, context, stream_state)
-            events.extend(complete_events)
-            next_processor = self.parent
-
-        return is_processed, next_processor, events
-
-    def process(
-        self, message, run_details, stream_state: StreamEventState
-    ) -> tuple[bool, ResponseEventGenerator, List[project_models.ResponseStreamEvent]]:
-        if message and message.content:
-            content = [message.content] if isinstance(message.content, str) else message.content
-            res = []
-            for item in content:
-                if not isinstance(item, str):
-                    self.logger.warning(f"Skipping non-string content item: {item}")
-                    continue
-                # create an event for each content item
-                chunk_event = project_models.ResponseTextDeltaEvent(
-                    item_id=self.item_id,
-                    output_index=self.output_index,
-                    content_index=self.content_index,
-                    delta=item,
-                    sequence_number=stream_state.sequence_number,
-                )
-                self.aggregated_content += item
-                stream_state.sequence_number += 1
-                res.append(chunk_event)
-            return True, self, res   # mypy: ignore[return-value]
-        return False, self, []
-
-    def has_finish_reason(self, message) -> bool:
-        if not message or message.id != self.message_id:
-            return False
-        if message.response_metadata and message.response_metadata.get("finish_reason"):
-            return True
-        return False
-
-    def should_end(self, message) -> bool:
-        # Determine if the message indicates end of the stream for this item
-        if message is None:
-            return True
-        if message.id != self.message_id:
-            return True
-        return False
-
-    def on_end(   # mypy: ignore[override]
-        self, message, context: AgentRunContext, stream_state: StreamEventState
-    ) -> tuple[bool, List[project_models.ResponseStreamEvent]]:
-        if not self.started:
-            return False, []
-
-        # finalize the item resource
-        done_event = project_models.ResponseTextDoneEvent(
-            item_id=self.item_id,
-            output_index=self.output_index,
-            content_index=self.content_index,
-            text=self.aggregated_content,
-            sequence_number=stream_state.sequence_number,
-        )
-        stream_state.sequence_number += 1
-        self.parent.aggregate_content(self.aggregated_content)
-        has_finish = self.has_finish_reason(message)
-        return has_finish, [done_event]
diff --git a/sdk/agentserver/azure-ai-agentserver-langgraph/azure/ai/agentserver/langgraph/models/response_event_generators/response_stream_event_generator.py b/sdk/agentserver/azure-ai-agentserver-langgraph/azure/ai/agentserver/langgraph/models/response_event_generators/response_stream_event_generator.py
deleted file mode 100644
index a6ad1cba7396..000000000000
--- a/sdk/agentserver/azure-ai-agentserver-langgraph/azure/ai/agentserver/langgraph/models/response_event_generators/response_stream_event_generator.py
+++ /dev/null
@@ -1,138 +0,0 @@
-# ---------------------------------------------------------
-# Copyright (c) Microsoft Corporation. All rights reserved.
-# ---------------------------------------------------------
-# pylint: disable=unused-argument
-# mypy: ignore-errors
-import time
-from typing import List
-
-from langchain_core import messages as langgraph_messages
-
-from azure.ai.agentserver.core.models import projects as project_models
-from azure.ai.agentserver.core.server.common.agent_run_context import AgentRunContext
-
-from .response_event_generator import (
-    ResponseEventGenerator,
-    StreamEventState,
-)
-from .response_output_item_event_generator import ResponseOutputItemEventGenerator
-
-
-class ResponseStreamEventGenerator(ResponseEventGenerator):
-    """
-    :meta private:
-    Response stream event generator.
-    """
-
-    def __init__(self, logger, parent):
-        super().__init__(logger, parent)
-        self.aggregated_contents: List[project_models.ItemResource] = []
-
-    def on_start(
-        self, context: AgentRunContext, stream_state: StreamEventState
-    ) -> tuple[bool, List[project_models.ResponseStreamEvent]]:
-        if self.started:
-            return True, []
-        agent_id = context.get_agent_id_object()
-        conversation = context.get_conversation_object()
-        # response create event
-        response_dict = {
-            "object": "response",
-            "agent_id": agent_id,
-            "conversation": conversation,
-            "id": context.response_id,
-            "status": "in_progress",
-            "created_at": int(time.time()),
-        }
-        created_event = project_models.ResponseCreatedEvent(
-            response=project_models.Response(response_dict),
-            sequence_number=stream_state.sequence_number,
-        )
-        stream_state.sequence_number += 1
-
-        # response in progress
-        response_dict = {
-            "object": "response",
-            "agent_id": agent_id,
-            "conversation": conversation,
-            "id": context.response_id,
-            "status": "in_progress",
-            "created_at": int(time.time()),
-        }
-        in_progress_event = project_models.ResponseInProgressEvent(
-            response=project_models.Response(response_dict),
-            sequence_number=stream_state.sequence_number,
-        )
-        stream_state.sequence_number += 1
-        self.started = True
-        return True, [created_event, in_progress_event]
-
-    def should_complete(self, event: langgraph_messages.AnyMessage) -> bool:
-        # Determine if the event indicates completion
-        if event is None:
-            return True
-        return False
-
-    def try_process_message(
-        self, message: langgraph_messages.AnyMessage, context: AgentRunContext, stream_state: StreamEventState
-    ) -> tuple[bool, ResponseEventGenerator, List[project_models.ResponseStreamEvent]]:
-        is_processed = False
-        next_processor = self
-        events = []
-
-        if not self.started:
-            self.started, start_events = self.on_start(context, stream_state)
-            events.extend(start_events)
-
-        if message:
-            # create a child processor
-            next_processor = ResponseOutputItemEventGenerator(
-                self.logger, self, len(self.aggregated_contents), message.id
-            )
-            return is_processed, next_processor, events
-
-        if self.should_end(message):
-            # received a None message, indicating end of the stream
-            done_events = self.on_end(message, context, stream_state)
-            events.extend(done_events)
-            is_processed = True
-            next_processor = None
-
-        return is_processed, next_processor, events
-
-    def should_end(self, event: langgraph_messages.AnyMessage) -> bool:
-        # Determine if the event indicates end of the stream
-        if event is None:
-            return True
-        return False
-
-    def on_end(self, message: langgraph_messages.AnyMessage, context: AgentRunContext, stream_state: StreamEventState):
-        agent_id = context.get_agent_id_object()
-        conversation = context.get_conversation_object()
-        response_dict = {
-            "object": "response",
-            "agent_id": agent_id,
-            "conversation": conversation,
-            "id": context.response_id,
-            "status": "completed",
-            "created_at": int(time.time()),
-            "output": self.aggregated_contents,
-        }
-        done_event = project_models.ResponseCompletedEvent(
-            response=project_models.Response(response_dict),
-            sequence_number=stream_state.sequence_number,
-        )
-        stream_state.sequence_number += 1
-        if self.parent:
-            self.parent.aggregate_content(self.aggregated_contents)
-        return [done_event]
-
-    def aggregate_content(self, content):
-        # aggregate content from children
-        if isinstance(content, list):
-            for c in content:
-                self.aggregate_content(c)
-        if isinstance(content, project_models.ItemResource):
-            self.aggregated_contents.append(content)
-        else:
-            raise ValueError(f"Invalid content type: {type(content)}, expected: {project_models.ItemResource}")
diff --git a/sdk/agentserver/azure-ai-agentserver-langgraph/azure/ai/agentserver/langgraph/models/utils.py b/sdk/agentserver/azure-ai-agentserver-langgraph/azure/ai/agentserver/langgraph/models/utils.py
deleted file mode 100644
index d9517d8b0e8d..000000000000
--- a/sdk/agentserver/azure-ai-agentserver-langgraph/azure/ai/agentserver/langgraph/models/utils.py
+++ /dev/null
@@ -1,68 +0,0 @@
-# ---------------------------------------------------------
-# Copyright (c) Microsoft Corporation. All rights reserved.
-# ---------------------------------------------------------
-import json
-from typing import get_type_hints
-
-
-def extract_function_call(tool_call: dict):
-    """
-    Extract function call details from tool_call dict.
-
-    :param tool_call: The tool call dictionary containing function call details.
-    :type tool_call: dict
-
-    :return: A tuple of (name, call_id, argument).
-    :rtype: tuple[str | None, str | None, str | None]
-    """
-    name = tool_call.get("name")
-    call_id = tool_call.get("id")
-    argument = None
-    arguments_raw = tool_call.get("args")
-    if isinstance(arguments_raw, str):
-        argument = arguments_raw
-    elif isinstance(arguments_raw, dict):
-        argument = json.dumps(arguments_raw)
-    return name, call_id, argument
-
-
-def is_state_schema_valid(state_schema) -> bool:
-    """
-    Validate whether the state schema of a graph contains a field named messages
-
-    :param state_schema: The state schema class from LangGraph
-    :type state_schema: TypedDict
-
-    :return: True if the state schema contains a field named messages, False otherwise.
-    :rtype: bool
-    """
-    fields = get_typeddict_fields(state_schema)
-    return "messages" in fields
-
-
-def get_typeddict_fields(schema_class) -> dict:
-    """
-    Get all fields/attributes from a TypedDict class.
-
-    :param schema_class: The TypedDict class to extract fields from
-    :type schema_class: TypedDict
-
-    :return: Dictionary of field names and their types
-    :rtype: dict
-
-    Example:
-        >>> from typing_extensions import TypedDict
-        >>> class MyState(TypedDict):
-        ...     messages: list[str]
-        ...     user_id: str
-        >>> get_typeddict_fields(MyState)
-        {'messages': list[str], 'user_id': str}
-    """
-    try:
-        return get_type_hints(schema_class)
-    except (TypeError, AttributeError):
-        # Fallback to __annotations__
-        if hasattr(schema_class, "__annotations__"):
-            return schema_class.__annotations__
-
-    return {}
diff --git a/sdk/agentserver/azure-ai-agentserver-langgraph/azure/ai/agentserver/langgraph/py.typed b/sdk/agentserver/azure-ai-agentserver-langgraph/azure/ai/agentserver/langgraph/py.typed
deleted file mode 100644
index e69de29bb2d1..000000000000
diff --git a/sdk/agentserver/azure-ai-agentserver-langgraph/cspell.json b/sdk/agentserver/azure-ai-agentserver-langgraph/cspell.json
deleted file mode 100644
index 470408fb66cc..000000000000
--- a/sdk/agentserver/azure-ai-agentserver-langgraph/cspell.json
+++ /dev/null
@@ -1,16 +0,0 @@
-{
-    "ignoreWords": [
-      "azureai",
-      "fstring",
-      "mslearn",
-      "envtemplate",
-      "ainvoke",
-      "asetup"
-    ],
-    "ignorePaths": [
-      "*.csv",
-      "*.json",
-      "*.rst",
-      "**/samples/**"
-    ]
-  }
\ No newline at end of file
diff --git a/sdk/agentserver/azure-ai-agentserver-langgraph/dev_requirements.txt b/sdk/agentserver/azure-ai-agentserver-langgraph/dev_requirements.txt
deleted file mode 100644
index a3e831d37638..000000000000
--- a/sdk/agentserver/azure-ai-agentserver-langgraph/dev_requirements.txt
+++ /dev/null
@@ -1,5 +0,0 @@
--e ../../../eng/tools/azure-sdk-tools
-../azure-ai-agentserver-core
-python-dotenv
-langchain-azure-ai[opentelemetry]>=0.1.4
-opentelemetry-exporter-otlp-proto-http
diff --git a/sdk/agentserver/azure-ai-agentserver-langgraph/mypy.ini b/sdk/agentserver/azure-ai-agentserver-langgraph/mypy.ini
deleted file mode 100644
index e0bb0b83e2ce..000000000000
--- a/sdk/agentserver/azure-ai-agentserver-langgraph/mypy.ini
+++ /dev/null
@@ -1,5 +0,0 @@
-[mypy]
-explicit_package_bases = True
-
-[mypy-samples.*]
-ignore_errors = true
\ No newline at end of file
diff --git a/sdk/agentserver/azure-ai-agentserver-langgraph/pyproject.toml b/sdk/agentserver/azure-ai-agentserver-langgraph/pyproject.toml
deleted file mode 100644
index 56eea835f958..000000000000
--- a/sdk/agentserver/azure-ai-agentserver-langgraph/pyproject.toml
+++ /dev/null
@@ -1,72 +0,0 @@
-[project]
-name = "azure-ai-agentserver-langgraph"
-dynamic = ["version", "readme"]
-description = "LangGraph adapter for Azure AI Agent Server"
-requires-python = ">=3.10"
-authors = [
-  { name = "Microsoft Corporation", email = "azpysdkhelp@microsoft.com" },
-]
-license = "MIT"
-classifiers = [
-    "Development Status :: 7 - Inactive",
-    "Programming Language :: Python",
-    "Programming Language :: Python :: 3 :: Only",
-    "Programming Language :: Python :: 3",
-    "Programming Language :: Python :: 3.10",
-    "Programming Language :: Python :: 3.11",
-    "Programming Language :: Python :: 3.12",
-    "Programming Language :: Python :: 3.13",
-]
-keywords = ["azure", "azure sdk"]
-
-dependencies = [
-    "azure-ai-agentserver-core",
-    "langchain>0.3.5",
-    "langchain-openai>0.3.10",
-    "langgraph>0.5.0",
-]
-
-[project.optional-dependencies]
-tracing = [
-    "langchain-azure-ai[opentelemetry]>=0.1.4",
-    "opentelemetry-exporter-otlp-proto-http",
-]
-
-[build-system]
-requires = ["setuptools>=69", "wheel"]
-build-backend = "setuptools.build_meta"
-
-[tool.setuptools.packages.find]
-exclude = [
-    "tests*",
-    "samples*",
-    "doc*",
-    "azure",
-    "azure.ai",
-]
-
-[tool.setuptools.dynamic]
-version = { attr = "azure.ai.agentserver.langgraph._version.VERSION" }
-readme = { file = ["README.md"], content-type = "text/markdown" }
-
-[tool.setuptools.package-data]
-pytyped = ["py.typed"]
-
-[tool.ruff]
-line-length = 120
-target-version = "py311"
-lint.select = ["E", "F", "B", "I"]   # E=pycodestyle errors, F=Pyflakes, B=bugbear, I=import sort
-lint.ignore = []
-fix = false
-
-[tool.ruff.lint.isort]
-known-first-party = ["azure.ai.agentserver.langgraph"]
-combine-as-imports = true
-
-[tool.azure-sdk-build]
-breaking = false   # incompatible python version
-pyright = false
-verifytypes = false   # incompatible python version for -core
-verify_keywords = false
-mindependency = false  # depends on -core package
-whl_no_aio = false
diff --git a/sdk/agentserver/azure-ai-agentserver-langgraph/pyrightconfig.json b/sdk/agentserver/azure-ai-agentserver-langgraph/pyrightconfig.json
deleted file mode 100644
index 6bb9ed0f43cf..000000000000
--- a/sdk/agentserver/azure-ai-agentserver-langgraph/pyrightconfig.json
+++ /dev/null
@@ -1,15 +0,0 @@
-{
-    "reportAttributeAccessIssue": "warning",
-    "reportIncompatibleMethodOverride": "warning",
-    "reportReturnType": "warning",
-    "reportArgumentType": "warning",
-    "reportMissingImports": "warning",
-    "reportOptionalMemberAccess": "warning",
-    "reportGeneralTypeIssues": "warning",
-    "reportCallIssue": "warning",
-
-    "exclude": [
-        "**/samples/**",
-        "**/conftest.py"
-    ]
-}
\ No newline at end of file
diff --git a/sdk/agentserver/azure-ai-agentserver-langgraph/samples/agent_calculator/.env-template b/sdk/agentserver/azure-ai-agentserver-langgraph/samples/agent_calculator/.env-template
deleted file mode 100644
index 92b9c812a686..000000000000
--- a/sdk/agentserver/azure-ai-agentserver-langgraph/samples/agent_calculator/.env-template
+++ /dev/null
@@ -1,4 +0,0 @@
-AZURE_OPENAI_API_KEY=<api-key>
-AZURE_OPENAI_ENDPOINT=https://<endpoint-name>.cognitiveservices.azure.com/
-OPENAI_API_VERSION=2025-03-01-preview
-AZURE_OPENAI_CHAT_DEPLOYMENT_NAME=<deployment-name>
diff --git a/sdk/agentserver/azure-ai-agentserver-langgraph/samples/agent_calculator/README.md b/sdk/agentserver/azure-ai-agentserver-langgraph/samples/agent_calculator/README.md
deleted file mode 100644
index dd2821accf15..000000000000
--- a/sdk/agentserver/azure-ai-agentserver-langgraph/samples/agent_calculator/README.md
+++ /dev/null
@@ -1,84 +0,0 @@
-# LangGraph Agent Calculator Sample
-
-This sample demonstrates how to create a calculator agent using LangGraph and using it with Container Agent Adapter. The agent can perform basic arithmetic operations (addition, multiplication, and division) by utilizing tools and making decisions about when to use them.
-
-## Overview
-
-The sample consists of several key components:
-
-- **LangGraph Agent**: A calculator agent that uses tools to perform arithmetic operations
-- **Azure AI Agents Adapter**: Adapters of the LangGraph agents. It hosts the agent as a service on your local machine.
-
-
-## Files Description
-
-- `langgraph_agent_calculator.py` - The main LangGraph agent implementation with calculator tools
-- `main.py` - HTTP server entry point using the agents adapter
-- `.env-template` A template of environment variables for Azure OpenAI configuration
-
-
-
-## Setup
-
-1. **Environment Configuration**
-   Create a `.env` file in this directory with your Azure OpenAI configuration:
-   ```
-   AZURE_OPENAI_API_KEY=your_api_key_here
-   AZURE_OPENAI_ENDPOINT=your_endpoint_here
-   AZURE_OPENAI_API_VERSION=2024-02-15-preview
-   ```
-   And install python-dotenv
-   ```bash
-   cd  container_agents/container_agent_adapter/python
-   pip install python-dotenv
-   ```
-
-2. **Install Dependencies**
-   Required Python packages (install via pip):
-   ```bash
-   cd  container_agents/container_agent_adapter/python
-   pip install -e .[langgraph]
-   ```
-
-## Usage
-
-### Running as HTTP Server
-
-1. Start the agent server:
-   ```bash
-   python main.py
-   ```
-   The server will start on `http://localhost:8088`
-
-2. Test the agent:
-   ```bash
-   curl -X POST http://localhost:8088/responses \
-     -H "Content-Type: application/json" \
-     -d '{
-       "agent": {
-         "name": "local_agent",
-         "type": "agent_reference"
-       },
-       "stream": false,
-       "input": "What is 15 divided by 3?"
-     }'
-   ```
-   
-   or 
-
-   ```bash
-   curl -X POST http://localhost:8088/responses \
-     -H "Content-Type: application/json" \
-     -d '{
-       "agent": {
-         "name": "local_agent",
-         "type": "agent_reference"
-       },
-       "stream": false,
-       "input": [{
-          "type": "message",
-          "role": "user",
-          "content": [{"type": "input_text", "text": "What is 3 add 5?"}]
-        }]
-     }'
-   ```
\ No newline at end of file
diff --git a/sdk/agentserver/azure-ai-agentserver-langgraph/samples/agent_calculator/langgraph_agent_calculator.py b/sdk/agentserver/azure-ai-agentserver-langgraph/samples/agent_calculator/langgraph_agent_calculator.py
deleted file mode 100644
index ffa8d14b208f..000000000000
--- a/sdk/agentserver/azure-ai-agentserver-langgraph/samples/agent_calculator/langgraph_agent_calculator.py
+++ /dev/null
@@ -1,142 +0,0 @@
-import os
-
-from dotenv import load_dotenv
-from langchain.chat_models import init_chat_model
-from langchain_core.messages import SystemMessage, ToolMessage
-from langchain_core.tools import tool
-from langgraph.graph import (
-    END,
-    START,
-    MessagesState,
-    StateGraph,
-)
-from typing_extensions import Literal
-from azure.identity import DefaultAzureCredential, get_bearer_token_provider
-
-from azure.ai.agentserver.langgraph import from_langgraph
-
-load_dotenv()
-
-deployment_name = os.getenv("AZURE_OPENAI_CHAT_DEPLOYMENT_NAME", "gpt-4o")
-api_key = os.getenv("AZURE_OPENAI_API_KEY", "")
-
-if api_key:
-    llm = init_chat_model(f"azure_openai:{deployment_name}")
-else:
-    credential = DefaultAzureCredential()
-    token_provider = get_bearer_token_provider(
-        credential, "https://cognitiveservices.azure.com/.default"
-    )
-    llm = init_chat_model(
-        f"azure_openai:{deployment_name}",
-        azure_ad_token_provider=token_provider,
-    )
-
-
-# Define tools
-@tool
-def multiply(a: int, b: int) -> int:
-    """Multiply a and b.
-
-    Args:
-        a: first int
-        b: second int
-    """
-    return a * b
-
-
-@tool
-def add(a: int, b: int) -> int:
-    """Adds a and b.
-
-    Args:
-        a: first int
-        b: second int
-    """
-    return a + b
-
-
-@tool
-def divide(a: int, b: int) -> float:
-    """Divide a and b.
-
-    Args:
-        a: first int
-        b: second int
-    """
-    return a / b
-
-
-# Augment the LLM with tools
-tools = [add, multiply, divide]
-tools_by_name = {tool.name: tool for tool in tools}
-llm_with_tools = llm.bind_tools(tools)
-
-
-# Nodes
-def llm_call(state: MessagesState):
-    """LLM decides whether to call a tool or not"""
-
-    return {
-        "messages": [
-            llm_with_tools.invoke(
-                [
-                    SystemMessage(
-                        content="You are a helpful assistant tasked with performing arithmetic on a set of inputs."
-                    )
-                ]
-                + state["messages"]
-            )
-        ]
-    }
-
-
-def tool_node(state: dict):
-    """Performs the tool call"""
-
-    result = []
-    for tool_call in state["messages"][-1].tool_calls:
-        tool = tools_by_name[tool_call["name"]]
-        observation = tool.invoke(tool_call["args"])
-        result.append(ToolMessage(content=observation, tool_call_id=tool_call["id"]))
-    return {"messages": result}
-
-
-# Conditional edge function to route to the tool node or end based upon whether the LLM made a tool call
-def should_continue(state: MessagesState) -> Literal["environment", END]:
-    """Decide if we should continue the loop or stop based upon whether the LLM made a tool call"""
-
-    messages = state["messages"]
-    last_message = messages[-1]
-    # If the LLM makes a tool call, then perform an action
-    if last_message.tool_calls:
-        return "Action"
-    # Otherwise, we stop (reply to the user)
-    return END
-
-
-# Build workflow
-agent_builder = StateGraph(MessagesState)
-
-# Add nodes
-agent_builder.add_node("llm_call", llm_call)
-agent_builder.add_node("environment", tool_node)
-
-# Add edges to connect nodes
-agent_builder.add_edge(START, "llm_call")
-agent_builder.add_conditional_edges(
-    "llm_call",
-    should_continue,
-    {
-        "Action": "environment",
-        END: END,
-    },
-)
-agent_builder.add_edge("environment", "llm_call")
-
-# Compile the agent
-agent = agent_builder.compile()
-
-if __name__ == "__main__":
-    adapter = from_langgraph(agent)
-    adapter.run()
diff --git a/sdk/agentserver/azure-ai-agentserver-langgraph/samples/agent_calculator/requirements.txt b/sdk/agentserver/azure-ai-agentserver-langgraph/samples/agent_calculator/requirements.txt
deleted file mode 100644
index 8c3bb2198ef1..000000000000
--- a/sdk/agentserver/azure-ai-agentserver-langgraph/samples/agent_calculator/requirements.txt
+++ /dev/null
@@ -1,3 +0,0 @@
-python-dotenv>=1.0.0
-azure-ai-agentserver-core
-azure-ai-agentserver-langgraph
diff --git a/sdk/agentserver/azure-ai-agentserver-langgraph/samples/agentic_rag/.env-template b/sdk/agentserver/azure-ai-agentserver-langgraph/samples/agentic_rag/.env-template
deleted file mode 100644
index 7f9e5c66c97c..000000000000
--- a/sdk/agentserver/azure-ai-agentserver-langgraph/samples/agentic_rag/.env-template
+++ /dev/null
@@ -1,6 +0,0 @@
-AZURE_OPENAI_API_KEY=<api-key>
-AZURE_OPENAI_ENDPOINT=https://<endpoint-name>.cognitiveservices.azure.com/
-OPENAI_API_VERSION=2025-03-01-preview
-AZURE_OPENAI_CHAT_DEPLOYMENT_NAME=<deployment-name>
-AZURE_OPENAI_EMBEDDINGS_DEPLOYMENT_NAME=<deployment-name>
-AZURE_OPENAI_EMBEDDINGS_MODEL_NAME=<model-name>
diff --git a/sdk/agentserver/azure-ai-agentserver-langgraph/samples/agentic_rag/__init__.py b/sdk/agentserver/azure-ai-agentserver-langgraph/samples/agentic_rag/__init__.py
deleted file mode 100644
index e69de29bb2d1..000000000000
diff --git a/sdk/agentserver/azure-ai-agentserver-langgraph/samples/agentic_rag/edges/__init__.py b/sdk/agentserver/azure-ai-agentserver-langgraph/samples/agentic_rag/edges/__init__.py
deleted file mode 100644
index e69de29bb2d1..000000000000
diff --git a/sdk/agentserver/azure-ai-agentserver-langgraph/samples/agentic_rag/edges/grade_documents.py b/sdk/agentserver/azure-ai-agentserver-langgraph/samples/agentic_rag/edges/grade_documents.py
deleted file mode 100644
index 1a37eb96103c..000000000000
--- a/sdk/agentserver/azure-ai-agentserver-langgraph/samples/agentic_rag/edges/grade_documents.py
+++ /dev/null
@@ -1,52 +0,0 @@
-import os
-from typing import Literal
-
-from dotenv import load_dotenv
-from langchain.chat_models import init_chat_model
-from langgraph.graph import MessagesState
-from pydantic import BaseModel, Field
-
-GRADE_PROMPT = (
-    "You are a grader assessing relevance of a retrieved document to a user question. \n "
-    "Here is the retrieved document: \n\n {context} \n\n"
-    "Here is the user question: {question} \n"
-    "If the document contains keyword(s) or semantic meaning related to the user question, grade it as relevant. \n"
-    "Give a binary score 'yes' or 'no' score to indicate whether the document is relevant to the question."
-)
-
-
-# highlight-next-line
-class GradeDocuments(BaseModel):
-    """Grade documents using a binary score for relevance check."""
-
-    binary_score: str = Field(
-        description="Relevance score: 'yes' if relevant, or 'no' if not relevant"
-    )
-
-
-load_dotenv()
-deployment_name = os.getenv("AZURE_OPENAI_CHAT_DEPLOYMENT_NAME", "gpt-4o")
-grader_model = init_chat_model(f"azure_openai:{deployment_name}")
-
-
-def grade_documents(
-    state: MessagesState,
-) -> Literal["generate_answer", "rewrite_question"]:
-    """Determine whether the retrieved documents are relevant to the question."""
-    question = state["messages"][0].content
-    context = state["messages"][-1].content
-
-    prompt = GRADE_PROMPT.format(question=question, context=context)
-    response = (
-        grader_model
-        # highlight-next-line
-        .with_structured_output(GradeDocuments).invoke(
-            [{"role": "user", "content": prompt}]
-        )
-    )
-    score = response.binary_score
-
-    if score == "yes":
-        return "generate_answer"
-    else:
-        return "rewrite_question"
diff --git a/sdk/agentserver/azure-ai-agentserver-langgraph/samples/agentic_rag/nodes/__init__.py b/sdk/agentserver/azure-ai-agentserver-langgraph/samples/agentic_rag/nodes/__init__.py
deleted file mode 100644
index e69de29bb2d1..000000000000
diff --git a/sdk/agentserver/azure-ai-agentserver-langgraph/samples/agentic_rag/nodes/generate_answer.py b/sdk/agentserver/azure-ai-agentserver-langgraph/samples/agentic_rag/nodes/generate_answer.py
deleted file mode 100644
index 42c2085d0819..000000000000
--- a/sdk/agentserver/azure-ai-agentserver-langgraph/samples/agentic_rag/nodes/generate_answer.py
+++ /dev/null
@@ -1,27 +0,0 @@
-import os
-
-from dotenv import load_dotenv
-from langchain.chat_models import init_chat_model
-from langgraph.graph import MessagesState
-
-GENERATE_PROMPT = (
-    "You are an assistant for question-answering tasks. "
-    "Use the following pieces of retrieved context to answer the question. "
-    "If you don't know the answer, just say that you don't know. "
-    "Use three sentences maximum and keep the answer concise.\n"
-    "Question: {question} \n"
-    "Context: {context}"
-)
-
-load_dotenv()
-deployment_name = os.getenv("AZURE_OPENAI_CHAT_DEPLOYMENT_NAME", "gpt-4o")
-response_model = init_chat_model(f"azure_openai:{deployment_name}")
-
-
-def generate_answer(state: MessagesState):
-    """Generate an answer."""
-    question = state["messages"][0].content
-    context = state["messages"][-1].content
-    prompt = GENERATE_PROMPT.format(question=question, context=context)
-    response = response_model.invoke([{"role": "user", "content": prompt}])
-    return {"messages": [response]}
diff --git a/sdk/agentserver/azure-ai-agentserver-langgraph/samples/agentic_rag/nodes/generate_query_or_respond.py b/sdk/agentserver/azure-ai-agentserver-langgraph/samples/agentic_rag/nodes/generate_query_or_respond.py
deleted file mode 100644
index 9e4bd761ba60..000000000000
--- a/sdk/agentserver/azure-ai-agentserver-langgraph/samples/agentic_rag/nodes/generate_query_or_respond.py
+++ /dev/null
@@ -1,25 +0,0 @@
-import os
-
-from dotenv import load_dotenv
-from langchain.chat_models import init_chat_model
-from langgraph.graph import MessagesState
-
-# Add the parent directory to the Python path to allow imports
-# sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
-from tools.retriever_tool import retriever_tool
-
-load_dotenv()
-deployment_name = os.getenv("AZURE_OPENAI_CHAT_DEPLOYMENT_NAME", "gpt-4o")
-response_model = init_chat_model(f"azure_openai:{deployment_name}")
-
-
-def generate_query_or_respond(state: MessagesState):
-    """Call the model to generate a response based on the current state. Given
-    the question, it will decide to retrieve using the retriever tool, or simply respond to the user.
-    """
-    response = (
-        response_model
-        # highlight-next-line
-        .bind_tools([retriever_tool]).invoke(state["messages"])
-    )
-    return {"messages": [response]}
diff --git a/sdk/agentserver/azure-ai-agentserver-langgraph/samples/agentic_rag/nodes/rewrite_question.py b/sdk/agentserver/azure-ai-agentserver-langgraph/samples/agentic_rag/nodes/rewrite_question.py
deleted file mode 100644
index 6113e1093ed4..000000000000
--- a/sdk/agentserver/azure-ai-agentserver-langgraph/samples/agentic_rag/nodes/rewrite_question.py
+++ /dev/null
@@ -1,27 +0,0 @@
-import os
-
-from dotenv import load_dotenv
-from langchain.chat_models import init_chat_model
-from langgraph.graph import MessagesState
-
-REWRITE_PROMPT = (
-    "Look at the input and try to reason about the underlying semantic intent / meaning.\n"
-    "Here is the initial question:"
-    "\n ------- \n"
-    "{question}"
-    "\n ------- \n"
-    "Formulate an improved question:"
-)
-
-load_dotenv()
-deployment_name = os.getenv("AZURE_OPENAI_CHAT_DEPLOYMENT_NAME", "gpt-4o")
-response_model = init_chat_model(f"azure_openai:{deployment_name}")
-
-
-def rewrite_question(state: MessagesState):
-    """Rewrite the original user question."""
-    messages = state["messages"]
-    question = messages[0].content
-    prompt = REWRITE_PROMPT.format(question=question)
-    response = response_model.invoke([{"role": "user", "content": prompt}])
-    return {"messages": [{"role": "user", "content": response.content}]}
diff --git a/sdk/agentserver/azure-ai-agentserver-langgraph/samples/agentic_rag/requirements.txt b/sdk/agentserver/azure-ai-agentserver-langgraph/samples/agentic_rag/requirements.txt
deleted file mode 100644
index 18dcb4bfc1b7..000000000000
--- a/sdk/agentserver/azure-ai-agentserver-langgraph/samples/agentic_rag/requirements.txt
+++ /dev/null
@@ -1,5 +0,0 @@
-python-dotenv>=1.0.0
-langchain_community==0.4.0
-beautifulsoup4==4.14.2
-azure-ai-agentserver-core
-azure-ai-agentserver-langgraph
diff --git a/sdk/agentserver/azure-ai-agentserver-langgraph/samples/agentic_rag/tools/__init__.py b/sdk/agentserver/azure-ai-agentserver-langgraph/samples/agentic_rag/tools/__init__.py
deleted file mode 100644
index e69de29bb2d1..000000000000
diff --git a/sdk/agentserver/azure-ai-agentserver-langgraph/samples/agentic_rag/tools/retriever_tool.py b/sdk/agentserver/azure-ai-agentserver-langgraph/samples/agentic_rag/tools/retriever_tool.py
deleted file mode 100644
index be586c088ac8..000000000000
--- a/sdk/agentserver/azure-ai-agentserver-langgraph/samples/agentic_rag/tools/retriever_tool.py
+++ /dev/null
@@ -1,42 +0,0 @@
-import os
-
-from dotenv import load_dotenv
-from langchain_core.tools import create_retriever_tool
-from langchain_community.document_loaders import WebBaseLoader
-from langchain_core.vectorstores import InMemoryVectorStore
-from langchain_openai import AzureOpenAIEmbeddings
-from langchain_text_splitters import RecursiveCharacterTextSplitter
-
-load_dotenv()
-deployment_name = os.getenv(
-    "AZURE_OPENAI_EMBEDDINGS_DEPLOYMENT_NAME", "text-embedding-3-small"
-)
-model_name = os.getenv("AZURE_OPENAI_EMBEDDINGS_MODEL_NAME", deployment_name)
-aoai_embeddings = AzureOpenAIEmbeddings(
-    model=model_name,
-    azure_deployment=deployment_name,
-)
-
-urls = [
-    "https://lilianweng.github.io/posts/2024-11-28-reward-hacking/",
-    "https://lilianweng.github.io/posts/2024-07-07-hallucination/",
-    "https://lilianweng.github.io/posts/2024-04-12-diffusion-video/",
-]
-
-docs = [WebBaseLoader(url).load() for url in urls]
-docs_list = [item for sublist in docs for item in sublist]
-
-text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
-    chunk_size=100, chunk_overlap=50
-)
-doc_splits = text_splitter.split_documents(docs_list)
-vectorstore = InMemoryVectorStore.from_documents(
-    documents=doc_splits, embedding=aoai_embeddings
-)
-retriever = vectorstore.as_retriever()
-
-retriever_tool = create_retriever_tool(
-    retriever,
-    "retrieve_blog_posts",
-    "Search and return information about Lilian Weng blog posts.",  # cspell:disable-line
-)
diff --git a/sdk/agentserver/azure-ai-agentserver-langgraph/samples/agentic_rag/workflow.py b/sdk/agentserver/azure-ai-agentserver-langgraph/samples/agentic_rag/workflow.py
deleted file mode 100644
index 9f7809e888c9..000000000000
--- a/sdk/agentserver/azure-ai-agentserver-langgraph/samples/agentic_rag/workflow.py
+++ /dev/null
@@ -1,48 +0,0 @@
-from edges.grade_documents import grade_documents
-from langgraph.graph import END, START, MessagesState, StateGraph
-from langgraph.prebuilt import ToolNode, tools_condition
-from nodes.generate_answer import generate_answer
-
-# Try relative imports first (works when imported as module)
-from nodes.generate_query_or_respond import generate_query_or_respond
-from nodes.rewrite_question import rewrite_question
-from tools.retriever_tool import retriever_tool
-
-from azure.ai.agentserver.langgraph import from_langgraph
-
-workflow = StateGraph(MessagesState)
-
-# Define the nodes we will cycle between
-workflow.add_node(generate_query_or_respond)
-workflow.add_node("retrieve", ToolNode([retriever_tool]))
-workflow.add_node(rewrite_question)
-workflow.add_node(generate_answer)
-
-workflow.add_edge(START, "generate_query_or_respond")
-
-# Decide whether to retrieve
-workflow.add_conditional_edges(
-    "generate_query_or_respond",
-    # Assess LLM decision (call `retriever_tool` tool or respond to the user)
-    tools_condition,
-    {
-        # Translate the condition outputs to nodes in our graph
-        "tools": "retrieve",
-        END: END,
-    },
-)
-
-# Edges taken after the `action` node is called.
-workflow.add_conditional_edges(
-    "retrieve",
-    # Assess agent decision
-    grade_documents,
-)
-workflow.add_edge("generate_answer", END)
-workflow.add_edge("rewrite_question", "generate_query_or_respond")
-
-# Compile
-graph = workflow.compile()
-
-if __name__ == "__main__":
-    from_langgraph(graph).run()
diff --git a/sdk/agentserver/azure-ai-agentserver-langgraph/samples/custom_state/.env-template b/sdk/agentserver/azure-ai-agentserver-langgraph/samples/custom_state/.env-template
deleted file mode 100644
index 6407cd1d9d13..000000000000
--- a/sdk/agentserver/azure-ai-agentserver-langgraph/samples/custom_state/.env-template
+++ /dev/null
@@ -1,3 +0,0 @@
-AZURE_OPENAI_API_KEY=<api-key>
-AZURE_OPENAI_ENDPOINT=https://<endpoint-name>.cognitiveservices.azure.com/
-AZURE_AI_MODEL_DEPLOYMENT_NAME=<your-deployment-name>
\ No newline at end of file
diff --git a/sdk/agentserver/azure-ai-agentserver-langgraph/samples/custom_state/README.md b/sdk/agentserver/azure-ai-agentserver-langgraph/samples/custom_state/README.md
deleted file mode 100644
index 1455a366a0ad..000000000000
--- a/sdk/agentserver/azure-ai-agentserver-langgraph/samples/custom_state/README.md
+++ /dev/null
@@ -1,51 +0,0 @@
-# Custom LangGraph State Converter (Mini RAG) Sample
-
-This sample demonstrates how to host a LangGraph agent **with a custom internal state** using the `azure.ai.agentserver` SDK by supplying a custom `LanggraphStateConverter` (`RAGStateConverter`). It shows the minimal pattern required to adapt OpenAI Responses-style requests to a LangGraph state and back to an OpenAI-compatible response.
-
-## What It Shows
-- Defining a custom state (`RAGState`) separate from the wire contract.
-- Implementing `RAGStateConverter.request_to_state` and `state_to_response` to bridge request ↔ graph ↔ response.
-- A simple multi-step graph: intent analysis → optional retrieval → answer generation.
-- Lightweight retrieval (keyword scoring over an in‑memory knowledge base) with citation annotations added to the assistant message.
-- Graceful local fallback answer when Azure OpenAI credentials are absent.
-- Non‑streaming response path only (streaming intentionally not implemented).
-
-## Flow Overview
-```
-CreateResponse request
-  -> RAGStateConverter.request_to_state
-    -> LangGraph executes nodes (analyze → retrieve? → answer)
-      -> Final state
-        -> RAGStateConverter.state_to_response
-          -> OpenAI-style response object
-```
-
-## Running
-```
-python main.py
-```
-Optional environment variables for live model call:
-- AZURE_OPENAI_API_KEY
-- AZURE_OPENAI_ENDPOINT (e.g. https://<your-project>.cognitiveservices.azure.com/)
-- AZURE_AI_MODEL_DEPLOYMENT_NAME (model deployment name)
-
-## Extending
-| Goal | Change |
-|------|--------|
-| Real retrieval | Replace `retrieve_docs` with embedding + vector / search backend. |
-| Richer answers | Introduce prompt templates or additional graph nodes. |
-| Multi‑turn memory | Persist prior messages; include truncated history in `request_to_state`. |
-| Tool / function calls | Add nodes producing tool outputs and incorporate into final response. |
-| Better citations | Store offsets / URLs and expand annotation objects. |
-| Streaming support | (See below) |
-
-### Adding Streaming
-1. Allow `stream=True` in requests and propagate a flag into state.
-2. Implement `get_stream_mode` (return appropriate mode, e.g. `events`).
-3. Implement `state_to_response_stream` to yield `ResponseStreamEvent` objects (lifecycle + deltas) and finalize with a completed event.
-4. Optionally collect incremental model tokens during `generate_answer`.
-
-## Key Takeaway
-A custom `LanggraphStateConverter` is the seam where you map external request contracts to an internal graph-friendly state shape and then format the final (or streamed) result back to the OpenAI Responses schema. Start simple (non‑streaming), then layer retrieval sophistication, memory, tools, and streaming as needed.
-
-Streaming is not supported in this sample out-of-the-box.
diff --git a/sdk/agentserver/azure-ai-agentserver-langgraph/samples/custom_state/main.py b/sdk/agentserver/azure-ai-agentserver-langgraph/samples/custom_state/main.py
deleted file mode 100644
index 27f5bf0d5ee2..000000000000
--- a/sdk/agentserver/azure-ai-agentserver-langgraph/samples/custom_state/main.py
+++ /dev/null
@@ -1,293 +0,0 @@
-from __future__ import annotations
-
-import os
-import json
-import time
-from dataclasses import dataclass
-from typing import Any, AsyncGenerator, AsyncIterator, Dict, List, TypedDict
-
-from dotenv import load_dotenv
-from langgraph.graph import StateGraph, START, END
-from openai import OpenAI, OpenAIError
-
-from azure.ai.agentserver.core import AgentRunContext
-from azure.ai.agentserver.core.models import Response, ResponseStreamEvent
-from azure.ai.agentserver.langgraph import from_langgraph
-from azure.ai.agentserver.langgraph.models import (
-    LanggraphStateConverter,
-)
-
-load_dotenv()
-
-API_KEY = os.environ.get("AZURE_OPENAI_API_KEY")
-BASE_URL = os.environ.get("AZURE_OPENAI_ENDPOINT") + "openai/v1"
-DEPLOYMENT = os.environ.get("AZURE_AI_MODEL_DEPLOYMENT_NAME")  # optional override
-DEFAULT_MODEL = "gpt-4.1-mini"
-
-
-# ---------------------------------------------------------------------------
-# Simple in-memory knowledge base (replace with real vector DB in production)
-# ---------------------------------------------------------------------------
-@dataclass
-class KBEntry:
-    id: str
-    text: str
-    tags: List[str]
-
-
-KNOWLEDGE_BASE: List[KBEntry] = [
-    KBEntry(
-        id="doc1",
-        text="LangGraph enables stateful AI workflows via graphs of nodes.",
-        tags=["langgraph", "workflow"],
-    ),
-    KBEntry(
-        id="doc2",
-        text="Retrieval augmented generation improves answer grounding by injecting documents.",
-        tags=["rag", "retrieval", "grounding"],
-    ),
-    KBEntry(
-        id="doc3",
-        text="Streaming responses send partial model outputs for lower latency user experience.",
-        tags=["streaming", "latency"],
-    ),
-]
-
-
-# ---------------------------------------------------------------------------
-# LangGraph State definition
-# ---------------------------------------------------------------------------
-class RAGState(TypedDict, total=False):
-    query: str
-    messages: List[Dict[str, Any]]  # simplified message records
-    needs_retrieval: bool
-    retrieved: List[Dict[str, Any]]  # selected documents
-    answer_parts: List[str]  # incremental answer assembly
-    final_answer: str  # final answer text
-    _stream_events: List[Any]  # buffered upstream model delta events (if any)
-    stream: bool  # whether streaming was requested
-
-
-# ---------------------------------------------------------------------------
-# Utility: naive keyword scoring retrieval
-# ---------------------------------------------------------------------------
-KEYWORDS = {
-    "langgraph": ["langgraph", "graph"],
-    "retrieval": ["retrieval", "rag", "ground"],
-    "stream": ["stream", "latency", "partial"],
-}
-
-
-def retrieve_docs(question: str, k: int = 2) -> List[Dict[str, Any]]:
-    scores: List[tuple[float, KBEntry]] = []
-    lower_q = question.lower()
-    for entry in KNOWLEDGE_BASE:
-        score = 0
-        for token in entry.tags:
-            if token in lower_q:
-                score += 2
-        for kw_group in KEYWORDS.values():
-            for kw in kw_group:
-                if kw in lower_q and kw in entry.text.lower():
-                    score += 1
-        if score > 0:
-            scores.append((score, entry))
-    scores.sort(key=lambda t: t[0], reverse=True)
-    return [{"id": e.id, "text": e.text, "score": s} for s, e in scores[:k]]
-
-
-# ---------------------------------------------------------------------------
-# Custom Converter
-# ---------------------------------------------------------------------------
-class RAGStateConverter(LanggraphStateConverter):
-    """Converter implementing mini RAG logic (non‑streaming only)."""
-
-    def get_stream_mode(self, context: AgentRunContext) -> str:  # noqa: D401
-        if context.request.get("stream", False):  # type: ignore[attr-defined]
-            raise NotImplementedError("Streaming not supported in this sample.")
-        return "values"
-
-    def request_to_state(self, context: AgentRunContext) -> Dict[str, Any]:  # noqa: D401
-        req = context.request
-        user_input = req.get("input")
-        if isinstance(user_input, list):
-            for item in user_input:
-                if isinstance(item, dict) and item.get("type") in (
-                    "message",
-                    "input_text",
-                ):
-                    user_input = item.get("content") or user_input
-                    break
-        if isinstance(user_input, list):
-            user_input = " ".join(str(x) for x in user_input)
-        prompt = str(user_input or "")
-        messages = []
-        instructions = req.get("instructions")
-        if instructions and isinstance(instructions, str):
-            messages.append({"role": "system", "content": instructions})
-        messages.append({"role": "user", "content": prompt})
-        res = {
-            "query": prompt,
-            "messages": messages,
-            "needs_retrieval": False,
-            "retrieved": [],
-            "answer_parts": [],
-            "stream": False,
-        }
-        print("initial state:", res)
-        return res
-
-    def state_to_response(
-        self, state: Dict[str, Any], context: AgentRunContext
-    ) -> Response:  # noqa: D401
-        final_answer = state.get("final_answer") or "(no answer generated)"
-        print(f"convert state to response, state: {state}")
-        citations = state.get("retrieved", [])
-        output_item = {
-            "type": "message",
-            "role": "assistant",
-            "content": [
-                {
-                    "type": "output_text",
-                    "text": final_answer,
-                    "annotations": [
-                        {
-                            "type": "citation",
-                            "doc_id": c.get("id"),
-                            "score": c.get("score"),
-                        }
-                        for c in citations
-                    ],
-                }
-            ],
-        }
-        base = {
-            "object": "response",
-            "id": context.response_id,
-            "agent": context.get_agent_id_object(),
-            "conversation": context.get_conversation_object(),
-            "status": "completed",
-            "created_at": int(time.time()),
-            "output": [output_item],
-        }
-        return Response(**base)
-
-    async def state_to_response_stream(  # noqa: D401
-        self,
-        stream_state: AsyncIterator[Dict[str, Any] | Any],
-        context: AgentRunContext,
-    ) -> AsyncGenerator[ResponseStreamEvent, None]:
-        raise NotImplementedError("Streaming not supported in this sample.")
-
-
-# ---------------------------------------------------------------------------
-# Graph Nodes
-# ---------------------------------------------------------------------------
-
-
-def _normalize_query(val: Any) -> str:
-    """Extract a lowercase text query from varied structures.
-
-    Accepts:
-      * str
-      * dict with 'content' or 'text'
-      * list of mixed items (recursively extracts first textual segment)
-    Falls back to JSON stringification for unknown structures.
-    """
-    if isinstance(val, str):
-        return val.strip().lower()
-    if isinstance(val, dict):
-        for k in ("content", "text", "value"):
-            v = val.get(k)
-            if isinstance(v, str) and v.strip():
-                return v.strip().lower()
-        # flatten simple dict string values
-        parts = [str(v) for v in val.values() if isinstance(v, (str, int, float))]
-        if parts:
-            return " ".join(parts).lower()
-    if isinstance(val, list):
-        for item in val:  # take first meaningful piece
-            extracted = _normalize_query(item)
-            if extracted:
-                return extracted
-        return ""
-    try:
-        return str(val).strip().lower()
-    except Exception:  # noqa: BLE001
-        return ""
-
-
-def analyze_intent(state: RAGState) -> RAGState:
-    raw_q = state.get("query", "")
-    q = _normalize_query(raw_q)
-    keywords = ("what", "how", "explain", "retrieval", "langgraph", "stream")
-    needs = any(kw in q for kw in keywords)
-    state["needs_retrieval"] = needs
-    # Also store normalized form for downstream nodes if different
-    if isinstance(raw_q, (dict, list)):
-        state["query"] = q
-    return state
-
-
-def retrieve_if_needed(state: RAGState) -> RAGState:
-    if state.get("needs_retrieval"):
-        state["retrieved"] = retrieve_docs(state.get("query", ""))
-    return state
-
-
-def generate_answer(state: RAGState) -> RAGState:
-    query = state.get("query", "")
-    retrieved = state.get("retrieved", [])
-
-    model_name = DEPLOYMENT or DEFAULT_MODEL
-
-    def synthesize_answer() -> tuple[str, List[str]]:
-        if not retrieved:
-            text = f"Answer: {query}" if query else "No question provided."
-            return text, [text]
-        doc_summaries = "; ".join(r["text"] for r in retrieved)
-        answer = f"Based on docs: {doc_summaries}\n\nAnswer: {query}"[:4000]
-        return answer, [answer]
-
-    if API_KEY and BASE_URL:
-        client = OpenAI(api_key=API_KEY, base_url=BASE_URL)
-        try:
-            resp = client.responses.create(model=model_name, input=query)
-            text = getattr(resp, "output_text", None)
-            if not text:
-                text = json.dumps(resp.model_dump(mode="json", exclude_none=True))[:500]
-            state["final_answer"] = text
-            state["answer_parts"] = [text]
-            return state
-        except OpenAIError:  # fallback
-            state["final_answer"], state["answer_parts"] = synthesize_answer()
-            return state
-    state["final_answer"], state["answer_parts"] = synthesize_answer()
-    return state
-
-
-# ---------------------------------------------------------------------------
-# Build the LangGraph
-# ---------------------------------------------------------------------------
-
-
-def _build_graph():
-    graph = StateGraph(RAGState)
-    graph.add_node("analyze", analyze_intent)
-    graph.add_node("retrieve", retrieve_if_needed)
-    graph.add_node("answer", generate_answer)
-
-    graph.add_edge(START, "analyze")
-    graph.add_edge("analyze", "retrieve")
-    graph.add_edge("retrieve", "answer")
-    graph.add_edge("answer", END)
-    return graph.compile()
-
-
-# ---------------------------------------------------------------------------
-# Entry Point
-# ---------------------------------------------------------------------------
-if __name__ == "__main__":
-    graph = _build_graph()
-    converter = RAGStateConverter()
-    from_langgraph(graph, converter).run()
diff --git a/sdk/agentserver/azure-ai-agentserver-langgraph/samples/custom_state/requirements.txt b/sdk/agentserver/azure-ai-agentserver-langgraph/samples/custom_state/requirements.txt
deleted file mode 100644
index 2d00898c9143..000000000000
--- a/sdk/agentserver/azure-ai-agentserver-langgraph/samples/custom_state/requirements.txt
+++ /dev/null
@@ -1,5 +0,0 @@
-python-dotenv>=1.0.0
-azure-ai-agentserver-core
-azure-ai-agentserver-langgraph
-openai
-fastapi
\ No newline at end of file
diff --git a/sdk/agentserver/azure-ai-agentserver-langgraph/samples/mcp_apikey/.env-template b/sdk/agentserver/azure-ai-agentserver-langgraph/samples/mcp_apikey/.env-template
deleted file mode 100644
index 04c14955bc69..000000000000
--- a/sdk/agentserver/azure-ai-agentserver-langgraph/samples/mcp_apikey/.env-template
+++ /dev/null
@@ -1,5 +0,0 @@
-AZURE_OPENAI_API_KEY=<api-key>
-AZURE_OPENAI_ENDPOINT=https://<endpoint-name>.cognitiveservices.azure.com/
-OPENAI_API_VERSION=2025-03-01-preview
-AZURE_OPENAI_CHAT_DEPLOYMENT_NAME=<deployment-name>
-GITHUB_TOKEN=<github_token>
diff --git a/sdk/agentserver/azure-ai-agentserver-langgraph/samples/mcp_apikey/README.md b/sdk/agentserver/azure-ai-agentserver-langgraph/samples/mcp_apikey/README.md
deleted file mode 100644
index cd9e88506127..000000000000
--- a/sdk/agentserver/azure-ai-agentserver-langgraph/samples/mcp_apikey/README.md
+++ /dev/null
@@ -1,126 +0,0 @@
-# LangGraph MCP GitHub Token Sample
-
-This sample shows how to wrap a LangGraph ReAct-style agent that is augmented with an MCP (Model Context Protocol) server requiring an API key / personal access token (GitHub) and expose it through the Azure AI Agents Adapter so it can be called via the unified `responses` endpoint.
-
-Compared to `mcp_simple`, this version demonstrates adding authorization headers (Bearer token) for an MCP server (GitHub) that expects a token.
-
-## What It Does
-
-`mcp_apikey.py`:
-1. Loads environment variables from a local `.env` file.
-2. Creates an Azure OpenAI chat model deployment (defaults to `gpt-4o`, override with `AZURE_OPENAI_DEPLOYMENT`).
-3. Reads a GitHub access token (`GITHUB_TOKEN`). This can be a classic or fine‑grained PAT (or an OAuth access token you obtained elsewhere).
-4. Constructs a `MultiServerMCPClient` pointing at the public GitHub MCP endpoint and injects the token as an `Authorization: Bearer ...` header.
-5. Fetches the available MCP tools exposed by the GitHub server.
-6. Builds a LangGraph ReAct agent (`create_react_agent`) with those tools.
-7. Hosts the agent using `from_langgraph(...).run_async()` making it available over HTTP (default: `http://localhost:8088`).
-
-## Folder Contents
-
-- `mcp_apikey.py` – Main script that builds and serves the token-authenticated MCP agent.
-- `.env-template` – Template for required environment variables.
-- `.env` – (User created) Actual secrets/endpoint values. Not committed.
-
-## Prerequisites
-
-Dependencies used by `mcp_apikey.py`:
-- agents_adapter[langgraph]
-- python-dotenv
-- langchain-mcp-adapters
-
-Install:
-```bash
-pip install -e container_agents_adapter/python[langgraph]
-pip install python-dotenv langchain-mcp-adapters
-```
-
-Requires Python 3.11+, Azure OpenAI deployment, and a `GITHUB_TOKEN`.
-
-## Environment Variables
-
-Copy `.env-template` to `.env` and fill in values:
-```
-AZURE_OPENAI_API_KEY=<azure-openai-key>
-AZURE_OPENAI_ENDPOINT=https://<endpoint-name>.cognitiveservices.azure.com/
-OPENAI_API_VERSION=2025-03-01-preview
-# Optional if your deployment name differs from gpt-4o
-AZURE_OPENAI_DEPLOYMENT=<your-deployment-name>
-
-# GitHub MCP auth (required)
-GITHUB_TOKEN=<your-github-token>
-```
-Notes:
-- `AZURE_OPENAI_DEPLOYMENT` defaults to `gpt-4o` if omitted.
-- Do NOT commit `.env`.
-
-## (Dependencies Covered Above)
-
-## Run the Sample
-
-From the `mcp-apikey` folder (or anywhere after install) run:
-```bash
-python mcp_apikey.py
-```
-The adapter starts an HTTP server (default `http://localhost:8088`).
-
-## Test the Agent
-
-Non-streaming example:
-```bash
-curl -X POST http://localhost:8088/responses \
-  -H "Content-Type: application/json" \
-  -d '{
-    "agent": {"name": "local_agent", "type": "agent_reference"},
-    "stream": false,
-    "input": "Use ONLY the Microsoft Learn MCP tools exposed by the connected MCP server (no built-in web search, no cached data).call the \"list tools\" capability and record the exact tool names returned.Use the search tool to query: \"Model Context Protocol\" (limit 3).Pick the top result and use the fetch tool to retrieve details/content for that document."
-  }'
-```
-
-Streaming example (server will stream delta events):
-```bash
-curl -N -X POST http://localhost:8088/responses \
-  -H "Content-Type: application/json" \
-  -d '{
-    "agent": {"name": "local_agent", "type": "agent_reference"},
-    "stream": true,
-    "input": "Use ONLY the Microsoft Learn MCP tools exposed by the connected MCP server (no built-in web search, no cached data).call the \"list tools\" capability and record the exact tool names returned.Use the search tool to query: \"Model Context Protocol\" (limit 3).Pick the top result and use the fetch tool to retrieve details/content for that document."
-  }'
-```
-
-Alternatively, you can send the richer structured message format:
-```bash
-curl -X POST http://localhost:8088/responses \
-  -H "Content-Type: application/json" \
-  -d '{
-    "agent": {"name": "local_agent", "type": "agent_reference"},
-    "stream": false,
-    "input": [{
-      "type": "message",
-      "role": "user",
-      "content": [{"type": "input_text", "text": "Use ONLY the Microsoft Learn MCP tools exposed by the connected MCP server (no built-in web search, no cached data).call the \"list tools\" capability and record the exact tool names returned.Use the search tool to query: \"Model Context Protocol\" (limit 3).Pick the top result and use the fetch tool to retrieve details/content for that document."}]
-    }]
-  }'
-```
-
-## Customization Ideas
-
-- Add additional MCP endpoints (e.g., documentation, search, custom internal tools).
-- Swap `create_react_agent` for a custom LangGraph graph with memory, guardrails, or ranking.
-- Integrate tracing / telemetry (LangSmith, OpenTelemetry) by adding callbacks to the model / agent.
-
-## Troubleshooting
-
-| Issue | Likely Cause | Fix |
-|-------|--------------|-----|
-| 401 from MCP server | Missing/invalid `GITHUB_TOKEN` | Regenerate PAT; ensure env var loaded |
-| 401 / auth from model | Azure key/endpoint incorrect | Re-check `.env` values |
-| Model not found | Deployment name mismatch | Set `AZURE_OPENAI_DEPLOYMENT` correctly |
-| No tools listed | GitHub MCP endpoint changed | Verify endpoint URL & token scopes |
-| Import errors | Extras not installed | Re-run dependency install |
-
-## Related Samples
-
-See `samples/langgraph/mcp_simple` for a no-auth MCP example and `samples/langgraph/agent_calculator` for arithmetic tooling.
-
----
-Extend this pattern to securely integrate additional authenticated MCP servers.
diff --git a/sdk/agentserver/azure-ai-agentserver-langgraph/samples/mcp_apikey/mcp_apikey.py b/sdk/agentserver/azure-ai-agentserver-langgraph/samples/mcp_apikey/mcp_apikey.py
deleted file mode 100644
index 12f5c50aadae..000000000000
--- a/sdk/agentserver/azure-ai-agentserver-langgraph/samples/mcp_apikey/mcp_apikey.py
+++ /dev/null
@@ -1,66 +0,0 @@
-# Copyright (c) Microsoft. All rights reserved.
-from __future__ import annotations
-
-import asyncio
-import os
-
-from dotenv import load_dotenv
-from importlib.metadata import version
-from langchain_mcp_adapters.client import MultiServerMCPClient
-from langchain_openai import AzureChatOpenAI
-
-from azure.ai.agentserver.langgraph import from_langgraph
-
-load_dotenv()  # Load .env with Azure + GitHub credentials
-
-
-def _get_required_env(name: str) -> str:
-    value = os.getenv(name)
-    if not value:
-        raise RuntimeError(
-            f"Missing required environment variable '{name}'. Please define it in your .env file."
-        )
-    return value
-
-
-def create_agent(model, tools):
-    # for different langgraph versions
-    langgraph_version = version("langgraph")
-    if langgraph_version < "1.0.0":
-        from langgraph.prebuilt import create_react_agent
-
-        return create_react_agent(model, tools)
-    else:
-        from langchain.agents import create_agent
-
-        return create_agent(model, tools)
-
-
-async def build_agent():
-    deployment = os.getenv("AZURE_OPENAI_CHAT_DEPLOYMENT_NAME", "gpt-4o")
-    model = AzureChatOpenAI(model=deployment)
-
-    github_token = _get_required_env("GITHUB_TOKEN")
-
-    client = MultiServerMCPClient(
-        {
-            "github": {
-                "url": "https://api.githubcopilot.com/mcp/",
-                "transport": "streamable_http",
-                "headers": {"Authorization": f"Bearer {github_token}"},
-            }
-        }
-    )
-
-    tools = await client.get_tools()
-    agent = create_agent(model, tools)
-    return agent
-
-
-async def _main():
-    agent = await build_agent()
-    await from_langgraph(agent).run_async()
-
-
-if __name__ == "__main__":
-    asyncio.run(_main())
diff --git a/sdk/agentserver/azure-ai-agentserver-langgraph/samples/mcp_apikey/requirements.txt b/sdk/agentserver/azure-ai-agentserver-langgraph/samples/mcp_apikey/requirements.txt
deleted file mode 100644
index e2ae1f5f6bf2..000000000000
--- a/sdk/agentserver/azure-ai-agentserver-langgraph/samples/mcp_apikey/requirements.txt
+++ /dev/null
@@ -1,4 +0,0 @@
-python-dotenv>=1.0.0
-langchain-mcp-adapters==0.1.11
-azure-ai-agentserver-core
-azure-ai-agentserver-langgraph
diff --git a/sdk/agentserver/azure-ai-agentserver-langgraph/samples/mcp_simple/.env-template b/sdk/agentserver/azure-ai-agentserver-langgraph/samples/mcp_simple/.env-template
deleted file mode 100644
index 92b9c812a686..000000000000
--- a/sdk/agentserver/azure-ai-agentserver-langgraph/samples/mcp_simple/.env-template
+++ /dev/null
@@ -1,4 +0,0 @@
-AZURE_OPENAI_API_KEY=<api-key>
-AZURE_OPENAI_ENDPOINT=https://<endpoint-name>.cognitiveservices.azure.com/
-OPENAI_API_VERSION=2025-03-01-preview
-AZURE_OPENAI_CHAT_DEPLOYMENT_NAME=<deployment-name>
diff --git a/sdk/agentserver/azure-ai-agentserver-langgraph/samples/mcp_simple/README.md b/sdk/agentserver/azure-ai-agentserver-langgraph/samples/mcp_simple/README.md
deleted file mode 100644
index 4414a64ebcf9..000000000000
--- a/sdk/agentserver/azure-ai-agentserver-langgraph/samples/mcp_simple/README.md
+++ /dev/null
@@ -1,121 +0,0 @@
-# LangGraph MCP Simple Sample
-
-This sample shows how to wrap a LangGraph ReAct-style agent that is augmented with MCP (Model Context Protocol) tools (sourced from Microsoft Learn) and expose it through the Azure AI Agents Adapter so it can be called using the standard responses endpoint.
-
-## What It Does
-
-`mcp_simple.py`:
-1. Loads environment variables from a local `.env` file (see template below).
-2. Creates an Azure OpenAI chat model (`gpt-4o`) via `AzureChatOpenAI`.
-3. Constructs an MCP multi-server client (`MultiServerMCPClient`) pointing at the Microsoft Learn MCP endpoint.
-4. Fetches available MCP tools and builds a LangGraph ReAct agent with those tools (`create_react_agent`).
-5. Hosts the agent using `from_langgraph(...).run_async()` so it is available over HTTP on `http://localhost:8088` (default adapter port).
-
-## Folder Contents
-
-- `mcp_simple.py` – Main script that builds and serves the agent.
-- `.env-template` – Template for required Azure OpenAI environment variables.
-- `.env` – (User created) Actual secrets/endpoint values. Not committed.
-
-## Prerequisites
-
-Dependencies used by `mcp_simple.py`:
-- agents_adapter with langgraph extra (brings langgraph, langchain, langchain-openai)
-- python-dotenv
-- langchain-mcp-adapters
-
-Install (from repo root):
-```bash
-pip install -e container_agents_adapter/python[langgraph]
-pip install python-dotenv langchain-mcp-adapters
-```
-
-Environment needs Azure OpenAI variables (see below). Requires Python 3.11+.
-
-## Environment Variables
-
-Copy `.env-template` to `.env` and fill in real values:
-```
-AZURE_OPENAI_API_KEY=<api-key>
-AZURE_OPENAI_ENDPOINT=https://<endpoint-name>.cognitiveservices.azure.com/
-OPENAI_API_VERSION=2025-03-01-preview
-```
-If you use a deployment name different from `gpt-4o`, adjust the `model="gpt-4o"` parameter in `mcp_simple.py` accordingly (e.g., the model argument must match your Azure OpenAI deployment name, not the base model family if they differ).
-
-## (Dependencies Covered Above)
-
-## Run the Sample
-
-From the `mcp_simple` folder (or anywhere after install) run:
-```bash
-python mcp_simple.py
-```
-The adapter will start an HTTP server (default: `http://localhost:8088`). When ready, you can send a request to the unified responses endpoint.
-
-## Test the Agent
-
-Non-streaming example:
-```bash
-curl -X POST http://localhost:8088/responses \
-  -H "Content-Type: application/json" \
-  -d '{
-    "agent": {"name": "local_agent", "type": "agent_reference"},
-    "stream": false,
-    "input": "Give me a short summary about Azure OpenAI"
-  }'
-```
-
-Streaming example (server will stream delta events):
-```bash
-curl -N -X POST http://localhost:8088/responses \
-  -H "Content-Type: application/json" \
-  -d '{
-    "agent": {"name": "local_agent", "type": "agent_reference"},
-    "stream": true,
-    "input": "List two learning resources about Azure Functions"
-  }'
-```
-
-Alternatively, you can send the richer structured message format:
-```bash
-curl -X POST http://localhost:8088/responses \
-  -H "Content-Type: application/json" \
-  -d '{
-    "agent": {"name": "local_agent", "type": "agent_reference"},
-    "stream": false,
-    "input": [{
-      "type": "message",
-      "role": "user",
-      "content": [{"type": "input_text", "text": "What learning paths cover Azure AI?"}]
-    }]
-  }'
-```
-
-## MCP Tooling Notes
-
-- `MultiServerMCPClient` connects to one or more MCP servers; here we configure a single `mslearn` server.
-- `get_tools()` returns tool schemas that LangGraph incorporates, enabling the agent to decide when to call MCP tools.
-- The Microsoft Learn MCP endpoint can surface search / retrieval style tools (subject to availability) so the agent can ground answers.
-
-## Customization Ideas
-
-- Add more MCP endpoints by extending the dictionary passed to `MultiServerMCPClient`.
-- Swap `create_react_agent` for a custom LangGraph graph if you need more control (e.g., tool prioritization, guardrails, memory).
-- Introduce logging or tracing (e.g., LangSmith) by configuring callbacks on the model or agent.
-
-## Troubleshooting
-
-| Issue | Likely Cause | Fix |
-|-------|--------------|-----|
-| 401 / auth errors from model | Wrong or missing key / endpoint | Re-check `.env` values and Azure OpenAI resource permissions |
-| Model not found | Deployment name mismatch | Use your actual Azure deployment name in `AzureChatOpenAI(model=...)` |
-| No tools available | MCP endpoint change / network issue | Confirm the MCP URL and that it returns tool definitions |
-| Import errors for langgraph or adapter | Extras not installed | Re-run `pip install -e .[langgraph]` |
-
-
-## Related Samples
-
-See `samples/langgraph/agent_calculator` for another LangGraph + adapter example with arithmetic tools.
-
----
-Happy hacking! Modify and extend the MCP tool set to build richer contextual agents.
diff --git a/sdk/agentserver/azure-ai-agentserver-langgraph/samples/mcp_simple/mcp_simple.py b/sdk/agentserver/azure-ai-agentserver-langgraph/samples/mcp_simple/mcp_simple.py
deleted file mode 100644
index 1b3c996386f0..000000000000
--- a/sdk/agentserver/azure-ai-agentserver-langgraph/samples/mcp_simple/mcp_simple.py
+++ /dev/null
@@ -1,56 +0,0 @@
-# Copyright (c) Microsoft. All rights reserved.
-"""Minimal LangGraph + MCP sample.
-
-Loads an MCP server (Microsoft Learn) and exposes a LangGraph ReAct agent
- through the agents_adapter server.
-"""
-
-import asyncio
-import os
-
-from dotenv import load_dotenv
-from importlib.metadata import version
-from langchain_mcp_adapters.client import MultiServerMCPClient
-from langchain_openai import AzureChatOpenAI
-
-from azure.ai.agentserver.langgraph import from_langgraph
-
-load_dotenv()
-
-
-def create_agent(model, tools):
-    # for different langgraph versions
-    langgraph_version = version("langgraph")
-    if langgraph_version < "1.0.0":
-        from langgraph.prebuilt import create_react_agent
-
-        return create_react_agent(model, tools)
-    else:
-        from langchain.agents import create_agent
-
-        return create_agent(model, tools)
-
-
-async def quickstart():
-    """Build and return a LangGraph agent wired to an MCP client."""
-    deployment = os.getenv("AZURE_OPENAI_CHAT_DEPLOYMENT_NAME", "gpt-4o")
-    model = AzureChatOpenAI(model=deployment)
-    client = MultiServerMCPClient(
-        {
-            "mslearn": {
-                "url": "https://learn.microsoft.com/api/mcp",
-                "transport": "streamable_http",
-            }
-        }
-    )
-    tools = await client.get_tools()
-    return create_agent(model, tools)
-
-
-async def main():  # pragma: no cover - sample entrypoint
-    agent = await quickstart()
-    await from_langgraph(agent).run_async()
-
-
-if __name__ == "__main__":
-    asyncio.run(main())
diff --git a/sdk/agentserver/azure-ai-agentserver-langgraph/samples/mcp_simple/requirements.txt b/sdk/agentserver/azure-ai-agentserver-langgraph/samples/mcp_simple/requirements.txt
deleted file mode 100644
index ab8d43c36684..000000000000
--- a/sdk/agentserver/azure-ai-agentserver-langgraph/samples/mcp_simple/requirements.txt
+++ /dev/null
@@ -1,5 +0,0 @@
-python-dotenv>=1.0.0
-langchain-mcp-adapters==0.1.11
-azure-ai-agentserver-core
-azure-ai-agentserver-langgraph
-
diff --git a/sdk/agentserver/azure-ai-agentserver-langgraph/samples/simple_agent_with_redis_checkpointer/.env-template b/sdk/agentserver/azure-ai-agentserver-langgraph/samples/simple_agent_with_redis_checkpointer/.env-template
deleted file mode 100644
index 41799808bfd6..000000000000
--- a/sdk/agentserver/azure-ai-agentserver-langgraph/samples/simple_agent_with_redis_checkpointer/.env-template
+++ /dev/null
@@ -1,7 +0,0 @@
-AZURE_OPENAI_API_KEY=<api-key>
-AZURE_OPENAI_ENDPOINT=https://<endpoint-name>.cognitiveservices.azure.com/
-OPENAI_API_VERSION=2025-03-01-preview
-AZURE_OPENAI_CHAT_DEPLOYMENT_NAME=<deployment-name>
-CHECKPOINTER_REDIS_URL=<redis-url>
-CHECKPOINTER_REDIS_KEY=<redis-key>
-CHECKPOINTER_REDIS_PORT=10000
diff --git a/sdk/agentserver/azure-ai-agentserver-langgraph/samples/simple_agent_with_redis_checkpointer/README.md b/sdk/agentserver/azure-ai-agentserver-langgraph/samples/simple_agent_with_redis_checkpointer/README.md
deleted file mode 100644
index 6f9ed5a679fb..000000000000
--- a/sdk/agentserver/azure-ai-agentserver-langgraph/samples/simple_agent_with_redis_checkpointer/README.md
+++ /dev/null
@@ -1,68 +0,0 @@
-# simple_agent_with_redis_checkpointer
-
-This sample demonstrates a LangGraph-based simple agent that uses an Azure managed Redis instance as a checkpointer. 
-
-# Prerequisites
-Create an Azure Managed Redis instance
-
-1) Install the Redis Enterprise CLI extension (if not already installed)
-    ```
-    az extension add --name redisenterprise
-    ```
-
-2) Create a resource group (example)
-    ```
-    az group create --name myRedisRG --location eastus
-    ```
-
-3) Create a Redis Enterprise instance with RedisJSON and RediSearch modules enabled
-   Create an [Azure Managed Redis instance](https://learn.microsoft.com/azure/redis/quickstart-create-managed-redis). For LangGraph checkpointer, the instance must have RedisJSON and RediSearch enabled. Clustering-policy should be EnterpriseCluster. Those configurations have to be set when creating. Redis sku and capacities can be configured with your needs.
-
-   When your redis instance is ready, add the redis information to environment variables.
-
-# Setup
-
-1. **Environment Configuration**
-   Create a `.env` file in this directory with your Azure OpenAI and Redis configuration:
-   ```
-    AZURE_OPENAI_API_KEY=<api-key>
-    AZURE_OPENAI_ENDPOINT=https://<endpoint-name>.cognitiveservices.azure.com/
-    OPENAI_API_VERSION=2025-03-01-preview
-    CHECKPOINTER_REDIS_URL=<name>.<region>.redis.azure.net
-    CHECKPOINTER_REDIS_KEY=<redis-key>
-    CHECKPOINTER_REDIS_PORT=10000
-   ```
-   And install python-dotenv
-   ```bash
-   pip install python-dotenv langgraph-checkpoint-redis
-   ```
-
-2. **Install Dependencies**
-   Required Python packages (install via pip):
-   ```bash
-   cd  container_agents/container_agent_adapter/python
-   pip install -e .[langgraph]
-   ```
-
-
-# Running as HTTP Server
-
-1. Start the agent server:
-   ```bash
-   python main.py
-   ```
-   The server will start on `http://localhost:8088`
-
-2. Test the agent:
-   ```bash
-   curl -X POST http://localhost:8088/responses \
-     -H "Content-Type: application/json" \
-     -d '{
-       "agent": {
-         "name": "local_agent",
-         "type": "agent_reference"
-       },
-       "stream": false,
-       "input": "What is 15 divided by 3?",
-       "conversation": {"id": "test-conversation-id"}
-     }'
\ No newline at end of file
diff --git a/sdk/agentserver/azure-ai-agentserver-langgraph/samples/simple_agent_with_redis_checkpointer/main.py b/sdk/agentserver/azure-ai-agentserver-langgraph/samples/simple_agent_with_redis_checkpointer/main.py
deleted file mode 100644
index 7b1c2b5e9fb1..000000000000
--- a/sdk/agentserver/azure-ai-agentserver-langgraph/samples/simple_agent_with_redis_checkpointer/main.py
+++ /dev/null
@@ -1,72 +0,0 @@
-import asyncio
-import os
-
-from importlib.metadata import version
-from dotenv import load_dotenv
-from langchain_core.tools import tool
-from langchain_openai import AzureChatOpenAI
-from langgraph.checkpoint.redis.aio import AsyncRedisSaver
-from redis.asyncio import Redis
-
-from azure.ai.agentserver.langgraph import from_langgraph
-
-load_dotenv()
-
-client = Redis(
-    host=os.getenv("CHECKPOINTER_REDIS_URL"),
-    port=os.getenv("CHECKPOINTER_REDIS_PORT"),
-    password=os.getenv("CHECKPOINTER_REDIS_KEY"),
-    ssl=True,
-    decode_responses=False,  # RedisSaver expects bytes
-)
-
-deployment_name = os.getenv("AZURE_OPENAI_CHAT_DEPLOYMENT_NAME", "gpt-4o")
-model = AzureChatOpenAI(model=deployment_name)
-
-
-@tool
-def get_word_length(word: str) -> int:
-    """Returns the length of a word."""
-    return len(word)
-
-
-@tool
-def calculator(expression: str) -> str:
-    """Evaluates mathematical expression"""
-    try:
-        maths_result = eval(expression)
-        return str(maths_result)
-    except Exception as e:
-        return f"Error: {str(e)}"
-
-
-tools = [get_word_length, calculator]
-
-
-def create_agent(model, tools, checkpointer):
-    # for different langgraph versions
-    langgraph_version = version("langgraph")
-    if langgraph_version < "1.0.0":
-        from langgraph.prebuilt import create_react_agent
-
-        return create_react_agent(model, tools, checkpointer=checkpointer)
-    else:
-        from langchain.agents import create_agent
-
-        return create_agent(model, tools, checkpointer=checkpointer)
-
-
-async def run_async():
-    # Pass the configured client to RedisSaver
-    # adapter uses astream/ainvoke so we need async checkpointer
-    saver = AsyncRedisSaver(redis_client=client)
-    await saver.asetup()
-
-    executor = create_agent(model, tools, checkpointer=saver)
-    # start server with async
-    await from_langgraph(executor).run_async()
-
-
-if __name__ == "__main__":
-    # host the langgraph agent
-    asyncio.run(run_async())
diff --git a/sdk/agentserver/azure-ai-agentserver-langgraph/samples/simple_agent_with_redis_checkpointer/requirements.txt b/sdk/agentserver/azure-ai-agentserver-langgraph/samples/simple_agent_with_redis_checkpointer/requirements.txt
deleted file mode 100644
index 8687d2061ad5..000000000000
--- a/sdk/agentserver/azure-ai-agentserver-langgraph/samples/simple_agent_with_redis_checkpointer/requirements.txt
+++ /dev/null
@@ -1,5 +0,0 @@
-python-dotenv>=1.0.0
-langgraph-checkpoint-redis==0.1.2
-azure-ai-agentserver-core
-azure-ai-agentserver-langgraph
-
diff --git a/sdk/agentserver/azure-ai-agentserver-langgraph/samples/simple_react_agent/.env-template b/sdk/agentserver/azure-ai-agentserver-langgraph/samples/simple_react_agent/.env-template
deleted file mode 100644
index 92b9c812a686..000000000000
--- a/sdk/agentserver/azure-ai-agentserver-langgraph/samples/simple_react_agent/.env-template
+++ /dev/null
@@ -1,4 +0,0 @@
-AZURE_OPENAI_API_KEY=<api-key>
-AZURE_OPENAI_ENDPOINT=https://<endpoint-name>.cognitiveservices.azure.com/
-OPENAI_API_VERSION=2025-03-01-preview
-AZURE_OPENAI_CHAT_DEPLOYMENT_NAME=<deployment-name>
diff --git a/sdk/agentserver/azure-ai-agentserver-langgraph/samples/simple_react_agent/main.py b/sdk/agentserver/azure-ai-agentserver-langgraph/samples/simple_react_agent/main.py
deleted file mode 100644
index b3249ef6ecb1..000000000000
--- a/sdk/agentserver/azure-ai-agentserver-langgraph/samples/simple_react_agent/main.py
+++ /dev/null
@@ -1,53 +0,0 @@
-import os
-
-from dotenv import load_dotenv
-from importlib.metadata import version
-from langchain_core.tools import tool
-from langchain_openai import AzureChatOpenAI
-from langgraph.checkpoint.memory import MemorySaver
-
-from azure.ai.agentserver.langgraph import from_langgraph
-
-load_dotenv()
-
-memory = MemorySaver()
-deployment_name = os.getenv("AZURE_OPENAI_CHAT_DEPLOYMENT_NAME", "gpt-4o")
-model = AzureChatOpenAI(model=deployment_name)
-
-
-@tool
-def get_word_length(word: str) -> int:
-    """Returns the length of a word."""
-    return len(word)
-
-
-@tool
-def calculator(expression: str) -> str:
-    """Evaluates mathematical expression"""
-    try:
-        maths_result = eval(expression)
-        return str(maths_result)
-    except Exception as e:
-        return f"Error: {str(e)}"
-
-
-def create_agent(model, tools, checkpointer):
-    # for different langgraph versions
-    langgraph_version = version("langgraph")
-    if langgraph_version < "1.0.0":
-        from langgraph.prebuilt import create_react_agent
-
-        return create_react_agent(model, tools, checkpointer=checkpointer)
-    else:
-        from langchain.agents import create_agent
-
-        return create_agent(model, tools, checkpointer=checkpointer)
-
-
-tools = [get_word_length, calculator]
-
-agent_executor = create_agent(model, tools, memory)
-
-if __name__ == "__main__":
-    # host the langgraph agent
-    from_langgraph(agent_executor).run()
diff --git a/sdk/agentserver/azure-ai-agentserver-langgraph/samples/simple_react_agent/requirements.txt b/sdk/agentserver/azure-ai-agentserver-langgraph/samples/simple_react_agent/requirements.txt
deleted file mode 100644
index 5d7322e06ed8..000000000000
--- a/sdk/agentserver/azure-ai-agentserver-langgraph/samples/simple_react_agent/requirements.txt
+++ /dev/null
@@ -1,3 +0,0 @@
-python-dotenv>=1.0.0
-azure-ai-agentserver-core
-azure-ai-agentserver-langgraph
\ No newline at end of file
diff --git a/sdk/agentserver/azure-ai-agentserver-langgraph/tests/__init__.py b/sdk/agentserver/azure-ai-agentserver-langgraph/tests/__init__.py
deleted file mode 100644
index 4a5d26360bce..000000000000
--- a/sdk/agentserver/azure-ai-agentserver-langgraph/tests/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-# Unit tests package
diff --git a/sdk/agentserver/azure-ai-agentserver-langgraph/tests/conftest.py b/sdk/agentserver/azure-ai-agentserver-langgraph/tests/conftest.py
deleted file mode 100644
index 7f055e40010c..000000000000
--- a/sdk/agentserver/azure-ai-agentserver-langgraph/tests/conftest.py
+++ /dev/null
@@ -1,10 +0,0 @@
-"""
-Pytest configuration and shared fixtures for unit tests.
-"""
-
-import sys
-from pathlib import Path
-
-# Add the src directory to the Python path so we can import modules under test
-src_path = Path(__file__).parent.parent.parent / "src"
-sys.path.insert(0, str(src_path))
diff --git a/sdk/agentserver/azure-ai-agentserver-langgraph/tests/unit_tests/test_langgraph_request_converter.py b/sdk/agentserver/azure-ai-agentserver-langgraph/tests/unit_tests/test_langgraph_request_converter.py
deleted file mode 100644
index 84a8c8784d8b..000000000000
--- a/sdk/agentserver/azure-ai-agentserver-langgraph/tests/unit_tests/test_langgraph_request_converter.py
+++ /dev/null
@@ -1,121 +0,0 @@
-import pytest
-from langchain_core import messages as langgraph_messages
-
-from azure.ai.agentserver.core import models
-from azure.ai.agentserver.core.models import projects as project_models
-from azure.ai.agentserver.langgraph import models as langgraph_models
-
-
-@pytest.mark.unit
-def test_convert_implicit_user_message():
-    """Test conversion of ImplicitUserMessage to HumanMessage."""
-
-    input_data = "input text string"
-    implicit_user_message = {"content": input_data}
-    create_response = models.CreateResponse(
-        input=[implicit_user_message],
-    )
-
-    converter = langgraph_models.LangGraphRequestConverter(create_response)
-    res = converter.convert()
-
-    assert "messages" in res
-    assert len(res["messages"]) == 1
-    assert isinstance(res["messages"][0], langgraph_messages.HumanMessage)
-    assert res["messages"][0].content == input_data
-
-
-@pytest.mark.unit
-def test_convert_implicit_user_message_with_contents():
-    """Test conversion of ImplicitUserMessage with list of contents to HumanMessage."""
-
-    input_data = [
-        {"text": "text content", "type": "input_text"},
-    ]
-    create_response = models.CreateResponse(input=[{"content": input_data}])
-
-    converter = langgraph_models.LangGraphRequestConverter(create_response)
-    res = converter.convert()
-
-    assert "messages" in res
-    assert len(res["messages"]) == 1
-    assert isinstance(res["messages"][0], langgraph_messages.HumanMessage)
-    assert isinstance(res["messages"][0].content, list)
-    assert len(res["messages"][0].content) == len(input_data)
-
-    for item_content, content in zip(input_data, res["messages"][0].content, strict=False):
-        assert isinstance(content, dict)
-        assert content["type"] == "text"
-        assert content["text"] == item_content.get("text")
-
-
-@pytest.mark.unit
-def test_convert_item_param_message():
-    """Test conversion of ItemParam of type MESSAGE to corresponding message."""
-
-    input_data = [
-        {"role": "user", "content": "user message"},
-        {"role": "assistant", "content": "assistant message"},
-        {"role": "system", "content": "system message"},
-    ]
-    create_response = models.CreateResponse(
-        input=input_data,
-    )
-    converter = langgraph_models.LangGraphRequestConverter(create_response)
-    res = converter.convert()
-
-    assert "messages" in res
-    assert len(res["messages"]) == len(input_data)
-
-    for item, message in zip(input_data, res["messages"], strict=False):
-        if item["role"] == project_models.ResponsesMessageRole.USER:
-            assert isinstance(message, langgraph_messages.HumanMessage)
-        elif item["role"] == project_models.ResponsesMessageRole.ASSISTANT:
-            assert isinstance(message, langgraph_messages.AIMessage)
-        elif item["role"] == project_models.ResponsesMessageRole.SYSTEM:
-            assert isinstance(message, langgraph_messages.SystemMessage)
-        else:
-            pytest.fail(f"Unexpected role: {item['role']}")
-
-        assert isinstance(message.content, str)
-        assert message.content == item["content"]
-
-
-@pytest.mark.unit
-def test_convert_item_param_function_call_and_function_call_output():
-    """Test conversion of ItemParam of type FUNCTION_CALL and FUNCTION_CALL_OUTPUT to corresponding message."""
-
-    input_data = [
-        {
-            "type": "function_call",
-            "call_id": "call_001",
-            "name": "get_ticket_status",
-            "arguments": '{"ticket_number": "845732"}',
-            "status": "completed",
-        },
-        {
-            "type": "function_call_output",
-            "call_id": "call_001",
-            "output": ('{"ticket_number": "845732", "status": "in_progress", "last_updated": "2024-07-15T09:42:00Z"}'),
-            "status": "completed",
-        },
-    ]
-    create_response = models.CreateResponse(
-        input=input_data,
-    )
-    converter = langgraph_models.LangGraphRequestConverter(create_response)
-    res = converter.convert()
-    assert "messages" in res
-    assert len(res["messages"]) == len(input_data)
-    assert isinstance(res["messages"][0], langgraph_messages.AIMessage)
-    assert res["messages"][0].tool_calls is not None
-    assert len(res["messages"][0].tool_calls) == 1
-    tool_call_detail = res["messages"][0].tool_calls[0]
-    assert tool_call_detail["id"] == "call_001"
-    assert tool_call_detail["name"] == "get_ticket_status"
-    assert tool_call_detail["args"] == {"ticket_number": "845732"}
-    assert isinstance(res["messages"][1], langgraph_messages.ToolMessage)
-    assert res["messages"][1].tool_call_id == "call_001"
-    assert res["messages"][1].content == (
-        '{"ticket_number": "845732", "status": "in_progress", "last_updated": "2024-07-15T09:42:00Z"}'
-    )