spotify · fabriziodemaria · May 7, 2025 · May 5, 2025 · May 5, 2025 · May 6, 2025
@@ -19,7 +19,7 @@ jobs:
     timeout-minutes: 10
     strategy:
       matrix:
-        container: [ "python:3.8", "python:3.9", "python:3.10", "python:3.11" ]
+        container: [ "python:3.9", "python:3.10", "python:3.11" ]
     container:
       image: ${{ matrix.container }}
 
@@ -64,13 +64,13 @@ jobs:
           .
 
       - name: Run black formatter check
-        run: black --check confidence
+        run: black --check confidence --exclude="telemetry_pb2.py|_version.py"
 
       - name: Run flake8 formatter check
-        run: flake8 confidence
- 
+        run: flake8 confidence --exclude=telemetry_pb2.py,_version.py
+
       - name: Run type linter check
-        run: mypy confidence
+        run: mypy confidence --follow-imports=skip --exclude=telemetry_pb2.py
 
       - name: Run tests with pytest
         run: pytest
@@ -2,10 +2,14 @@
 
 We ask you to write well covered unit tests with your changes and please make sure you use `black` and `flake8` to lint your code before making a PR. There are CI checks that will fail otherwise.
 
-Linting and tests will run on python [3.8, 3.9. 3.10 and 3.11](https://github.com/spotify/confidence-sdk-python/blob/nicklasl-patch-1/.github/workflows/pull-requests.yaml#L22).
+Linting and tests will run on python [3.9. 3.10 and 3.11](https://github.com/spotify/confidence-sdk-python/blob/nicklasl-patch-1/.github/workflows/pull-requests.yaml#L22).
 
 We require pull request titles to follow the [Conventional Commits specification](https://www.conventionalcommits.org/en/v1.0.0/) and we also encourage individual commits to adher to that.
 
 We use "squash merge" and any merge PR title will show up in the changelog based on the title.
 
+Run the following if you need to regenerate the telemetry protobuf code:
 
+```
+./generate_proto.py
+```
@@ -59,6 +59,18 @@ confidence.track("event_name", {
 })
 ```
 
+## Telemetry
+
+The SDK includes telemetry functionality that helps monitor SDK performance and usage. By default, telemetry is enabled and collects metrics (anonymously) such as resolve latency and request status. This data is used by the Confidence team to improve the product, and in certain cases it is also available to the SDK adopters.
+
+You can disable telemetry by setting `disable_telemetry=True` when initializing the Confidence client:
+
+```python
+confidence = Confidence("CLIENT_TOKEN",
+    disable_telemetry=True
+)
+```
+
 ## OpenFeature
 
 The library includes a `Provider` for

@@ -19,6 +19,7 @@
 import requests
 import httpx
 from typing_extensions import TypeGuard
+import time
 
 from confidence import __version__
 from confidence.errors import (
@@ -30,6 +31,7 @@
 )
 from .flag_types import FlagResolutionDetails, Reason, ErrorCode
 from .names import FlagName, VariantName
+from .telemetry import Telemetry, ProtoTraceId, ProtoStatus
 
 EU_RESOLVE_API_ENDPOINT = "https://resolver.eu.confidence.dev"
 US_RESOLVE_API_ENDPOINT = "https://resolver.us.confidence.dev"
@@ -101,6 +103,7 @@ def __init__(
         timeout_ms: Optional[int] = DEFAULT_TIMEOUT_MS,
         logger: logging.Logger = logging.getLogger("confidence_logger"),
         async_client: httpx.AsyncClient = httpx.AsyncClient(),
+        disable_telemetry: bool = False,
     ):
         self._client_secret = client_secret
         self._region = region
@@ -111,6 +114,17 @@ def __init__(
         self.async_client = async_client
         self._setup_logger(logger)
         self._custom_resolve_base_url = custom_resolve_base_url
+        self._telemetry = Telemetry(__version__, disabled=disable_telemetry)
+
+    def _get_resolve_headers(self) -> Dict[str, str]:
+        headers = {
+            "Content-Type": "application/json",
+            "Accept": "application/json",
+        }
+        telemetry_header = self._telemetry.get_monitoring_header()
+        if telemetry_header:
+            headers["X-CONFIDENCE-TELEMETRY"] = telemetry_header
+        return headers
 
     def resolve_boolean_details(
         self, flag_key: str, default_value: bool
@@ -367,7 +381,6 @@ def _send_event_internal(self, event_name: str, data: Dict[str, FieldType]) -> N
             )
             if response.status_code == 200:
                 json = response.json()
-
                 json_errors = json.get("errors")
                 if json_errors:
                     self.logger.warning("events emitted with errors:")
@@ -407,6 +420,7 @@ def _handle_resolve_response(
     def _resolve(
         self, flag_name: FlagName, context: Dict[str, FieldType]
     ) -> ResolveResult:
+        start_time = time.perf_counter()
         request_body = {
             "clientSecret": self._client_secret,
             "evaluationContext": context,
@@ -420,24 +434,49 @@ def _resolve(
 
         resolve_url = f"{base_url}/v1/flags:resolve"
         timeout_sec = None if self._timeout_ms is None else self._timeout_ms / 1000.0
+
         try:
             response = requests.post(
-                resolve_url, json=request_body, timeout=timeout_sec
+                resolve_url,
+                json=request_body,
+                headers=self._get_resolve_headers(),
+                timeout=timeout_sec,
+            )
+
+            result = self._handle_resolve_response(response, flag_name)
+            duration_ms = int((time.perf_counter() - start_time) * 1000)
+            self._telemetry.add_trace(
+                ProtoTraceId.PROTO_TRACE_ID_RESOLVE_LATENCY,
+                duration_ms,
+                ProtoStatus.PROTO_STATUS_SUCCESS,
             )
-            return self._handle_resolve_response(response, flag_name)
+            return result
         except requests.exceptions.Timeout:
+            duration_ms = int((time.perf_counter() - start_time) * 1000)
+            self._telemetry.add_trace(
+                ProtoTraceId.PROTO_TRACE_ID_RESOLVE_LATENCY,
+                duration_ms,
+                ProtoStatus.PROTO_STATUS_TIMEOUT,
+            )
             self.logger.warning(
                 f"Request timed out after {timeout_sec}s"
                 f" when resolving flag {flag_name}"
             )
             raise TimeoutError()
         except requests.exceptions.RequestException as e:
+            duration_ms = int((time.perf_counter() - start_time) * 1000)
+            self._telemetry.add_trace(
+                ProtoTraceId.PROTO_TRACE_ID_RESOLVE_LATENCY,
+                duration_ms,
+                ProtoStatus.PROTO_STATUS_ERROR,
+            )
             self.logger.warning(f"Error resolving flag {flag_name}: {str(e)}")
             raise GeneralError(str(e))
 
     async def _resolve_async(
         self, flag_name: FlagName, context: Dict[str, FieldType]
     ) -> ResolveResult:
+        start_time = time.perf_counter()
         request_body = {
             "clientSecret": self._client_secret,
             "evaluationContext": context,
@@ -453,16 +492,38 @@ async def _resolve_async(
         timeout_sec = None if self._timeout_ms is None else self._timeout_ms / 1000.0
         try:
             response = await self.async_client.post(
-                resolve_url, json=request_body, timeout=timeout_sec
+                resolve_url,
+                json=request_body,
+                headers=self._get_resolve_headers(),
+                timeout=timeout_sec,
+            )
+            result = self._handle_resolve_response(response, flag_name)
+            duration_ms = int((time.perf_counter() - start_time) * 1000)
+            self._telemetry.add_trace(
+                ProtoTraceId.PROTO_TRACE_ID_RESOLVE_LATENCY,
+                duration_ms,
+                ProtoStatus.PROTO_STATUS_SUCCESS,
             )
-            return self._handle_resolve_response(response, flag_name)
+            return result
         except httpx.TimeoutException:
+            duration_ms = int((time.perf_counter() - start_time) * 1000)
+            self._telemetry.add_trace(
+                ProtoTraceId.PROTO_TRACE_ID_RESOLVE_LATENCY,
+                duration_ms,
+                ProtoStatus.PROTO_STATUS_TIMEOUT,
+            )
             self.logger.warning(
                 f"Request timed out after {timeout_sec}s"
                 f" when resolving flag {flag_name}"
             )
             raise TimeoutError()
         except httpx.HTTPError as e:
+            duration_ms = int((time.perf_counter() - start_time) * 1000)
+            self._telemetry.add_trace(
+                ProtoTraceId.PROTO_TRACE_ID_RESOLVE_LATENCY,
+                duration_ms,
+                ProtoStatus.PROTO_STATUS_ERROR,
+            )
             self.logger.warning(f"Error resolving flag {flag_name}: {str(e)}")
             raise GeneralError(str(e))
 

@@ -91,7 +91,7 @@ def _to_openfeature_error_code(
         return openfeature.exception.ErrorCode.PROVIDER_NOT_READY
 
 
-class ConfidenceOpenFeatureProvider(AbstractProvider):
+class ConfidenceOpenFeatureProvider(AbstractProvider):  # type: ignore[misc]
     def __init__(self, confidence_sdk: confidence.confidence.Confidence):
         self.confidence_sdk = confidence_sdk
 

@@ -0,0 +1,64 @@
+syntax = "proto3";
+
+package confidence.telemetry.v1;
+
+enum ProtoPlatform {
+  PROTO_PLATFORM_UNSPECIFIED = 0;
+  PROTO_PLATFORM_JS_WEB = 4;
+  PROTO_PLATFORM_JS_SERVER = 5;
+  PROTO_PLATFORM_PYTHON = 6;
+  PROTO_PLATFORM_GO = 7;
+}
+
+message ProtoMonitoring {
+  repeated ProtoLibraryTraces library_traces = 1;
+  ProtoPlatform platform = 2;
+}
+
+message ProtoLibraryTraces {
+  ProtoLibrary library = 1;
+  string library_version = 2;
+  repeated ProtoTrace traces = 3;
+
+  message ProtoTrace {
+    ProtoTraceId id = 1;
+
+    // DEPRECATED
+    optional uint64 millisecond_duration = 2;
+
+    oneof trace {
+      ProtoRequestTrace request_trace = 3;
+      ProtoCountTrace count_trace = 4;
+    }
+
+    message ProtoCountTrace {}
+
+    message ProtoRequestTrace {
+      uint64 millisecond_duration = 1;
+      ProtoStatus status = 2;
+
+      enum ProtoStatus {
+        PROTO_STATUS_UNSPECIFIED = 0;
+        PROTO_STATUS_SUCCESS = 1;
+        PROTO_STATUS_ERROR = 2;
+        PROTO_STATUS_TIMEOUT = 3;
+        PROTO_STATUS_CACHED = 4;
+      }
+    }
+  }
+
+  enum ProtoLibrary {
+    PROTO_LIBRARY_UNSPECIFIED = 0;
+    PROTO_LIBRARY_CONFIDENCE = 1;
+    PROTO_LIBRARY_OPEN_FEATURE = 2;
+    PROTO_LIBRARY_REACT = 3;
+  }
+
+  enum ProtoTraceId {
+    PROTO_TRACE_ID_UNSPECIFIED = 0;
+    PROTO_TRACE_ID_RESOLVE_LATENCY = 1;
+    PROTO_TRACE_ID_STALE_FLAG = 2;
+    PROTO_TRACE_ID_FLAG_TYPE_MISMATCH = 3;
+    PROTO_TRACE_ID_WITH_CONTEXT = 4;
+  }
+} 
@@ -0,0 +1,71 @@
+import base64
+from queue import Queue
+from typing import Optional
+from typing_extensions import TypeAlias
+
+from confidence.telemetry_pb2 import (
+    ProtoMonitoring,
+    ProtoLibraryTraces,
+    ProtoPlatform,
+)
+
+# Define type aliases for the protobuf classes
+ProtoTrace: TypeAlias = ProtoLibraryTraces.ProtoTrace
+ProtoLibrary: TypeAlias = ProtoLibraryTraces.ProtoLibrary
+ProtoTraceId: TypeAlias = ProtoLibraryTraces.ProtoTraceId
+ProtoStatus: TypeAlias = ProtoLibraryTraces.ProtoTrace.ProtoRequestTrace.ProtoStatus
+
+
+class Telemetry:
+    _instance: Optional["Telemetry"] = None
+    _initialized: bool = False
+    version: str
+    _traces_queue: Queue[ProtoTrace]
+    _disabled: bool
+
+    def __new__(cls, version: str, disabled: bool = False) -> "Telemetry":
+        if cls._instance is None:
+            cls._instance = super(Telemetry, cls).__new__(cls)
+            cls._initialized = False
+            cls._disabled = disabled
+        return cls._instance
+
+    def __init__(self, version: str, disabled: bool = False) -> None:
+        if not self._initialized:
+            self.version = version
+            self._traces_queue = Queue()
+            self._disabled = disabled
+            self._initialized = True
+
+    def add_trace(
+        self, trace_id: ProtoTraceId, duration_ms: int, status: ProtoStatus
+    ) -> None:
+        if self._disabled:
+            return
+        trace = ProtoTrace()
+        trace.id = trace_id
+        request_trace = ProtoTrace.ProtoRequestTrace()
+        request_trace.millisecond_duration = duration_ms
+        request_trace.status = status
+        trace.request_trace.CopyFrom(request_trace)
+        self._traces_queue.put(trace)
+
+    def get_monitoring_header(self) -> str:
+        if self._disabled:
+            return ""
+        current_traces = []
+        while not self._traces_queue.empty():
+            try:
+                current_traces.append(self._traces_queue.get_nowait())
+            except Exception:
+                break
+
+        monitoring = ProtoMonitoring()
+        library_traces = monitoring.library_traces.add()
+        library_traces.library = ProtoLibrary.PROTO_LIBRARY_CONFIDENCE
+        library_traces.library_version = self.version
+        library_traces.traces.extend(current_traces)
+        monitoring.platform = ProtoPlatform.PROTO_PLATFORM_PYTHON
+        serialized = monitoring.SerializeToString()
+        encoded = base64.b64encode(serialized).decode()
+        return encoded