Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions .github/workflows/pull-requests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ jobs:
timeout-minutes: 10
strategy:
matrix:
container: [ "python:3.8", "python:3.9", "python:3.10", "python:3.11" ]
container: [ "python:3.9", "python:3.10", "python:3.11" ]
container:
image: ${{ matrix.container }}

Expand Down Expand Up @@ -64,13 +64,13 @@ jobs:
.

- name: Run black formatter check
run: black --check confidence
run: black --check confidence --exclude="telemetry_pb2.py|_version.py"

- name: Run flake8 formatter check
run: flake8 confidence
run: flake8 confidence --exclude=telemetry_pb2.py,_version.py

- name: Run type linter check
run: mypy confidence
run: mypy confidence --follow-imports=skip --exclude=telemetry_pb2.py

- name: Run tests with pytest
run: pytest
6 changes: 5 additions & 1 deletion CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,14 @@

We ask you to write well covered unit tests with your changes and please make sure you use `black` and `flake8` to lint your code before making a PR. There are CI checks that will fail otherwise.

Linting and tests will run on python [3.8, 3.9. 3.10 and 3.11](https://github.com/spotify/confidence-sdk-python/blob/nicklasl-patch-1/.github/workflows/pull-requests.yaml#L22).
Linting and tests will run on python [3.9. 3.10 and 3.11](https://github.com/spotify/confidence-sdk-python/blob/nicklasl-patch-1/.github/workflows/pull-requests.yaml#L22).

We require pull request titles to follow the [Conventional Commits specification](https://www.conventionalcommits.org/en/v1.0.0/) and we also encourage individual commits to adher to that.

We use "squash merge" and any merge PR title will show up in the changelog based on the title.

Run the following if you need to regenerate the telemetry protobuf code:

```
./generate_proto.py
```
12 changes: 12 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,18 @@ confidence.track("event_name", {
})
```

## Telemetry

The SDK includes telemetry functionality that helps monitor SDK performance and usage. By default, telemetry is enabled and collects metrics (anonymously) such as resolve latency and request status. This data is used by the Confidence team to improve the product, and in certain cases it is also available to the SDK adopters.

You can disable telemetry by setting `disable_telemetry=True` when initializing the Confidence client:

```python
confidence = Confidence("CLIENT_TOKEN",
disable_telemetry=True
)
```

## OpenFeature

The library includes a `Provider` for
Expand Down
71 changes: 66 additions & 5 deletions confidence/confidence.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import requests
import httpx
from typing_extensions import TypeGuard
import time

from confidence import __version__
from confidence.errors import (
Expand All @@ -30,6 +31,7 @@
)
from .flag_types import FlagResolutionDetails, Reason, ErrorCode
from .names import FlagName, VariantName
from .telemetry import Telemetry, ProtoTraceId, ProtoStatus

EU_RESOLVE_API_ENDPOINT = "https://resolver.eu.confidence.dev"
US_RESOLVE_API_ENDPOINT = "https://resolver.us.confidence.dev"
Expand Down Expand Up @@ -101,6 +103,7 @@ def __init__(
timeout_ms: Optional[int] = DEFAULT_TIMEOUT_MS,
logger: logging.Logger = logging.getLogger("confidence_logger"),
async_client: httpx.AsyncClient = httpx.AsyncClient(),
disable_telemetry: bool = False,
):
self._client_secret = client_secret
self._region = region
Expand All @@ -111,6 +114,17 @@ def __init__(
self.async_client = async_client
self._setup_logger(logger)
self._custom_resolve_base_url = custom_resolve_base_url
self._telemetry = Telemetry(__version__, disabled=disable_telemetry)

def _get_resolve_headers(self) -> Dict[str, str]:
headers = {
"Content-Type": "application/json",
"Accept": "application/json",
}
telemetry_header = self._telemetry.get_monitoring_header()
if telemetry_header:
headers["X-CONFIDENCE-TELEMETRY"] = telemetry_header
return headers

def resolve_boolean_details(
self, flag_key: str, default_value: bool
Expand Down Expand Up @@ -367,7 +381,6 @@ def _send_event_internal(self, event_name: str, data: Dict[str, FieldType]) -> N
)
if response.status_code == 200:
json = response.json()

json_errors = json.get("errors")
if json_errors:
self.logger.warning("events emitted with errors:")
Expand Down Expand Up @@ -407,6 +420,7 @@ def _handle_resolve_response(
def _resolve(
self, flag_name: FlagName, context: Dict[str, FieldType]
) -> ResolveResult:
start_time = time.perf_counter()
request_body = {
"clientSecret": self._client_secret,
"evaluationContext": context,
Expand All @@ -420,24 +434,49 @@ def _resolve(

resolve_url = f"{base_url}/v1/flags:resolve"
timeout_sec = None if self._timeout_ms is None else self._timeout_ms / 1000.0

try:
response = requests.post(
resolve_url, json=request_body, timeout=timeout_sec
resolve_url,
json=request_body,
headers=self._get_resolve_headers(),
timeout=timeout_sec,
)

result = self._handle_resolve_response(response, flag_name)
duration_ms = int((time.perf_counter() - start_time) * 1000)
self._telemetry.add_trace(
ProtoTraceId.PROTO_TRACE_ID_RESOLVE_LATENCY,
duration_ms,
ProtoStatus.PROTO_STATUS_SUCCESS,
)
return self._handle_resolve_response(response, flag_name)
return result
except requests.exceptions.Timeout:
duration_ms = int((time.perf_counter() - start_time) * 1000)
self._telemetry.add_trace(
ProtoTraceId.PROTO_TRACE_ID_RESOLVE_LATENCY,
duration_ms,
ProtoStatus.PROTO_STATUS_TIMEOUT,
)
self.logger.warning(
f"Request timed out after {timeout_sec}s"
f" when resolving flag {flag_name}"
)
raise TimeoutError()
except requests.exceptions.RequestException as e:
duration_ms = int((time.perf_counter() - start_time) * 1000)
self._telemetry.add_trace(
ProtoTraceId.PROTO_TRACE_ID_RESOLVE_LATENCY,
duration_ms,
ProtoStatus.PROTO_STATUS_ERROR,
)
self.logger.warning(f"Error resolving flag {flag_name}: {str(e)}")
raise GeneralError(str(e))

async def _resolve_async(
self, flag_name: FlagName, context: Dict[str, FieldType]
) -> ResolveResult:
start_time = time.perf_counter()
request_body = {
"clientSecret": self._client_secret,
"evaluationContext": context,
Expand All @@ -453,16 +492,38 @@ async def _resolve_async(
timeout_sec = None if self._timeout_ms is None else self._timeout_ms / 1000.0
try:
response = await self.async_client.post(
resolve_url, json=request_body, timeout=timeout_sec
resolve_url,
json=request_body,
headers=self._get_resolve_headers(),
timeout=timeout_sec,
)
result = self._handle_resolve_response(response, flag_name)
duration_ms = int((time.perf_counter() - start_time) * 1000)
self._telemetry.add_trace(
ProtoTraceId.PROTO_TRACE_ID_RESOLVE_LATENCY,
duration_ms,
ProtoStatus.PROTO_STATUS_SUCCESS,
)
return self._handle_resolve_response(response, flag_name)
return result
except httpx.TimeoutException:
duration_ms = int((time.perf_counter() - start_time) * 1000)
self._telemetry.add_trace(
ProtoTraceId.PROTO_TRACE_ID_RESOLVE_LATENCY,
duration_ms,
ProtoStatus.PROTO_STATUS_TIMEOUT,
)
self.logger.warning(
f"Request timed out after {timeout_sec}s"
f" when resolving flag {flag_name}"
)
raise TimeoutError()
except httpx.HTTPError as e:
duration_ms = int((time.perf_counter() - start_time) * 1000)
self._telemetry.add_trace(
ProtoTraceId.PROTO_TRACE_ID_RESOLVE_LATENCY,
duration_ms,
ProtoStatus.PROTO_STATUS_ERROR,
)
self.logger.warning(f"Error resolving flag {flag_name}: {str(e)}")
raise GeneralError(str(e))

Expand Down
2 changes: 1 addition & 1 deletion confidence/openfeature_provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ def _to_openfeature_error_code(
return openfeature.exception.ErrorCode.PROVIDER_NOT_READY


class ConfidenceOpenFeatureProvider(AbstractProvider):
class ConfidenceOpenFeatureProvider(AbstractProvider): # type: ignore[misc]
def __init__(self, confidence_sdk: confidence.confidence.Confidence):
self.confidence_sdk = confidence_sdk

Expand Down
64 changes: 64 additions & 0 deletions confidence/telemetry.proto
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
syntax = "proto3";

package confidence.telemetry.v1;

enum ProtoPlatform {
PROTO_PLATFORM_UNSPECIFIED = 0;
PROTO_PLATFORM_JS_WEB = 4;
PROTO_PLATFORM_JS_SERVER = 5;
PROTO_PLATFORM_PYTHON = 6;
PROTO_PLATFORM_GO = 7;
}

message ProtoMonitoring {
repeated ProtoLibraryTraces library_traces = 1;
ProtoPlatform platform = 2;
}

message ProtoLibraryTraces {
ProtoLibrary library = 1;
string library_version = 2;
repeated ProtoTrace traces = 3;

message ProtoTrace {
ProtoTraceId id = 1;

// DEPRECATED
optional uint64 millisecond_duration = 2;

oneof trace {
ProtoRequestTrace request_trace = 3;
ProtoCountTrace count_trace = 4;
}

message ProtoCountTrace {}

message ProtoRequestTrace {
uint64 millisecond_duration = 1;
ProtoStatus status = 2;

enum ProtoStatus {
PROTO_STATUS_UNSPECIFIED = 0;
PROTO_STATUS_SUCCESS = 1;
PROTO_STATUS_ERROR = 2;
PROTO_STATUS_TIMEOUT = 3;
PROTO_STATUS_CACHED = 4;
}
}
}

enum ProtoLibrary {
PROTO_LIBRARY_UNSPECIFIED = 0;
PROTO_LIBRARY_CONFIDENCE = 1;
PROTO_LIBRARY_OPEN_FEATURE = 2;
PROTO_LIBRARY_REACT = 3;
}

enum ProtoTraceId {
PROTO_TRACE_ID_UNSPECIFIED = 0;
PROTO_TRACE_ID_RESOLVE_LATENCY = 1;
PROTO_TRACE_ID_STALE_FLAG = 2;
PROTO_TRACE_ID_FLAG_TYPE_MISMATCH = 3;
PROTO_TRACE_ID_WITH_CONTEXT = 4;
}
}
71 changes: 71 additions & 0 deletions confidence/telemetry.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
import base64
from queue import Queue
from typing import Optional
from typing_extensions import TypeAlias

from confidence.telemetry_pb2 import (
ProtoMonitoring,
ProtoLibraryTraces,
ProtoPlatform,
)

# Define type aliases for the protobuf classes
ProtoTrace: TypeAlias = ProtoLibraryTraces.ProtoTrace
ProtoLibrary: TypeAlias = ProtoLibraryTraces.ProtoLibrary
ProtoTraceId: TypeAlias = ProtoLibraryTraces.ProtoTraceId
ProtoStatus: TypeAlias = ProtoLibraryTraces.ProtoTrace.ProtoRequestTrace.ProtoStatus


class Telemetry:
_instance: Optional["Telemetry"] = None
_initialized: bool = False
version: str
_traces_queue: Queue[ProtoTrace]
_disabled: bool

def __new__(cls, version: str, disabled: bool = False) -> "Telemetry":
if cls._instance is None:
cls._instance = super(Telemetry, cls).__new__(cls)
cls._initialized = False
cls._disabled = disabled
return cls._instance

def __init__(self, version: str, disabled: bool = False) -> None:
if not self._initialized:
self.version = version
self._traces_queue = Queue()
self._disabled = disabled
self._initialized = True

def add_trace(
self, trace_id: ProtoTraceId, duration_ms: int, status: ProtoStatus
) -> None:
if self._disabled:
return
trace = ProtoTrace()
trace.id = trace_id
request_trace = ProtoTrace.ProtoRequestTrace()
request_trace.millisecond_duration = duration_ms
request_trace.status = status
trace.request_trace.CopyFrom(request_trace)
self._traces_queue.put(trace)

def get_monitoring_header(self) -> str:
if self._disabled:
return ""
current_traces = []
while not self._traces_queue.empty():
try:
current_traces.append(self._traces_queue.get_nowait())
except Exception:
break

monitoring = ProtoMonitoring()
library_traces = monitoring.library_traces.add()
library_traces.library = ProtoLibrary.PROTO_LIBRARY_CONFIDENCE
library_traces.library_version = self.version
library_traces.traces.extend(current_traces)
monitoring.platform = ProtoPlatform.PROTO_PLATFORM_PYTHON
serialized = monitoring.SerializeToString()
encoded = base64.b64encode(serialized).decode()
return encoded
Loading