Skip to content
Draft
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
224 changes: 221 additions & 3 deletions sentry_sdk/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,12 @@
import uuid
import random
import socket
from collections.abc import Mapping
from collections.abc import Mapping, Iterable
from datetime import datetime, timezone
from importlib import import_module
from typing import TYPE_CHECKING, List, Dict, cast, overload
import warnings
import json

from sentry_sdk._compat import check_uwsgi_thread_support
from sentry_sdk._metrics_batcher import MetricsBatcher
Expand All @@ -30,6 +31,7 @@
)
from sentry_sdk.serializer import serialize
from sentry_sdk.tracing import trace
from sentry_sdk.traces import SpanStatus
from sentry_sdk.tracing_utils import has_span_streaming_enabled
from sentry_sdk.transport import (
HttpTransportCore,
Expand All @@ -38,6 +40,7 @@
)
from sentry_sdk.consts import (
SPANDATA,
SPANSTATUS,
DEFAULT_MAX_VALUE_LENGTH,
DEFAULT_OPTIONS,
INSTRUMENTER,
Expand All @@ -56,6 +59,8 @@
)
from sentry_sdk.scrubber import EventScrubber
from sentry_sdk.monitor import Monitor
from sentry_sdk.envelope import Item, PayloadRef
from sentry_sdk.utils import datetime_from_isoformat

if TYPE_CHECKING:
from typing import Any
Expand All @@ -66,7 +71,15 @@
from typing import Union
from typing import TypeVar

from sentry_sdk._types import Event, Hint, SDKInfo, Log, Metric, EventDataCategory
from sentry_sdk._types import (
Event,
Hint,
SDKInfo,
Log,
Metric,
EventDataCategory,
SerializedAttributeValue,
)
from sentry_sdk.integrations import Integration
from sentry_sdk.scope import Scope
from sentry_sdk.session import Session
Expand All @@ -89,6 +102,181 @@
}


def _serialized_v1_attribute_to_serialized_v2_attribute(
attribute_value: "Any",
) -> "Optional[SerializedAttributeValue]":
if isinstance(attribute_value, bool):
return {
"value": attribute_value,
"type": "boolean",
}

if isinstance(attribute_value, int):
return {
"value": attribute_value,
"type": "integer",
}

if isinstance(attribute_value, float):
return {
"value": attribute_value,
"type": "double",
}

Check failure on line 125 in sentry_sdk/client.py

View check run for this annotation

@sentry/warden / warden: find-bugs

[M5W-3GS] Wrong variable `event` used instead of `event_opt` causes missing span attributes (additional location)

The `_serialized_v1_span_to_serialized_v2_span` function is called with `event` (the raw input) instead of `event_opt` (the prepared/processed event). The conversion function extracts user info, release, environment, transaction name, trace context, and SDK metadata from the event parameter. Since `event_opt` is populated by `_prepare_event` which applies scope data, using the raw `event` will result in GenAI spans missing important attributes like user.id, sentry.release, sentry.environment, sentry.segment.name, and sentry.sdk.* when these values come from the scope rather than the original event.
if isinstance(attribute_value, str):
return {
"value": attribute_value,
"type": "string",
}

if isinstance(attribute_value, list):
if not attribute_value:
return {"value": [], "type": "array"}

ty = type(attribute_value[0])
if ty in (int, str, bool, float) and all(
type(v) is ty for v in attribute_value
):
return {
"value": attribute_value,
"type": "array",
}

# Types returned when the serializer for V1 span attributes recurses into some container types.
if isinstance(attribute_value, (dict, list)):
return {
"value": json.dumps(attribute_value),
"type": "string",
}

return None


def _serialized_v1_span_to_serialized_v2_span(
span: "dict[str, Any]", event: "Event"
) -> "dict[str, Any]":
# See SpanBatcher._to_transport_format() for analogous population of all entries except "attributes".
res: "dict[str, Any]" = {
"status": SpanStatus.OK.value,
"is_segment": False,
}

if "trace_id" in span:
res["trace_id"] = span["trace_id"]

if "span_id" in span:
res["span_id"] = span["span_id"]

if "description" in span:
res["name"] = span["description"]

if "start_timestamp" in span:
start_timestamp = None
try:
start_timestamp = datetime_from_isoformat(span["start_timestamp"])
except Exception:
pass

if start_timestamp is not None:
res["start_timestamp"] = start_timestamp.timestamp()

if "timestamp" in span:
end_timestamp = None
try:
end_timestamp = datetime_from_isoformat(span["timestamp"])
except Exception:
pass

if end_timestamp is not None:
res["end_timestamp"] = end_timestamp.timestamp()

if "parent_span_id" in span:
res["parent_span_id"] = span["parent_span_id"]

if "status" in span and span["status"] != SPANSTATUS.OK:
res["status"] = "error"

attributes: "Dict[str, Any]" = {}

if "op" in span:
attributes["sentry.op"] = span["op"]
if "origin" in span:
attributes["sentry.origin"] = span["origin"]

span_data = span.get("data")
if isinstance(span_data, dict):
attributes.update(span_data)

span_tags = span.get("tags")
if isinstance(span_tags, dict):
attributes.update(span_tags)

# See Scope._apply_user_attributes_to_telemetry() for user attributes.
user = event.get("user")
if isinstance(user, dict):
if "id" in user:
attributes["user.id"] = user["id"]
if "username" in user:
attributes["user.name"] = user["username"]
if "email" in user:
attributes["user.email"] = user["email"]

# See Scope.set_global_attributes() for release, environment, and SDK metadata.
if "release" in event:
attributes["sentry.release"] = event["release"]
if "environment" in event:
attributes["sentry.environment"] = event["environment"]
if "transaction" in event:
attributes["sentry.segment.name"] = event["transaction"]

trace_context = event.get("contexts", {}).get("trace", {})
if "span_id" in trace_context:
attributes["sentry.segment.id"] = trace_context["span_id"]

sdk_info = event.get("sdk")
if isinstance(sdk_info, dict):
if "name" in sdk_info:
attributes["sentry.sdk.name"] = sdk_info["name"]
if "version" in sdk_info:
attributes["sentry.sdk.version"] = sdk_info["version"]

if not attributes:
return res

res["attributes"] = {}
for key, value in attributes.items():
res["attributes"][key] = _serialized_v1_attribute_to_serialized_v2_attribute(
value
)

return res


def _split_gen_ai_spans(
event_opt: "Event",
) -> "Optional[tuple[List[Dict[str, object]], List[Dict[str, object]]]]":
if "spans" not in event_opt:
return None

spans: "Any" = event_opt["spans"]
if isinstance(spans, AnnotatedValue):
spans = spans.value

if not isinstance(spans, Iterable):
return None

non_gen_ai_spans = []
gen_ai_spans = []
for span in spans:
span_op = span.get("op")

Check warning on line 271 in sentry_sdk/client.py

View check run for this annotation

@sentry/warden / warden: code-review

AttributeError if span is not a dict in _split_gen_ai_spans

`span.get("op")` is called at line 271 without checking that `span` is a dict first. The function only checks that `spans` is Iterable, but individual spans could be non-dict values. This inconsistency with the defensive check at line 1127 (`if isinstance(span, dict)`) means the function may raise an `AttributeError` before returning.
Comment thread
sentry-warden[bot] marked this conversation as resolved.
if isinstance(span_op, str) and span_op.startswith("gen_ai."):
gen_ai_spans.append(span)
else:
non_gen_ai_spans.append(span)

return non_gen_ai_spans, gen_ai_spans


def _get_options(*args: "Optional[str]", **kwargs: "Any") -> "Dict[str, Any]":
if args and (isinstance(args[0], (bytes, str)) or args[0] is None):
dsn: "Optional[str]" = args[0]
Expand Down Expand Up @@ -912,7 +1100,37 @@
if is_transaction:
if isinstance(profile, Profile):
envelope.add_profile(profile.to_json(event_opt, self.options))
envelope.add_transaction(event_opt)

split_spans = _split_gen_ai_spans(event_opt)
if split_spans is None or not split_spans[1]:
envelope.add_transaction(event_opt)
else:
non_gen_ai_spans, gen_ai_spans = split_spans

event_opt["spans"] = non_gen_ai_spans
envelope.add_transaction(event_opt)

envelope.add_item(
Item(
type=SpanBatcher.TYPE,
content_type=SpanBatcher.CONTENT_TYPE,
headers={
"item_count": len(gen_ai_spans),
},
payload=PayloadRef(
json={
"items": [
_serialized_v1_span_to_serialized_v2_span(
span, event

Check warning on line 1124 in sentry_sdk/client.py

View check run for this annotation

@sentry/warden / warden: code-review

Wrong event variable passed to span converter - uses original event instead of processed event_opt

The code passes `event` (the original, unprocessed event) to `_serialized_v1_span_to_serialized_v2_span` instead of `event_opt` (the prepared/enriched event). The `_serialized_v1_span_to_serialized_v2_span` function extracts metadata like user, release, environment, transaction name, trace context, and SDK info from the event parameter. Since `event` has not been processed by `_prepare_event()`, these fields may be missing or incomplete, resulting in GenAI spans with incomplete attributes in the V2 format.
)

Check failure on line 1125 in sentry_sdk/client.py

View check run for this annotation

@sentry/warden / warden: find-bugs

Wrong variable `event` used instead of `event_opt` causes missing span attributes

The `_serialized_v1_span_to_serialized_v2_span` function is called with `event` (the raw input) instead of `event_opt` (the prepared/processed event). The conversion function extracts user info, release, environment, transaction name, trace context, and SDK metadata from the event parameter. Since `event_opt` is populated by `_prepare_event` which applies scope data, using the raw `event` will result in GenAI spans missing important attributes like user.id, sentry.release, sentry.environment, sentry.segment.name, and sentry.sdk.* when these values come from the scope rather than the original event.
for span in gen_ai_spans
if isinstance(span, dict)
]

Check warning on line 1128 in sentry_sdk/client.py

View check run for this annotation

@sentry/warden / warden: find-bugs

item_count header may not match actual items due to isinstance filtering

The `item_count` header is set to `len(gen_ai_spans)` but the actual items list filters spans with `if isinstance(span, dict)`. If any span in `gen_ai_spans` is not a dict (e.g., an AnnotatedValue or other type), the header will report more items than are actually present in the payload. This could cause issues with downstream processing that relies on the item_count header being accurate.
Comment on lines +1118 to +1128
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

item_count header may not match actual items due to isinstance filtering

The item_count header is set to len(gen_ai_spans) but the actual items list filters spans with if isinstance(span, dict). If any span in gen_ai_spans is not a dict (e.g., an AnnotatedValue or other type), the header will report more items than are actually present in the payload. This could cause issues with downstream processing that relies on the item_count header being accurate.

Verification

Verified by examining: (1) _split_gen_ai_spans at lines 255-277, which iterates spans and appends to gen_ai_spans list without type checking (it only calls .get() on spans, assuming dict-like behavior); (2) The spans in event_opt["spans"] could potentially contain non-dict types as seen in other parts of the codebase that check for AnnotatedValue. The filtering on line 1127 guards against this but creates a mismatch with the count on line 1118.

Suggested fix: Either filter gen_ai_spans before calculating item_count, or compute item_count from the filtered list.

Suggested change
"item_count": len(gen_ai_spans),
},
payload=PayloadRef(
json={
"items": [
_serialized_v1_span_to_serialized_v2_span(
span, event
)
for span in gen_ai_spans
if isinstance(span, dict)
]
gen_ai_spans = [span for span in gen_ai_spans if isinstance(span, dict)]
span, event_opt

Identified by Warden find-bugs · BKJ-YKB

},
),
)
)

elif is_checkin:
envelope.add_checkin(event_opt)
else:
Expand Down
Loading