Skip to content

Commit 203b322

Browse files
Bartek Ogryczaklobsterkatiegetsantry[bot]
authored andcommitted
ref(utils): SDK name tag normalizer (getsentry#59504)
getsentry#59501 Normalizes SDK tags to reduce their cardinality. Related to getsentry#59075 and getsentry#59379. - non-Sentry SDK tags are ignored (collapsed into `"other"`) - official Sentry SDK tags are normalized and shortened: - `sentry.javascript.*` are mostly kept as-is - `sentry.native.*` are collapsed to 3 levels - all other `sentry.*` are collapsed to 2 levels --------- Co-authored-by: Katie Byers <[email protected]> Co-authored-by: getsantry[bot] <66042841+getsantry[bot]@users.noreply.github.com>
1 parent 0ebcb04 commit 203b322

File tree

2 files changed

+166
-0
lines changed

2 files changed

+166
-0
lines changed

src/sentry/utils/tag_normalization.py

Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
import functools
2+
import re
3+
4+
_KNOWN_TAGS = {
5+
"sentry.cocoa",
6+
"sentry.dart",
7+
"sentry.dotnet",
8+
"sentry.elixir",
9+
"sentry.go",
10+
"sentry.java",
11+
"sentry.javascript.angular",
12+
"sentry.javascript.browser",
13+
"sentry.javascript.capacitor",
14+
"sentry.javascript.cordova",
15+
"sentry.javascript.deno",
16+
"sentry.javascript.electron",
17+
"sentry.javascript.ember",
18+
"sentry.javascript.gatsby",
19+
"sentry.javascript.nextjs",
20+
"sentry.javascript.node",
21+
"sentry.javascript.react",
22+
"sentry.javascript.react.native",
23+
"sentry.javascript.remix",
24+
"sentry.javascript.serverless",
25+
"sentry.javascript.svelte",
26+
"sentry.javascript.sveltekit",
27+
"sentry.javascript.vue",
28+
"sentry.kubernetes",
29+
"sentry.native.android",
30+
"sentry.native.dotnet",
31+
"sentry.native.unity",
32+
"sentry.native.unreal",
33+
"sentry.objc",
34+
"sentry.perl",
35+
"sentry.php",
36+
"sentry.python",
37+
"sentry.ruby",
38+
"sentry.rust",
39+
"sentry.swift",
40+
}
41+
42+
43+
_SYNONYMOUS_TAGS = {
44+
"sentry.cordova": "sentery.javascript.cordova",
45+
"sentry.electron": "sentry.javascript.electron",
46+
"sentry.javascript.angular.ivy": "sentry.javascript.angular",
47+
"sentry.javascript.node.experimental": "sentry.javascript.node",
48+
"sentry.javascript.react.expo": "sentry.javascript.react",
49+
"sentry.javascript.react.native.expo": "sentry.javascript.react.native",
50+
"sentry.laravel": "sentry.php.laravel",
51+
"sentry.react": "sentry.javascript.react",
52+
"sentry.symfony": "sentry.php.symfony",
53+
"sentry.unity": "sentry.native.unity",
54+
}
55+
56+
# TODO: Should we be grouping by origin SDK instead? (For example, should we be
57+
# combining all flutter events rather than all native events?)
58+
# See https://github.com/getsentry/sentry/pull/59504#discussion_r1385483963
59+
60+
61+
@functools.lru_cache(maxsize=300)
62+
def normalize_sdk_tag(tag: str) -> str:
63+
"""
64+
Normalize tags coming from SDKs to more manageable canonical form, by:
65+
66+
- combining synonymous tags (`sentry.react` -> `sentry.javascript.react`),
67+
- ignoring framework differences (`sentry.python.flask` and `sentry.python.django` -> `sentry.python`)
68+
- collapsing all community/third-party SDKs into a single `other` category
69+
70+
Note: Some platforms may keep their framework-specific values, as needed for analytics.
71+
"""
72+
73+
# replace non-word characters with dots (normalize sentry-foo to sentry.foo)
74+
tag = re.sub(r"[\W_]+", ".", tag)
75+
76+
# collapse known synonymous tags
77+
tag = _SYNONYMOUS_TAGS.get(tag, tag)
78+
79+
# ignore non-sentry SDK tags
80+
if not tag.startswith("sentry."):
81+
return "other"
82+
83+
# collapse tags other than JavaScript / Native to their top-level SDK
84+
85+
if not tag.split(".")[1] in {"javascript", "native"}:
86+
tag = ".".join(tag.split(".", 2)[0:2])
87+
88+
if tag.split(".")[1] == "native":
89+
tag = ".".join(tag.split(".", 3)[0:3])
90+
91+
if tag not in _KNOWN_TAGS:
92+
tag = "other"
93+
94+
return tag
Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
import pytest
2+
3+
from sentry.utils.tag_normalization import normalize_sdk_tag
4+
5+
6+
@pytest.mark.parametrize(
7+
("tag", "expected"),
8+
(
9+
("sentry-javascript-angular", "sentry.javascript.angular"),
10+
("sentry_python", "sentry.python"),
11+
),
12+
)
13+
def test_normalizes_to_dots(tag, expected):
14+
assert normalize_sdk_tag(tag) == expected
15+
16+
17+
@pytest.mark.parametrize(
18+
("tag", "expected"),
19+
(
20+
("sentry.javascript.angular", "sentry.javascript.angular"),
21+
(
22+
"sentry.javascript.react.native",
23+
"sentry.javascript.react.native",
24+
),
25+
("sentry.python.django", "sentry.python"),
26+
(
27+
"sentry.native.android.flutter",
28+
"sentry.native.android",
29+
),
30+
),
31+
)
32+
def test_shortens_non_js(tag, expected):
33+
assert normalize_sdk_tag(tag) == expected
34+
35+
36+
@pytest.mark.parametrize(
37+
("tag", "expected"),
38+
(
39+
("sentry.javascript.angular", "sentry.javascript.angular"),
40+
("sentry.javascript.angular.ivy", "sentry.javascript.angular"),
41+
("sentry.symfony", "sentry.php"),
42+
("sentry.unity", "sentry.native.unity"),
43+
("sentry.javascript.react.native.expo", "sentry.javascript.react.native"),
44+
),
45+
)
46+
def test_uses_synonyms(tag, expected):
47+
assert normalize_sdk_tag(tag) == expected
48+
49+
50+
@pytest.mark.parametrize(
51+
("tag", "expected"),
52+
(("foo.baz.bar", "other"), ("sentryfoo", "other"), ("raven", "other")),
53+
)
54+
def test_non_sentry_to_other(tag, expected):
55+
assert normalize_sdk_tag(tag) == expected
56+
57+
58+
@pytest.mark.parametrize(
59+
("tag", "expected"),
60+
(("sentry.sparql", "other"), ("sentry.terraform.hcl", "other"), ("sentry-native", "other")),
61+
)
62+
def test_unknown_sentry_to_other(tag, expected):
63+
assert normalize_sdk_tag(tag) == expected
64+
65+
66+
def test_responses_cached():
67+
normalize_sdk_tag.cache_clear()
68+
assert normalize_sdk_tag("sentry.javascript.react") == "sentry.javascript.react"
69+
assert normalize_sdk_tag("sentry.javascript.react") == "sentry.javascript.react"
70+
71+
assert normalize_sdk_tag.cache_info().hits == 1
72+
assert normalize_sdk_tag.cache_info().misses == 1

0 commit comments

Comments
 (0)