Skip to content

Commit 3d8cb6d

Browse files
committed
formatting
1 parent dac1167 commit 3d8cb6d

14 files changed

+93
-120
lines changed

sql/2024/privacy/most_common_client_hints.sql

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -35,9 +35,8 @@ WITH response_headers AS (
3535
SELECT
3636
client,
3737
IF(header_name = 'accept-ch', header_value, tag_value) AS value,
38-
COUNT(DISTINCT page) AS number_of_websites,
39-
ANY_VALUE(total_websites) AS total_websites,
40-
COUNT(DISTINCT page) / ANY_VALUE(total_websites) AS pct_websites
38+
COUNT(DISTINCT page) / ANY_VALUE(total_websites) AS pct_pages,
39+
COUNT(DISTINCT page) AS number_of_pages
4140
FROM response_headers
4241
FULL OUTER JOIN meta_tags
4342
USING (client, page)
@@ -47,6 +46,5 @@ WHERE
4746
GROUP BY
4847
client,
4948
value
50-
ORDER BY
51-
number_of_websites DESC
49+
ORDER BY pct_pages DESC
5250
LIMIT 200
Lines changed: 7 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,41 +1,26 @@
1-
#standardSQL
21
# Counts of CMPs using IAB Transparency & Consent Framework
32
# cf. https://github.com/InteractiveAdvertisingBureau/GDPR-Transparency-and-Consent-Framework/blob/master/TCFv2/IAB%20Tech%20Lab%20-%20CMP%20API%20v2.md#tcdata
43

5-
WITH totals AS (
4+
WITH cmps AS (
65
SELECT
76
client,
8-
COUNT(0) AS total_websites
7+
page,
8+
JSON_VALUE(custom_metrics, '$.privacy.iab_tcf_v2.data.cmpId') AS cmpId,
9+
COUNT(DISTINCT page) OVER (PARTITION BY client) AS total_pages
910
FROM `httparchive.all.pages`
1011
WHERE
1112
date = '2024-06-01' AND
1213
is_root_page = TRUE
13-
GROUP BY client
14-
), cmps AS (
15-
SELECT
16-
client,
17-
JSON_VALUE(custom_metrics, '$.privacy.iab_tcf_v2.data.cmpId') AS cmpId
18-
FROM `httparchive.all.pages`
19-
WHERE
20-
date = '2024-06-01' AND
21-
is_root_page = TRUE AND
22-
JSON_VALUE(custom_metrics, '$.privacy.iab_tcf_v2.data.cmpId') IS NOT NULL
2314
)
2415

2516
SELECT
2617
client,
2718
cmpId,
28-
COUNT(0) AS number_of_websites,
29-
total_websites,
30-
COUNT(0) / total_websites AS pct_websites
19+
COUNT(0) / ANY_VALUE(total_pages) AS pct_pages,
20+
COUNT(0) AS number_of_pages
3121
FROM cmps
32-
JOIN totals
33-
USING (client)
3422
GROUP BY
3523
client,
36-
total_websites,
3724
cmpId
3825
ORDER BY
39-
pct_websites DESC,
40-
client,
41-
cmpId
26+
pct_pages DESC

sql/2024/privacy/most_common_cname_domains.sql

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -35,8 +35,8 @@ SELECT
3535
client,
3636
NET.REG_DOMAIN(cnames.cname) AS cname,
3737
NET.REG_DOMAIN(adguard_trackers.domain) AS adguard_cname,
38-
COUNT(DISTINCT NET.REG_DOMAIN(cnames.origin)) AS request_domain_count,
39-
COUNT(DISTINCT page) AS page_count
38+
COUNT(DISTINCT NET.REG_DOMAIN(cnames.origin)) AS number_of_request_domains,
39+
COUNT(DISTINCT page) AS number_of_pages
4040
--ARRAY_AGG(DISTINCT cnames.origin LIMIT 2) AS request_domain_examples,
4141
--ARRAY_AGG(DISTINCT page LIMIT 2) AS page_examples,
4242
FROM cnames
@@ -49,5 +49,5 @@ GROUP BY
4949
HAVING request_domain_count > 100
5050
ORDER BY
5151
client,
52-
request_domain_count DESC
52+
number_of_request_domains DESC
5353
LIMIT 500

sql/2024/privacy/most_common_countries_for_iab_tcf_v2.sql

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
#standardSQL
21
# Counts of countries for publishers using IAB Transparency & Consent Framework
32
# cf. https://github.com/InteractiveAdvertisingBureau/GDPR-Transparency-and-Consent-Framework/blob/master/TCFv2/IAB%20Tech%20Lab%20-%20CMP%20API%20v2.md#tcdata
43
# "Country code of the country that determines the legislation of
@@ -28,16 +27,14 @@ WITH totals AS (
2827
SELECT
2928
client,
3029
publisherCC,
31-
COUNT(0) AS number_of_websites,
32-
total_websites,
33-
COUNT(0) / total_websites AS pct_websites
30+
COUNT(0) / ANY_VALUE(total_websites) AS pct_pages,
31+
COUNT(0) AS number_of_pages
3432
FROM cmps
3533
JOIN totals
3634
USING (client)
3735
GROUP BY
3836
client,
39-
total_websites,
4037
publisherCC
4138
ORDER BY
4239
client,
43-
number_of_websites DESC
40+
pct_pages DESC
Lines changed: 7 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,39 +1,27 @@
1-
#standardSQL
21
# Counts of US Privacy String values for websites using IAB US Privacy Framework
32
# cf. https://github.com/InteractiveAdvertisingBureau/USPrivacy/blob/master/CCPA/US%20Privacy%20String.md
43

5-
WITH totals AS (
6-
SELECT
7-
client,
8-
COUNT(DISTINCT page) AS pages_total
9-
FROM `httparchive.all.pages`
10-
WHERE
11-
date = '2024-06-01'
12-
GROUP BY client
13-
), usp_data AS (
4+
WITH usp_data AS (
145
SELECT
156
client,
167
page,
17-
JSON_VALUE(custom_metrics, '$.privacy.iab_usp.privacy_string.uspString') AS uspString
8+
JSON_VALUE(custom_metrics, '$.privacy.iab_usp.privacy_string.uspString') AS uspString,
9+
COUNT(DISTINCT page) OVER (PARTITION BY client) AS pages_total
1810
FROM `httparchive.all.pages`
1911
WHERE
2012
date = '2024-06-01' AND
21-
JSON_VALUE(custom_metrics, '$.privacy.iab_usp.privacy_string.uspString') IS NOT NULL
13+
is_root_page = TRUE
2214
)
2315

2416
SELECT
2517
client,
2618
uspString,
27-
COUNT(DISTINCT page) AS pages_with_usp,
28-
ANY_VALUE(pages_total) AS pages_total,
29-
COUNT(DISTINCT page) / ANY_VALUE(pages_total) AS pages_pct
19+
COUNT(DISTINCT page) / ANY_VALUE(pages_total) AS pct_pages,
20+
COUNT(DISTINCT page) AS number_of_pages
3021
FROM usp_data
31-
JOIN totals
32-
USING (client)
3322
GROUP BY
3423
client,
3524
uspString
3625
ORDER BY
37-
client,
38-
pages_pct DESC
26+
pct_pages DESC
3927
LIMIT 100

sql/2024/privacy/number_of_websites_per_technology.sql

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -16,10 +16,9 @@ WITH technologies AS (
1616
SELECT
1717
client,
1818
technology,
19-
COUNT(DISTINCT page) / ANY_VALUE(total_websites) AS percent_of_websites,
20-
COUNT(DISTINCT page) AS number_of_websites,
21-
ARRAY_AGG(DISTINCT category) AS categories,
22-
ANY_VALUE(total_websites) AS total_websites
19+
COUNT(DISTINCT page) / ANY_VALUE(total_websites) AS pct_pages,
20+
COUNT(DISTINCT page) AS number_of_pages,
21+
ARRAY_AGG(DISTINCT category) AS categories
2322
FROM technologies
2423
WHERE
2524
category IN (
@@ -32,5 +31,4 @@ GROUP BY
3231
client,
3332
technology
3433
ORDER BY
35-
client,
36-
number_of_websites DESC
34+
pct_pages DESC

sql/2024/privacy/number_of_websites_per_technology_category.sql

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,8 @@
11
SELECT
22
client,
33
category,
4-
COUNT(DISTINCT IF(category = tech_category, page, NULL)) / COUNT(DISTINCT page) AS pct_websites_in_category,
5-
COUNT(DISTINCT IF(category = tech_category, page, NULL)) AS number_of_websites_in_category,
6-
COUNT(DISTINCT page) AS total_pages
4+
COUNT(DISTINCT IF(category = tech_category, page, NULL)) / COUNT(DISTINCT page) AS pct_pages,
5+
COUNT(DISTINCT IF(category = tech_category, page, NULL)) AS number_of_pages
76
FROM `httparchive.all.pages`,
87
UNNEST(technologies) AS tech,
98
UNNEST(categories) AS tech_category,
@@ -20,5 +19,4 @@ GROUP BY
2019
client,
2120
category
2221
ORDER BY
23-
client,
24-
number_of_websites_in_category DESC
22+
pct_pages DESC

sql/2024/privacy/number_of_websites_with_bounce_tracking.sql

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -72,9 +72,9 @@ WITH redirect_requests AS (
7272
-- Count the number of websites with bounce tracking per bounce hostname
7373
SELECT
7474
NET.HOST(navigation_redirect_location) AS bounce_hostname,
75-
COUNT(DISTINCT page) AS pages_count
75+
COUNT(DISTINCT page) AS number_of_pages
7676
--ARRAY_AGG(page LIMIT 2) AS page_examples
7777
FROM bounce_sequences
7878
GROUP BY bounce_hostname
79-
ORDER BY pages_count DESC
79+
ORDER BY number_of_pages DESC
8080
LIMIT 100

sql/2024/privacy/number_of_websites_with_client_hints.sql

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -30,13 +30,13 @@ WITH response_headers AS (
3030

3131
SELECT
3232
client,
33-
ANY_VALUE(total_websites) AS total_websites,
34-
COUNT(DISTINCT page) / ANY_VALUE(total_websites) AS pct_websites
33+
COUNT(DISTINCT page) / ANY_VALUE(total_websites) AS pct_pages,
34+
COUNT(DISTINCT page) AS number_of_pages
3535
FROM response_headers
3636
FULL OUTER JOIN meta_tags
3737
USING (client, page)
3838
WHERE
3939
header_name = 'accept-ch' OR
4040
tag_name = 'accept-ch'
4141
GROUP BY client
42-
ORDER BY client
42+
ORDER BY pct_pages DESC

sql/2024/privacy/number_of_websites_with_dnt.sql

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
#standardSQL
21
# Pages that request DNT status
32

43
WITH blink AS (
@@ -26,10 +25,10 @@ pages AS (
2625

2726
SELECT
2827
COALESCE(blink.client, pages.client) AS client,
29-
blink.num_urls AS number_of_websites_usage_per_blink,
30-
blink.pct_urls AS percentage_of_websites_usage_per_blink,
31-
pages.num_urls AS number_of_websites_usage_per_custom_metric,
32-
pages.pct_urls AS percentage_of_websites_usage_per_custom_metric
28+
blink.num_urls AS number_of_pages_usage_per_blink,
29+
blink.pct_urls AS pct_of_websites_usage_per_blink,
30+
pages.num_urls AS number_of_pages_usage_per_custom_metric,
31+
pages.pct_urls AS pct_of_websites_usage_per_custom_metric
3332
FROM blink
3433
FULL OUTER JOIN pages
3534
ON blink.client = pages.client

sql/2024/privacy/number_of_websites_with_gpc.sql

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,12 @@
1-
#standardSQL
21
# Pages that provide `/.well-known/gpc.json` for Global Privacy Control
32

43
WITH pages AS (
54
SELECT
65
client,
7-
COUNT(DISTINCT IF(JSON_VALUE(custom_metrics, '$.well-known."/.well-known/gpc.json".found') = 'true', page, NULL)) AS well_known_pages_count,
8-
COUNT(DISTINCT IF(JSON_VALUE(custom_metrics, '$.well-known."/.well-known/gpc.json".found') = 'true', page, NULL)) / COUNT(DISTINCT page) AS well_known_pages_pct,
9-
COUNT(DISTINCT IF(JSON_VALUE(custom_metrics, '$.privacy.navigator_globalPrivacyControl') = 'true', page, NULL)) AS js_api_pages_count,
10-
COUNT(DISTINCT IF(JSON_VALUE(custom_metrics, '$.privacy.navigator_globalPrivacyControl') = 'true', page, NULL)) / COUNT(DISTINCT page) AS js_api_pages_pct
6+
COUNT(DISTINCT IF(JSON_VALUE(custom_metrics, '$.well-known."/.well-known/gpc.json".found') = 'true', page, NULL)) / COUNT(DISTINCT page) AS pct_pages_well_known,
7+
COUNT(DISTINCT IF(JSON_VALUE(custom_metrics, '$.well-known."/.well-known/gpc.json".found') = 'true', page, NULL)) AS number_of_pages_well_known,
8+
COUNT(DISTINCT IF(JSON_VALUE(custom_metrics, '$.privacy.navigator_globalPrivacyControl') = 'true', page, NULL)) / COUNT(DISTINCT page) AS pct_pages_js_api,
9+
COUNT(DISTINCT IF(JSON_VALUE(custom_metrics, '$.privacy.navigator_globalPrivacyControl') = 'true', page, NULL)) AS number_of_pages_js_api
1110
FROM `httparchive.all.pages`
1211
WHERE
1312
date = '2024-06-01' AND
@@ -18,8 +17,8 @@ WITH pages AS (
1817
headers AS (
1918
SELECT
2019
client,
21-
COUNT(DISTINCT IF(headers.name = 'sec-gpc' AND headers.value = '1', page, NULL)) AS headers_pages_count,
22-
COUNT(DISTINCT IF(headers.name = 'sec-gpc' AND headers.value = '1', page, NULL)) / COUNT(DISTINCT page) AS headers_pages_pct
20+
COUNT(DISTINCT IF(headers.name = 'sec-gpc' AND headers.value = '1', page, NULL)) / COUNT(DISTINCT page) AS pct_pages_headers,
21+
COUNT(DISTINCT IF(headers.name = 'sec-gpc' AND headers.value = '1', page, NULL)) AS number_of_pages_headers
2322
FROM `httparchive.all.requests`,
2423
UNNEST(response_headers) headers
2524
WHERE
Lines changed: 23 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
#standardSQL
21
# Counts of pages with IAB Frameworks (Transparency & Consent / US Privacy)
32

43
WITH privacy_custom_metrics_data AS (
@@ -12,38 +11,45 @@ WITH privacy_custom_metrics_data AS (
1211
)
1312

1413
SELECT
15-
*,
16-
number_of_websites_with_iab_tcf_v1 / number_of_websites AS pct_websites_with_iab_tcf_v1,
17-
number_of_websites_with_iab_tcf_v2 / number_of_websites AS pct_websites_with_iab_tcf_v2,
18-
number_of_websites_with_iab_usp / number_of_websites AS pct_websites_with_iab_usp,
19-
number_of_websites_with_iab_tcf_any / number_of_websites AS pct_websites_with_iab_tcf_any,
20-
number_of_websites_with_iab_any / number_of_websites AS pct_websites_with_iab_any,
21-
number_of_websites_with_iab_tcf_v1_compliant / number_of_websites_with_iab_tcf_v1 AS pct_websites_with_iab_tcf_v1_compliant,
22-
number_of_websites_with_iab_tcf_v2_compliant / number_of_websites_with_iab_tcf_v2 AS pct_websites_with_iab_tcf_v2_compliant
14+
client,
15+
number_of_pages_with_iab_tcf_v1 / number_of_pages AS pct_pages_with_iab_tcf_v1,
16+
number_of_pages_with_iab_tcf_v1,
17+
number_of_pages_with_iab_tcf_v2 / number_of_pages AS pct_pages_with_iab_tcf_v2,
18+
number_of_pages_with_iab_tcf_v2,
19+
number_of_pages_with_iab_usp / number_of_pages AS pct_pages_with_iab_usp,
20+
number_of_pages_with_iab_usp,
21+
number_of_pages_with_iab_tcf_any / number_of_pages AS pct_pages_with_iab_tcf_any,
22+
number_of_pages_with_iab_tcf_any,
23+
number_of_pages_with_iab_any / number_of_pages AS pct_pages_with_iab_any,
24+
number_of_pages_with_iab_any,
25+
number_of_pages_with_iab_tcf_v1_compliant / number_of_pages_with_iab_tcf_v1 AS pct_pages_with_iab_tcf_v1_compliant,
26+
number_of_pages_with_iab_tcf_v1_compliant,
27+
number_of_pages_with_iab_tcf_v2_compliant / number_of_pages_with_iab_tcf_v2 AS pct_pages_with_iab_tcf_v2_compliant,
28+
number_of_pages_with_iab_tcf_v2_compliant
2329
FROM (
2430
SELECT
2531
client,
26-
COUNT(0) AS number_of_websites,
27-
COUNTIF(JSON_VALUE(metrics, '$.iab_tcf_v1.present') = 'true') AS number_of_websites_with_iab_tcf_v1,
28-
COUNTIF(JSON_VALUE(metrics, '$.iab_tcf_v2.present') = 'true') AS number_of_websites_with_iab_tcf_v2,
29-
COUNTIF(JSON_VALUE(metrics, '$.iab_usp.present') = 'true') AS number_of_websites_with_iab_usp,
32+
COUNT(0) AS number_of_pages,
33+
COUNTIF(JSON_VALUE(metrics, '$.iab_tcf_v1.present') = 'true') AS number_of_pages_with_iab_tcf_v1,
34+
COUNTIF(JSON_VALUE(metrics, '$.iab_tcf_v2.present') = 'true') AS number_of_pages_with_iab_tcf_v2,
35+
COUNTIF(JSON_VALUE(metrics, '$.iab_usp.present') = 'true') AS number_of_pages_with_iab_usp,
3036
COUNTIF(
3137
JSON_VALUE(metrics, '$.iab_tcf_v1.present') = 'true' OR
3238
JSON_VALUE(metrics, '$.iab_tcf_v2.present') = 'true'
33-
) AS number_of_websites_with_iab_tcf_any,
39+
) AS number_of_pages_with_iab_tcf_any,
3440
COUNTIF(
3541
JSON_VALUE(metrics, '$.iab_tcf_v1.present') = 'true' OR
3642
JSON_VALUE(metrics, '$.iab_tcf_v2.present') = 'true' OR
3743
JSON_VALUE(metrics, '$.iab_usp.present') = 'true'
38-
) AS number_of_websites_with_iab_any,
44+
) AS number_of_pages_with_iab_any,
3945
COUNTIF(
4046
JSON_VALUE(metrics, '$.iab_tcf_v1.present') = 'true' AND
4147
JSON_VALUE(metrics, '$.iab_tcf_v1.compliant_setup') = 'true'
42-
) AS number_of_websites_with_iab_tcf_v1_compliant,
48+
) AS number_of_pages_with_iab_tcf_v1_compliant,
4349
COUNTIF(
4450
JSON_VALUE(metrics, '$.iab_tcf_v2.present') = 'true' AND
4551
JSON_VALUE(metrics, '$.iab_tcf_v2.compliant_setup') = 'true'
46-
) AS number_of_websites_with_iab_tcf_v2_compliant
52+
) AS number_of_pages_with_iab_tcf_v2_compliant
4753
FROM privacy_custom_metrics_data
4854
GROUP BY client
4955
)

0 commit comments

Comments
 (0)