Skip to content

Commit 4504d2d

Browse files
JannisBushGJFR
andauthored
Fix incorrect "total" numbers in Security chapter (2024, 2022, ?) (#3912)
* Update iframe_attributes_usage description * Fix total_iframes in iframe_attributes_usage.sql * Fix total pages in meta_csp_disallowed_directives.sql * Clarify in 3 queries that the total is not global * Note clarification * Update contributor details * Add comments to 2022, 2021, 2020 queries * Fix linting issues * Adapt text with updated query results * Query for 2020 and 2021 (using crawl.pages) * Fix linting * Adapt articles of 2022, 2021 and 2020 * Apply number fixes * Apply fixes to translated chapters --------- Co-authored-by: Gertjan Franken <[email protected]>
1 parent aaa187a commit 4504d2d

21 files changed

+164
-41
lines changed

sql/2020/security/iframe_attributes_usage.sql

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
# usage of allow and sandbox attribute of iframe elements, per page and over all iframe elements
33
SELECT
44
client,
5-
COUNT(0) AS total_iframes,
5+
COUNT(0) AS total_iframes, # Note: These are not the total number of iframes but only the number of iframes with allow/sandbox + 1 for each website without such iframes
66
COUNTIF(allow IS NOT NULL) AS freq_allow,
77
COUNTIF(allow IS NOT NULL) / COUNT(0) AS pct_allow_frames,
88
COUNTIF(sandbox IS NOT NULL) AS freq_sandbox,

sql/2021/security/iframe_attributes_usage.sql

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
# usage of allow and sandbox attribute of iframe elements, per page and over all iframe elements
33
SELECT
44
client,
5-
COUNT(0) AS total_iframes,
5+
COUNT(0) AS total_iframes, # Note: These are not the total number of iframes but only the number of iframes with allow/sandbox + 1 for each website without such iframes
66
COUNTIF(allow IS NOT NULL) AS freq_allow,
77
COUNTIF(allow IS NOT NULL) / COUNT(0) AS pct_allow_frames,
88
COUNTIF(sandbox IS NOT NULL) AS freq_sandbox,

sql/2022/security/iframe_attributes_usage.sql

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
# usage of allow and sandbox attribute of iframe elements, per page and over all iframe elements
33
SELECT
44
client,
5-
COUNT(0) AS total_iframes,
5+
COUNT(0) AS total_iframes, # Note: These are not the total number of iframes but only the number of iframes with allow/sandbox + 1 for each website without such iframes
66
COUNTIF(allow IS NOT NULL) AS freq_allow,
77
COUNTIF(allow IS NOT NULL) / COUNT(0) AS pct_allow_frames,
88
COUNTIF(sandbox IS NOT NULL) AS freq_sandbox,

sql/2024/security/coep_header_prevalence.sql

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
#standardSQL
22
# Section: Attack Preventions - Preventing attacks using Cross-Origin policies
33
# Question: Which are the most common COEP values?
4-
# Note: Considers headers of main document responses
4+
# Note: Considers headers of main document responses only
55
SELECT
66
client,
77
coep_header,

sql/2024/security/coop_header_prevalence.sql

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
#standardSQL
22
# Section: Attack Preventions - Preventing attacks using Cross-Origin policies
33
# Question: Which are the most common COOP values?
4-
# Note: Considers headers of main document responses
4+
# Note: Considers headers of main document responses only
55
SELECT
66
client,
77
coop_header,

sql/2024/security/csp_number_of_allowed_hosts.sql

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,16 @@
11
#standardSQL
22
# Section: Attack Preventions - Preventing attacks using CSP
33
# Question: CSP on home pages: number of unique headers, header length and number of allowed HTTP(S) hosts in all directives
4+
# Note: for CSP we checked whether the header value is NULL (empty?) (99.65% of CSP headers are not NULL on desktop), we did not do this for other headers?
45
CREATE TEMP FUNCTION getNumUniqueHosts(str STRING) AS (
56
(SELECT COUNT(DISTINCT x) FROM UNNEST(REGEXP_EXTRACT_ALL(str, r'(?i)(https*://[^\s;]+)[\s;]')) AS x)
67
);
78

89
SELECT
910
client,
1011
percentile,
11-
COUNT(0) AS total_requests,
12-
COUNTIF(csp_header IS NOT NULL) AS total_csp_headers,
12+
COUNT(0) AS total_csp_headers,
13+
COUNTIF(csp_header IS NOT NULL) AS total_non_null_csp_headers,
1314
COUNTIF(csp_header IS NOT NULL) / COUNT(0) AS pct_csp_headers,
1415
COUNT(DISTINCT csp_header) AS num_unique_csp_headers,
1516
APPROX_QUANTILES(LENGTH(csp_header), 1000 IGNORE NULLS)[OFFSET(percentile * 10)] AS csp_header_length,

sql/2024/security/csp_script_source_list_keywords.sql

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
# Question: usage of default/script-src, and within the directive usage of strict-dynamic, nonce values, unsafe-inline and unsafe-eval
44
SELECT
55
client,
6-
total_pages,
6+
total_pages_with_csp,
77
freq_csp,
88
freq_default_script_src,
99
SAFE_DIVIDE(freq_default_script_src, freq_csp) AS pct_default_script_src_over_csp,
@@ -22,7 +22,7 @@ SELECT
2222
FROM (
2323
SELECT
2424
client,
25-
COUNT(0) AS total_pages,
25+
COUNT(0) AS total_pages_with_csp,
2626
COUNTIF(csp_header IS NOT NULL) AS freq_csp,
2727
COUNTIF(REGEXP_CONTAINS(csp_header, '(?i)(default|script)-src')) AS freq_default_script_src,
2828
COUNTIF(REGEXP_CONTAINS(csp_header, '(?i)(default|script)-src[^;]+strict-dynamic')) AS freq_strict_dynamic,

sql/2024/security/iframe_attribute_popular_hosts.sql

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,9 @@ SELECT
1212
client,
1313
policy_type,
1414
hostname,
15-
total_iframes,
15+
total_iframes_with_allow_or_sandbox,
1616
COUNTIF(has_policy) AS freq,
17-
COUNTIF(has_policy) / total_iframes AS pct
17+
COUNTIF(has_policy) / total_iframes_with_allow_or_sandbox AS pct
1818
FROM (
1919
SELECT
2020
client,
@@ -37,7 +37,7 @@ FROM (
3737
JOIN (
3838
SELECT
3939
client,
40-
SUM(ARRAY_LENGTH(JSON_EXTRACT_ARRAY(JSON_EXTRACT_SCALAR(payload, '$._security'), '$.iframe-allow-sandbox'))) AS total_iframes
40+
SUM(ARRAY_LENGTH(JSON_EXTRACT_ARRAY(JSON_EXTRACT_SCALAR(payload, '$._security'), '$.iframe-allow-sandbox'))) AS total_iframes_with_allow_or_sandbox
4141
FROM
4242
`httparchive.all.pages`
4343
WHERE
@@ -49,7 +49,7 @@ USING
4949
(client)
5050
GROUP BY
5151
client,
52-
total_iframes,
52+
total_iframes_with_allow_or_sandbox,
5353
policy_type,
5454
hostname
5555
HAVING

sql/2024/security/iframe_attributes_usage.sql

Lines changed: 20 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,29 @@
11
#standardSQL
22
# Section: Content Inclusion - Iframe Sandbox/Permissions Policy
3-
# Question: How often are the allow and sandbox attributes used on iframes? Both per page and over all iframe elements
3+
# Question: How often are the allow and sandbox attributes used on iframes? Both per page (used in at least one iframe on a page) and over all iframe elements
4+
WITH total_iframe_count AS (
5+
SELECT
6+
client,
7+
date,
8+
SUM(SAFE_CAST(JSON_EXTRACT(custom_metrics, '$.num_iframes') AS INT64)) AS total_iframes
9+
FROM
10+
`httparchive.all.pages`
11+
WHERE
12+
(date = '2022-06-01' OR date = '2023-06-01' OR date = '2023-12-01' OR date = '2024-03-01' OR date = '2024-04-01' OR date = '2024-05-01' OR date = '2024-06-01') AND
13+
is_root_page
14+
GROUP BY client, date
15+
)
16+
417
SELECT
518
client,
619
date,
7-
COUNT(0) AS total_iframes,
20+
total_iframes,
821
COUNTIF(allow IS NOT NULL) AS freq_allow,
9-
COUNTIF(allow IS NOT NULL) / COUNT(0) AS pct_allow_frames,
22+
COUNTIF(allow IS NOT NULL) / total_iframes AS pct_allow_frames,
1023
COUNTIF(sandbox IS NOT NULL) AS freq_sandbox,
11-
COUNTIF(sandbox IS NOT NULL) / COUNT(0) AS pct_sandbox_frames,
24+
COUNTIF(sandbox IS NOT NULL) / total_iframes AS pct_sandbox_frames,
1225
COUNTIF(allow IS NOT NULL AND sandbox IS NOT NULL) AS freq_both_frames,
13-
COUNTIF(allow IS NOT NULL AND sandbox IS NOT NULL) / COUNT(0) AS pct_both_frames,
26+
COUNTIF(allow IS NOT NULL AND sandbox IS NOT NULL) / total_iframes AS pct_both_frames,
1427
COUNT(DISTINCT url) AS total_urls,
1528
COUNT(DISTINCT IF(allow IS NOT NULL, url, NULL)) AS allow_freq_urls,
1629
COUNT(DISTINCT IF(allow IS NOT NULL, url, NULL)) / COUNT(DISTINCT url) AS allow_pct_urls,
@@ -36,8 +49,9 @@ FROM (
3649
is_root_page
3750
)
3851
LEFT JOIN UNNEST(iframeAttrs) AS iframeAttr
39-
)
52+
) JOIN total_iframe_count USING (client, date)
4053
GROUP BY
54+
total_iframes,
4155
client,
4256
date
4357
ORDER BY
Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
#standardSQL
2+
# Section: Content Inclusion - Iframe Sandbox/Permissions Policy
3+
# Question: How often are the allow and sandbox attributes used on iframes? Both per page (used in at least one iframe on a page) and over all iframe elements
4+
WITH total_iframe_count AS (
5+
SELECT
6+
client,
7+
date,
8+
SUM(SAFE.INT64(custom_metrics.other.num_iframes)) AS total_iframes
9+
FROM
10+
`httparchive.crawl.pages`
11+
WHERE
12+
(date = '2020-08-01' OR date = '2021-07-01' OR date = '2022-06-01') AND
13+
is_root_page
14+
GROUP BY client, date
15+
)
16+
17+
SELECT
18+
client,
19+
date,
20+
total_iframes,
21+
COUNTIF(allow IS NOT NULL) AS freq_allow,
22+
COUNTIF(allow IS NOT NULL) / total_iframes AS pct_allow_frames,
23+
COUNTIF(sandbox IS NOT NULL) AS freq_sandbox,
24+
COUNTIF(sandbox IS NOT NULL) / total_iframes AS pct_sandbox_frames,
25+
COUNTIF(allow IS NOT NULL AND sandbox IS NOT NULL) AS freq_both_frames,
26+
COUNTIF(allow IS NOT NULL AND sandbox IS NOT NULL) / total_iframes AS pct_both_frames,
27+
COUNT(DISTINCT url) AS total_urls,
28+
COUNT(DISTINCT IF(allow IS NOT NULL, url, NULL)) AS allow_freq_urls,
29+
COUNT(DISTINCT IF(allow IS NOT NULL, url, NULL)) / COUNT(DISTINCT url) AS allow_pct_urls,
30+
COUNT(DISTINCT IF(sandbox IS NOT NULL, url, NULL)) AS sandbox_freq_urls,
31+
COUNT(DISTINCT IF(sandbox IS NOT NULL, url, NULL)) / COUNT(DISTINCT url) AS sandbox_pct_urls
32+
FROM (
33+
SELECT
34+
client,
35+
date,
36+
url,
37+
SAFE.STRING(iframeAttr.allow) AS allow,
38+
SAFE.STRING(iframeAttr.sandbox) AS sandbox
39+
FROM (
40+
SELECT
41+
client,
42+
date,
43+
page AS url,
44+
JSON_EXTRACT_ARRAY(custom_metrics.security.`iframe-allow-sandbox`) AS iframeAttrs
45+
FROM
46+
`httparchive.crawl.pages`
47+
WHERE
48+
(date = '2020-08-01' OR date = '2021-07-01' OR date = '2022-06-01') AND
49+
is_root_page
50+
) LEFT JOIN UNNEST(iframeAttrs) AS iframeAttr
51+
) JOIN total_iframe_count USING (client, date)
52+
GROUP BY
53+
total_iframes,
54+
client,
55+
date
56+
ORDER BY
57+
date,
58+
client

0 commit comments

Comments
 (0)