Skip to content

Commit 94800e1

Browse files
wardpeetbrendankenny
authored andcommitted
report: add table filter for third-party urls (#6351)
1 parent 0e513a6 commit 94800e1

14 files changed

+311
-83
lines changed

lighthouse-core/audits/seo/canonical.js

Lines changed: 1 addition & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -35,19 +35,6 @@ const UIStrings = {
3535

3636
const str_ = i18n.createMessageInstanceIdFn(__filename, UIStrings);
3737

38-
/**
39-
* Returns a primary domain for provided URL (e.g. http://www.example.com -> example.com).
40-
* Note that it does not take second-level domains into account (.co.uk).
41-
* @param {URL} url
42-
* @returns {string}
43-
*/
44-
function getPrimaryDomain(url) {
45-
return url.hostname
46-
.split('.')
47-
.slice(-2)
48-
.join('.');
49-
}
50-
5138
/**
5239
* @typedef CanonicalURLData
5340
* @property {Set<string>} uniqueCanonicalURLs
@@ -173,7 +160,7 @@ class Canonical extends Audit {
173160

174161
// bing and yahoo don't allow canonical URLs pointing to different domains, it's also
175162
// a common mistake to publish a page with canonical pointing to e.g. a test domain or localhost
176-
if (getPrimaryDomain(canonicalURL) !== getPrimaryDomain(baseURL)) {
163+
if (!URL.rootDomainsMatch(canonicalURL, baseURL)) {
177164
return {
178165
score: 0,
179166
explanation: str_(UIStrings.explanationDifferentDomain, {url: canonicalURL}),

lighthouse-core/lib/i18n/en-US.json

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1343,6 +1343,10 @@
13431343
"message": "Expand snippet",
13441344
"description": "Label for button that shows all lines of the snippet when clicked"
13451345
},
1346+
"lighthouse-core/report/html/renderer/util.js | thirdPartyResourcesLabel": {
1347+
"message": "Show 3rd-party resources",
1348+
"description": "This label is for a checkbox above a table of items loaded by a web page. The checkbox is used to show or hide third-party (or \"3rd-party\") resources in the table, where \"third-party resources\" refers to items loaded by a web page from URLs that aren't controlled by the owner of the web page."
1349+
},
13461350
"lighthouse-core/report/html/renderer/util.js | toplevelWarningsMessage": {
13471351
"message": "There were issues affecting this run of Lighthouse:",
13481352
"description": "Label shown preceding any important warnings that may have invalidated the entire report. For example, if the user has Chrome extensions installed, they may add enough performance overhead that Lighthouse's performance metrics are unreliable. If shown, this will be displayed at the top of the report UI."

lighthouse-core/lib/url-shim.js

Lines changed: 7 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -9,22 +9,10 @@
99
* URL shim so we keep our code DRY
1010
*/
1111

12-
/* global self */
12+
/* global URL */
1313

1414
const Util = require('../report/html/renderer/util.js');
1515

16-
// Type cast so tsc sees window.URL and require('url').URL as sufficiently equivalent.
17-
const URL = /** @type {!Window["URL"]} */ (typeof self !== 'undefined' && self.URL) ||
18-
require('url').URL;
19-
20-
// 25 most used tld plus one domains (aka public suffixes) from http archive.
21-
// @see https://github.com/GoogleChrome/lighthouse/pull/5065#discussion_r191926212
22-
// The canonical list is https://publicsuffix.org/learn/ but we're only using subset to conserve bytes
23-
const listOfTlds = [
24-
'com', 'co', 'gov', 'edu', 'ac', 'org', 'go', 'gob', 'or', 'net', 'in', 'ne', 'nic', 'gouv',
25-
'web', 'spb', 'blog', 'jus', 'kiev', 'mil', 'wi', 'qc', 'ca', 'bel', 'on',
26-
];
27-
2816
const allowedProtocols = [
2917
'https:', 'http:', 'chrome:', 'chrome-extension:',
3018
];
@@ -99,34 +87,18 @@ class URLShim extends URL {
9987
}
10088
}
10189

102-
/**
103-
* Gets the tld of a domain
104-
*
105-
* @param {string} hostname
106-
* @return {string} tld
107-
*/
108-
static getTld(hostname) {
109-
const tlds = hostname.split('.').slice(-2);
110-
111-
if (!listOfTlds.includes(tlds[0])) {
112-
return `.${tlds[tlds.length - 1]}`;
113-
}
114-
115-
return `.${tlds.join('.')}`;
116-
}
117-
11890
/**
11991
* Check if rootDomains matches
12092
*
121-
* @param {string} urlA
122-
* @param {string} urlB
93+
* @param {string|URL} urlA
94+
* @param {string|URL} urlB
12395
*/
12496
static rootDomainsMatch(urlA, urlB) {
12597
let urlAInfo;
12698
let urlBInfo;
12799
try {
128-
urlAInfo = new URL(urlA);
129-
urlBInfo = new URL(urlB);
100+
urlAInfo = Util.createOrReturnURL(urlA);
101+
urlBInfo = Util.createOrReturnURL(urlB);
130102
} catch (err) {
131103
return false;
132104
}
@@ -135,14 +107,9 @@ class URLShim extends URL {
135107
return false;
136108
}
137109

138-
const tldA = URLShim.getTld(urlAInfo.hostname);
139-
const tldB = URLShim.getTld(urlBInfo.hostname);
140-
141110
// get the string before the tld
142-
const urlARootDomain = urlAInfo.hostname.replace(new RegExp(`${tldA}$`), '')
143-
.split('.').splice(-1)[0];
144-
const urlBRootDomain = urlBInfo.hostname.replace(new RegExp(`${tldB}$`), '')
145-
.split('.').splice(-1)[0];
111+
const urlARootDomain = Util.getRootDomain(urlAInfo);
112+
const urlBRootDomain = Util.getRootDomain(urlBInfo);
146113

147114
return urlARootDomain === urlBRootDomain;
148115
}

lighthouse-core/report/html/renderer/details-renderer.js

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -124,7 +124,11 @@ class DetailsRenderer {
124124
element.appendChild(hostElem);
125125
}
126126

127-
if (title) element.title = url;
127+
if (title) {
128+
element.title = url;
129+
// set the url on the element's dataset which we use to check 3rd party origins
130+
element.dataset.url = url;
131+
}
128132
return element;
129133
}
130134

lighthouse-core/report/html/renderer/report-ui-features.js

Lines changed: 98 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,15 +16,25 @@
1616
*/
1717
'use strict';
1818

19+
/* eslint-env browser */
20+
1921
/**
2022
* @fileoverview Adds export button, print, and other dynamic functionality to
2123
* the report.
2224
*/
2325

24-
/* globals self URL Blob CustomEvent getFilenamePrefix window */
26+
/* globals getFilenamePrefix Util */
2527

2628
/** @typedef {import('./dom.js')} DOM */
2729

30+
/**
31+
* @param {HTMLTableElement} tableEl
32+
* @return {Array<HTMLTableRowElement>}
33+
*/
34+
function getTableRows(tableEl) {
35+
return Array.from(tableEl.tBodies[0].rows);
36+
}
37+
2838
class ReportUIFeatures {
2939
/**
3040
* @param {DOM} dom
@@ -74,6 +84,7 @@ class ReportUIFeatures {
7484
this._setupMediaQueryListeners();
7585
this._setupSmoothScroll();
7686
this._setupExportButton();
87+
this._setupThirdPartyFilter();
7788
this._setupStickyHeaderElements();
7889
this._setUpCollapseDetailsAfterPrinting();
7990
this._resetUIState();
@@ -131,6 +142,92 @@ class ReportUIFeatures {
131142
dropdown.addEventListener('click', this.onExport);
132143
}
133144

145+
_setupThirdPartyFilter() {
146+
// Some audits should not display the third party filter option.
147+
const thirdPartyFilterAuditExclusions = [
148+
// This audit deals explicitly with third party resources.
149+
'uses-rel-preconnect',
150+
];
151+
152+
// Get all tables with a text url column.
153+
/** @type {Array<HTMLTableElement>} */
154+
const tables = Array.from(this._document.querySelectorAll('.lh-table'));
155+
const tablesWithUrls = tables
156+
.filter(el => el.querySelector('td.lh-table-column--url'))
157+
.filter(el => {
158+
const containingAudit = el.closest('.lh-audit');
159+
if (!containingAudit) throw new Error('.lh-table not within audit');
160+
return !thirdPartyFilterAuditExclusions.includes(containingAudit.id);
161+
});
162+
163+
tablesWithUrls.forEach((tableEl, index) => {
164+
const thirdPartyRows = this._getThirdPartyRows(tableEl, this.json.finalUrl);
165+
// No 3rd parties, no checkbox!
166+
if (!thirdPartyRows.size) return;
167+
168+
// create input box
169+
const filterTemplate = this._dom.cloneTemplate('#tmpl-lh-3p-filter', this._document);
170+
const filterInput = this._dom.find('input', filterTemplate);
171+
const id = `lh-3p-filter-label--${index}`;
172+
173+
filterInput.id = id;
174+
filterInput.addEventListener('change', e => {
175+
// Remove rows from the dom and keep track of them to readd on uncheck.
176+
// Why removing instead of hiding? To keep nth-child(even) background-colors working.
177+
if (e.target instanceof HTMLInputElement && !e.target.checked) {
178+
for (const row of thirdPartyRows.values()) {
179+
row.remove();
180+
}
181+
} else {
182+
// Add row elements back to original positions.
183+
for (const [position, row] of thirdPartyRows.entries()) {
184+
const childrenArr = getTableRows(tableEl);
185+
tableEl.tBodies[0].insertBefore(row, childrenArr[position]);
186+
}
187+
}
188+
});
189+
190+
this._dom.find('label', filterTemplate).setAttribute('for', id);
191+
this._dom.find('.lh-3p-filter-count', filterTemplate).textContent =
192+
`${thirdPartyRows.size}`;
193+
this._dom.find('.lh-3p-ui-string', filterTemplate).textContent =
194+
Util.UIStrings.thirdPartyResourcesLabel;
195+
196+
// Finally, add checkbox to the DOM.
197+
if (!tableEl.parentNode) return; // Keep tsc happy.
198+
tableEl.parentNode.insertBefore(filterTemplate, tableEl);
199+
});
200+
}
201+
202+
/**
203+
* From a table with URL entries, finds the rows containing third-party URLs
204+
* and returns a Map of those rows, mapping from row index to row Element.
205+
* @param {HTMLTableElement} el
206+
* @param {string} finalUrl
207+
* @return {Map<number, HTMLTableRowElement>}
208+
*/
209+
_getThirdPartyRows(el, finalUrl) {
210+
const urlItems = this._dom.findAll('.lh-text__url', el);
211+
const finalUrlRootDomain = Util.getRootDomain(finalUrl);
212+
213+
/** @type {Map<number, HTMLTableRowElement>} */
214+
const thirdPartyRows = new Map();
215+
for (const urlItem of urlItems) {
216+
const datasetUrl = urlItem.dataset.url;
217+
if (!datasetUrl) continue;
218+
const isThirdParty = Util.getRootDomain(datasetUrl) !== finalUrlRootDomain;
219+
if (!isThirdParty) continue;
220+
221+
const urlRowEl = urlItem.closest('tr');
222+
if (urlRowEl) {
223+
const rowPosition = getTableRows(el).indexOf(urlRowEl);
224+
thirdPartyRows.set(rowPosition, urlRowEl);
225+
}
226+
}
227+
228+
return thirdPartyRows;
229+
}
230+
134231
_setupStickyHeaderElements() {
135232
this.topbarEl = this._dom.find('.lh-topbar', this._document);
136233
this.scoreScaleEl = this._dom.find('.lh-scorescale', this._document);

lighthouse-core/report/html/renderer/util.js

Lines changed: 57 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
*/
1717
'use strict';
1818

19-
/* globals self URL */
19+
/* globals self, URL */
2020

2121
const ELLIPSIS = '\u2026';
2222
const NBSP = '\xa0';
@@ -29,6 +29,14 @@ const RATINGS = {
2929
ERROR: {label: 'error'},
3030
};
3131

32+
// 25 most used tld plus one domains (aka public suffixes) from http archive.
33+
// @see https://github.com/GoogleChrome/lighthouse/pull/5065#discussion_r191926212
34+
// The canonical list is https://publicsuffix.org/learn/ but we're only using subset to conserve bytes
35+
const listOfTlds = [
36+
'com', 'co', 'gov', 'edu', 'ac', 'org', 'go', 'gob', 'or', 'net', 'in', 'ne', 'nic', 'gouv',
37+
'web', 'spb', 'blog', 'jus', 'kiev', 'mil', 'wi', 'qc', 'ca', 'bel', 'on',
38+
];
39+
3240
class Util {
3341
static get PASS_THRESHOLD() {
3442
return PASS_THRESHOLD;
@@ -336,6 +344,51 @@ class Util {
336344
};
337345
}
338346

347+
/**
348+
* @param {string|URL} value
349+
* @return {URL}
350+
*/
351+
static createOrReturnURL(value) {
352+
if (value instanceof URL) {
353+
return value;
354+
}
355+
356+
return new URL(value);
357+
}
358+
359+
/**
360+
* Gets the tld of a domain
361+
*
362+
* @param {string} hostname
363+
* @return {string} tld
364+
*/
365+
static getTld(hostname) {
366+
const tlds = hostname.split('.').slice(-2);
367+
368+
if (!listOfTlds.includes(tlds[0])) {
369+
return `.${tlds[tlds.length - 1]}`;
370+
}
371+
372+
return `.${tlds.join('.')}`;
373+
}
374+
375+
/**
376+
* Returns a primary domain for provided hostname (e.g. www.example.com -> example.com).
377+
* @param {string|URL} url hostname or URL object
378+
* @returns {string}
379+
*/
380+
static getRootDomain(url) {
381+
const hostname = Util.createOrReturnURL(url).hostname;
382+
const tld = Util.getTld(hostname);
383+
384+
// tld is .com or .co.uk which means we means that length is 1 to big
385+
// .com => 2 & .co.uk => 3
386+
const splitTld = tld.split('.');
387+
388+
// get TLD + root domain
389+
return hostname.split('.').slice(-splitTld.length).join('.');
390+
}
391+
339392
/**
340393
* @param {LH.Config.Settings} settings
341394
* @return {Array<{name: string, description: string}>}
@@ -524,6 +577,9 @@ Util.UIStrings = {
524577
lsPerformanceCategoryDescription: '[Lighthouse](https://developers.google.com/web/tools/lighthouse/) analysis of the current page on an emulated mobile network. Values are estimated and may vary.',
525578
/** Title of the lab data section of the Performance category. Within this section are various speed metrics which quantify the pageload performance into values presented in seconds and milliseconds. "Lab" is an abbreviated form of "laboratory", and refers to the fact that the data is from a controlled test of a website, not measurements from real users visiting that site. */
526579
labDataTitle: 'Lab Data',
580+
581+
/** This label is for a checkbox above a table of items loaded by a web page. The checkbox is used to show or hide third-party (or "3rd-party") resources in the table, where "third-party resources" refers to items loaded by a web page from URLs that aren't controlled by the owner of the web page. */
582+
thirdPartyResourcesLabel: 'Show 3rd-party resources',
527583
};
528584

529585
if (typeof module !== 'undefined' && module.exports) {

0 commit comments

Comments
 (0)