Skip to content

Commit cd34e35

Browse files
feat: Add screenshot storage support to broken link checker (#123)
* get-cloud-region (#100) Add functionality to synthetics-sdk-api to extract cloud region during GCF execution * stoage proto api (#101) * expose resolveProjectId (#104) * update to capture_condition (#109) * chore(deps): bump ip from 1.1.8 to 1.1.9 (#105) * chore(deps): bump ip from 1.1.8 to 1.1.9 Bumps [ip](https://github.com/indutny/node-ip) from 1.1.8 to 1.1.9. - [Commits](indutny/node-ip@v1.1.8...v1.1.9) --- updated-dependencies: - dependency-name: ip dependency-type: indirect ... Signed-off-by: dependabot[bot] <[email protected]> * Empty-Commit --------- Signed-off-by: dependabot[bot] <[email protected]> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Adam Weidman <[email protected]> * add samples tags (#108) * blc-api-integration-def (#102) * resolveProjectId present (#106) * take-screenshots (#107) * rebase-capture-condition (#110) * refactor-integrations (#112) * sanitize strings (#113) * Take and populate screenshot (#114) * screenshots-prop * broken_links.spec working * fix naming * pass-args * response to comments * change default (#118) * update synthetics-sdk-api to point to new npm pkg --------- Signed-off-by: dependabot[bot] <[email protected]> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
1 parent 6c7fedb commit cd34e35

17 files changed

+2210
-581
lines changed

package-lock.json

Lines changed: 714 additions & 89 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

packages/synthetics-sdk-broken-links/package.json

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,20 +27,24 @@
2727
"@types/chai": "^4.3.4",
2828
"@types/express": "^4.17.17",
2929
"@types/node": "^18.15.10",
30+
"@types/proxyquire": "^1.3.31",
3031
"@types/sinon": "^10.0.16",
3132
"@types/supertest": "^2.0.12",
3233
"chai": "^4.3.7",
3334
"chai-exclude": "^2.1.0",
3435
"express": "^4.18.2",
35-
"sinon": "^15.2.0",
36+
"proxyquire": "^2.1.3",
37+
"node-mocks-http": "^1.13.0",
38+
"sinon": "^16.1.1",
3639
"supertest": "^6.3.3",
3740
"synthetics-sdk-broken-links": "file:./"
3841
},
3942
"engines": {
4043
"node": ">=18"
4144
},
4245
"dependencies": {
43-
"@google-cloud/synthetics-sdk-api": "^0.5.1",
46+
"@google-cloud/storage": "^7.7.0",
47+
"@google-cloud/synthetics-sdk-api": "^0.6.0",
4448
"puppeteer": "21.3.6"
4549
}
4650
}

packages/synthetics-sdk-broken-links/src/broken_links.ts

Lines changed: 65 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -12,12 +12,13 @@
1212
// See the License for the specific language governing permissions and
1313
// limitations under the License.
1414

15-
import puppeteer, { Browser, Page } from 'puppeteer';
15+
// Internal Project Files
1616
import {
17+
BaseError,
1718
BrokenLinksResultV1_BrokenLinkCheckerOptions,
1819
BrokenLinksResultV1_SyntheticLinkResult,
19-
instantiateMetadata,
2020
getRuntimeMetadata,
21+
instantiateMetadata,
2122
SyntheticResult,
2223
} from '@google-cloud/synthetics-sdk-api';
2324
import {
@@ -32,10 +33,19 @@ import {
3233
checkLinks,
3334
closeBrowser,
3435
closePagePool,
35-
retrieveLinksFromPage,
3636
openNewPage,
37+
retrieveLinksFromPage,
3738
} from './navigation_func';
38-
import { setDefaultOptions, validateInputOptions } from './options_func';
39+
import { processOptions } from './options_func';
40+
import {
41+
createStorageClientIfStorageSelected,
42+
getOrCreateStorageBucket,
43+
StorageParameters,
44+
} from './storage_func';
45+
46+
// External Dependencies
47+
import { Bucket } from '@google-cloud/storage';
48+
import puppeteer, { Browser, Page } from 'puppeteer';
3949

4050
export interface BrokenLinkCheckerOptions {
4151
origin_uri: string;
@@ -48,6 +58,7 @@ export interface BrokenLinkCheckerOptions {
4858
wait_for_selector?: string;
4959
per_link_options?: { [key: string]: PerLinkOption };
5060
total_synthetic_timeout_millis?: number;
61+
screenshot_options?: ScreenshotOptions;
5162
}
5263

5364
export interface PerLinkOption {
@@ -70,6 +81,17 @@ export enum StatusClass {
7081
STATUS_CLASS_ANY = 'STATUS_CLASS_ANY',
7182
}
7283

84+
export interface ScreenshotOptions {
85+
storage_location?: string;
86+
capture_condition?: CaptureCondition;
87+
}
88+
89+
export enum CaptureCondition {
90+
NONE = 'NONE',
91+
FAILING = 'FAILING',
92+
ALL = 'ALL',
93+
}
94+
7395
let synthetics_sdk_broken_links_package;
7496
try {
7597
synthetics_sdk_broken_links_package = require('../package.json');
@@ -79,7 +101,11 @@ try {
79101
instantiateMetadata(synthetics_sdk_broken_links_package);
80102

81103
export async function runBrokenLinks(
82-
inputOptions: BrokenLinkCheckerOptions
104+
inputOptions: BrokenLinkCheckerOptions,
105+
args: {
106+
executionId: string | undefined;
107+
checkId: string | undefined;
108+
}
83109
): Promise<SyntheticResult> {
84110
// init
85111
const startTime = new Date().toISOString();
@@ -96,6 +122,30 @@ export async function runBrokenLinks(
96122
const [timeLimitPromise, timeLimitTimeout, timeLimitresolver] =
97123
getTimeLimitPromise(startTime, adjusted_synthetic_timeout_millis);
98124

125+
const errors: BaseError[] = [];
126+
127+
// Initialize Storage Client with Error Handling. Set to `null` if
128+
// capture_condition is 'None'
129+
const storageClient = createStorageClientIfStorageSelected(
130+
errors,
131+
options.screenshot_options!.capture_condition
132+
);
133+
134+
// // Bucket Validation
135+
const bucket: Bucket | null = await getOrCreateStorageBucket(
136+
storageClient,
137+
options.screenshot_options!.storage_location,
138+
errors
139+
);
140+
141+
const storageParams: StorageParameters = {
142+
storageClient: storageClient,
143+
bucket: bucket,
144+
checkId: args.checkId || '_',
145+
executionId: args.executionId || '_',
146+
screenshotNumber: 1,
147+
};
148+
99149
const followed_links: BrokenLinksResultV1_SyntheticLinkResult[] = [];
100150

101151
const checkLinksPromise = async () => {
@@ -109,7 +159,8 @@ export async function runBrokenLinks(
109159
originPage,
110160
options,
111161
startTime,
112-
adjusted_synthetic_timeout_millis
162+
adjusted_synthetic_timeout_millis,
163+
storageParams
113164
)
114165
);
115166

@@ -131,7 +182,8 @@ export async function runBrokenLinks(
131182
linksToFollow,
132183
options,
133184
startTime,
134-
adjusted_synthetic_timeout_millis
185+
adjusted_synthetic_timeout_millis,
186+
storageParams
135187
))
136188
);
137189
return true;
@@ -149,7 +201,9 @@ export async function runBrokenLinks(
149201
startTime,
150202
runtime_metadata,
151203
options,
152-
followed_links
204+
followed_links,
205+
storageParams,
206+
errors
153207
);
154208
} catch (err) {
155209
const errorMessage =
@@ -176,7 +230,8 @@ async function checkOriginLink(
176230
originPage: Page,
177231
options: BrokenLinksResultV1_BrokenLinkCheckerOptions,
178232
startTime: string,
179-
adjusted_synthetic_timeout_millis: number
233+
adjusted_synthetic_timeout_millis: number,
234+
storageParams: StorageParameters
180235
): Promise<BrokenLinksResultV1_SyntheticLinkResult> {
181236
let originLinkResult: BrokenLinksResultV1_SyntheticLinkResult;
182237

@@ -193,6 +248,7 @@ async function checkOriginLink(
193248
originPage,
194249
{ target_uri: options.origin_uri, anchor_text: '', html_element: '' },
195250
options,
251+
storageParams,
196252
true
197253
);
198254

@@ -263,16 +319,3 @@ async function scrapeLinks(
263319
options.link_order
264320
);
265321
}
266-
267-
/**
268-
* Validates input options and sets defaults in `options`.
269-
*
270-
* @param inputOptions - The input options for the broken link checker.
271-
* @returns The processed broken link checker options.
272-
*/
273-
function processOptions(
274-
inputOptions: BrokenLinkCheckerOptions
275-
): BrokenLinksResultV1_BrokenLinkCheckerOptions {
276-
const validOptions = validateInputOptions(inputOptions);
277-
return setDefaultOptions(validOptions);
278-
}

packages/synthetics-sdk-broken-links/src/handlers.ts

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,15 @@
1212
// See the License for the specific language governing permissions and
1313
// limitations under the License.
1414

15-
import { runBrokenLinks, BrokenLinkCheckerOptions } from './broken_links';
15+
// Standard Libraries
1616
import { Request, Response } from 'express';
1717

18+
// Internal Project Files
19+
import { runBrokenLinks, BrokenLinkCheckerOptions } from './broken_links';
20+
21+
const syntheticExecutionIdHeader = 'Synthetic-Execution-Id';
22+
const checkIdHeader = 'Check-Id';
23+
1824
/**
1925
* Middleware for easy invocation of SyntheticSDK broken links, and may be used to
2026
* register a GoogleCloudFunction http function, or express js compatible handler.
@@ -26,5 +32,10 @@ import { Request, Response } from 'express';
2632
export function runBrokenLinksHandler(options: BrokenLinkCheckerOptions) {
2733
// eslint-disable-next-line @typescript-eslint/no-explicit-any
2834
return async (req: Request, res: Response): Promise<any> =>
29-
res.send(await runBrokenLinks(options));
35+
res.send(
36+
await runBrokenLinks(options, {
37+
executionId: req.get(syntheticExecutionIdHeader),
38+
checkId: req.get(checkIdHeader),
39+
})
40+
);
3041
}

packages/synthetics-sdk-broken-links/src/index.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,11 +13,11 @@
1313
// limitations under the License.
1414

1515
export {
16-
runBrokenLinks,
1716
BrokenLinkCheckerOptions,
17+
LinkOrder,
1818
PerLinkOption,
19+
runBrokenLinks,
1920
StatusClass,
20-
LinkOrder,
2121
} from './broken_links';
2222
export * from './handlers';
2323
export * from '@google-cloud/synthetics-sdk-api';

packages/synthetics-sdk-broken-links/src/link_utils.ts

Lines changed: 105 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,11 +12,16 @@
1212
// See the License for the specific language governing permissions and
1313
// limitations under the License.
1414

15-
import { HTTPResponse } from 'puppeteer';
15+
// Standard Libraries
16+
import * as path from 'path';
17+
18+
// Internal Project Files
1619
import {
20+
BaseError,
1721
BrokenLinksResultV1,
1822
BrokenLinksResultV1_BrokenLinkCheckerOptions,
1923
BrokenLinksResultV1_BrokenLinkCheckerOptions_LinkOrder,
24+
BrokenLinksResultV1_BrokenLinkCheckerOptions_ScreenshotOptions_CaptureCondition as ApiCaptureCondition,
2025
BrokenLinksResultV1_SyntheticLinkResult,
2126
GenericResultV1,
2227
getRuntimeMetadata,
@@ -25,6 +30,10 @@ import {
2530
SyntheticResult,
2631
} from '@google-cloud/synthetics-sdk-api';
2732

33+
// External Dependencies
34+
import { HTTPResponse } from 'puppeteer';
35+
import { StorageParameters } from './storage_func';
36+
2837
/**
2938
* Represents an intermediate link with its properties.
3039
*/
@@ -153,6 +162,8 @@ function parseFollowedLinks(
153162
options: {} as BrokenLinksResultV1_BrokenLinkCheckerOptions,
154163
origin_link_result: {} as BrokenLinksResultV1_SyntheticLinkResult,
155164
followed_link_results: [],
165+
execution_data_storage_path: '',
166+
errors: [],
156167
};
157168

158169
for (const link of followed_links) {
@@ -216,12 +227,21 @@ export function createSyntheticResult(
216227
start_time: string,
217228
runtime_metadata: { [key: string]: string },
218229
options: BrokenLinksResultV1_BrokenLinkCheckerOptions,
219-
followed_links: BrokenLinksResultV1_SyntheticLinkResult[]
230+
followed_links: BrokenLinksResultV1_SyntheticLinkResult[],
231+
storageParams: StorageParameters,
232+
errors: BaseError[]
220233
): SyntheticResult {
221234
// Create BrokenLinksResultV1 by parsing followed links and setting options
222235
const broken_links_result: BrokenLinksResultV1 =
223236
parseFollowedLinks(followed_links);
224237
broken_links_result.options = options;
238+
broken_links_result.errors = errors;
239+
broken_links_result.execution_data_storage_path = storageParams.bucket
240+
? 'gs://' +
241+
storageParams.bucket.name +
242+
'/' +
243+
getStoragePathToExecution(storageParams, options)
244+
: '';
225245

226246
// Create SyntheticResult object
227247
const synthetic_result: SyntheticResult = {
@@ -264,6 +284,89 @@ export function shuffleAndTruncate(
264284
return linksToFollow.slice(0, link_limit! - 1);
265285
}
266286

287+
/**
288+
* Determines whether a screenshot should be taken based on screenshot options and link result.
289+
*
290+
* @param options - BrokenLinksResultV1_BrokenLinkCheckerOptions
291+
* @param passed - boolean indicating whether the link navigation succeeded
292+
* @returns true if a screenshot should be taken, false otherwise
293+
*/
294+
export function shouldTakeScreenshot(
295+
options: BrokenLinksResultV1_BrokenLinkCheckerOptions,
296+
passed: boolean
297+
): boolean {
298+
return (
299+
options.screenshot_options!.capture_condition === ApiCaptureCondition.ALL ||
300+
(options.screenshot_options!.capture_condition ===
301+
ApiCaptureCondition.FAILING &&
302+
!passed)
303+
);
304+
}
305+
306+
/**
307+
308+
* Sanitizes an object name string for safe use, ensuring compliance with
309+
* naming restrictions.
310+
*
311+
* @param {string} inputString - The original object name string.
312+
* @returns {string} The sanitized object name.
313+
*
314+
* **Sanitization Rules:**
315+
* * Removes control characters ([\u007F-\u009F]).
316+
* * Removes disallowed characters (#, [, ], *, ?, ", <, >, |, /).
317+
* * Replaces the forbidden prefix ".well-known/acme-challenge/" with an underscore.
318+
* * Replaces standalone occurrences of "." or ".." with an underscore.
319+
*/
320+
export function sanitizeObjectName(
321+
inputString: string | null | undefined
322+
): string {
323+
if (!inputString || inputString === '.' || inputString === '..') return '_';
324+
325+
// Regular expressions for:
326+
/*eslint no-useless-escape: "off"*/
327+
const invalidCharactersRegex = /[\r\n\u007F-\u009F#\[\]*?:"<>|/]/g; // Control characters, special characters, path separator
328+
const wellKnownPrefixRegex = /^\.well-known\/acme-challenge\//;
329+
330+
// Core sanitization:
331+
return inputString
332+
.replace(wellKnownPrefixRegex, '_') // Replace forbidden prefix
333+
.replace(invalidCharactersRegex, '_') // replace invalid characters
334+
.trim() // Clean up any leading/trailing spaces
335+
.replace(/\s+/g, '_'); // Replace one or more spaces with underscores
336+
}
337+
338+
export function getStoragePathToExecution(
339+
storageParams: StorageParameters,
340+
options: BrokenLinksResultV1_BrokenLinkCheckerOptions
341+
) {
342+
try {
343+
const storageLocation = options.screenshot_options!.storage_location;
344+
let writeDestination = '';
345+
346+
// extract folder name for a given storage location. If there is no '/'
347+
// present then the storageLocation is just a folder
348+
const firstSlashIndex = storageLocation.indexOf('/');
349+
if (firstSlashIndex !== -1) {
350+
writeDestination = storageLocation.substring(firstSlashIndex + 1);
351+
}
352+
353+
// Ensure writeDestination ends with a slash for proper path joining
354+
if (writeDestination && !writeDestination.endsWith('/')) {
355+
writeDestination += '/';
356+
}
357+
358+
writeDestination = path.join(
359+
writeDestination,
360+
storageParams.checkId,
361+
storageParams.executionId
362+
);
363+
364+
return writeDestination;
365+
} catch (err) {
366+
return '';
367+
}
368+
}
369+
267370
export function getTimeLimitPromise(
268371
startTime: string,
269372
totalTimeoutMillis: number,

0 commit comments

Comments
 (0)