Skip to content

Commit 99ef08f

Browse files
Merge pull request #2601 from ably/chore/compress-assets
[WEB-4399] Compress static assets post-build
2 parents 16de4fa + 86f6333 commit 99ef08f

File tree

8 files changed

+355
-111
lines changed

8 files changed

+355
-111
lines changed

.circleci/config.yml

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,9 +51,11 @@ jobs:
5151
path: coverage
5252

5353
build:
54+
environment:
55+
COMPRESS_MAX_THREADS: 8
5456
executor:
5557
name: default
56-
resource_class: large
58+
resource_class: xlarge
5759
steps:
5860
- checkout
5961
- attach_workspace:
@@ -102,6 +104,9 @@ jobs:
102104
- run:
103105
name: Require redirects file to be generated
104106
command: test -f config/nginx-redirects.conf
107+
- run:
108+
name: Verify all files are compressed
109+
command: ./bin/assert-compressed.sh
105110
- run:
106111
name: Test nginx configuration
107112
command: |

bin/assert-compressed.sh

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
#!/bin/bash
2+
3+
#
4+
# A utility script to assert that all CSS, JS, JSON, and SVG files have corresponding .gz compressed versions
5+
#
6+
# Usage: assert-compressed.sh
7+
#
8+
9+
# Find all files that should be compressed
10+
FILES=$(find public -type f \( -name "*.css" -o -name "*.js" -o -name "*.json" -o -name "*.svg" \))
11+
ORIGINAL_COUNT=$(echo "$FILES" | wc -l)
12+
13+
# Check each file for a corresponding .gz version
14+
MISSING_FILES=()
15+
for file in $FILES; do
16+
if [ ! -f "${file}.gz" ]; then
17+
MISSING_FILES+=("$file")
18+
fi
19+
done
20+
21+
MISSING_COUNT=${#MISSING_FILES[@]}
22+
23+
if [ $MISSING_COUNT -gt 0 ]; then
24+
echo "Error: Found ${MISSING_COUNT} files without corresponding .gz versions"
25+
echo "Missing compressed versions for:"
26+
for file in "${MISSING_FILES[@]}"; do
27+
echo " $file"
28+
done
29+
exit 1
30+
fi
31+
32+
echo "OK: All ${ORIGINAL_COUNT} files have corresponding .gz compressed versions"

config/nginx.conf.erb

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -83,9 +83,12 @@ http {
8383
# Removes trailing slashes everywhere (by redirecting)
8484
rewrite ^/(.*)/$ <%= ENV['SKIP_HTTPS'] == 'true' ? '$scheme' : 'https' %>://$host/$1 permanent;
8585

86+
# Serve pre-gzipped versions of assets
87+
gzip_static on;
88+
8689
<% unless ENV['SKIP_HTTPS'] == 'true' %>
8790
# Enforce HTTPS
88-
if ($http_x_forwarded_proto != "https") {
91+
if ($http_x_forwarded_proto != "https") {
8992
return 301 https://$host$request_uri;
9093
}
9194
<% end %>
@@ -103,7 +106,6 @@ http {
103106
location ~* \.(js|css|jpg|jpeg|gif|svg|png|woff|woff2)$ {
104107
# expires 1y;
105108
more_set_headers 'Cache-Control: public';
106-
gzip_static on; # to serve pre-gzipped version
107109

108110
# Some browsers still send conditional-GET requests if there's a
109111
# Last-Modified header or an ETag header even if they haven't

data/onPostBuild/compressAssets.ts

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
import { GatsbyNode } from 'gatsby';
2+
import fastGlob from 'fast-glob';
3+
import path from 'path';
4+
import Piscina from 'piscina';
5+
import { isMainThread } from 'worker_threads';
6+
import fs from 'fs/promises';
7+
import { gzipAsync } from '@gfx/zopfli';
8+
9+
/**
10+
* This file is inspired by gatsby-plugin-zopfli and is essentially a smaller,
11+
* inlined version of it.
12+
*
13+
* It comes in two parts, first is the onPostBuild hook for Gatsby, which finds
14+
* all the assets we want to compress, and it then uses Piscina to perform the
15+
* compression tasks in parallel.
16+
*
17+
* The second part is the worker code, which is the code that is executed by the
18+
* worker threads. It's a simple function that takes in the file path and the
19+
* output path, and it compresses the file using the gzipAsync function.
20+
*
21+
* It all happens in this single file
22+
*/
23+
24+
export const onPostBuild: GatsbyNode['onPostBuild'] = async ({ reporter }) => {
25+
const cwd = path.join(process.cwd(), 'public');
26+
const globResult = await fastGlob('**/*.{css,js,json,svg}', { cwd });
27+
28+
const files = globResult.map((file) => {
29+
return {
30+
from: path.join(cwd, file),
31+
to: path.join(cwd, `${file}.gz`),
32+
};
33+
});
34+
35+
const maxThreads = parseInt(process.env.COMPRESS_MAX_THREADS || '12', 10);
36+
37+
reporter.info(`Compressing ${files.length} files with ${maxThreads} threads`);
38+
39+
const pool = new Piscina({
40+
filename: __filename,
41+
execArgv: ['-r', 'ts-node/register'], // Needed for Piscina to work with TypeScript
42+
maxThreads,
43+
});
44+
const compress = files.map((file) => pool.run(file));
45+
46+
await Promise.all(compress);
47+
48+
reporter.info(`Compressed ${pool.completed} files - ${(pool.duration / 1000).toFixed(3)}s`);
49+
};
50+
51+
/**
52+
* From here on down is the worker code that is executed by the worker threads
53+
* in Piscina to perform the actual compression.
54+
*/
55+
56+
const options = {
57+
numiterations: 15,
58+
};
59+
60+
interface CompressInputs {
61+
from: string;
62+
to: string;
63+
}
64+
65+
const compress = async ({ from, to }: CompressInputs) => {
66+
const fileContent = await fs.readFile(from, 'utf8');
67+
const compressedContent = await gzipAsync(fileContent, options);
68+
await fs.writeFile(to, compressedContent);
69+
};
70+
71+
// This strange bit of code is to ensure we export a default function
72+
// when we're being called by the Piscina worker
73+
if (!isMainThread) {
74+
module.exports = async ({ from, to }: CompressInputs) => {
75+
await compress({ from, to });
76+
};
77+
}

data/onPostBuild/index.ts

Lines changed: 6 additions & 106 deletions
Original file line numberDiff line numberDiff line change
@@ -1,109 +1,9 @@
11
import { GatsbyNode } from 'gatsby';
2-
import * as path from 'path';
3-
import * as fs from 'fs';
2+
import { onPostBuild as llmstxt } from './llmstxt';
3+
import { onPostBuild as compressAssets } from './compressAssets';
44

5-
/**
6-
* This script is used to create a file called llms.txt that contains a list of all the pages in the site.
7-
* It is heavily inspired by the gatsby-plugin-sitemap plugin, and stripped down to only to what we need.
8-
*/
9-
10-
const LLMS_TXT_PREAMBLE = `# https://ably.com/docs llms.txt\n`;
11-
12-
const REPORTER_PREFIX = 'onPostBuild:';
13-
14-
interface DocumentQueryResult {
15-
site: {
16-
siteMetadata: {
17-
siteUrl: string;
18-
};
19-
};
20-
allFileHtml: {
21-
nodes: {
22-
slug: string;
23-
meta: {
24-
title: string;
25-
meta_description: string;
26-
};
27-
}[];
28-
};
29-
}
30-
31-
const withoutTrailingSlash = (path: string) => (path === `/` ? path : path.replace(/\/$/, ``));
32-
33-
const prefixPath = ({ url, siteUrl, pathPrefix = `` }: { url: string; siteUrl: string; pathPrefix?: string }) => {
34-
return new URL(pathPrefix + withoutTrailingSlash(url), siteUrl).toString();
35-
};
36-
37-
const escapeMarkdown = (text: string) => {
38-
// backslash-escape Markdown special chars: \ ` * _ { } [ ] ( ) # + !
39-
return text.replace(/([\\`*_{}\[\]()#+!])/g, '\\$1');
40-
};
41-
42-
export const onPostBuild: GatsbyNode['onPostBuild'] = async ({ graphql, reporter, basePath }) => {
43-
const query = `
44-
query {
45-
site {
46-
siteMetadata {
47-
siteUrl
48-
}
49-
}
50-
51-
allFileHtml(filter: { articleType: { in: ["document", "apiReference"] } }) {
52-
nodes {
53-
slug
54-
meta {
55-
title
56-
meta_description
57-
}
58-
}
59-
}
60-
}
61-
`;
62-
const { data: queryRecords, errors } = await graphql<DocumentQueryResult>(query);
63-
64-
if (errors) {
65-
reporter.panicOnBuild(`Error while running GraphQL query.`);
66-
throw errors;
67-
}
68-
69-
if (!queryRecords) {
70-
reporter.panicOnBuild(`No documents found.`);
71-
throw new Error('No documents found.');
72-
}
73-
74-
const siteUrl = queryRecords.site.siteMetadata.siteUrl;
75-
76-
if (!siteUrl) {
77-
reporter.panicOnBuild(`${REPORTER_PREFIX} Site URL not found.`);
78-
throw new Error('Site URL not found.');
79-
}
80-
81-
const allPages = queryRecords.allFileHtml.nodes;
82-
83-
reporter.info(`${REPORTER_PREFIX} Found ${allPages.length} pages to place into llms.txt`);
84-
85-
const serializedPages = [LLMS_TXT_PREAMBLE];
86-
87-
for (const page of allPages) {
88-
const { slug, meta } = page;
89-
const { title, meta_description } = meta;
90-
91-
try {
92-
const url = prefixPath({ url: slug, siteUrl, pathPrefix: basePath });
93-
const safeTitle = escapeMarkdown(title);
94-
const link = `[${safeTitle}](${url})`;
95-
const line = `- ${[link, meta_description].join(': ')}`;
96-
serializedPages.push(line);
97-
} catch (err) {
98-
reporter.panic(`${REPORTER_PREFIX} Error serializing pages`, err as Error);
99-
}
100-
}
101-
102-
const llmsTxtPath = path.join(process.cwd(), 'public', 'llms.txt');
103-
try {
104-
fs.writeFileSync(llmsTxtPath, serializedPages.join('\n'));
105-
reporter.info(`${REPORTER_PREFIX} Successfully wrote llms.txt with ${serializedPages.length} pages`);
106-
} catch (err) {
107-
reporter.panic(`${REPORTER_PREFIX} Error writing llms.txt file`, err as Error);
108-
}
5+
export const onPostBuild: GatsbyNode['onPostBuild'] = async (args) => {
6+
// Run all onPostBuild functions in sequence
7+
await llmstxt(args);
8+
await compressAssets(args);
1099
};

data/onPostBuild/llmstxt.ts

Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,109 @@
1+
import { GatsbyNode } from 'gatsby';
2+
import * as path from 'path';
3+
import * as fs from 'fs';
4+
5+
/**
6+
* This script is used to create a file called llms.txt that contains a list of all the pages in the site.
7+
* It is heavily inspired by the gatsby-plugin-sitemap plugin, and stripped down to only to what we need.
8+
*/
9+
10+
const LLMS_TXT_PREAMBLE = `# https://ably.com/docs llms.txt\n`;
11+
12+
const REPORTER_PREFIX = 'onPostBuild:';
13+
14+
interface DocumentQueryResult {
15+
site: {
16+
siteMetadata: {
17+
siteUrl: string;
18+
};
19+
};
20+
allFileHtml: {
21+
nodes: {
22+
slug: string;
23+
meta: {
24+
title: string;
25+
meta_description: string;
26+
};
27+
}[];
28+
};
29+
}
30+
31+
const withoutTrailingSlash = (path: string) => (path === `/` ? path : path.replace(/\/$/, ``));
32+
33+
const prefixPath = ({ url, siteUrl, pathPrefix = `` }: { url: string; siteUrl: string; pathPrefix?: string }) => {
34+
return new URL(pathPrefix + withoutTrailingSlash(url), siteUrl).toString();
35+
};
36+
37+
const escapeMarkdown = (text: string) => {
38+
// backslash-escape Markdown special chars: \ ` * _ { } [ ] ( ) # + !
39+
return text.replace(/([\\`*_{}[\]()#+!])/g, '\\$1');
40+
};
41+
42+
export const onPostBuild: GatsbyNode['onPostBuild'] = async ({ graphql, reporter, basePath }) => {
43+
const query = `
44+
query {
45+
site {
46+
siteMetadata {
47+
siteUrl
48+
}
49+
}
50+
51+
allFileHtml(filter: { articleType: { in: ["document", "apiReference"] } }) {
52+
nodes {
53+
slug
54+
meta {
55+
title
56+
meta_description
57+
}
58+
}
59+
}
60+
}
61+
`;
62+
const { data: queryRecords, errors } = await graphql<DocumentQueryResult>(query);
63+
64+
if (errors) {
65+
reporter.panicOnBuild(`Error while running GraphQL query.`);
66+
throw errors;
67+
}
68+
69+
if (!queryRecords) {
70+
reporter.panicOnBuild(`No documents found.`);
71+
throw new Error('No documents found.');
72+
}
73+
74+
const siteUrl = queryRecords.site.siteMetadata.siteUrl;
75+
76+
if (!siteUrl) {
77+
reporter.panicOnBuild(`${REPORTER_PREFIX} Site URL not found.`);
78+
throw new Error('Site URL not found.');
79+
}
80+
81+
const allPages = queryRecords.allFileHtml.nodes;
82+
83+
reporter.info(`${REPORTER_PREFIX} Found ${allPages.length} pages to place into llms.txt`);
84+
85+
const serializedPages = [LLMS_TXT_PREAMBLE];
86+
87+
for (const page of allPages) {
88+
const { slug, meta } = page;
89+
const { title, meta_description } = meta;
90+
91+
try {
92+
const url = prefixPath({ url: slug, siteUrl, pathPrefix: basePath });
93+
const safeTitle = escapeMarkdown(title);
94+
const link = `[${safeTitle}](${url})`;
95+
const line = `- ${[link, meta_description].join(': ')}`;
96+
serializedPages.push(line);
97+
} catch (err) {
98+
reporter.panic(`${REPORTER_PREFIX} Error serializing pages`, err as Error);
99+
}
100+
}
101+
102+
const llmsTxtPath = path.join(process.cwd(), 'public', 'llms.txt');
103+
try {
104+
fs.writeFileSync(llmsTxtPath, serializedPages.join('\n'));
105+
reporter.info(`${REPORTER_PREFIX} Successfully wrote llms.txt with ${serializedPages.length} pages`);
106+
} catch (err) {
107+
reporter.panic(`${REPORTER_PREFIX} Error writing llms.txt file`, err as Error);
108+
}
109+
};

0 commit comments

Comments
 (0)