Merge pull request #2601 from ably/chore/compress-assets

kennethkalmer · web-flow · commit 99ef08ffaa33 · 2025-05-15T22:20:35.000+01:00
[WEB-4399] Compress static assets post-build
diff --git a/.circleci/config.yml b/.circleci/config.yml
@@ -51,9 +51,11 @@ jobs:
           path: coverage
 
   build:
+    environment:
+      COMPRESS_MAX_THREADS: 8
     executor:
       name: default
-    resource_class: large
+    resource_class: xlarge
     steps:
       - checkout
       - attach_workspace:
@@ -102,6 +104,9 @@ jobs:
       - run:
           name: Require redirects file to be generated
           command: test -f config/nginx-redirects.conf
+      - run:
+          name: Verify all files are compressed
+          command: ./bin/assert-compressed.sh
       - run:
           name: Test nginx configuration
           command: |
diff --git a/bin/assert-compressed.sh b/bin/assert-compressed.sh
@@ -0,0 +1,32 @@
+#!/bin/bash
+
+#
+# A utility script to assert that all CSS, JS, JSON, and SVG files have corresponding .gz compressed versions
+#
+# Usage: assert-compressed.sh
+#
+
+# Find all files that should be compressed
+FILES=$(find public -type f \( -name "*.css" -o -name "*.js" -o -name "*.json" -o -name "*.svg" \))
+ORIGINAL_COUNT=$(echo "$FILES" | wc -l)
+
+# Check each file for a corresponding .gz version
+MISSING_FILES=()
+for file in $FILES; do
+  if [ ! -f "${file}.gz" ]; then
+    MISSING_FILES+=("$file")
+  fi
+done
+
+MISSING_COUNT=${#MISSING_FILES[@]}
+
+if [ $MISSING_COUNT -gt 0 ]; then
+  echo "Error: Found ${MISSING_COUNT} files without corresponding .gz versions"
+  echo "Missing compressed versions for:"
+  for file in "${MISSING_FILES[@]}"; do
+    echo "  $file"
+  done
+  exit 1
+fi
+
+echo "OK: All ${ORIGINAL_COUNT} files have corresponding .gz compressed versions"
diff --git a/config/nginx.conf.erb b/config/nginx.conf.erb
@@ -83,9 +83,12 @@ http {
     # Removes trailing slashes everywhere (by redirecting)
     rewrite ^/(.*)/$ <%= ENV['SKIP_HTTPS'] == 'true' ? '$scheme' : 'https' %>://$host/$1 permanent;
 
+    # Serve pre-gzipped versions of assets
+    gzip_static on;
+
     <% unless ENV['SKIP_HTTPS'] == 'true' %>
     # Enforce HTTPS
-      if ($http_x_forwarded_proto != "https") {
+    if ($http_x_forwarded_proto != "https") {
       return 301 https://$host$request_uri;
     }
     <% end %>
@@ -103,7 +106,6 @@ http {
     location ~* \.(js|css|jpg|jpeg|gif|svg|png|woff|woff2)$ {
       # expires 1y;
       more_set_headers 'Cache-Control: public';
-      gzip_static on; # to serve pre-gzipped version
 
       # Some browsers still send conditional-GET requests if there's a
       # Last-Modified header or an ETag header even if they haven't
diff --git a/data/onPostBuild/compressAssets.ts b/data/onPostBuild/compressAssets.ts
@@ -0,0 +1,77 @@
+import { GatsbyNode } from 'gatsby';
+import fastGlob from 'fast-glob';
+import path from 'path';
+import Piscina from 'piscina';
+import { isMainThread } from 'worker_threads';
+import fs from 'fs/promises';
+import { gzipAsync } from '@gfx/zopfli';
+
+/**
+ * This file is inspired by gatsby-plugin-zopfli and is essentially a smaller,
+ * inlined version of it.
+ *
+ * It comes in two parts, first is the onPostBuild hook for Gatsby, which finds
+ * all the assets we want to compress, and it then uses Piscina to perform the
+ * compression tasks in parallel.
+ *
+ * The second part is the worker code, which is the code that is executed by the
+ * worker threads. It's a simple function that takes in the file path and the
+ * output path, and it compresses the file using the gzipAsync function.
+ *
+ * It all happens in this single file
+ */
+
+export const onPostBuild: GatsbyNode['onPostBuild'] = async ({ reporter }) => {
+  const cwd = path.join(process.cwd(), 'public');
+  const globResult = await fastGlob('**/*.{css,js,json,svg}', { cwd });
+
+  const files = globResult.map((file) => {
+    return {
+      from: path.join(cwd, file),
+      to: path.join(cwd, `${file}.gz`),
+    };
+  });
+
+  const maxThreads = parseInt(process.env.COMPRESS_MAX_THREADS || '12', 10);
+
+  reporter.info(`Compressing ${files.length} files with ${maxThreads} threads`);
+
+  const pool = new Piscina({
+    filename: __filename,
+    execArgv: ['-r', 'ts-node/register'], // Needed for Piscina to work with TypeScript
+    maxThreads,
+  });
+  const compress = files.map((file) => pool.run(file));
+
+  await Promise.all(compress);
+
+  reporter.info(`Compressed ${pool.completed} files - ${(pool.duration / 1000).toFixed(3)}s`);
+};
+
+/**
+ * From here on down is the worker code that is executed by the worker threads
+ * in Piscina to perform the actual compression.
+ */
+
+const options = {
+  numiterations: 15,
+};
+
+interface CompressInputs {
+  from: string;
+  to: string;
+}
+
+const compress = async ({ from, to }: CompressInputs) => {
+  const fileContent = await fs.readFile(from, 'utf8');
+  const compressedContent = await gzipAsync(fileContent, options);
+  await fs.writeFile(to, compressedContent);
+};
+
+// This strange bit of code is to ensure we export a default function
+// when we're being called by the Piscina worker
+if (!isMainThread) {
+  module.exports = async ({ from, to }: CompressInputs) => {
+    await compress({ from, to });
+  };
+}
diff --git a/data/onPostBuild/index.ts b/data/onPostBuild/index.ts
@@ -1,109 +1,9 @@
 import { GatsbyNode } from 'gatsby';
-import * as path from 'path';
-import * as fs from 'fs';
+import { onPostBuild as llmstxt } from './llmstxt';
+import { onPostBuild as compressAssets } from './compressAssets';
 
-/**
- * This script is used to create a file called llms.txt that contains a list of all the pages in the site.
- * It is heavily inspired by the gatsby-plugin-sitemap plugin, and stripped down to only to what we need.
- */
-
-const LLMS_TXT_PREAMBLE = `# https://ably.com/docs llms.txt\n`;
-
-const REPORTER_PREFIX = 'onPostBuild:';
-
-interface DocumentQueryResult {
-  site: {
-    siteMetadata: {
-      siteUrl: string;
-    };
-  };
-  allFileHtml: {
-    nodes: {
-      slug: string;
-      meta: {
-        title: string;
-        meta_description: string;
-      };
-    }[];
-  };
-}
-
-const withoutTrailingSlash = (path: string) => (path === `/` ? path : path.replace(/\/$/, ``));
-
-const prefixPath = ({ url, siteUrl, pathPrefix = `` }: { url: string; siteUrl: string; pathPrefix?: string }) => {
-  return new URL(pathPrefix + withoutTrailingSlash(url), siteUrl).toString();
-};
-
-const escapeMarkdown = (text: string) => {
-  // backslash-escape Markdown special chars: \ ` * _ { } [ ] ( ) # + !
-  return text.replace(/([\\`*_{}\[\]()#+!])/g, '\\$1');
-};
-
-export const onPostBuild: GatsbyNode['onPostBuild'] = async ({ graphql, reporter, basePath }) => {
-  const query = `
-    query {
-      site {
-        siteMetadata {
-          siteUrl
-        }
-      }
-
-      allFileHtml(filter: { articleType: { in: ["document", "apiReference"] } }) {
-        nodes {
-          slug
-          meta {
-            title
-            meta_description
-          }
-        }
-      }
-    }
-  `;
-  const { data: queryRecords, errors } = await graphql<DocumentQueryResult>(query);
-
-  if (errors) {
-    reporter.panicOnBuild(`Error while running GraphQL query.`);
-    throw errors;
-  }
-
-  if (!queryRecords) {
-    reporter.panicOnBuild(`No documents found.`);
-    throw new Error('No documents found.');
-  }
-
-  const siteUrl = queryRecords.site.siteMetadata.siteUrl;
-
-  if (!siteUrl) {
-    reporter.panicOnBuild(`${REPORTER_PREFIX} Site URL not found.`);
-    throw new Error('Site URL not found.');
-  }
-
-  const allPages = queryRecords.allFileHtml.nodes;
-
-  reporter.info(`${REPORTER_PREFIX} Found ${allPages.length} pages to place into llms.txt`);
-
-  const serializedPages = [LLMS_TXT_PREAMBLE];
-
-  for (const page of allPages) {
-    const { slug, meta } = page;
-    const { title, meta_description } = meta;
-
-    try {
-      const url = prefixPath({ url: slug, siteUrl, pathPrefix: basePath });
-      const safeTitle = escapeMarkdown(title);
-      const link = `[${safeTitle}](${url})`;
-      const line = `- ${[link, meta_description].join(': ')}`;
-      serializedPages.push(line);
-    } catch (err) {
-      reporter.panic(`${REPORTER_PREFIX} Error serializing pages`, err as Error);
-    }
-  }
-
-  const llmsTxtPath = path.join(process.cwd(), 'public', 'llms.txt');
-  try {
-    fs.writeFileSync(llmsTxtPath, serializedPages.join('\n'));
-    reporter.info(`${REPORTER_PREFIX} Successfully wrote llms.txt with ${serializedPages.length} pages`);
-  } catch (err) {
-    reporter.panic(`${REPORTER_PREFIX} Error writing llms.txt file`, err as Error);
-  }
+export const onPostBuild: GatsbyNode['onPostBuild'] = async (args) => {
+  // Run all onPostBuild functions in sequence
+  await llmstxt(args);
+  await compressAssets(args);
 };
diff --git a/data/onPostBuild/llmstxt.ts b/data/onPostBuild/llmstxt.ts
@@ -0,0 +1,109 @@
+import { GatsbyNode } from 'gatsby';
+import * as path from 'path';
+import * as fs from 'fs';
+
+/**
+ * This script is used to create a file called llms.txt that contains a list of all the pages in the site.
+ * It is heavily inspired by the gatsby-plugin-sitemap plugin, and stripped down to only to what we need.
+ */
+
+const LLMS_TXT_PREAMBLE = `# https://ably.com/docs llms.txt\n`;
+
+const REPORTER_PREFIX = 'onPostBuild:';
+
+interface DocumentQueryResult {
+  site: {
+    siteMetadata: {
+      siteUrl: string;
+    };
+  };
+  allFileHtml: {
+    nodes: {
+      slug: string;
+      meta: {
+        title: string;
+        meta_description: string;
+      };
+    }[];
+  };
+}
+
+const withoutTrailingSlash = (path: string) => (path === `/` ? path : path.replace(/\/$/, ``));
+
+const prefixPath = ({ url, siteUrl, pathPrefix = `` }: { url: string; siteUrl: string; pathPrefix?: string }) => {
+  return new URL(pathPrefix + withoutTrailingSlash(url), siteUrl).toString();
+};
+
+const escapeMarkdown = (text: string) => {
+  // backslash-escape Markdown special chars: \ ` * _ { } [ ] ( ) # + !
+  return text.replace(/([\\`*_{}[\]()#+!])/g, '\\$1');
+};
+
+export const onPostBuild: GatsbyNode['onPostBuild'] = async ({ graphql, reporter, basePath }) => {
+  const query = `
+    query {
+      site {
+        siteMetadata {
+          siteUrl
+        }
+      }
+
+      allFileHtml(filter: { articleType: { in: ["document", "apiReference"] } }) {
+        nodes {
+          slug
+          meta {
+            title
+            meta_description
+          }
+        }
+      }
+    }
+  `;
+  const { data: queryRecords, errors } = await graphql<DocumentQueryResult>(query);
+
+  if (errors) {
+    reporter.panicOnBuild(`Error while running GraphQL query.`);
+    throw errors;
+  }
+
+  if (!queryRecords) {
+    reporter.panicOnBuild(`No documents found.`);
+    throw new Error('No documents found.');
+  }
+
+  const siteUrl = queryRecords.site.siteMetadata.siteUrl;
+
+  if (!siteUrl) {
+    reporter.panicOnBuild(`${REPORTER_PREFIX} Site URL not found.`);
+    throw new Error('Site URL not found.');
+  }
+
+  const allPages = queryRecords.allFileHtml.nodes;
+
+  reporter.info(`${REPORTER_PREFIX} Found ${allPages.length} pages to place into llms.txt`);
+
+  const serializedPages = [LLMS_TXT_PREAMBLE];
+
+  for (const page of allPages) {
+    const { slug, meta } = page;
+    const { title, meta_description } = meta;
+
+    try {
+      const url = prefixPath({ url: slug, siteUrl, pathPrefix: basePath });
+      const safeTitle = escapeMarkdown(title);
+      const link = `[${safeTitle}](${url})`;
+      const line = `- ${[link, meta_description].join(': ')}`;
+      serializedPages.push(line);
+    } catch (err) {
+      reporter.panic(`${REPORTER_PREFIX} Error serializing pages`, err as Error);
+    }
+  }
+
+  const llmsTxtPath = path.join(process.cwd(), 'public', 'llms.txt');
+  try {
+    fs.writeFileSync(llmsTxtPath, serializedPages.join('\n'));
+    reporter.info(`${REPORTER_PREFIX} Successfully wrote llms.txt with ${serializedPages.length} pages`);
+  } catch (err) {
+    reporter.panic(`${REPORTER_PREFIX} Error writing llms.txt file`, err as Error);
+  }
+};
diff --git a/package.json b/package.json
diff --git a/yarn.lock b/yarn.lock