Inline PDFImage.createRawMask in the PDFImage.createMask method

Snuffleupagus · Snuffleupagus · commit dc3e24a76a3f · 2025-04-08T12:01:50.000+02:00
After the introduction of `OffscreenCanvas` support we now have *two separate* mask-methods in the `PDFImage` class, and the reason that they were not combined is likely that we need the "raw" bytes when parsing Type3-glyph image masks.
However, that case is easy to support simply by disabling `OffscreenCanvas` usage when parsing Type3-glyphs and that way we're able to reduce some code duplication.

Another slightly strange property of the `PDFImage.createMask` method is that it needs various image-dictionary parameters *manually* provided, which is probably because this is very old code.
That feels slightly unwieldy, and we instead change the method to pass in the image-stream directly and do the necessary data-lookup internally.

A side-effect of this re-factoring is that we now support using the custom `isSingleOpaquePixel` operator in Type3-glyphs, which shouldn't hurt even though it seems extremely unlikely for that to ever happen in Type3-glyphs.
diff --git a/src/core/evaluator.js b/src/core/evaluator.js
@@ -72,7 +72,6 @@ import { BaseStream } from "./base_stream.js";
 import { bidi } from "./bidi.js";
 import { ColorSpace } from "./colorspace.js";
 import { ColorSpaceUtils } from "./colorspace_utils.js";
-import { DecodeStream } from "./decode_stream.js";
 import { getFontSubstitution } from "./font_substitutions.js";
 import { getGlyphsUnicode } from "./glyphlist.js";
 import { getMetrics } from "./metrics.js";
@@ -571,7 +570,10 @@ class PartialEvaluator {
     localImageCache,
     localColorSpaceCache,
   }) {
-    const dict = image.dict;
+    const { maxImageSize, ignoreErrors, isOffscreenCanvasSupported } =
+      this.options;
+
+    const { dict } = image;
     const imageRef = dict.objId;
     const w = dict.get("W", "Width");
     const h = dict.get("H", "Height");
@@ -580,15 +582,14 @@ class PartialEvaluator {
       warn("Image dimensions are missing, or not numbers.");
       return;
     }
-    const maxImageSize = this.options.maxImageSize;
     if (maxImageSize !== -1 && w * h > maxImageSize) {
       const msg = "Image exceeded maximum allowed size and was removed.";
 
-      if (this.options.ignoreErrors) {
-        warn(msg);
-        return;
+      if (!ignoreErrors) {
+        throw new Error(msg);
       }
-      throw new Error(msg);
+      warn(msg);
+      return;
     }
 
     let optionalContent;
@@ -607,52 +608,10 @@ class PartialEvaluator {
       // data can't be done here. Instead of creating a
       // complete PDFImage, only read the information needed
       // for later.
-      const interpolate = dict.get("I", "Interpolate");
-      const bitStrideLength = (w + 7) >> 3;
-      const imgArray = image.getBytes(bitStrideLength * h);
-      const decode = dict.getArray("D", "Decode");
-
-      if (this.parsingType3Font) {
-        // NOTE: Compared to other image resources we don't bother caching
-        // Type3-glyph image masks, since we've not come across any cases
-        // where that actually helps.
-        // In Type3-glyphs image masks are "always" inline resources,
-        // they're usually fairly small and aren't being re-used either.
-
-        imgData = PDFImage.createRawMask({
-          imgArray,
-          width: w,
-          height: h,
-          imageIsFromDecodeStream: image instanceof DecodeStream,
-          inverseDecode: decode?.[0] > 0,
-          interpolate,
-        });
-        args = compileType3Glyph(imgData);
-
-        if (args) {
-          operatorList.addImageOps(OPS.constructPath, args, optionalContent);
-          return;
-        }
-        warn("Cannot compile Type3 glyph.");
-
-        // If compilation failed, or was disabled, fallback to using an inline
-        // image mask; this case should be extremely rare.
-        operatorList.addImageOps(
-          OPS.paintImageMaskXObject,
-          [imgData],
-          optionalContent
-        );
-        return;
-      }
-
       imgData = await PDFImage.createMask({
-        imgArray,
-        width: w,
-        height: h,
-        imageIsFromDecodeStream: image instanceof DecodeStream,
-        inverseDecode: decode?.[0] > 0,
-        interpolate,
-        isOffscreenCanvasSupported: this.options.isOffscreenCanvasSupported,
+        image,
+        isOffscreenCanvasSupported:
+          isOffscreenCanvasSupported && !this.parsingType3Font,
       });
 
       if (imgData.isSingleOpaquePixel) {
@@ -677,6 +636,36 @@ class PartialEvaluator {
         return;
       }
 
+      if (this.parsingType3Font) {
+        // NOTE: Compared to other image resources we don't bother caching
+        // Type3-glyph image masks, since we've not come across any cases
+        // where that actually helps.
+        // In Type3-glyphs image masks are "always" inline resources,
+        // they're usually fairly small and aren't being re-used either.
+        if (typeof PDFJSDev === "undefined" || PDFJSDev.test("TESTING")) {
+          assert(
+            imgData.data instanceof Uint8Array,
+            "Type3 glyph image mask must be a TypedArray."
+          );
+        }
+        args = compileType3Glyph(imgData);
+
+        if (args) {
+          operatorList.addImageOps(OPS.constructPath, args, optionalContent);
+          return;
+        }
+        warn("Cannot compile Type3 glyph.");
+
+        // If compilation failed, or was disabled, fallback to using an inline
+        // image mask; this case should be extremely rare.
+        operatorList.addImageOps(
+          OPS.paintImageMaskXObject,
+          [imgData],
+          optionalContent
+        );
+        return;
+      }
+
       const objId = `mask_${this.idFactory.createObjId()}`;
       operatorList.addDependency(objId);
 
@@ -736,7 +725,7 @@ class PartialEvaluator {
       } catch (reason) {
         const msg = `Unable to decode inline image: "${reason}".`;
 
-        if (!this.options.ignoreErrors) {
+        if (!ignoreErrors) {
           throw new Error(msg);
         }
         warn(msg);
@@ -819,8 +808,7 @@ class PartialEvaluator {
       .then(async imageObj => {
         imgData = await imageObj.createImageData(
           /* forceRGBA = */ false,
-          /* isOffscreenCanvasSupported = */ this.options
-            .isOffscreenCanvasSupported
+          isOffscreenCanvasSupported
         );
         imgData.dataLen = imgData.bitmap
           ? imgData.width * imgData.height * 4
diff --git a/src/core/image.js b/src/core/image.js
@@ -348,58 +348,18 @@ class PDFImage {
     });
   }
 
-  static createRawMask({
-    imgArray,
-    width,
-    height,
-    imageIsFromDecodeStream,
-    inverseDecode,
-    interpolate,
-  }) {
-    // |imgArray| might not contain full data for every pixel of the mask, so
-    // we need to distinguish between |computedLength| and |actualLength|.
-    // In particular, if inverseDecode is true, then the array we return must
-    // have a length of |computedLength|.
+  static async createMask({ image, isOffscreenCanvasSupported = false }) {
+    const { dict } = image;
+    const width = dict.get("W", "Width");
+    const height = dict.get("H", "Height");
 
-    const computedLength = ((width + 7) >> 3) * height;
-    const actualLength = imgArray.byteLength;
-    const haveFullData = computedLength === actualLength;
-    let data, i;
+    const interpolate = dict.get("I", "Interpolate");
+    const decode = dict.getArray("D", "Decode");
+    const inverseDecode = decode?.[0] > 0;
 
-    if (imageIsFromDecodeStream && (!inverseDecode || haveFullData)) {
-      // imgArray came from a DecodeStream and its data is in an appropriate
-      // form, so we can just transfer it.
-      data = imgArray;
-    } else if (!inverseDecode) {
-      data = new Uint8Array(imgArray);
-    } else {
-      data = new Uint8Array(computedLength);
-      data.set(imgArray);
-      data.fill(0xff, actualLength);
-    }
-
-    // If necessary, invert the original mask data (but not any extra we might
-    // have added above). It's safe to modify the array -- whether it's the
-    // original or a copy, we're about to transfer it anyway, so nothing else
-    // in this thread can be relying on its contents.
-    if (inverseDecode) {
-      for (i = 0; i < actualLength; i++) {
-        data[i] ^= 0xff;
-      }
-    }
-
-    return { data, width, height, interpolate };
-  }
+    const computedLength = ((width + 7) >> 3) * height;
+    const imgArray = image.getBytes(computedLength);
 
-  static async createMask({
-    imgArray,
-    width,
-    height,
-    imageIsFromDecodeStream,
-    inverseDecode,
-    interpolate,
-    isOffscreenCanvasSupported = false,
-  }) {
     const isSingleOpaquePixel =
       width === 1 &&
       height === 1 &&
@@ -452,17 +412,40 @@ class PDFImage {
         bitmap,
       };
     }
-
-    // Get the data almost as they're and they'll be decoded
+    // Fallback to get the data almost as they're and they'll be decoded
     // just before being drawn.
-    return this.createRawMask({
-      imgArray,
-      width,
-      height,
-      inverseDecode,
-      imageIsFromDecodeStream,
-      interpolate,
-    });
+
+    // |imgArray| might not contain full data for every pixel of the mask, so
+    // we need to distinguish between |computedLength| and |actualLength|.
+    // In particular, if inverseDecode is true, then the array we return must
+    // have a length of |computedLength|.
+    const actualLength = imgArray.byteLength;
+    const haveFullData = computedLength === actualLength;
+    let data;
+
+    if (image instanceof DecodeStream && (!inverseDecode || haveFullData)) {
+      // imgArray came from a DecodeStream and its data is in an appropriate
+      // form, so we can just transfer it.
+      data = imgArray;
+    } else if (!inverseDecode) {
+      data = new Uint8Array(imgArray);
+    } else {
+      data = new Uint8Array(computedLength);
+      data.set(imgArray);
+      data.fill(0xff, actualLength);
+    }
+
+    // If necessary, invert the original mask data (but not any extra we might
+    // have added above). It's safe to modify the array -- whether it's the
+    // original or a copy, we're about to transfer it anyway, so nothing else
+    // in this thread can be relying on its contents.
+    if (inverseDecode) {
+      for (let i = 0; i < actualLength; i++) {
+        data[i] ^= 0xff;
+      }
+    }
+
+    return { data, width, height, interpolate };
   }
 
   get drawWidth() {