diff --git a/CHANGELOG.md b/CHANGELOG.md index 874aebec..d27cb0c4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,7 @@ ### Unreleased - Fix null values in table cells rendering as `[object Object]` +- Fix further LineWrapper precision issues ### [v0.17.0] - 2025-04-12 diff --git a/lib/line_wrapper.js b/lib/line_wrapper.js index e455640d..7702dac3 100644 --- a/lib/line_wrapper.js +++ b/lib/line_wrapper.js @@ -85,10 +85,10 @@ class LineWrapper extends EventEmitter { } wordWidth(word) { - return ( + return PDFNumber( this.document.widthOfString(word, this) + - this.characterSpacing + - this.wordSpacing + this.characterSpacing + + this.wordSpacing, ); } diff --git a/lib/utils.js b/lib/utils.js index 17b0d538..6cab97f7 100644 --- a/lib/utils.js +++ b/lib/utils.js @@ -1,8 +1,25 @@ +const fArray = new Float32Array(1); +const uArray = new Uint32Array(fArray.buffer); + export function PDFNumber(n) { // PDF numbers are strictly 32bit - // so convert this number to the nearest 32bit number + // so convert this number to a 32bit number // @see ISO 32000-1 Annex C.2 (real numbers) - return Math.fround(n); + const rounded = Math.fround(n); + if (rounded <= n) return rounded; + + // Will have to perform 32bit float truncation + fArray[0] = n; + + // Get the 32-bit representation as integer and shift bits + if (n <= 0) { + uArray[0] += 1; + } else { + uArray[0] -= 1; + } + + // Return the float value + return fArray[0]; } /** diff --git a/tests/unit/helpers.js b/tests/unit/helpers.js index f7671e39..0ea3f9f8 100644 --- a/tests/unit/helpers.js +++ b/tests/unit/helpers.js @@ -1,3 +1,24 @@ +/** + * @import PDFDocument from '../../lib/document'; + */ + +/** + * @typedef {object} TextStream + * @property {string} text + * @property {string} font + * @property {number} fontSize + * + * @typedef {string | Buffer} PDFDataItem + * @typedef {Array} PDFData + * + * @typedef {object} PDFDataObject + * @property {PDFDataItem[]} items + */ + +/** + * @param {PDFDocument} doc + * @return {PDFData} + */ function logData(doc) { const loggedData = []; const originalMethod = doc._write; @@ -18,4 +39,83 @@ function joinTokens(...args) { return r; } -export { logData, joinTokens }; +/** + * @description + * Returns an array of objects from the PDF data. Object items are surrounded by /\d 0 obj/ and 'endobj'. + * @param {PDFData} data + * @return {Array} + */ +function getObjects(data) { + const objects = []; + let currentObject = null; + for (const item of data) { + if (item instanceof Buffer) { + if (currentObject) { + currentObject.items.push(item); + } + } else if (typeof item === 'string') { + if (/^\d+\s0\sobj/.test(item)) { + currentObject = { items: [] }; + objects.push(currentObject); + } else if (item === 'endobj') { + currentObject = null; + } else if (currentObject) { + currentObject.items.push(item); + } + } + } + return objects; +} + +/** + * @param {Buffer} textStream + * @return {TextStream | undefined} + */ +function parseTextStream(textStream) { + const decodedStream = textStream.toString('utf8'); + + // Extract font and font size + const fontMatch = decodedStream.match(/\/([A-Za-z0-9]+)\s+(\d+)\s+Tf/); + + if (!fontMatch) { + return undefined; + } + + const font = fontMatch[1]; + const fontSize = parseInt(fontMatch[2], 10); + + // Extract hex strings inside TJ array + const tjMatch = decodedStream.match(/\[([^\]]+)\]\s+TJ/); + if (!tjMatch) { + return undefined; + } + let text = ''; + + // this is a simplified version + // the correct way is to retrieve the encoding from /Resources /Font dictionary and decode using it + // https://stackoverflow.com/a/29468049/5724645 + + // Match all hex strings like <...> + const hexMatches = [...tjMatch[1].matchAll(/<([0-9a-fA-F]+)>/g)]; + for (const m of hexMatches) { + // Convert hex to string + const hex = m[1]; + for (let i = 0; i < hex.length; i += 2) { + const code = parseInt(hex.substr(i, 2), 16); + let char = String.fromCharCode(code); + // Handle special cases + if (code === 0x0a) { + char = '\n'; // Newline + } else if (code === 0x0d) { + char = '\r'; // Carriage return + } else if (code === 0x85) { + char = '...'; + } + text += char; + } + } + + return { text, font, fontSize }; +} + +export { logData, joinTokens, parseTextStream, getObjects }; diff --git a/tests/unit/setupTests.js b/tests/unit/setupTests.js index a2825382..521b61ff 100644 --- a/tests/unit/setupTests.js +++ b/tests/unit/setupTests.js @@ -1,5 +1,7 @@ -import matcher from './toContainChunk'; +import toContainChunk from './toContainChunk'; +import toContainText from './toContainText'; import { toMatchImageSnapshot } from 'jest-image-snapshot'; -expect.extend(matcher); +expect.extend(toContainChunk); +expect.extend(toContainText); expect.extend({ toMatchImageSnapshot }); diff --git a/tests/unit/text.spec.js b/tests/unit/text.spec.js index bc9be0bc..6a1153cc 100644 --- a/tests/unit/text.spec.js +++ b/tests/unit/text.spec.js @@ -15,33 +15,15 @@ describe('Text', () => { test('with simple content', () => { const docData = logData(document); - const textStream = Buffer.from( - `1 0 0 -1 0 792 cm -q -1 0 0 -1 0 792 cm -BT -1 0 0 1 72 711.384 Tm -/F1 12 Tf -[<73696d706c65207465> 30 <7874> 0] TJ -ET -Q -`, - 'binary', - ); - document.text('simple text'); document.end(); - expect(docData).toContainChunk([ - `5 0 obj`, - `<< -/Length 116 ->>`, - `stream`, - textStream, - `\nendstream`, - `endobj`, - ]); + expect(docData).toContainText({ text: 'simple text' }); + }); + + test('with destination', () => { + // just check that there is no exception + document.text('simple text', { destination: 'anchor' }); }); test('with content ending after page right margin', () => { @@ -194,5 +176,21 @@ Q `endobj`, ]); }); + + test('bounded text precision - issue #1611', () => { + const docData = logData(document); + const text = 'New york'; + const bounds = document.boundsOfString(text); + // Draw text which is constrained to the bounds + document.text(text, { + ellipsis: true, + width: bounds.width, + height: bounds.height, + }); + + document.end(); + + expect(docData).toContainText({ text }); + }); }); }); diff --git a/tests/unit/toContainText/index.js b/tests/unit/toContainText/index.js new file mode 100644 index 00000000..1eabd982 --- /dev/null +++ b/tests/unit/toContainText/index.js @@ -0,0 +1,117 @@ +import { getObjects, parseTextStream } from '../helpers.js'; + +/** + * @import { TextStream, PDFDataObject } from '../helpers.js'; + * @import JestMatchedUtils from 'jest-matcher-utils'; + */ + +/** + * @param {JestMatchedUtils} utils + * @param {TextStream} argument + * @return {string} + */ +const passMessage = (utils, argument) => () => { + return ( + utils.matcherHint('.not.toContainText', 'data', 'textStream') + + '\n\n' + + `Expected data not to contain text:\n\n${utils.printExpected(argument)}` + ); +}; + +/** + * @param {JestMatchedUtils} utils + * @param {TextStream[]} received + * @param {TextStream} argument + * @return {string} + */ +const failMessage = (utils, received, argument) => () => { + return ( + utils.matcherHint('.toContainText', 'data', 'textStream') + + '\n\n' + + `Expected data to contain text:\n\n${utils.printExpected(argument)}\n\nFound:\n\n${utils.printReceived(received)}` + ); +}; + +function textStreamMatches(expected, actual) { + if (expected.text !== actual.text) { + return false; + } + + if (expected.font && expected.font !== actual.font) { + return false; + } + + if (expected.fontSize && expected.fontSize !== actual.fontSize) { + return false; + } + + return true; +} + +/** + * @param {PDFDataObject} object + * @return {TextStream | undefined} + */ +function getTextStream(object) { + // text stream objects have 4 items + // first item is a string containing the Length of the stream + // second item 'stream' + // third item is the stream content Buffer + // fourth item is 'endstream' + + if (object.items.length !== 4) { + return; + } + if (typeof object.items[0] !== 'string') { + return; + } + if (object.items[1] !== 'stream') { + return; + } + if (!(object.items[2] instanceof Buffer)) { + return; + } + if (!/endstream/.test(object.items[3])) { + return; + } + + return parseTextStream(object.items[2]); +} + +export default { + /** + * + * @param {(string | Buffer)[]} data + * @param {Partial} textStream + * @returns + */ + toContainText(data, textStream) { + const objects = getObjects(data); + const foundTextStreams = []; + let pass = false; + + for (const object of objects) { + const objectTextStream = getTextStream(object, textStream); + if (!objectTextStream) { + continue; + } + foundTextStreams.push(objectTextStream); + if (textStreamMatches(textStream, objectTextStream)) { + pass = true; + break; + } + } + + if (pass) { + return { + pass: true, + message: passMessage(this.utils, textStream), + }; + } + + return { + pass: false, + message: failMessage(this.utils, foundTextStreams, textStream), + }; + }, +}; diff --git a/tests/unit/utils.spec.js b/tests/unit/utils.spec.js index e1181cd6..d6d1e81a 100644 --- a/tests/unit/utils.spec.js +++ b/tests/unit/utils.spec.js @@ -1,4 +1,4 @@ -import { normalizeSides } from '../../lib/utils'; +import { normalizeSides, PDFNumber } from '../../lib/utils'; describe('normalizeSides', () => { test.each([ @@ -54,3 +54,19 @@ describe('normalizeSides', () => { }); }); }); + +describe('PDFNumber', () => { + test.each([ + [0, 0], + [0.04999999701976776], //float32 rounded down + [0.05], + [0.05000000074505806], //float32 rounded up + [1], + [-1], + [-5.05], + [5.05], + ])('PDFNumber(%f) -> %f', (n) => { + expect(PDFNumber(n)).toBeLessThanOrEqual(n); + expect(PDFNumber(n, false)).toBeLessThanOrEqual(n); + }); +}); diff --git a/tests/visual/__image_snapshots__/table-spec-js-table-multi-page-table-1-snap.png b/tests/visual/__image_snapshots__/table-spec-js-table-multi-page-table-1-snap.png new file mode 100644 index 00000000..ab1e3ff2 Binary files /dev/null and b/tests/visual/__image_snapshots__/table-spec-js-table-multi-page-table-1-snap.png differ diff --git a/tests/visual/__image_snapshots__/table-spec-js-table-multi-page-table-2-snap.png b/tests/visual/__image_snapshots__/table-spec-js-table-multi-page-table-2-snap.png new file mode 100644 index 00000000..84743748 Binary files /dev/null and b/tests/visual/__image_snapshots__/table-spec-js-table-multi-page-table-2-snap.png differ diff --git a/tests/visual/helpers.js b/tests/visual/helpers.js index 883c6b2d..a5351fcc 100644 --- a/tests/visual/helpers.js +++ b/tests/visual/helpers.js @@ -26,7 +26,7 @@ function runDocTest(options, fn) { const { systemFonts = false } = options; const images = await pdf2png(pdfData, { systemFonts }); for (let image of images) { - expect(image).toMatchImageSnapshot(); + expect(image).toMatchImageSnapshot(options); } resolve(); } catch (err) { diff --git a/tests/visual/table.spec.js b/tests/visual/table.spec.js index 7fd3aff1..90d6fa2c 100644 --- a/tests/visual/table.spec.js +++ b/tests/visual/table.spec.js @@ -405,4 +405,24 @@ describe('table', function () { }, ); }); + + test('multi page table', function () { + return runDocTest({ size: [500, 300] }, function (doc) { + doc.font('tests/fonts/Roboto-Italic.ttf'); + doc.table({ + debug: true, + data: [ + [ + { + rowSpan: 3, + text: 'LEFT_COLUMN', + }, + 'RIGHT_COLUM_ROW_1\n'.repeat(5), + ], + ['RIGHT_COLUM_ROW_2\n'.repeat(5)], + ['RIGHT_COLUM_ROW_3'], + ], + }); + }); + }); }); diff --git a/tests/visual/text.spec.js b/tests/visual/text.spec.js index 84646ea8..8fb3150e 100644 --- a/tests/visual/text.spec.js +++ b/tests/visual/text.spec.js @@ -127,6 +127,7 @@ describe('text', function () { '#ecf157', '#acfa70', ]; + function randColor() { return cols[i++ % cols.length]; }