diff --git a/src/annotator/anchoring/pdf.ts b/src/annotator/anchoring/pdf.ts index f2e33ababa7..07b80a1ea41 100644 --- a/src/annotator/anchoring/pdf.ts +++ b/src/annotator/anchoring/pdf.ts @@ -17,6 +17,7 @@ import type { import { translateOffsets } from '../util/normalize'; import { matchQuote } from './match-quote'; import { createPlaceholder } from './placeholder'; +import { textInDOMRect } from './text-in-rect'; import { TextPosition, TextRange } from './text-range'; import { TextQuoteAnchor } from './types'; @@ -128,6 +129,12 @@ async function getPageView(pageIndex: number): Promise { return pageView!; } +function getTextLayerFromPoint(x: number, y: number): HTMLElement | undefined { + return document + .elementsFromPoint(x, y) + .find(el => el.classList.contains('textLayer')) as HTMLElement | undefined; +} + /** * Return true if the document has selectable text. */ @@ -848,6 +855,13 @@ export async function describeShape(shape: Shape): Promise { }; }; + const textFromRect = (textLayer: HTMLElement, rect: DOMRect) => { + // Set a limit on how much text is included in thumbnails, to avoid shape + // selector objects becoming too large. + const maxTextLen = 256; + return textInDOMRect(textLayer, rect).slice(0, maxTextLen); + }; + switch (shape.type) { case 'rect': { const [topLeft, bottomRight] = await Promise.all([ @@ -866,20 +880,34 @@ export async function describeShape(shape: Shape): Promise { } const pageView = await getPageView(topLeft.pageIndex); + const pdfRect = { + type: 'rect', + left: topLeft.x, + top: topLeft.y, + right: bottomRight.x, + bottom: bottomRight.y, + } as const; + + const textLayer = getTextLayerFromPoint(shape.left, shape.top); + let text; + if (textLayer) { + const rect = new DOMRect( + shape.left, + shape.top, + shape.right - shape.left, + shape.bottom - shape.top, + ); + text = textFromRect(textLayer, rect); + } return [ createPageSelector(pageView, topLeft.pageIndex), { type: 'ShapeSelector', anchor: 'page', - shape: { - type: 'rect', - left: topLeft.x, - top: topLeft.y, - right: bottomRight.x, - bottom: bottomRight.y, - }, + shape: pdfRect, view: pageBoundingBox(pageView.pdfPage), + text, }, ]; } @@ -889,8 +917,14 @@ export async function describeShape(shape: Shape): Promise { throw new Error('Point is not in a page'); } - const pageView = await getPageView(point.pageIndex); + const textLayer = getTextLayerFromPoint(shape.x, shape.y); + let text; + if (textLayer) { + const rect = new DOMRect(shape.x, shape.y, 1, 1); + text = textFromRect(textLayer, rect); + } + const pageView = await getPageView(point.pageIndex); return [ createPageSelector(pageView, point.pageIndex), { @@ -901,6 +935,7 @@ export async function describeShape(shape: Shape): Promise { x: point.x, y: point.y, }, + text, view: pageBoundingBox(pageView.pdfPage), }, ]; diff --git a/src/annotator/anchoring/test/pdf-test.js b/src/annotator/anchoring/test/pdf-test.js index 40de88776b0..948edc0178c 100644 --- a/src/annotator/anchoring/test/pdf-test.js +++ b/src/annotator/anchoring/test/pdf-test.js @@ -964,10 +964,26 @@ describe('annotator/anchoring/pdf', () => { describe('describeShape', () => { let elementsFromPoint; + let textLayer; + let fakeTextInDOMRect; const borderLeft = 5; const borderTop = 8; + // Create a matcher for a `DOMRect`. + // + // Note that if you pass a `DOMRect` directly to eg. `assert.calledWith`, + // the match will always succeed, whether the values are equal or not. + const matchRect = expected => + sinon.match( + rect => + rect.x === expected.x && + rect.y === expected.y && + rect.width === expected.width && + rect.height === expected.height, + `DOMRect { x=${expected.x}, y=${expected.y} width=${expected.width} height=${expected.height} }`, + ); + beforeEach(() => { for (let i = 0; i < viewer.pdfViewer.pagesCount; i++) { const pageDiv = viewer.pdfViewer.getPageView(i).div; @@ -979,6 +995,9 @@ describe('annotator/anchoring/pdf', () => { // which are not a PDF page container, are ignored. const dummy = document.createElement('div'); + textLayer = document.createElement('div'); + textLayer.className = 'textLayer'; + // Override `elementsFromPoint` to control how viewport coordinates are // mapped to pages. elementsFromPoint = sinon.stub(document, 'elementsFromPoint'); @@ -994,7 +1013,13 @@ describe('annotator/anchoring/pdf', () => { } const pageDiv = viewer.pdfViewer.getPageView(pageIndex).div; - return [dummy, pageDiv]; + return [dummy, textLayer, pageDiv]; + }); + + fakeTextInDOMRect = sinon.stub().returns('text-in-shape'); + + pdfAnchoring.$imports.$mock({ + './text-in-rect': { textInDOMRect: fakeTextInDOMRect }, }); }); @@ -1024,6 +1049,11 @@ describe('annotator/anchoring/pdf', () => { y: 10 + borderTop, }); + assert.calledWith( + fakeTextInDOMRect, + textLayer, + matchRect(new DOMRect(10 + borderLeft, 10 + borderTop, 1, 1)), + ); assert.deepEqual(selectors, [ { type: 'PageSelector', @@ -1044,9 +1074,21 @@ describe('annotator/anchoring/pdf', () => { right: 100, top: 200, }, + text: 'text-in-shape', }, ]); }); + + it('does not extract text if there is no text layer', async () => { + textLayer.className = 'notTheTextLayer'; + const selectors = await describeShape({ + type: 'point', + x: 10 + borderLeft, + y: 10 + borderTop, + }); + const shapeSelector = selectors.find(s => s.type === 'ShapeSelector'); + assert.isUndefined(shapeSelector.text); + }); }); context('when shape is a rect', () => { @@ -1090,14 +1132,29 @@ describe('annotator/anchoring/pdf', () => { const [expectedLeft, expectedTop] = pageView.getPagePoint(10, 10); const [expectedRight, expectedBottom] = pageView.getPagePoint(30, 50); - const selectors = await describeShape({ - type: 'rect', + const rect = { left: 10 + borderLeft, top: 10 + borderTop, right: 30 + borderLeft, bottom: 50 + borderTop, + }; + const selectors = await describeShape({ + type: 'rect', + ...rect, }); + assert.calledWith( + fakeTextInDOMRect, + textLayer, + matchRect( + new DOMRect( + rect.left, + rect.top, + rect.right - rect.left, + rect.bottom - rect.top, + ), + ), + ); assert.deepEqual(selectors, [ { type: 'PageSelector', @@ -1120,11 +1177,38 @@ describe('annotator/anchoring/pdf', () => { right: 100, top: 200, }, + text: 'text-in-shape', }, ]); }); }); + it('does not extract text if there is no text layer', async () => { + textLayer.className = 'notTheTextLayer'; + const selectors = await describeShape({ + type: 'rect', + left: 10 + borderLeft, + top: 10 + borderTop, + right: 30 + borderLeft, + bottom: 50 + borderTop, + }); + const shapeSelector = selectors.find(s => s.type === 'ShapeSelector'); + assert.isUndefined(shapeSelector.text); + }); + + it('truncates extracted text', async () => { + fakeTextInDOMRect.returns('a'.repeat(300)); + const selectors = await describeShape({ + type: 'rect', + left: 10 + borderLeft, + top: 10 + borderTop, + right: 100, + bottom: 100, + }); + const shapeSelector = selectors.find(s => s.type === 'ShapeSelector'); + assert.equal(shapeSelector.text, 'a'.repeat(256)); + }); + it('throws if shape is unsupported', async () => { let err; try { diff --git a/src/annotator/anchoring/test/text-in-rect-test.js b/src/annotator/anchoring/test/text-in-rect-test.js new file mode 100644 index 00000000000..943260b2ec7 --- /dev/null +++ b/src/annotator/anchoring/test/text-in-rect-test.js @@ -0,0 +1,69 @@ +import { textInDOMRect } from '../text-in-rect'; + +describe('textInDOMRect', () => { + let container; + + beforeEach(() => { + container = document.createElement('div'); + container.style.position = 'fixed'; + + const leftColumn = document.createElement('p'); + leftColumn.className = 'left-column'; + Object.assign(leftColumn.style, { + position: 'absolute', + width: '200px', + }); + leftColumn.append('Line one', document.createElement('br'), 'Line two'); + + const rightColumn = document.createElement('p'); + rightColumn.className = 'right-column'; + Object.assign(rightColumn.style, { + position: 'absolute', + width: '200px', + left: '200px', + }); + rightColumn.append('Line three', document.createElement('br'), 'Line four'); + + container.append(leftColumn, rightColumn); + + document.body.append(container); + }); + + afterEach(() => { + container.remove(); + }); + + [ + // Rect covering whole left column + { + rect: new DOMRect(0, 0, 200, 200), + expected: 'Line one Line two', + }, + // Rect covering whole right column + { + rect: new DOMRect(200, 0, 200, 200), + expected: 'Line three Line four', + }, + // Tiny rect touching first word in left column + { + rect: new DOMRect(10, 10, 1, 1), + expected: 'Line', + }, + // Zero-sized rect touching first word in left column + { + rect: new DOMRect(10, 10, 0, 0), + expected: '', + }, + ].forEach(({ rect, expected }) => { + it('returns text in rect', () => { + const text = textInDOMRect(container, rect); + assert.equal(text, expected); + }); + }); + + it('only returns text from root container', () => { + const leftColumn = container.querySelector('.left-column'); + const text = textInDOMRect(leftColumn, new DOMRect(0, 0, 500, 500)); + assert.equal(text, 'Line one Line two'); + }); +}); diff --git a/src/annotator/anchoring/text-in-rect.ts b/src/annotator/anchoring/text-in-rect.ts new file mode 100644 index 00000000000..fe33a2d8407 --- /dev/null +++ b/src/annotator/anchoring/text-in-rect.ts @@ -0,0 +1,59 @@ +import { rectIntersects, rectsOverlapVertically } from '../util/geometry'; + +/** + * Return the DOM text that intersects a given rect. + * + * The text nodes under {@link root} are split into words and the bounding + * rectangle of each word is intersected with {@link rect}. If the intersection + * is non-empty, the text of that word is added to the output string. + * + * @param root - Root element of the DOM tree to search + * @param rect - Client coordinates of the region + */ +export function textInDOMRect(root: Element, rect: DOMRect): string { + const iter = root.ownerDocument!.createNodeIterator( + root, + NodeFilter.SHOW_TEXT, + ); + + // Pieces of text that intersect the rect. + const textChunks = []; + + // Rect for previous text chunk which was included in the output. + let prevChunkRect; + + let currentNode; + while ((currentNode = iter.nextNode())) { + const textNode = currentNode as Text; + + // We split on word boundaries here rather than spaces, so inter-word spaces + // are included in the "words". + const words = textNode.data.split(/\b/); + let offset = 0; + + for (const word of words) { + const range = new Range(); + range.setStart(textNode, offset); + const endOffset = offset + word.length; + range.setEnd(textNode, endOffset); + const wordRect = range.getBoundingClientRect(); + + if (rectIntersects(wordRect, rect)) { + // We assume that spaces are included in the text between words on a + // line, but not between lines. + const newLine = + prevChunkRect && !rectsOverlapVertically(prevChunkRect, wordRect); + if (newLine) { + textChunks.push(' '); + } + + textChunks.push(word); + prevChunkRect = wordRect; + } + + offset = endOffset; + } + } + + return textChunks.join(''); +} diff --git a/src/sidebar/components/Annotation/Annotation.tsx b/src/sidebar/components/Annotation/Annotation.tsx index 231f8d82634..9173c2d607a 100644 --- a/src/sidebar/components/Annotation/Annotation.tsx +++ b/src/sidebar/components/Annotation/Annotation.tsx @@ -124,7 +124,12 @@ function Annotation({ replyCount={replyCount} threadIsCollapsed={threadIsCollapsed} /> - {targetShape && } + {targetShape && ( + + )} {annotationQuote && ( ` maps to aria-label. We might want to + // split this into a separate concise label and longer description in + // future. aria-label={alt} + // Set the title attribute to make it easy to inspect the alt text on + // desktop. Screen readers will only read `aria-label` since it has the + // same value. + title={alt} className={classes} style={{ width: `${bitmap.width / scale}px`, @@ -44,9 +51,17 @@ function BitmapImage({ alt, bitmap, classes, scale = 1.0 }: BitmapImageProps) { export type AnnotationThumbnailProps = { tag: string; thumbnailService: ThumbnailService; + + /** + * Text contained in the thumbnail. + * + * This is used when generating alt text for the thumbnail. + */ + textInImage?: string; }; function AnnotationThumbnail({ + textInImage, tag, thumbnailService, }: AnnotationThumbnailProps) { @@ -69,6 +84,13 @@ function AnnotationThumbnail({ } }, [error, devicePixelRatio, tag, thumbnail, thumbnailService]); + let altText; + if (textInImage) { + altText = `Thumbnail. Contains text: ${textInImage}`; + } else { + altText = 'Thumbnail'; + } + return (
{thumbnail && ( { right: 10, bottom: 0, }, + text: 'Some text', }, ]; const wrapper = createComponent({ annotation }); const thumbnail = wrapper.find('AnnotationThumbnail'); assert.isTrue(thumbnail.exists()); assert.equal(thumbnail.prop('tag'), annotation.$tag); + assert.equal(thumbnail.prop('textInImage'), 'Some text'); }); }); diff --git a/src/sidebar/components/Annotation/test/AnnotationThumbnail-test.js b/src/sidebar/components/Annotation/test/AnnotationThumbnail-test.js index 6a10c13435b..21106986b09 100644 --- a/src/sidebar/components/Annotation/test/AnnotationThumbnail-test.js +++ b/src/sidebar/components/Annotation/test/AnnotationThumbnail-test.js @@ -38,6 +38,24 @@ describe('AnnotationThumbnail', () => { assert.isTrue(wrapper.exists('BitmapImage')); }); + [ + { + textInImage: undefined, + expectedAlt: 'Thumbnail', + }, + { + textInImage: 'Foo bar', + expectedAlt: 'Thumbnail. Contains text: Foo bar', + }, + ].forEach(({ textInImage, expectedAlt }) => { + it('sets alt text for thumbnail', () => { + fakeThumbnailService.get.returns(fakeThumbnail); + const wrapper = createComponent({ textInImage }); + const image = wrapper.find('canvas'); + assert.equal(image.prop('aria-label'), expectedAlt); + }); + }); + it('requests thumbnail and then renders it if not cached', async () => { const wrapper = createComponent(); assert.calledOnce(fakeThumbnailService.fetch); diff --git a/src/sidebar/helpers/annotation-metadata.ts b/src/sidebar/helpers/annotation-metadata.ts index 45e462d23f5..c001e34ce64 100644 --- a/src/sidebar/helpers/annotation-metadata.ts +++ b/src/sidebar/helpers/annotation-metadata.ts @@ -4,7 +4,6 @@ import type { EPUBContentSelector, PageSelector, SavedAnnotation, - Shape, ShapeSelector, TextQuoteSelector, } from '../../types/api'; @@ -401,11 +400,11 @@ export function quote(annotation: APIAnnotationData): string | null { * This will return `null` if the annotation is associated with a text * selection instead of a shape. */ -export function shape(annotation: APIAnnotationData): Shape | null { +export function shape(annotation: APIAnnotationData): ShapeSelector | null { const shapeSelector = annotation.target[0]?.selector?.find( s => s.type === 'ShapeSelector', ) as ShapeSelector | undefined; - return shapeSelector?.shape ?? null; + return shapeSelector ?? null; } /** diff --git a/src/sidebar/helpers/test/annotation-metadata-test.js b/src/sidebar/helpers/test/annotation-metadata-test.js index 1bad3ab8cdb..4afa198449a 100644 --- a/src/sidebar/helpers/test/annotation-metadata-test.js +++ b/src/sidebar/helpers/test/annotation-metadata-test.js @@ -726,15 +726,18 @@ describe('sidebar/helpers/annotation-metadata', () => { }, ], expected: { - type: 'rect', - left: 0, - top: 10, - right: 10, - bottom: 0, + type: 'ShapeSelector', + shape: { + type: 'rect', + left: 0, + top: 10, + right: 10, + bottom: 0, + }, }, }, ].forEach(({ selectors, expected }) => { - it('returns shape from shape selector', () => { + it('returns shape selector', () => { const annotation = { target: [ { diff --git a/src/types/api.ts b/src/types/api.ts index 1202fe77c61..909db0c8f91 100644 --- a/src/types/api.ts +++ b/src/types/api.ts @@ -212,6 +212,9 @@ export type ShapeSelector = { right: number; bottom: number; }; + + /** The text contained inside this shape. */ + text?: string; }; /**