@@ -73,6 +73,10 @@ describe("api", function () {
73
73
} , WAIT_TIMEOUT ) ;
74
74
}
75
75
76
+ function mergeText ( items ) {
77
+ return items . map ( chunk => chunk . str + ( chunk . hasEOL ? "\n" : "" ) ) . join ( "" ) ;
78
+ }
79
+
76
80
describe ( "getDocument" , function ( ) {
77
81
it ( "creates pdf doc from URL-string" , async function ( ) {
78
82
const urlStr = TEST_PDFS_PATH + basicApiFileName ;
@@ -1604,11 +1608,17 @@ describe("api", function () {
1604
1608
const data = await Promise . all ( [ defaultPromise , parametersPromise ] ) ;
1605
1609
1606
1610
expect ( ! ! data [ 0 ] . items ) . toEqual ( true ) ;
1607
- expect ( data [ 0 ] . items . length ) . toEqual ( 12 ) ;
1611
+ expect ( data [ 0 ] . items . length ) . toEqual ( 11 ) ;
1608
1612
expect ( ! ! data [ 0 ] . styles ) . toEqual ( true ) ;
1609
1613
1614
+ const page1 = mergeText ( data [ 0 ] . items ) ;
1615
+ expect ( page1 ) . toEqual ( `Table Of Content
1616
+ Chapter 1 .......................................................... 2
1617
+ Paragraph 1.1 ...................................................... 3
1618
+ page 1 / 3` ) ;
1619
+
1610
1620
expect ( ! ! data [ 1 ] . items ) . toEqual ( true ) ;
1611
- expect ( data [ 1 ] . items . length ) . toEqual ( 7 ) ;
1621
+ expect ( data [ 1 ] . items . length ) . toEqual ( 6 ) ;
1612
1622
expect ( ! ! data [ 1 ] . styles ) . toEqual ( true ) ;
1613
1623
} ) ;
1614
1624
@@ -1632,6 +1642,7 @@ describe("api", function () {
1632
1642
transform : [ 18 , 0 , 0 , 18 , 441.81 , 708.4499999999999 ] ,
1633
1643
width : 77.49 ,
1634
1644
hasEOL : false ,
1645
+ isOneSpace : false ,
1635
1646
} ) ;
1636
1647
expect ( styles [ fontName ] ) . toEqual ( {
1637
1648
fontFamily : "serif" ,
@@ -1643,6 +1654,107 @@ describe("api", function () {
1643
1654
await loadingTask . destroy ( ) ;
1644
1655
} ) ;
1645
1656
1657
+ it ( "gets text content, with no extra spaces (issue 13226)" , async function ( ) {
1658
+ const loadingTask = getDocument ( buildGetDocumentParams ( "issue13226.pdf" ) ) ;
1659
+ const pdfDoc = await loadingTask . promise ;
1660
+ const pdfPage = await pdfDoc . getPage ( 1 ) ;
1661
+ const { items } = await pdfPage . getTextContent ( ) ;
1662
+ const text = mergeText ( items ) ;
1663
+
1664
+ expect ( text ) . toEqual (
1665
+ "Mitarbeiterinnen und Mitarbeiter arbeiten in über 100 Ländern engagiert im Dienste"
1666
+ ) ;
1667
+
1668
+ await loadingTask . destroy ( ) ;
1669
+ } ) ;
1670
+
1671
+ it ( "gets text content, with merged spaces (issue 13201)" , async function ( ) {
1672
+ const loadingTask = getDocument ( buildGetDocumentParams ( "issue13201.pdf" ) ) ;
1673
+ const pdfDoc = await loadingTask . promise ;
1674
+ const pdfPage = await pdfDoc . getPage ( 1 ) ;
1675
+ const { items } = await pdfPage . getTextContent ( ) ;
1676
+ const text = mergeText ( items ) ;
1677
+
1678
+ expect (
1679
+ text . includes (
1680
+ "Abstract. A purely peer-to-peer version of electronic cash would allow online"
1681
+ )
1682
+ ) . toEqual ( true ) ;
1683
+ expect (
1684
+ text . includes (
1685
+ "avoid mediating disputes. The cost of mediation increases transaction costs, limiting the"
1686
+ )
1687
+ ) . toEqual ( true ) ;
1688
+ expect (
1689
+ text . includes (
1690
+ "system is secure as long as honest nodes collectively control more CPU power than any"
1691
+ )
1692
+ ) . toEqual ( true ) ;
1693
+
1694
+ await loadingTask . destroy ( ) ;
1695
+ } ) ;
1696
+
1697
+ it ( "gets text content, with no spaces between letters of words (issue 11913)" , async function ( ) {
1698
+ const loadingTask = getDocument ( buildGetDocumentParams ( "issue11913.pdf" ) ) ;
1699
+ const pdfDoc = await loadingTask . promise ;
1700
+ const pdfPage = await pdfDoc . getPage ( 1 ) ;
1701
+ const { items } = await pdfPage . getTextContent ( ) ;
1702
+ const text = mergeText ( items ) ;
1703
+
1704
+ expect (
1705
+ text . includes (
1706
+ "1. The first of these cases arises from the tragic handicap which has blighted the life of the Plaintiff, and from the response of the"
1707
+ )
1708
+ ) . toEqual ( true ) ;
1709
+ expect (
1710
+ text . includes (
1711
+ "argued in this Court the appeal raises narrower, but important, issues which may be summarised as follows:-"
1712
+ )
1713
+ ) . toEqual ( true ) ;
1714
+ await loadingTask . destroy ( ) ;
1715
+ } ) ;
1716
+
1717
+ it ( "gets text content, with merged spaces (issue 10900)" , async function ( ) {
1718
+ const loadingTask = getDocument ( buildGetDocumentParams ( "issue10900.pdf" ) ) ;
1719
+ const pdfDoc = await loadingTask . promise ;
1720
+ const pdfPage = await pdfDoc . getPage ( 1 ) ;
1721
+ const { items } = await pdfPage . getTextContent ( ) ;
1722
+ const text = mergeText ( items ) ;
1723
+
1724
+ expect (
1725
+ text . includes ( `3 3 3 3
1726
+ 851.5 854.9 839.3 837.5
1727
+ 633.6 727.8 789.9 796.2
1728
+ 1,485.1 1,582.7 1,629.2 1,633.7
1729
+ 114.2 121.7 125.3 130.7
1730
+ 13.0x 13.0x 13.0x 12.5x` )
1731
+ ) . toEqual ( true ) ;
1732
+
1733
+ await loadingTask . destroy ( ) ;
1734
+ } ) ;
1735
+
1736
+ it ( "gets text content, with spaces (issue 10640)" , async function ( ) {
1737
+ const loadingTask = getDocument ( buildGetDocumentParams ( "issue10640.pdf" ) ) ;
1738
+ const pdfDoc = await loadingTask . promise ;
1739
+ const pdfPage = await pdfDoc . getPage ( 1 ) ;
1740
+ const { items } = await pdfPage . getTextContent ( ) ;
1741
+ const text = mergeText ( items ) ;
1742
+
1743
+ expect (
1744
+ text . includes ( `Open Sans is a humanist sans serif typeface designed by Steve Matteson.
1745
+ Open Sans was designed with an upright stress, open forms and a neu-
1746
+ tral, yet friendly appearance. It was optimized for print, web, and mobile
1747
+ interfaces, and has excellent legibility characteristics in its letterforms (see
1748
+ figure \x81 on the following page). This font is available from the Google Font
1749
+ Directory [\x81] as TrueType files licensed under the Apache License version \x82.\x80.
1750
+ This package provides support for this font in LATEX. It includes Type \x81
1751
+ versions of the fonts, converted for this package using FontForge from its
1752
+ sources, for full support with Dvips.` )
1753
+ ) . toEqual ( true ) ;
1754
+
1755
+ await loadingTask . destroy ( ) ;
1756
+ } ) ;
1757
+
1646
1758
it ( "gets empty structure tree" , async function ( ) {
1647
1759
const tree = await page . getStructTree ( ) ;
1648
1760
0 commit comments