@@ -2187,17 +2187,50 @@ class PartialEvaluator {
2187
2187
spaceInFlowMax : 0 ,
2188
2188
trackingSpaceMin : Infinity ,
2189
2189
negativeSpaceMax : - Infinity ,
2190
+ notASpace : - Infinity ,
2190
2191
transform : null ,
2191
2192
fontName : null ,
2192
2193
hasEOL : false ,
2193
2194
} ;
2194
2195
2196
+ // Use a cyclic buffer (length === 2) to save the last chars in the
2197
+ // text stream.
2198
+ // It's useful to know when we need to add a whitespace in the
2199
+ // text chunk.
2200
+ const twoLastChars = [ " " , " " ] ;
2201
+ let twoLastCharsPos = 0 ;
2202
+
2203
+ /**
2204
+ * Save the last char.
2205
+ * @param {string } char
2206
+ * @returns {boolean } true when the two last chars before adding the new one
2207
+ * are a non-whitespace followed by a whitespace.
2208
+ */
2209
+ function saveLastChar ( char ) {
2210
+ const nextPos = ( twoLastCharsPos + 1 ) % 2 ;
2211
+ const ret =
2212
+ twoLastChars [ twoLastCharsPos ] !== " " && twoLastChars [ nextPos ] === " " ;
2213
+ twoLastChars [ twoLastCharsPos ] = char ;
2214
+ twoLastCharsPos = nextPos ;
2215
+
2216
+ return ret ;
2217
+ }
2218
+
2219
+ function resetLastChars ( ) {
2220
+ twoLastChars [ 0 ] = twoLastChars [ 1 ] = " " ;
2221
+ twoLastCharsPos = 0 ;
2222
+ }
2223
+
2195
2224
// Used in addFakeSpaces.
2196
2225
2197
2226
// A white <= fontSize * TRACKING_SPACE_FACTOR is a tracking space
2198
2227
// so it doesn't count as a space.
2199
2228
const TRACKING_SPACE_FACTOR = 0.1 ;
2200
2229
2230
+ // When a white <= fontSize * NOT_A_SPACE_FACTOR, there is no space
2231
+ // even if one is present in the text stream.
2232
+ const NOT_A_SPACE_FACTOR = 0.03 ;
2233
+
2201
2234
// A negative white < fontSize * NEGATIVE_SPACE_FACTOR induces
2202
2235
// a break (a new chunk of text is created).
2203
2236
// It doesn't change anything when the text is copied but
@@ -2299,6 +2332,7 @@ class PartialEvaluator {
2299
2332
2300
2333
textContentItem . trackingSpaceMin =
2301
2334
textState . fontSize * TRACKING_SPACE_FACTOR ;
2335
+ textContentItem . notASpace = textState . fontSize * NOT_A_SPACE_FACTOR ;
2302
2336
textContentItem . negativeSpaceMax =
2303
2337
textState . fontSize * NEGATIVE_SPACE_FACTOR ;
2304
2338
textContentItem . spaceInFlowMin =
@@ -2483,6 +2517,7 @@ class PartialEvaluator {
2483
2517
return true ;
2484
2518
}
2485
2519
2520
+ resetLastChars ( ) ;
2486
2521
flushTextContentItem ( ) ;
2487
2522
return true ;
2488
2523
}
@@ -2491,7 +2526,17 @@ class PartialEvaluator {
2491
2526
appendEOL ( ) ;
2492
2527
return true ;
2493
2528
}
2529
+
2530
+ if ( advanceY <= textOrientation * textContentItem . notASpace ) {
2531
+ // The real spacing between 2 consecutive chars is thin enough to be
2532
+ // considered a non-space.
2533
+ resetLastChars ( ) ;
2534
+ }
2535
+
2494
2536
if ( advanceY <= textOrientation * textContentItem . trackingSpaceMin ) {
2537
+ if ( advanceY <= textContentItem . notASpace ) {
2538
+ resetLastChars ( ) ;
2539
+ }
2495
2540
textContentItem . height += advanceY ;
2496
2541
} else if (
2497
2542
! addFakeSpaces (
@@ -2501,6 +2546,7 @@ class PartialEvaluator {
2501
2546
)
2502
2547
) {
2503
2548
if ( textContentItem . str . length === 0 ) {
2549
+ resetLastChars ( ) ;
2504
2550
textContent . items . push ( {
2505
2551
str : " " ,
2506
2552
dir : "ltr" ,
@@ -2532,6 +2578,10 @@ class PartialEvaluator {
2532
2578
appendEOL ( ) ;
2533
2579
return true ;
2534
2580
}
2581
+
2582
+ // We're moving back so in case the last char was a whitespace
2583
+ // we cancel it: it doesn't make sense to insert it.
2584
+ resetLastChars ( ) ;
2535
2585
flushTextContentItem ( ) ;
2536
2586
return true ;
2537
2587
}
@@ -2541,12 +2591,19 @@ class PartialEvaluator {
2541
2591
return true ;
2542
2592
}
2543
2593
2594
+ if ( advanceX <= textOrientation * textContentItem . notASpace ) {
2595
+ // The real spacing between 2 consecutive chars is thin enough to be
2596
+ // considered a non-space.
2597
+ resetLastChars ( ) ;
2598
+ }
2599
+
2544
2600
if ( advanceX <= textOrientation * textContentItem . trackingSpaceMin ) {
2545
2601
textContentItem . width += advanceX ;
2546
2602
} else if (
2547
2603
! addFakeSpaces ( advanceX , textContentItem . prevTransform , textOrientation )
2548
2604
) {
2549
2605
if ( textContentItem . str . length === 0 ) {
2606
+ resetLastChars ( ) ;
2550
2607
textContent . items . push ( {
2551
2608
str : " " ,
2552
2609
dir : "ltr" ,
@@ -2600,14 +2657,7 @@ class PartialEvaluator {
2600
2657
}
2601
2658
let scaledDim = glyphWidth * scale ;
2602
2659
2603
- if (
2604
- glyph . isWhitespace &&
2605
- ( i === 0 ||
2606
- i + 1 === ii ||
2607
- glyphs [ i - 1 ] . isWhitespace ||
2608
- glyphs [ i + 1 ] . isWhitespace ||
2609
- extraSpacing )
2610
- ) {
2660
+ if ( glyph . isWhitespace ) {
2611
2661
// Don't push a " " in the textContentItem
2612
2662
// (except when it's between two non-spaces chars),
2613
2663
// it will be done (if required) in next call to
@@ -2623,6 +2673,7 @@ class PartialEvaluator {
2623
2673
charSpacing += - scaledDim + textState . wordSpacing ;
2624
2674
textState . translateTextMatrix ( 0 , - charSpacing ) ;
2625
2675
}
2676
+ saveLastChar ( " " ) ;
2626
2677
continue ;
2627
2678
}
2628
2679
@@ -2653,17 +2704,18 @@ class PartialEvaluator {
2653
2704
textChunk . prevTransform = getCurrentTextTransform ( ) ;
2654
2705
}
2655
2706
2656
- if ( glyph . isWhitespace ) {
2707
+ let glyphUnicode = glyph . unicode ;
2708
+ glyphUnicode = NormalizedUnicodes [ glyphUnicode ] || glyphUnicode ;
2709
+ glyphUnicode = reverseIfRtl ( glyphUnicode ) ;
2710
+ if ( saveLastChar ( glyphUnicode ) ) {
2711
+ // The two last chars are a non-whitespace followed by a whitespace
2712
+ // and then this non-whitespace, so we insert a whitespace here.
2657
2713
// Replaces all whitespaces with standard spaces (0x20), to avoid
2658
2714
// alignment issues between the textLayer and the canvas if the text
2659
2715
// contains e.g. tabs (fixes issue6612.pdf).
2660
2716
textChunk . str . push ( " " ) ;
2661
- } else {
2662
- let glyphUnicode = glyph . unicode ;
2663
- glyphUnicode = NormalizedUnicodes [ glyphUnicode ] || glyphUnicode ;
2664
- glyphUnicode = reverseIfRtl ( glyphUnicode ) ;
2665
- textChunk . str . push ( glyphUnicode ) ;
2666
2717
}
2718
+ textChunk . str . push ( glyphUnicode ) ;
2667
2719
2668
2720
if ( charSpacing ) {
2669
2721
if ( ! font . vertical ) {
@@ -2679,6 +2731,7 @@ class PartialEvaluator {
2679
2731
}
2680
2732
2681
2733
function appendEOL ( ) {
2734
+ resetLastChars ( ) ;
2682
2735
if ( textContentItem . initialized ) {
2683
2736
textContentItem . hasEOL = true ;
2684
2737
flushTextContentItem ( ) ;
@@ -2701,6 +2754,7 @@ class PartialEvaluator {
2701
2754
width <= textOrientation * textContentItem . spaceInFlowMax
2702
2755
) {
2703
2756
if ( textContentItem . initialized ) {
2757
+ resetLastChars ( ) ;
2704
2758
textContentItem . str . push ( " " ) ;
2705
2759
}
2706
2760
return false ;
@@ -2715,6 +2769,7 @@ class PartialEvaluator {
2715
2769
}
2716
2770
2717
2771
flushTextContentItem ( ) ;
2772
+ resetLastChars ( ) ;
2718
2773
textContent . items . push ( {
2719
2774
str : " " ,
2720
2775
// TODO: check if using the orientation from last chunk is
0 commit comments