|
| 1 | +----------------------------------------------------------------------- |
| 2 | +-- FILE: luaotfload-arabic.lua |
| 3 | +-- DESCRIPTION: part of luaotfload / arabic specific support |
| 4 | +----------------------------------------------------------------------- |
| 5 | + |
| 6 | +assert(luaotfload_module, "This is a part of luaotfload and should not be loaded independently") { |
| 7 | + name = "luaotfload-arabic", |
| 8 | + version = "3.27-dev", --TAGVERSION |
| 9 | + date = "2023-08-31", --TAGDATE |
| 10 | + description = "luaotfload submodule / features", |
| 11 | + license = "GPL v2.0", |
| 12 | + author = "Marcel Krüger", |
| 13 | + copyright = "The LaTeX Project", |
| 14 | +} |
| 15 | + |
| 16 | +local unicode = require'luaotfload-unicode' |
| 17 | +local ccc = unicode.ccc |
| 18 | + |
| 19 | +local node_new = node.direct.new |
| 20 | +local setlink = node.direct.setlink |
| 21 | +local is_char = node.direct.is_char |
| 22 | +local getnext = node.direct.getnext |
| 23 | + |
| 24 | +-- Mark combining marks |
| 25 | +local mcm = { |
| 26 | + [0x0654] = true, -- ARABIC HAMZA ABOVE |
| 27 | + [0x0655] = true, -- ARABIC HAMZA BELOW |
| 28 | + [0x0658] = true, -- ARABIC MARK NOON GHUNNA |
| 29 | + [0x06DC] = true, -- ARABIC SMALL HIGH SEEN |
| 30 | + [0x06E3] = true, -- ARABIC SMALL LOW SEEN |
| 31 | + [0x06E7] = true, -- ARABIC SMALL HIGH YEH |
| 32 | + [0x06E8] = true, -- ARABIC SMALL HIGH NOON |
| 33 | + [0x08CA] = true, -- ARABIC SMALL HIGH FARSI YEH |
| 34 | + [0x08CB] = true, -- ARABIC SMALL HIGH YEH BARREE WITH TWO DOTS BELOW |
| 35 | + [0x08CD] = true, -- ARABIC SMALL HIGH ZAH |
| 36 | + [0x08CE] = true, -- ARABIC LARGE ROUND DOT ABOVE |
| 37 | + [0x08CF] = true, -- ARABIC LARGE ROUND DOT BELOW |
| 38 | + [0x08D3] = true, -- ARABIC SMALL LOW WAW |
| 39 | + [0x08F3] = true, -- ARABIC SMALL HIGH WAW |
| 40 | +} |
| 41 | + |
| 42 | +-- Implement AMTRA from UTR #53. |
| 43 | +-- This assumes that the text is already normalized according to NFD. For most |
| 44 | +-- fonts, normalizing to NFC should be good enough. |
| 45 | +local function reorder_amtra(head, f) |
| 46 | + local n = head |
| 47 | + while n do |
| 48 | + local base, prev = n |
| 49 | + prev, n = n, getnext(n) |
| 50 | + while true do |
| 51 | + local char = is_char(n, f) -- is_char(nil, f) == is_char(0, f) == nil |
| 52 | + local this_ccc = ccc[char] |
| 53 | + if not this_ccc then break end -- ! This `break` is the hot path |
| 54 | + if this_ccc == 33 then |
| 55 | + local after_33, tail_33 = n |
| 56 | + repeat |
| 57 | + tail_33 = after_33 |
| 58 | + after_33 = getnext(tail_33) |
| 59 | + local char = is_char(after_33, f) |
| 60 | + local after_ccc = ccc[char] |
| 61 | + until after_ccc ~= 33 |
| 62 | + setlink(prev, after_33) |
| 63 | + setlink(tail_33, getnext(base)) |
| 64 | + setlink(base, n) |
| 65 | + if prev == base then |
| 66 | + prev = tail_33 |
| 67 | + end |
| 68 | + n = after_33 |
| 69 | + elseif this_ccc == 220 then |
| 70 | + local after_220, tail_220, found = n |
| 71 | + repeat |
| 72 | + tail_220 = after_220 |
| 73 | + after_220 = getnext(tail_220) |
| 74 | + local char = is_char(after_220, f) |
| 75 | + if mcm[char] then found = true end |
| 76 | + local after_ccc = ccc[char] |
| 77 | + until after_ccc ~= 220 |
| 78 | + if found then |
| 79 | + setlink(prev, after_220) |
| 80 | + setlink(tail_220, getnext(base)) |
| 81 | + setlink(base, n) |
| 82 | + if prev == base then |
| 83 | + prev = tail_220 |
| 84 | + end |
| 85 | + n = after_220 |
| 86 | + base = tail_220 -- Because ccc230 should get inserted after this |
| 87 | + else |
| 88 | + prev, n = tail_220, after_220 |
| 89 | + end |
| 90 | + elseif this_ccc == 230 then |
| 91 | + local after_230, tail_230, found = n |
| 92 | + repeat |
| 93 | + tail_230 = after_230 |
| 94 | + after_230 = getnext(tail_230) |
| 95 | + local char = is_char(after_230, f) |
| 96 | + if mcm[char] then found = true end |
| 97 | + local after_ccc = ccc[char] |
| 98 | + until after_ccc ~= 230 |
| 99 | + if found then |
| 100 | + setlink(prev, after_230) |
| 101 | + setlink(tail_230, getnext(base)) |
| 102 | + setlink(base, n) |
| 103 | + if prev == base then |
| 104 | + prev = tail_230 |
| 105 | + end |
| 106 | + n = after_230 |
| 107 | + else |
| 108 | + prev, n = tail_220, after_220 |
| 109 | + end |
| 110 | + else |
| 111 | + prev, n = n, getnext(n) |
| 112 | + end |
| 113 | + end |
| 114 | + end |
| 115 | + return n |
| 116 | +end |
| 117 | + |
| 118 | +-- We need to run after normalize and ideally directly afterwards. So try to insert after normalize |
| 119 | +-- or default to the start of the list such that normalize can insert itself before us later. |
| 120 | +local normalize_index = 0 |
| 121 | +for i, manipulator in ipairs(fonts.constructors.features.otf.processors.node) do |
| 122 | + if manipulator.name == 'normalize' then |
| 123 | + normalize_index = i |
| 124 | + end |
| 125 | +end |
| 126 | +fonts.constructors.features.otf.register { |
| 127 | + name = 'amtra', |
| 128 | + default = 'auto', |
| 129 | + description = 'Apply Unicode Arabic Mark Rendering', |
| 130 | + initializers = { |
| 131 | + node = function(fonttable, value, features) |
| 132 | + if values == 'auto' then |
| 133 | + features.amtra = fonttable.properties.script == 'arab' |
| 134 | + end |
| 135 | + end, |
| 136 | + }, |
| 137 | + processors = { |
| 138 | + position = normalize_index + 1, |
| 139 | + node = function(head, f) |
| 140 | + return reorder_amtra(head, f) |
| 141 | + end, |
| 142 | + }, |
| 143 | +} |
0 commit comments