----------------------------------------------------------------------- -- FILE: luaotfload-notdef.lua -- DESCRIPTION: part of luaotfload / notdef ----------------------------------------------------------------------- assert(luaotfload_module, "This is a part of luaotfload and should not be loaded independently") { name = "luaotfload-notdef", version = "3.28", --TAGVERSION date = "2024-02-14", --TAGDATE description = "luaotfload submodule / notdef", license = "GPL v2.0", author = "Marcel Krüger" } local harfbuzz = luaotfload.harfbuzz local flush_node = node.direct.flush_node local getfont = font.getfont local getnext = node.direct.getnext local getwhd = node.direct.getwhd local insert = table.insert local insert_after = node.direct.insert_after local kern_id = node.id'kern' local disc_id = node.id'disc' local nodenew = node.direct.new local nodecopy = node.direct.copy local otfregister = fonts.constructors.features.otf.register local protect_glyph = node.direct.protect_glyph local remove = node.direct.remove local setfont = node.direct.setfont local traverse_char = node.direct.traverse_char local traverse_id = node.direct.traverse_id local setchar = node.direct.setchar local setdisc = node.direct.setdisc local getdisc = node.direct.getdisc local getwidth = node.direct.getwidth local setkern = node.direct.setkern local setattributelist = node.direct.setattributelist local getattributelist = node.direct.getattributelist local setmove = luaotfload.fontloader.nodes.injections.setmove -- According to DerivedCoreProperties.txt, Default_Ignorable_Code_Point -- is generated from: -- Other_Default_Ignorable_Code_Point -- + Cf (Format characters) -- + Variation_Selector -- - White_Space -- - FFF9..FFFB (Interlinear annotation format characters) -- - 13430..13438 (Egyptian hieroglyph format characters) -- - Prepended_Concatenation_Mark (Exceptional format characters that should be visible) -- Based on HarfBuzz, we add the exclusion -- - Lo (Letter, Other) -- This affects Hangul fillers. local ignorable_codepoints do local sep = lpeg.P' '^0 * ';' * lpeg.P' '^0 local codepoint = lpeg.S'0123456789ABCDEF'^4/function(c)return tonumber(c, 16)end local codepoint_range = codepoint * ('..' * codepoint + lpeg.Cc(false)) local function multirawset(table, key1, key2, value) for key = key1,(key2 or key1) do rawset(table, key, value) end return table end local entry = lpeg.Cg(codepoint * ';' * (1-lpeg.P';')^0 * ';Cf;' * lpeg.Cc(true))^-1 * (1-lpeg.P'\n')^0 * '\n' local file = lpeg.Cf( lpeg.Ct'' * entry^0 , rawset) local f = io.open(kpse.find_file"UnicodeData.txt") ignorable_codepoints = file:match(f:read'*a') f:close() entry = lpeg.Cg(codepoint_range * sep * ('Other_Default_Ignorable_Code_Point' * lpeg.Cc(true) + 'Variation_Selector' * lpeg.Cc(true) + 'White_Space' * lpeg.Cc(nil) + 'Prepended_Concatenation_Mark' * lpeg.Cc(nil) ) * ' # ' * (1-lpeg.P'Lo'))^-1 * (1-lpeg.P'\n')^0 * '\n' file = lpeg.Cf( lpeg.Carg(1) * entry^0 , multirawset) f = io.open(kpse.find_file"PropList.txt") ignorable_codepoints = file:match(f:read'*a', 1, ignorable_codepoints) f:close() for i = 0xFFF9,0xFFFB do ignorable_codepoints[i] = nil end for i = 0x13430,0x13438 do ignorable_codepoints[i] = nil end end ignorable_codepoints[0xAD] = nil local function setnotdef(tfmdata, factor) local desc = tfmdata.shared.rawdata.descriptions -- So we have to find the .notdef glyph. We only know that it has GID -- 0, but we need it's Unicode mapping. Normally it isn't mapped in -- the font, so we auto-assigned the first private slot: local guess = desc[0xF0000] if guess and guess.index == 0 then tfmdata.notdefcode = 0xF0000 return end -- If this didn't happen, it might be mapped to one of the -- replacement characters: for code = 0xFFFC,0xFFFF do guess = desc[code] if guess and guess.index == 0 then tfmdata.notdefcode = code return end end -- Oh no, we couldn't find it. Maybe we can find it by name? local code = tfmdata.resources.unicodes[".notdef"] -- Better safe than sorry guess = code and desc[code] if guess and guess.index == 0 then tfmdata.notdefcode = code return end -- So the font didn't do the obvious things and then it lied to us. -- At this point we should think about sending an automated complain -- to the font author, but we probably can't trust the contact -- information either. -- We will fall back to brute force now: for code, char in pairs(desc) do if char.index == 0 then tfmdata.notdefcode = code return end end -- If we ever reach this point, something odd happened. Either there -- are no glyphs at all (then LuaTeX will complain anyway, so let's -- ignore that case) or someone tried to use this with a legacy font. -- In that case there most likely isn't a `.notdef` glyph anyway and -- inserting glyph 0 would insert a random character, so `notdefcode` -- better stays `nil`. end local glyph_id = node.id'glyph' local function donotdef(head, font, _, _, _) local tfmdata = getfont(font) local notdef, chars = tfmdata.unscaled.notdefcode, tfmdata.characters if not notdef then return end for cur, cid, fid in traverse_char(head) do if fid == font then local w, h, d = getwhd(cur) if w == 0 and h == 0 and d == 0 and not chars[cid] and not ignorable_codepoints[cid] then local notdefnode = nodenew(glyph_id, 256) setfont(notdefnode, font, notdef) insert_after(cur, cur, notdefnode) protect_glyph(cur) end end end end otfregister { name = "notdef", description = "Add notdef glyphs", default = 1, initializers = { node = setnotdef, }, processors = { node = donotdef, } } local font_invisible_replacement = setmetatable({}, {__index = function(t, fid) local fontdata = font.getfont(fid) local replacement = fontdata.shared.features.invisible if replacement == "remove" then t[fid] = false return false end replacement = tonumber(replacement) or 32 local char = fontdata.characters[replacement] if char then t[fid] = {replacement, -char.width} return t[fid] else t[fid] = false return false end end}) local push, pop do local function checkprop(n) local p = node.direct.getproperty(n) return p and p.zwnj end local list = {} function push(head) head = node.direct.todirect(head) local l = {} list[#list+1] = l for n, id in node.direct.traverse(head) do if checkprop(n) then head = node.direct.remove(head, n) l[#l+1] = n elseif id == disc_id then local pre, post, replace = getdisc(n) for nn in node.direct.traverse(pre) do if checkprop(nn) then local after pre, after = node.direct.remove(pre, nn) l[#l+1] = {nn, n, 'pre'} end end for nn in node.direct.traverse(post) do if checkprop(nn) then post = node.direct.remove(post, nn) l[#l+1] = {nn, n, 'post'} end end for nn in node.direct.traverse(replace) do if checkprop(nn) then replace = node.direct.remove(replace, nn) l[#l+1] = {nn, n, 'replace'} end end setdisc(n, pre, post, replace) end end return head end local getfield, setfield = node.direct.getfield, node.direct.setfield local function pop(head) head = node.direct.todirect(head) local l = list[#list] list[#list] = nil for i = #l,1,-1 do local e = l[i] local n = tonumber(e) local disc, thishead if n then thishead = head else disc, n = e[2], e[1] thishead = getfield(disc, e[3]) end local prev, next = node.direct.getboth(n) if prev or not next then thishead = node.direct.insert_after(thishead, prev, n) else thishead = node.direct.insert_before(thishead, next, n) end if disc then setfield(disc, e[3], thishead) else head = thishead end end return head end fonts.handlers.otf.handlers.marked_push = push fonts.handlers.otf.handlers.marked_pop = pop end local sequence1 = { features = {["semiignored-node"] = {["*"] = {["*"] = true}}}, flags = {false, false, false, false}, name = "semiignored-node", order = {"semiignored-node"}, type = "marked_push", } local sequence2 = { features = {["semiignored-node"] = {["*"] = {["*"] = true}}}, flags = {false, false, false, false}, name = "semiignored-node", order = {"semiignored-node"}, type = "marked_pop", } local function pushpopinitialiser(tfmdata, value, features) local resources = tfmdata.resources local sequences = resources and resources.sequences local first_gpos, last_gpos if sequences then local alreadydone for i=1,#sequences do local sequence = sequences[i] if sequence1 == sequence then return elseif sequence.type:sub(1,5) == "gpos_" then if not first_gpos then first_gpos = i end last_gpos = i end end if first_gpos then insert(sequences, last_gpos+1, sequence2) insert(sequences, first_gpos, sequence1) end end end otfregister { name = 'semiignored-node', description = 'Allow adding nodes which break ligatures but do not affect kerning', default = true, -- Should basically never be disabled manually initializers = { node = pushpopinitialiser, -- plug = ? -- TODO: Manually handle in luaotfload-harf-plug.lua }, } local delayed_remove do local delayed function delayed_remove(n) flush_node(delayed) delayed = n end end local function ignorablehandler(head, fid, ...) -- FIXME: The arguments are probably wrong local fontparam = font_invisible_replacement[fid] local replacement = fontparam and fontparam[1] local font_kern = fontparam and fontparam[2] for n, c, f in traverse_char(head) do if f == fid then local lookup = ignorable_codepoints[c] if lookup then if replacement then setchar(n, replacement) if font_kern then local k = nodenew(kern_id) setkern(k, font_kern) setattributelist(k, getattributelist(n)) head = insert_after(head, n, k) end else local after head, after = remove(head, n) delayed_remove(n) end end end end delayed_remove() for n in traverse_id(disc_id, head) do local a, b, c = getdisc(n) setdisc(ignorablehandler(a, fid), ignorablehandler(b, fid), ignorablehandler(c, fid)) end return head end if harfbuzz then local harf_settings = luaotfload.harf local preserve_flag = harfbuzz.Buffer.FLAG_PRESERVE_DEFAULT_IGNORABLES or 0 local remove_flag = harfbuzz.Buffer.FLAG_REMOVE_DEFAULT_IGNORABLES or 0 local dotted_circle_flag = harfbuzz.Buffer.FLAG_DO_NOT_INSERT_DOTTED_CIRCLE or 0 harf_settings.default_buf_flags = (harf_settings.default_buf_flags & ~remove_flag) | preserve_flag | dotted_circle_flag local function dottedcircleinitialize(tfmdata, value) if not tfmdata.hb then return end local hb = tfmdata.hb hb.buf_flags = hb.buf_flags & ~dotted_circle_flag end otfregister { name = 'dottedcircle', description = 'Insert dotted circle to fix invalid clusters', default = true, initializers = { plug = dottedcircleinitialize, }, } end otfregister { name = 'invisible', description = 'Remove invisible control characters', default = true, processors = { node = ignorablehandler, plug = ignorablehandler, }, } --- vim:sw=2:ts=2:expandtab:tw=71