2021-04-24 17:47:36 (UTC-03:00)
Marcel Rodrigues <marcelgmr@gmail.com>
add basic TrueType font parsing
diff --git a/ttf.lua b/ttf.lua new file mode 100644 index 0000000..c1eefd1 --- /dev/null +++ b/ttf.lua @@ -0,0 +1,542 @@ +local bit = require "bit" + +local bnot = bit.bnot +local bor, band = bit.bor, bit.band +local lshift, rshift = bit.lshift, bit.rshift + +local function log(s) + io.stderr:write(s .. "\n") +end + +local function utf8to32(utf8str) + assert(type(utf8str) == "string") + local res, seq, val = {}, 0, nil + for i = 1, #utf8str do + local c = string.byte(utf8str, i) + if seq == 0 then + table.insert(res, val) + seq = c < 0x80 and 1 or c < 0xE0 and 2 or c < 0xF0 and 3 or + c < 0xF8 and 4 or --c < 0xFC and 5 or c < 0xFE and 6 or + error("invalid UTF-8 character sequence") + val = band(c, 2^(8-seq) - 1) + else + val = bor(lshift(val, 6), band(c, 0x3F)) + end + seq = seq - 1 + end + table.insert(res, val) + return res +end + +-- Note: TrueType uses big endian for everything. + +local function uint(s) + local x = 0 + for i = 1, #s do + x = x * 256 + s:byte(i) + end + return x +end + +local function int(s) + local x = uint(s) + local p = 2^(#s*8) + if x >= p/2 then x = x - p end + return x +end + +local function str(x) + local s = "" + while x > 0 do + s = string.char(x % 256) .. s + x = math.floor(x / 256) + end + return s +end + +local Face = {} +Face.__index = Face + +function Face:str(n) return self.fp:read(n) end +function Face:uint8() return uint(self.fp:read(1)) end +function Face:uint16() return uint(self.fp:read(2)) end +function Face:uint32() return uint(self.fp:read(4)) end +function Face:uint64() return uint(self.fp:read(8)) end +function Face:int8() return int(self.fp:read(1)) end +function Face:int16() return int(self.fp:read(2)) end +function Face:int32() return int(self.fp:read(4)) end +function Face:int64() return int(self.fp:read(8)) end + +function Face:goto(tag, offset) + self.fp:seek("set", self.dir[tag].offset + (offset or 0)) +end + +function Face:getpos() + return self.fp:seek() +end + +function Face:setpos(pos) + self.fp:seek("set", pos) +end + +function Face:offset() + local scaler_type = self:uint32() + assert(scaler_type == 0x74727565 or scaler_type == 0x00010000, + ("invalid scaler type for TrueType: 0x%08X"):format(scaler_type)) + local num_tables = self:uint16() + -- The entries for search_range, entry_selector and range_shift are used to + -- facilitate quick binary searches of the table directory. Unless a font + -- has a large number of tables, a sequential search will be fast enough. + local search_range = self:uint16() + local entry_selector = self:uint16() + local range_shift = self:uint16() + self.num_tables = num_tables +end + +function Face:directory() + local dir = {} + for i = 1, self.num_tables do + local tag = self:str(4) + local checksum = self:uint32() + local offset = self:uint32() + local length = self:uint32() + -- TODO: verify checksums + dir[tag] = {checksum=checksum, offset=offset, length=length} + end + self.dir = dir +end + +function Face:head() + self:goto("head") + local version = self:uint32() + assert(version == 0x00010000, ("invalid version: 0x%08X"):format(version)) + local revision = self:uint32() + local checksum_adj = self:uint32() + local magic = self:uint32() + assert(magic == 0x5F0F3CF5, ("invalid magic: 0x%08X"):format(magic)) + local flags = self:uint16() + self.units_per_em = self:uint16() + local created = self:int64() + local modified = self:int64() + local xmin = self:int16() + local ymin = self:int16() + local xmax = self:int16() + local ymax = self:int16() + local mac_style = self:uint16() + local lowest_rec_ppem = self:uint16() + local direction_hint = self:int16() + self.index_to_loc_fmt = self:int16() + local glyph_data_fmt = self:int16() +end + +function Face:maxp() + self:goto("maxp") + local version = self:uint32() + if version == 0x00005000 then + self.num_glyphs = self:uint16() + elseif version == 0x00010000 then + self.num_glyphs = self:uint16() + local max_points = self:uint16() + local max_contours = self:uint16() + local max_composite_points = self:uint16() + local max_composite_contours = self:uint16() + local max_zones = self:uint16() + local max_twilight_points = self:uint16() + local max_storage = self:uint16() + local max_function_defs = self:uint16() + local max_instruction_defs = self:uint16() + local max_stack_elements = self:uint16() + local max_size_of_instructions = self:uint16() + local max_component_elements = self:uint16() + local max_component_depth = self:uint16() + else + error(("invalid maxp version: 0x%08X"):format(version)) + end +end + +function Face:cmap() + self:goto("cmap") + local version = self:uint16() + assert(version == 0, ("invalid cmap version: %d"):format(version)) + local num_subtables = self:uint16() + local encoding, suboffset + local ok = false + for i = 1, num_subtables do + local platform_id = self:uint16() + encoding = self:uint16() + suboffset = self:uint32() + if platform_id == 0 then -- platform == Unicode + ok = true + break + elseif platform_id == 3 then -- platform == Microsoft + if encoding == 10 or encoding == 1 then + ok = true + break + end + end + end + if not ok then + error(("could not find Unicode cmap in %d subtables"):format(num_subtables)) + end + self:goto("cmap", suboffset) + self:subcmap() +end + +function Face:subcmap() + local format = self:uint16() + local segs = {} + local gia = {} + if format == 4 then + local length = self:uint16() + local language = self:uint16() + assert(language == 0, ("invalid subcmap language: %d"):format(language)) + local seg_count = self:uint16() / 2 + local search_range = self:uint16() + local entry_selector = self:uint16() + local range_shift = self:uint16() + for i = 1, seg_count do + segs[i] = {end_code=self:uint16()} + end + local last = segs[seg_count].end_code + assert(last == 0xFFFF, ("invalid subcmap last end code: %d"):format(last)) + local pad = self:uint16() + assert(pad == 0, ("invalid subcmap reserved pad: %d"):format(pad)) + for i = 1, seg_count do + segs[i].start_code = self:uint16() + end + for i = 1, seg_count do + segs[i].id_delta = self:uint16() + end + for i = 1, seg_count do + segs[i].id_range_offset = self:uint16() + end + local gia_len = (length - (16+8*seg_count)) / 2 + for i = 1, gia_len do + gia[i] = self:uint16() + end + -- TODO: support other formats, specially 6 and 12 + else + error(("unsupported subcmap format: %d"):format(format)) + end + self.segs, self.gia = segs, gia +end + +function Face:hhea() + self:goto("hhea") + local versionH = self:uint16() + local versionL = self:uint16() + assert(versionH == 1 and versionL == 0, + ("invalid hhea version: %d.%d"):format(versionH, versionL)) + local ascent = self:int16() + local descent = self:int16() + local line_gap = self:int16() + local advance_width_max = self:uint16() + local min_left_side_bearing = self:int16() + local min_right_side_bearing = self:int16() + local x_max_extent = self:int16() + local caret_slope_rise = self:int16() + local caret_slope_run = self:int16() + local caret_offset = self:int16() + for i = 1, 4 do + local reserved = self:uint16() + assert(reserved == 0, "nonzero reserved field in hhea") + end + local metric_data_format = self:int16() + assert(metric_data_format == 0, + ("invalid metric data format: %d"):format(metric_data_format)) + self.nlong_hor_metrics = self:uint16() +end + +function Face:hmetrics(id) + local n = self.nlong_hor_metrics -- for readability of expressions below + local advance, bearing + if id < n then + self:goto("hmtx", 4*id) + advance = self:uint16() + bearing = self:int16() + else + self:goto("hmtx", 4*(n-1)) + advance = self:uint16() + self:goto("hmtx", 4*n+2*(id-n)) + bearing = self:int16() + end + return advance, bearing +end + +function Face:kern() + self:goto("kern") + local version = self:uint16() + assert(version == 0, ("invalid kern table version: %d"):format(version)) + local ntables = self:uint16() + for i = 1, ntables do + local version = self:uint16() + local length = self:uint16() + local format = self:uint8() -- usually 0 + local coverage = self:uint8() + local horizontal = band(coverage, 2^0) > 0 -- usually true + local minimum = band(coverage, 2^1) > 0 -- usually false (kerning) + local cross_stream = band(coverage, 2^2) > 0 -- usually false (regular) + local override = band(coverage, 2^3) > 0 -- usually false (accumulate) + assert(band(coverage, 0xF0) == 0, "invalid coverage bits set") + if format == 0 then + self.num_kernings = self:uint16() + local search_range = self:uint16() + local entry_selector = self:uint16() + local range_shift = self:uint16() + local kerning = {} + for j = 1, self.num_kernings do + -- glyph indices (left * 2^16 + right) + local key = self:uint32() + -- kerning value + local value = self:int16() + kerning[key] = value + end + self.kerning = kerning + else + log(("unsupported kerning table format: %d"):format(format)) + end + end +end + +function Face:get_kerning(left_id, right_id) + return self.kerning[left_id * 2^16 + right_id] or 0 +end + +-- Convert a character code to its glyph id. +function Face:char_index(code) + local i = 1 + while code > self.segs[i].end_code do i = i + 1 end + if self.segs[i].start_code > code then return 0 end + local iro = self.segs[i].id_range_offset + if iro == 0 then + return (code + self.segs[i].id_delta) % 0x10000 + else + local idx = iro + 2 * (code - self.segs[i].start_code) + idx = idx - (#self.segs - i + 1) * 2 + local id = self.gia[idx/2+1] + if id > 0 then + id = (id + self.segs[i].id_delta) % 0x10000 + end + return id + end +end + +-- used in Face:glyph(); resolution in DPI +function Face:set_size(resolution, point_size) + self.scale = point_size * resolution / (72 * self.units_per_em) +end + +-- helper for Face:glyph() +function Face:pack_outline(points, end_points) + local outline = {} + local j = 1 + local p, q + for i = 1, #end_points do + local contour = {} + while j <= end_points[i] do + p = points[j] + q = {p.x*self.scale, p.y*self.scale, p.on_curve} + table.insert(contour, q) + j = j + 1 + end + table.insert(contour, contour[1]) -- close contour + table.insert(outline, contour) + end + return outline +end + +function Face:glyph(id) + local suboffset + if self.index_to_loc_fmt == 0 then -- short offsets + self:goto("loca", 2*id) + suboffset = self:uint16() * 2 + else -- long offsets + self:goto("loca", 4*id) + suboffset = self:uint16() + end + self:goto("glyf", suboffset) + local num_contours = self:int16() + local xmin = self:int16() + local ymin = self:int16() + local xmax = self:int16() + local ymax = self:int16() + local points, end_points = {}, {} + if num_contours > 0 then -- simple glyph + for i = 1, num_contours do + end_points[i] = self:uint16() + 1 + end + local num_points = end_points[#end_points] + local instruction_length = self:uint16() + local instructions = self:str(instruction_length) + local i = 0 + while i < num_points do + i = i + 1 + local flags = self:uint8() + assert(flags < 64, "point flag with higher bits set") + local point = { + on_curve = band(flags, 2^0) > 0, + x_short = band(flags, 2^1) > 0, + y_short = band(flags, 2^2) > 0, + repeated = band(flags, 2^3) > 0, + x_sign_same = band(flags, 2^4) > 0, + y_sign_same = band(flags, 2^5) > 0 + } + points[i] = point + if point.repeated then + local repeats = self:uint8() + for j = 1, repeats do + i = i + 1 + points[i] = { + on_curve = point.on_curve, + x_short = point.x_short, + y_short = point.y_short, + x_sign_same = point.x_sign_same, + y_sign_same = point.y_sign_same + } + end + end + end + local last_x, last_y = 0, 0 + for i = 1, #points do + if points[i].x_short then + local x = self:uint8() + if not points[i].x_sign_same then x = -x end + points[i].x = last_x + x + else + if not points[i].x_sign_same then + points[i].x = last_x + self:int16() + else + points[i].x = last_x + end + end + last_x = points[i].x + end + for i = 1, #points do + if points[i].y_short then + local y = self:uint8() + if not points[i].y_sign_same then y = -y end + points[i].y = last_y + y + else + if not points[i].y_sign_same then + points[i].y = last_y + self:int16() + else + points[i].y = last_y + end + end + last_y = points[i].y + end + elseif num_contours < 0 then -- compound glyph + local more = true + while more do + local flags = self:uint16() + local args_are_words = band(flags, 2^0x0) > 0 + local args_are_xy = band(flags, 2^0x1) > 0 + local round_xy_to_grid = band(flags, 2^0x2) > 0 + local regular_scale = band(flags, 2^0x3) > 0 + local obsolete = band(flags, 2^0x4) > 0 + more = band(flags, 2^0x5) > 0 + local irregular_scale = band(flags, 2^0x6) > 0 + local two_by_two = band(flags, 2^0x7) > 0 + local instructions = band(flags, 2^0x8) > 0 + local use_my_metrics = band(flags, 2^0x9) > 0 + local overlap = band(flags, 2^0xA) > 0 + local scaled_offset = band(flags, 2^0xB) > 0 + local unscaled_offset = band(flags, 2^0xC) > 0 + if obsolete then + log("warning: glyph component using obsolete flag") + end + local gid = self:uint16() + local pos = self:getpos() + local sub_points, sub_end_points = self:glyph(gid) + self:setpos(pos) + local e, f + if args_are_xy then + if args_are_words then + e = self:int16() + f = self:int16() + else + e = self:int8() + f = self:int8() + end + if round_xy_to_grid then + log("warning: ignoring request to round component offset") + end + else + local i, j + if args_are_words then + i = self:uint16() + j = self:uint16() + else + i = self:uint8() + j = self:uint8() + end + e = points[i].x - sub_points[j].x + f = points[i].y - sub_points[j].y + end + local a, b, c, d = 1, 0, 0, 1 + if regular_scale then + log("regular scale") + a = self:int16() / 0x4000 + d = a + elseif irregular_scale then + log("irregular scale") + a = self:int16() / 0x4000 + d = self:int16() / 0x4000 + elseif two_by_two then + log("2x2 transformation") + a = self:int16() / 0x4000 + b = self:int16() / 0x4000 + c = self:int16() / 0x4000 + d = self:int16() / 0x4000 + end + local m = math.max(math.abs(a), math.abs(b)) + local n = math.max(math.abs(c), math.abs(d)) + if math.abs(math.abs(a)-math.abs(c)) <= 0x21/0x10000 then + m = m * 2 + end + if math.abs(math.abs(c)-math.abs(d)) <= 0x21/0x10000 then + n = n * 2 + end + for i, p in ipairs(sub_points) do + points[#points+1] = { + x=m*(a*p.x/m + c*p.y/m + e), + y=n*(b*p.x/n + d*p.y/n + f), + on_curve=p.on_curve + } + end + local offset = end_points[#end_points] or 0 + for i, e in ipairs(sub_end_points) do + end_points[#end_points+1] = offset + e + end + end + -- TODO: read instructions for composite character + end + return self:pack_outline(points, end_points) +end + +local function load_face(f) + if type(f) == "string" then f = io.open(f, "rb") end + local self = setmetatable({fp=f}, Face) + self:offset() + self:directory() + self:head() + self:maxp() + self:cmap() + if self.dir["hhea"] then + self:hhea() + else + log("no horizontal metrics (hhea+hmtx)") + end + if self.dir["kern"] then + self:kern() + else + self.num_kernings = 0 + log("no kerning table (kern)") + end + return self +end + +return { + utf8to32=utf8to32, + load_face=load_face +}