#!/usr/bin/python3 # -*- coding: utf-8 -*- # Parse open-type font file and convert to `clm` format file import sys from functools import reduce, partial import fontforge import xml.dom.minidom as dom import tempfile import struct def chain(*fs): def _chain(f, g): return lambda x: g(f(x)) return reduce(_chain, fs, lambda x: x) def fmap(f, it): return [j for i in it for j in f(i)] def do(f, x): f(x) return x def do_loop(f, xs): for x in xs: f(x) return xs def find_first(xs, predicate): for x in xs: if predicate(x): return x return None def read_math_consts(font): m = font.math return [ m.ScriptPercentScaleDown, m.ScriptScriptPercentScaleDown, m.DelimitedSubFormulaMinHeight, m.DisplayOperatorMinHeight, m.MathLeading, m.AxisHeight, m.AccentBaseHeight, m.FlattenedAccentBaseHeight, m.SubscriptShiftDown, m.SubscriptTopMax, m.SubscriptBaselineDropMin, m.SuperscriptShiftUp, m.SuperscriptShiftUpCramped, m.SuperscriptBottomMin, m.SuperscriptBaselineDropMax, m.SubSuperscriptGapMin, m.SuperscriptBottomMaxWithSubscript, m.SpaceAfterScript, m.UpperLimitGapMin, m.UpperLimitBaselineRiseMin, m.LowerLimitGapMin, m.LowerLimitBaselineDropMin, m.StackTopShiftUp, m.StackTopDisplayStyleShiftUp, m.StackBottomShiftDown, m.StackBottomDisplayStyleShiftDown, m.StackGapMin, m.StackDisplayStyleGapMin, m.StretchStackTopShiftUp, m.StretchStackBottomShiftDown, m.StretchStackGapAboveMin, m.StretchStackGapBelowMin, m.FractionNumeratorShiftUp, m.FractionNumeratorDisplayStyleShiftUp, m.FractionDenominatorShiftDown, m.FractionDenominatorDisplayStyleShiftDown, m.FractionNumeratorGapMin, m.FractionNumeratorDisplayStyleGapMin, m.FractionRuleThickness, m.FractionDenominatorGapMin, m.FractionDenominatorDisplayStyleGapMin, m.SkewedFractionHorizontalGap, m.SkewedFractionVerticalGap, m.OverbarVerticalGap, m.OverbarRuleThickness, m.OverbarExtraAscender, m.UnderbarVerticalGap, m.UnderbarRuleThickness, m.UnderbarExtraDescender, m.RadicalVerticalGap, m.RadicalDisplayStyleVerticalGap, m.RadicalRuleThickness, m.RadicalExtraAscender, m.RadicalKernBeforeDegree, m.RadicalKernAfterDegree, m.RadicalDegreeBottomRaisePercent, m.MinConnectorOverlap ] def read_metrics(glyph): ''' Return a tuple to represents metrics, in (width, height, depth) order ''' # [xmin, ymin, xmax, ymax] # 0 is the baseline bounding_box = glyph.boundingBox() return ( # width glyph.width, # height = max(0, ymax) max(0, bounding_box[3]), # depth = -min(0, ymin) -min(0, bounding_box[1]) ) def read_variants(get_variants): ''' Return an array of glyph names to represents variants ''' variants = get_variants() if not variants: return [] return variants.split(' ') def read_scripts(glyph, subtable_names): ''' Return an array of glyph names to represents scripts variants ''' scripts = chain( partial(fmap, lambda subtable_name: glyph.getPosSub(subtable_name)), partial(fmap, lambda infos: infos[2:]) )(subtable_names) return list(scripts) def read_glyph_assembly(get_assembly): ''' Return a tuple ( italics_correction, ( (glyph_name, flag, start_connector_length, end_connector_length, full_advance), ... ) ) ''' return get_assembly() def read_math_kern_record(glyph): ''' Return a tuple contains 4 elements in clockwise direction, None means not represent ( ((correction_height, kerning), ...), None, ... ) ''' return ( glyph.mathKern.topLeft, glyph.mathKern.topRight, glyph.mathKern.bottomLeft, glyph.mathKern.bottomRight, ) def read_math(glyph, scripts_subtable_names): return ( glyph.italicCorrection, # if no top accent attachment, the value == 32767 (undefined math value) glyph.topaccent, read_variants(lambda: glyph.horizontalVariants), read_variants(lambda: glyph.verticalVariants), read_scripts(glyph, scripts_subtable_names), read_glyph_assembly(lambda: ( glyph.horizontalComponentItalicCorrection, glyph.horizontalComponents, )), read_glyph_assembly(lambda: ( glyph.verticalComponentItalicCorrection, glyph.verticalComponents, )), read_math_kern_record(glyph), ) def read_kern(glyph, kern_subtable_names): ''' Return array of tuples represents kerning [ (glyph_name, kerning), ... ] ''' ks = [] for sn in kern_subtable_names: kerns = glyph.getPosSub(sn) for k in kerns: if k[5] != 0: ks.append((k[2], k[5],)) return ks def path_cmd_arg_cnt(x): if x == 'M' or x == 'm' or x == 'L' or x == 'l' or x == 'T' or x == 't': return 2 if x == 'H' or x == 'h' or x == 'V' or x == 'v': return 1 if x == 'C' or x == 'c': return 6 if x == 'S' or x == 's' or x == 'Q' or x == 'q': return 4 if x == 'Z' or x == 'z': return -1 return 0 def is_valid_digit(c): return c == '.' or c == '-' or c.isdigit() def mirror_cmd(t): x = t[0] if x == 'M' or x == 'm' or x == 'L' or x == 'l' or x == 'T' or x == 't': t[2] = -t[2] elif x == 'V' or x == 'v': t[1] = -t[1] elif x == 'C' or x == 'c': t[2] = -t[2] t[4] = -t[4] t[6] = -t[6] elif x == 'S' or x == 's' or x == 'Q' or x == 'q': t[2] = -t[2] t[4] = -t[4] def mirror(data): for t in data: mirror_cmd(t) def read_glyph_path(glyph): tf = tempfile.NamedTemporaryFile(suffix='.svg', mode='w+') # keep font coordinate-system, thus increase y from bottom to up glyph.export(tf.name, usetransform=True) string = tf.read() tf.close() svg = dom.parseString(string).documentElement g = svg.getElementsByTagName('g') paths = g[0].getElementsByTagName('path') path = None if paths: path = paths[0] if not path: return [] v = path.attributes.get('d').value data = [] def digits(i): j = i while is_valid_digit(v[j]): j += 1 return (v[i:j], j,) i = 0 while i < len(v): x = v[i] cnt = path_cmd_arg_cnt(x) if not cnt: i += 1 continue i += 1 cmd = [x] for j in range(0, cnt): while not is_valid_digit(v[i]): i += 1 r = digits(i) cmd.append(int(r[0])) i = r[1] data.append(cmd) mirror(data) return data def read_glyph(glyph, is_math_font, parse_path, kern_subtable_names, scripts_subtable_names): ''' Return a tuple ( metrics, kerning, math, path_cmds, glyph_name ) ''' return ( read_metrics(glyph), read_kern(glyph, kern_subtable_names), None if not is_math_font else read_math(glyph, scripts_subtable_names), None if not parse_path else read_glyph_path(glyph), glyph.glyphname, ) def _read_lookup_subtables( font, is_target_lookup=lambda x: True, is_target_subtable=lambda x: True): ''' Return a function to get the subtable names ''' def _is_target_lookup(lookup_name): lookup_info = font.getLookupInfo(lookup_name) return lookup_info and is_target_lookup(lookup_info) return chain( partial(filter, _is_target_lookup), partial(fmap, lambda lookup_name: font.getLookupSubtables(lookup_name)), partial(filter, is_target_subtable) ) def read_scripts_subtables(font): ''' Return array of scripts variants subtable name The feature tag is 'ssty' ''' subtables = _read_lookup_subtables( font, lambda info: info[0] == 'gsub_alternate' and any( t[0] == 'ssty' for t in info[2]) )(font.gsub_lookups) return list(subtables) def read_kern_subtables(font): ''' Return array of kerning subtable name ''' tables = [] lookups = font.gpos_lookups for lookup_name in lookups: lookup_info = font.getLookupInfo(lookup_name) if lookup_info and lookup_info[0] == 'gpos_pair': subtable_names = font.getLookupSubtables(lookup_name) for sn in subtable_names: if not font.isKerningClass(sn): tables.append(sn) return tables def read_ligature_subtables(font): ''' Return array of ligture subtable name ''' tables = [] lookups = font.gsub_lookups for lookup_name in lookups: lookup_info = font.getLookupInfo(lookup_name) if lookup_info and lookup_info[0] == 'gsub_ligature': subtable_names = font.getLookupSubtables(lookup_name) for sn in subtable_names: tables.append(sn) return tables def read_ligatures(glyph, liga_subtable_names): ''' Return array of ligatures info represents by this glyph, the glyph id is the ligature [ ((glyph_name, glyph_name, ...), glyph_id), ... ] ''' ligs = [] for sn in liga_subtable_names: liga_infos = glyph.getPosSub(sn) for info in liga_infos: ligs.append((info[2:], glyph.originalgid,)) return ligs def read_kerning_class(font): ''' Return array of kerning class table: ( # glyphs on left ( None, # first is None (glyph_name, glyph_name, ...), ... ), # glyphs on right ( None, # first is None (glyph_name, glyph_name, ...), ... ), # kerning value ( kerning_value, ... ) ) ''' kerns = [] lookups = font.gpos_lookups for ln in lookups: li = font.getLookupInfo(ln) if li and li[0] == 'gpos_pair': subtables = font.getLookupSubtables(ln) for sn in subtables: if font.isKerningClass(sn): kerns.append(font.getKerningClass(sn)) return kerns def write_clm_unicode_glyph_map(f, unicode_glyph_map): length = len(unicode_glyph_map) f.write(struct.pack('!H', length)) sort_map = sorted(unicode_glyph_map, key=lambda x: x[0]) for (codepoint, glyph_id,) in sort_map: f.write(struct.pack('!IH', codepoint, glyph_id)) def write_clm_kerning_class(f, kerning_classes, glyph_name_id_map): length = len(list(kerning_classes)) f.write(struct.pack('!H', length)) def write_group(g): """ row format: (index, (glyph_name, glyph_name, ...)) convert to: [{index, glyph_id}, ...] """ # write the group length f.write(struct.pack('!H', len(g))) m = [] for (index, glyph_names,) in g: for name in glyph_names: m.append({'index': index, 'glyph_id': glyph_name_id_map[name]}) # sort the array by glyph id m = sorted(m, key=lambda item: item['glyph_id']) # write the count of the glyphs f.write(struct.pack('!H', len(m))) # write glyph id and its index in this group for item in m: f.write(struct.pack('!HH', item['glyph_id'], item['index'])) for (left, right, value,) in kerning_classes: write_group(list(enumerate(left[1:]))) write_group(list(enumerate(right[1:]))) column_count = len(right) for i, v in enumerate(value): if i < column_count or i % column_count == 0: continue f.write(struct.pack('!h', v)) def write_ligas(f, ligas, glyph_name_id_map): forest = [] def add_node(glyphs, liga): children = forest child = None for (glyph, char,) in glyphs: child = find_first(children, lambda x: x['glyph'] == glyph) if not child: child = {'glyph': glyph, 'char': char, 'liga': -1, 'children': []} children.append(child) children = child['children'] child['liga'] = liga for (chars, liga,) in ligas: add_node( map(lambda char: (glyph_name_id_map[char], char,), chars), liga) def sort_children(children): for child in children: child['children'] = sort_children(child['children']) return sorted(children, key=lambda x: x['glyph']) forest = sort_children(forest) def write_node(node): f.write(struct.pack('!H', node['glyph'])) f.write(struct.pack('!i', node['liga'])) f.write(struct.pack('!H', len(node['children']))) for child in node['children']: write_node(child) root = {'glyph': 0, 'char': 0, 'liga': -1, 'children': forest} write_node(root) def write_math_consts(f, consts): for v in consts: f.write(struct.pack('!h', v)) def write_glyphs(f, glyphs, glyph_name_id_map, is_math_font, have_glyph_path): f.write(struct.pack('!H', len(glyphs))) def write_metrics(metrics): for v in metrics: f.write(struct.pack('!h', int(v))) def write_kerns(g, kerns): if not kerns: f.write(struct.pack('!H', 0)) return f.write(struct.pack('!H', len(list(kerns)))) sorted_kerns = chain( partial(map, lambda x: (glyph_name_id_map[x[0]], x[1],)), partial(sorted, key=lambda x: x[0]) )(kerns) for (glyph, value,) in sorted_kerns: f.write(struct.pack('!Hh', glyph, value)) def write_variants(variants): length = 0 if not variants else len(variants) f.write(struct.pack('!H', length)) if length == 0: return for v in variants: gid = glyph_name_id_map[v] f.write(struct.pack('!H', gid)) def write_glyph_assembly(assembly): if not assembly or not assembly[1]: f.write(struct.pack('?', False)) return f.write(struct.pack('?', True)) f.write(struct.pack('!H', len(assembly[1]))) f.write(struct.pack('!h', assembly[0])) # italics correction for part in assembly[1]: f.write(struct.pack('!H', glyph_name_id_map[part[0]])) for v in part[1:]: f.write(struct.pack('!H', v)) def write_math_kern(math_kerns): for i in math_kerns: if not i: f.write(struct.pack('!H', 0)) continue f.write(struct.pack('!H', len(i))) for (correction_height, value,) in i: f.write(struct.pack('!hh', correction_height, value)) def write_math(math): f.write(struct.pack('!h', math[0])) # italics correction f.write(struct.pack('!h', math[1])) # topaccent attachment write_variants(math[2]) # horizontal variants write_variants(math[3]) # vertical variants write_variants(math[4]) # scripts variants write_glyph_assembly(math[5]) # horizontal assembly write_glyph_assembly(math[6]) # vertical assembly write_math_kern(math[7]) # math kern def write_path(path): f.write(struct.pack('!H', len(path))) if not path: return for cmd in path: f.write(struct.pack('c', bytes(cmd[0], 'ascii'))) for param in cmd[1:]: f.write(struct.pack('!h', param)) for glyph in glyphs: write_metrics(glyph[0]) write_kerns(glyph[4], glyph[1]) if is_math_font: write_math(glyph[2]) if have_glyph_path: write_path(glyph[3]) def parse_fontstyle(font, userstyle): NONE = 0b00000000 RM = 0b00000001 BF = 0b00000010 IT = 0b00000100 CAL = 0b00001000 FRAK = 0b00010000 BB = 0b00100000 SF = 0b01000000 TT = 0b10000000 style = NONE if userstyle is None or userstyle == "": if font.os2_weight >= 700: style |= BF if font.os2_stylemap == 0x40: style |= RM if font.os2_stylemap == 0x21: style |= (BF | IT) if font.os2_stylemap == 0x20: style |= BF if font.os2_stylemap == 0x01: style |= IT if font.macstyle != -1: if font.macstyle & 0b01 == 0b01: style |= BF if font.macstyle & 0b10 == 0b10: style |= IT if font.fullname.upper().__contains__("MONO"): style |= TT else: userstyles = userstyle.split(",") if "rm" in userstyles: style |= RM if "bf" in userstyles: style |= BF if "it" in userstyles: style |= IT if "cal" in userstyles: style |= CAL if "frak" in userstyles: style |= FRAK if "bb" in userstyles: style |= BB if "sf" in userstyles: style |= SF if "tt" in userstyles: style |= TT return style def parse_otf(file_path, have_glyph_path, output_file_path, userstyle=""): print("parsing font " + file_path + ", please wait...") font = fontforge.open(file_path) is_math_font = font.math.exists() # read math constants math_consts = [] if is_math_font: math_consts = read_math_consts(font) # read kern subtables kern_subtable_names = read_kern_subtables(font) # read ligature subtables liga_subtable_names = read_ligature_subtables(font) # read kern class tables kern_class_tables = read_kerning_class(font) # read scripts subtables scripts_subtable_names = read_scripts_subtables(font) unicode_glyph_map = [] glyph_name_id_map = {} glyphs = [] ligas = [] # read glyphs in GID order for glyph_name in font: glyph = font[glyph_name] # unicode-glyph map if glyph.unicode != -1: unicode_glyph_map.append((glyph.unicode, glyph.originalgid,)) glyph_name_id_map[glyph_name] = glyph.originalgid # glyph info glyphs.append(read_glyph( glyph, is_math_font, have_glyph_path, kern_subtable_names, scripts_subtable_names )) # read ligature liga_info = read_ligatures(glyph, liga_subtable_names) for l in liga_info: ligas.append(l) name = font.fullname family = font.familyname style = parse_fontstyle(font, userstyle) em = font.em xheight = font.xHeight ascent = font.ascent descent = font.descent with open(output_file_path, 'wb') as f: f.write(struct.pack('c', bytes('c', 'ascii'))) f.write(struct.pack('c', bytes('l', 'ascii'))) f.write(struct.pack('c', bytes('m', 'ascii'))) # current major version 5 f.write(struct.pack('!H', 5)) # minor version, if support glyph path f.write(struct.pack('B', 2 if have_glyph_path else 1)) f.write(struct.pack(str(len(name) + 1) + 's', bytes(name, 'utf-8'))) f.write(struct.pack(str(len(family) + 1) + 's', bytes(family, 'utf-8'))) f.write(struct.pack('?', is_math_font)) f.write(struct.pack('!H', style)) f.write(struct.pack('!H', em)) f.write(struct.pack('!H', int(xheight))) f.write(struct.pack('!H', ascent)) f.write(struct.pack('!H', descent)) write_clm_unicode_glyph_map(f, unicode_glyph_map) write_clm_kerning_class(f, kern_class_tables, glyph_name_id_map) write_ligas(f, ligas, glyph_name_id_map) if is_math_font: write_math_consts(f, math_consts) write_glyphs(f, glyphs, glyph_name_id_map, is_math_font, have_glyph_path) font.close() def batch_parse(input_dir, have_glyph_path, output_dir): import os fs = os.listdir(input_dir) if not os.path.isdir(output_dir): os.makedirs(output_dir) for f in fs: if f.endswith('.otf'): name = os.path.basename(f)[0:-4] save_name = name + ".clm2" if have_glyph_path else name + ".clm1" save_path = os.path.join(output_dir, save_name) input_file = os.path.join(input_dir, f) parse_otf(input_file, have_glyph_path, save_path) print("The generated clm data files were saved into directory: " + output_dir) def single_parse(input_file, have_glyph_path, output_dir, user_style): import os name = os.path.basename(input_file)[0:-4] save_name = name + ".clm2" if have_glyph_path else name + ".clm1" save_path = os.path.join(output_dir, save_name) parse_otf(input_file, have_glyph_path, save_path, user_style) print("The generated clm data file was saved into directory: " + output_dir); usage = """ Generate clm data from OTF font file. Usage: with single mode: --single \\ \\ \\ \\ or with batch mode: --batch \\ \\ \\ The batch mode will not run in recursive, allowed font styles are: rm: roman bf: bold it: italic cal: calligraphic frak: fraktur bb: double struck sf: sans-serif tt: type-writer If no font styles are given, trying to read it from font, but only the following styles will be taken into account: rm, bf, it, tt """ def main(): if len(sys.argv) < 4: print(usage) return if sys.argv[1] == '--batch': batch_parse( sys.argv[2], sys.argv[3] == 'true', sys.argv[4] ) else: userstyle = "" if (len(sys.argv) > 5): userstyle = sys.argv[5] single_parse( sys.argv[2], sys.argv[3] == 'true', sys.argv[4], userstyle ) if __name__ == "__main__": main()