cmark

My personal build of CMark ✏️

make_entities_inc.py (957B)

 1 # Creates C data structures for binary lookup table of entities,
 2 # using python's html5 entity data.
 3 # Usage: python3 tools/make_entities_inc.py > src/entities.inc
 4 
 5 import html
 6 
 7 entities5 = html.entities.html5
 8 
 9 # remove keys without semicolons.  For some reason the list
10 # has duplicates of a few things, like auml, one with and one
11 # without a semicolon.
12 entities = sorted([(k[:-1], entities5[k].encode('utf-8')) for k in entities5.keys() if k[-1] == ';'])
13 
14 # Print out the header:
15 print("""/* Autogenerated by tools/make_headers_inc.py */
16 
17 struct cmark_entity_node {
18 	unsigned char *entity;
19         unsigned char bytes[8];
20 };
21 
22 #define CMARK_ENTITY_MIN_LENGTH 2
23 #define CMARK_ENTITY_MAX_LENGTH 32""")
24 
25 print("#define CMARK_NUM_ENTITIES " + str(len(entities)));
26 
27 print("\nstatic const struct cmark_entity_node cmark_entities[] = {");
28 
29 for (ent, bs) in entities:
30   print('{(unsigned char*)"' + ent + '", {' + ', '.join(map(str, bs)) + ', 0}},')
31 
32 print("};")