cmark

My personal build of CMark ✏️

wrapper.rkt (8896B)

  1 #lang racket/base
  2 
  3 ;; requires racket >= 5.3 because of submodules
  4 
  5 ;; Lowlevel interface
  6 
  7 (module low-level racket/base
  8 
  9   (require ffi/unsafe ffi/unsafe/define)
 10 
 11   (provide (all-defined-out))
 12 
 13   (define-ffi-definer defcmark (ffi-lib "libcmark"))
 14 
 15   (define _cmark_node_type
 16     (_enum '(;; Error status
 17              none
 18              ;; Block
 19              document block-quote list item code-block
 20              html-block custom-block
 21              paragraph heading thematic-break
 22              ;; ?? first-block = document
 23              ;; ?? last-block = thematic-break
 24              ;; Inline
 25              text softbreak linebreak code html-inline custom-inline
 26              emph strong link image
 27              ;; ?? first-inline = text
 28              ;; ?? last-inline = image
 29              )))
 30   (define _cmark_list_type
 31     (_enum '(no_list bullet_list ordered_list)))
 32   (define _cmark_delim_type
 33     (_enum '(no_delim period_delim paren_delim)))
 34   (define _cmark_opts
 35     (let ([opts '([sourcepos  1] ; include sourcepos attribute on block elements
 36                   [hardbreaks 2] ; render `softbreak` elements as hard line breaks
 37                   [safe       3] ; defined here for API compatibility (on by default)
 38                   [unsafe    17] ; render raw HTML and unsafe links
 39                   [nobreaks   4] ; render `softbreak` elements as spaces
 40                   [normalize  8] ; legacy (no effect)
 41                   [validate-utf8 9] ; validate UTF-8 in the input
 42                   [smart     10] ; straight quotes to curly, ---/-- to em/en dashes
 43                   )])
 44       (_bitmask (apply append (map (λ(o) `(,(car o) = ,(expt 2 (cadr o))))
 45                                    opts)))))
 46 
 47   (define-cpointer-type _node)
 48 
 49   (defcmark cmark_markdown_to_html
 50     (_fun [bs : _bytes] [_int = (bytes-length bs)] _cmark_opts
 51           -> [r : _bytes] -> (begin0 (bytes->string/utf-8 r) (free r))))
 52 
 53   (defcmark cmark_parse_document
 54     (_fun [bs : _bytes] [_int = (bytes-length bs)] _cmark_opts
 55           -> _node))
 56 
 57   (defcmark cmark_render_html
 58     (_fun _node _cmark_opts
 59           -> [r : _bytes] -> (begin0 (bytes->string/utf-8 r) (free r))))
 60 
 61   (defcmark cmark_node_new              (_fun _cmark_node_type -> _node))
 62   (defcmark cmark_node_free             (_fun _node -> _void))
 63 
 64   (defcmark cmark_node_next             (_fun _node -> _node/null))
 65   (defcmark cmark_node_previous         (_fun _node -> _node/null))
 66   (defcmark cmark_node_parent           (_fun _node -> _node/null))
 67   (defcmark cmark_node_first_child      (_fun _node -> _node/null))
 68   (defcmark cmark_node_last_child       (_fun _node -> _node/null))
 69 
 70   (defcmark cmark_node_get_user_data    (_fun _node -> _racket))
 71   (defcmark cmark_node_set_user_data    (_fun _node _racket -> _bool))
 72   (defcmark cmark_node_get_type         (_fun _node -> _cmark_node_type))
 73   (defcmark cmark_node_get_type_string  (_fun _node -> _bytes))
 74   (defcmark cmark_node_get_literal      (_fun _node -> _string))
 75   (defcmark cmark_node_set_literal      (_fun _node _string -> _bool))
 76   (defcmark cmark_node_get_heading_level (_fun _node -> _int))
 77   (defcmark cmark_node_set_heading_level (_fun _node _int -> _bool))
 78   (defcmark cmark_node_get_list_type    (_fun _node -> _cmark_list_type))
 79   (defcmark cmark_node_set_list_type    (_fun _node _cmark_list_type -> _bool))
 80   (defcmark cmark_node_get_list_delim   (_fun _node -> _cmark_delim_type))
 81   (defcmark cmark_node_set_list_delim   (_fun _node _cmark_delim_type -> _bool))
 82   (defcmark cmark_node_get_list_start   (_fun _node -> _int))
 83   (defcmark cmark_node_set_list_start   (_fun _node _int -> _bool))
 84   (defcmark cmark_node_get_list_tight   (_fun _node -> _bool))
 85   (defcmark cmark_node_set_list_tight   (_fun _node _bool -> _bool))
 86   (defcmark cmark_node_get_fence_info   (_fun _node -> _string))
 87   (defcmark cmark_node_set_fence_info   (_fun _node _string -> _bool))
 88   (defcmark cmark_node_get_url          (_fun _node -> _string))
 89   (defcmark cmark_node_set_url          (_fun _node _string -> _bool))
 90   (defcmark cmark_node_get_title        (_fun _node -> _string))
 91   (defcmark cmark_node_set_title        (_fun _node _string -> _bool))
 92   (defcmark cmark_node_get_start_line   (_fun _node -> _int))
 93   (defcmark cmark_node_get_start_column (_fun _node -> _int))
 94   (defcmark cmark_node_get_end_line     (_fun _node -> _int))
 95   (defcmark cmark_node_get_end_column   (_fun _node -> _int))
 96 
 97   (defcmark cmark_node_unlink           (_fun _node -> _void))
 98   (defcmark cmark_node_insert_before    (_fun _node _node -> _bool))
 99   (defcmark cmark_node_insert_after     (_fun _node _node -> _bool))
100   (defcmark cmark_node_prepend_child    (_fun _node _node -> _bool))
101   (defcmark cmark_node_append_child     (_fun _node _node -> _bool))
102   (defcmark cmark_consolidate_text_nodes (_fun _node -> _void))
103 
104   (defcmark cmark_version               (_fun -> _int))
105   (defcmark cmark_version_string        (_fun -> _string))
106 
107   )
108 
109 ;; Rackety interface
110 
111 (module high-level racket/base
112 
113   (require (submod ".." low-level) ffi/unsafe)
114 
115   (provide cmark-markdown-to-html)
116   (define (cmark-markdown-to-html str [options '(normalize smart)])
117     (cmark_markdown_to_html (if (bytes? str) str (string->bytes/utf-8 str))
118                             options))
119 
120   (require (for-syntax racket/base racket/syntax))
121   (define-syntax (make-getter+setter stx)
122     (syntax-case stx ()
123       [(_ name) (with-syntax ([(getter setter)
124                                (map (λ(op) (format-id #'name "cmark_node_~a_~a"
125                                                       op #'name))
126                                     '(get set))])
127                   #'(cons getter setter))]))
128   (define-syntax-rule (define-getters+setters name [type field ...] ...)
129     (define name (list (list 'type (make-getter+setter field) ...) ...)))
130   (define-getters+setters getters+setters
131     [heading heading_level] [code-block fence_info]
132     [link url title] [image url title]
133     [list list_type list_delim list_start list_tight])
134 
135   (provide cmark->sexpr)
136   (define (cmark->sexpr node)
137     (define text (cmark_node_get_literal node))
138     (define type (cmark_node_get_type node))
139     (define children
140       (let loop ([node (cmark_node_first_child node)])
141         (if (not node) '()
142             (cons (cmark->sexpr node) (loop (cmark_node_next node))))))
143     (define info
144       (cond [(assq type getters+setters)
145              => (λ(gss) (map (λ(gs) ((car gs) node)) (cdr gss)))]
146             [else '()]))
147     (define (assert-no what-not b)
148       (when b (error 'cmark->sexpr "unexpected ~a in ~s" what-not type)))
149     (cond [(memq type '(document paragraph heading block-quote list item
150                         emph strong link image))
151            (assert-no 'text text)
152            (list type info children)]
153           [(memq type '(text code code-block html-block html-inline
154                         softbreak linebreak thematic-break))
155            (assert-no 'children (pair? children))
156            (list type info text)]
157           [else (error 'cmark->sexpr "unknown type: ~s" type)]))
158 
159   (provide sexpr->cmark)
160   (define (sexpr->cmark sexpr) ; assumes valid input, as generated by the above
161     (define (loop sexpr)
162       (define type (car sexpr))
163       (define info (cadr sexpr))
164       (define data (caddr sexpr))
165       (define node (cmark_node_new type))
166       (let ([gss (assq type getters+setters)])
167         (when gss
168           (unless (= (length (cdr gss)) (length info))
169             (error 'sexpr->cmark "bad number of info values in ~s" sexpr))
170           (for-each (λ(gs x) ((cdr gs) node x)) (cdr gss) info)))
171       (cond [(string? data) (cmark_node_set_literal node data)]
172             [(not data) (void)]
173             [(list? data)
174              (for ([child (in-list data)])
175                (cmark_node_append_child node (sexpr->cmark child)))]
176             [else (error 'sexpr->cmark "bad data in ~s" sexpr)])
177       node)
178     (define root (loop sexpr))
179     (register-finalizer root cmark_node_free)
180     root)
181 
182   ;; Registers a `cmark_node_free` finalizer
183   (provide cmark-parse-document)
184   (define (cmark-parse-document str [options '(normalize smart)])
185     (define root (cmark_parse_document
186                   (if (bytes? str) str (string->bytes/utf-8 str))
187                   options))
188     (register-finalizer root cmark_node_free)
189     root)
190 
191   (provide cmark-render-html)
192   (define (cmark-render-html root [options '(normalize smart)])
193     (cmark_render_html root options)))
194 
195 #; ;; sample use
196 (begin
197   (require 'high-level racket/string)
198   (cmark-render-html
199    (cmark-parse-document
200     (string-join '("foo"
201                    "==="
202                    ""
203                    "> blah"
204                    ">"
205                    "> blah *blah* `bar()` blah:"
206                    ">"
207                    ">     function foo() {"
208                    ">       bar();"
209                    ">     }")
210                  "\n"))))