libMultiMarkdown7 7.0.0-alpha.1
Lightweight markup processor to produce HTML, LaTeX, and more.
Loading...
Searching...
No Matches
libMultiMarkdown.h
Go to the documentation of this file.
1
14
15/*
16
17 MIT License
18
19 Copyright (c) 2024-2025 Fletcher T. Penney
20
21 Permission is hereby granted, free of charge, to any person obtaining a copy
22 of this software and associated documentation files (the "Software"), to deal
23 in the Software without restriction, including without limitation the rights
24 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
25 copies of the Software, and to permit persons to whom the Software is
26 furnished to do so, subject to the following conditions:
27
28 The above copyright notice and this permission notice shall be included in all
29 copies or substantial portions of the Software.
30
31 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
32 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
33 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
34 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
35 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
36 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
37 SOFTWARE.
38
39*/
40
41
42#ifndef libMultiMarkdown7_H
43#define libMultiMarkdown7_H
44
45#include <stdint.h>
46#include <stdio.h>
47
48
49// Advance declarations
50typedef struct mmd_node mmd_node;
51typedef struct read_ctx read_ctx;
52typedef struct stack stack;
53
54
58void mmd_process_file(FILE * in, FILE * out, uint32_t options, const char * search_path, const char * source_path);
59void mmd_process_filename(const char * fname, FILE * out, uint32_t options, const char * search_path);
60void mmd_process_str(const char * text, FILE * out, uint32_t options, const char * search_path, const char * source_path);
61void mmd_process_str_len(const char * text, size_t in_len, FILE * out, uint32_t options, const char * search_path, const char * source_path);
62
63
66char * mmd_process_file_to_str(FILE * in, size_t * out_len, uint32_t options, const char * search_path, const char * source_path);
67char * mmd_process_filename_to_str(const char * fname, size_t * out_len, uint32_t options, const char * search_path);
68char * mmd_process_str_to_str(const char * text, size_t * out_len, uint32_t options, const char * search_path, const char * source_path);
69char * mmd_process_str_len_to_str(const char * text, size_t in_len, size_t * out_len, uint32_t options, const char * search_path, const char * source_path);
70
71
78mmd_node * mmd_parse_file(FILE * in, read_ctx * c, uint32_t options);
79mmd_node * mmd_parse_filename(const char * filename, read_ctx * c, uint32_t options);
80mmd_node * mmd_parse_str(const char * text, read_ctx * c, uint32_t options);
81mmd_node * mmd_parse_str_len(const char * text, size_t in_len, read_ctx * c, uint32_t options);
82
83
87void mmd_ast_file(FILE * in, FILE * out, uint32_t options);
88void mmd_ast_filename(const char * fname, FILE * out, uint32_t options);
89void mmd_ast_str(const char * text, FILE * out, uint32_t options);
90void mmd_ast_str_len(const char * text, size_t in_len, FILE * out, uint32_t options);
91
92
96read_ctx * mmd_metadata_filename(const char * fname, uint32_t options);
97read_ctx * mmd_metadata_file(FILE * in, uint32_t options);
98read_ctx * mmd_metadata_str(const char * text, uint32_t options);
99read_ctx * mmd_metadata_str_len(const char * text, size_t in_len, uint32_t options);
100
101
103
105void mmd_node_tree_free(mmd_node * n);
106
111
114
116read_ctx * read_ctx_new(uint32_t options);
117void read_ctx_reset(read_ctx * c, uint32_t options);
118void read_ctx_free(read_ctx * c);
119
120void custom_seed_rand(void);
121
122
125 FORMAT_EPUB,
127 FORMAT_BEAMER,
128 FORMAT_MEMOIR,
129 FORMAT_FODT,
130 FORMAT_ODT,
131 FORMAT_TEXTBUNDLE,
132 FORMAT_TEXTBUNDLE_COMPRESSED,
133 FORMAT_OPML,
134 FORMAT_ITMZ,
136 FORMAT_HTML_WITH_ASSETS,
137};
138
139
149
150
160
161
163 // First 5 bits are for output_format (32 max)
164 // Next 4 bits are for smart_quote_language (16 max)
165 // Next 4 bits are for language (16 max)
175};
176
177
179#define MMD_OUT_FORMAT_MASK 0x1f
180#define MMD_SMART_QUOTE_MASK 0x01e0
181#define MMD_LANGUAGE_MASK 0x1E00
182
184#define MMD_OUT_FORMAT_FROM_OPTS(x) ((x & MMD_OUT_FORMAT_MASK) >> 0)
185
187#define MMD_SMART_QUOTE_FROM_OPTS(x) ((x & MMD_SMART_QUOTE_MASK) >> 5)
188
190#define MMD_LANGUAGE_FROM_OPTS(x) ((x & MMD_LANGUAGE_MASK) >> 9)
191
192
194struct mmd_node {
195 unsigned char type;
196 uint32_t hash;
197
198 size_t start;
199 size_t len;
200
201 struct mmd_node * next;
202 struct mmd_node * child;
203 struct mmd_node * tail;
204
205 struct mmd_node * content;
206};
207
208
212
213 size_t c_start;
214 size_t c_len;
215};
216
217typedef struct mmd_line_node mmd_line_node;
218
219
221#define MMD_TYPE_MASK 0xc0
222#define MMD_TOKEN_MASK 0x80
223
224#define MMD_NODE_IS_LINE(x) ((((mmd_node*)x)->type & MMD_TYPE_MASK) == 0x00)
225#define MMD_NODE_IS_BLOCK(x) ((((mmd_node*)x)->type & MMD_TYPE_MASK) == 0x40)
226#define MMD_NODE_IS_TOKEN(x) ((((mmd_node*)x)->type & MMD_TOKEN_MASK) == 0x80)
227
228
231 // Line types (1-63)
232 LINE_ATX_1 = 1,
233 LINE_ATX_2,
234 LINE_ATX_3,
235 LINE_ATX_4,
236 LINE_ATX_5,
237 LINE_ATX_6,
238 LINE_BACKTICK,
239 LINE_BLOCKQUOTE,
240 LINE_CONTINUATION,
241 LINE_DEF_ABBREVIATION,
242 LINE_DEF_CITATION,
243 LINE_DEF_FOOTNOTE,
244 LINE_DEF_GLOSSARY,
245 LINE_DEF_LINK,
246 LINE_DEFINITION,
247 LINE_EMPTY,
248 LINE_FALLBACK,
249 LINE_FENCE_BACKTICK_3,
250 LINE_FENCE_BACKTICK_4,
251 LINE_FENCE_BACKTICK_5,
252 LINE_FENCE_BACKTICK_START_3,
253 LINE_FENCE_BACKTICK_START_4,
254 LINE_FENCE_BACKTICK_START_5,
255 LINE_HR,
256 LINE_HTML,
257 LINE_HTML_BLOCK,
258 LINE_HTML_BLOCKISH,
259 LINE_HTML_BlOCKISH,
260 LINE_INDENTED_SPACE,
261 LINE_INDENTED_TAB,
262 LINE_LIST_BULLETED,
263 LINE_LIST_ENUMERATED,
264 LINE_META,
265 LINE_PLAIN,
266 LINE_SETEXT_1,
267 LINE_SETEXT_2,
268 LINE_START_COMMENT,
269 LINE_STOP_COMMENT,
270 LINE_TABLE,
271 LINE_TABLE_SEPARATOR,
272 LINE_TOC,
273 LINE_YAML,
274
275 CODE_FENCE_LINE = 63, // TODO: Do I really need to use this?
276
277 // Block types (64-127)
278 BLOCK_BLOCKQUOTE = 64, // 64
279 BLOCK_CODE_FENCED, // 65
280 BLOCK_CODE_INDENTED, // 66
281 BLOCK_DEF_ABBREVIATION, // 67
282 BLOCK_DEF_CITATION, // 68
283 BLOCK_DEF_FOOTNOTE, // 69
284 BLOCK_DEF_GLOSSARY, // 70
285 BLOCK_DEF_LINK, // 71
286 BLOCK_DEFINITION, // 72
287 BLOCK_DEFLIST, // 73
288 BLOCK_EMPTY, // 74
289 BLOCK_GENERAL, // 75
290 BLOCK_H1,
291 BLOCK_H2,
292 BLOCK_H3,
293 BLOCK_H4,
294 BLOCK_H5,
295 BLOCK_H6, // 81
296 BLOCK_HEADING, // 82
297 BLOCK_HR, // 83
298 BLOCK_HTML, // 84
299 BLOCK_LIST_BULLETED, // 85
300 BLOCK_LIST_BULLETED_LOOSE, // 86
301 BLOCK_LIST_ENUMERATED, // 87
302 BLOCK_LIST_ENUMERATED_LOOSE, // 88
303 BLOCK_LIST_ITEM, // 89
304 BLOCK_LIST_ITEM_TIGHT, // 90
305 BLOCK_META, // 91
306 BLOCK_PARA, // 92
307 BLOCK_SETEXT_1, // 93
308 BLOCK_SETEXT_2, // 94
309 BLOCK_TABLE, // 95
310 BLOCK_TABLE_HEADER, // 96
311 BLOCK_TABLE_SECTION, // 97
312 BLOCK_TERM, // 98
313 BLOCK_TOC, // 99
314 BLOCK_FIGURE, // 100
315
316
317 // Token types (128-255)
318 TOKEN_EOF = 128,
319 TOKEN_NL,
320 TOKEN_LINEBREAK,
321 TOKEN_TEXT,
322 TOKEN_TEXT_ABBREVIATION,
323 TOKEN_TEXT_GLOSSARY,
324 TOKEN_TEXT_WHITESPACE,
325
326 TOKEN_AMPERSAND,
327 TOKEN_AMPERSAND_LONG,
328 TOKEN_HTML_ENTITY,
329
330 TOKEN_HASH,
331
332 TOKEN_STAR,
333 TOKEN_PLUS,
334 TOKEN_MINUS,
335
336 TEXT_NUMBER_POSS_LIST,
337
338 TOKEN_UL,
339 TOKEN_COLON,
340
341 TOKEN_ATX_MARKER,
342 TOKEN_BLOCKQUOTE_MARKER,
343 TOKEN_DEFLIST_COLON,
344 TOKEN_LIST_MARKER,
345 TOKEN_ABBREVIATION_MARKER,
346 TOKEN_FOOTNOTE_MARKER,
347 TOKEN_GLOSSARY_MARKER,
348 TOKEN_CITATION_MARKER,
349 TOKEN_VARIABLE_MARKER,
350
351 TOKEN_BACKTICK,
352 TOKEN_APOSTROPHE,
353 TOKEN_QUOTE_SINGLE,
354 TOKEN_QUOTE_DOUBLE,
355 TOKEN_QUOTE_DOUBLE_ALT,
356 TOKEN_ELLIPSIS,
357 TOKEN_DASH_M,
358 TOKEN_DASH_N,
359 TOKEN_DASH_N_RANGE,
360
361 TOKEN_PAREN_LEFT,
362 TOKEN_PAREN_RIGHT,
363 TOKEN_BRACKET_LEFT,
364 TOKEN_BRACKET_RIGHT,
365 TOKEN_ANGLE_LEFT,
366 TOKEN_ANGLE_RIGHT,
367 TOKEN_BRACE_LEFT,
368 TOKEN_BRACE_RIGHT,
369
370 TOKEN_PAIR_ANGLE,
371 TOKEN_PAIR_BACKTICK,
372 TOKEN_PAIR_BRACE,
373 TOKEN_PAIR_BRACKET,
374 TOKEN_PAIR_BRACKET_EMPTY,
375 TOKEN_PAIR_BRACKET_NOT_CITED,
376 TOKEN_PAIR_BRACKET_ABBREVIATION,
377 TOKEN_PAIR_BRACKET_FOOTNOTE,
378 TOKEN_PAIR_BRACKET_GLOSSARY,
379 TOKEN_PAIR_BRACKET_CITATION,
380 TOKEN_PAIR_BRACKET_IMAGE,
381 TOKEN_PAIR_BRACKET_LINK,
382 TOKEN_PAIR_BRACKET_VARIABLE,
383 TOKEN_PAIR_PAREN,
384 TOKEN_PAIR_QUOTE_DOUBLE,
385 TOKEN_PAIR_QUOTE_SINGLE,
386 TOKEN_PAIR_STAR,
387 TOKEN_PAIR_STAR_USED, // Must immediately follow TOKEN_PAIR_STAR
388 TOKEN_PAIR_UL,
389 TOKEN_PAIR_UL_USED, // Must immediately follow TOKEN_PAIR_UL
390 TOKEN_SPECIAL_CHARACTER,
391
392 TOKEN_PAIR_EMPH,
393 TOKEN_PAIR_STRONG,
394
395 TOKEN_ESCAPED_CHARACTER,
396 TOKEN_NBSP,
397 TOKEN_PIPE,
398
399 TOKEN_CM_ADD_OPEN,
400 TOKEN_CM_ADD_CLOSE,
401 TOKEN_CM_DEL_OPEN,
402 TOKEN_CM_DEL_CLOSE,
403 TOKEN_CM_SUB_OPEN,
404 TOKEN_CM_SUB_DIV,
405 TOKEN_CM_SUB_CLOSE,
406 TOKEN_CM_COM_OPEN,
407 TOKEN_CM_COM_CLOSE,
408 TOKEN_CM_HI_OPEN,
409 TOKEN_CM_HI_CLOSE,
410
411 TOKEN_PAIR_CM_ADD,
412 TOKEN_PAIR_CM_DEL,
413 TOKEN_PAIR_CM_SUB_DEL,
414 TOKEN_PAIR_CM_SUB_ADD,
415 TOKEN_PAIR_CM_COM,
416 TOKEN_PAIR_CM_HI,
417
418 TOKEN_SUPERSCRIPT,
419 TOKEN_SUBSCRIPT,
420
421 TOKEN_MATH_PAREN_OPEN,
422 TOKEN_MATH_PAREN_CLOSE,
423 TOKEN_MATH_BRACKET_OPEN,
424 TOKEN_MATH_BRACKET_CLOSE,
425 TOKEN_MATH_DOLLAR_SINGLE,
426 TOKEN_MATH_DOLLAR_DOUBLE,
427
428 TOKEN_PAIR_MATH_PAREN,
429 TOKEN_PAIR_MATH_BRACKET,
430 TOKEN_PAIR_MATH_DOLLAR_SINGLE,
431 TOKEN_PAIR_MATH_DOLLAR_DOUBLE,
432
433 TOKEN_TABLE_CELL,
434 TOKEN_TABLE_DIVIDER,
435
436 TOKEN_MANUAL_LABEL,
437
438 OBJECT_REPLACEMENT_CHARACTER,
439};
440
441
442#endif
char * mmd_process_file_to_str(FILE *in, size_t *out_len, uint32_t options, const char *search_path, const char *source_path)
void mmd_process_file(FILE *in, FILE *out, uint32_t options, const char *search_path, const char *source_path)
mmd_options
@ MMD_OPTION_CRITIC_REJECT
Reject all proposed changes.
@ MMD_OPTION_RANDOM_NOTE_ID
Use random footnote id # to avoid collisions.
@ MMD_OPTION_TRANSCLUDE
Enable file transclusion.
@ MMD_OPTION_MMD_HEADER
Enable use of mmd header and mmd footer metadata.
@ MMD_OPTION_COMPATIBILITY
Limit functionality to core Markdown features.
@ MMD_OPTION_BLOCKS_ONLY
Process block-level tokens only; do not parse inside the blocks.
@ MMD_OPTION_RANDOM_HEADER_ID
Use random header id # to avoid collisions.
@ MMD_OPTION_STATS
Display performance stats on stderr.
@ MMD_OPTION_CRITIC_ACCEPT
Accept all proposed changes.
void mmd_node_free(mmd_node *n)
Utility functions.
void mmd_ast_file(FILE *in, FILE *out, uint32_t options)
mmd_node * mmd_parse_file(FILE *in, read_ctx *c, uint32_t options)
void mmd_node_tree_describe_hash(mmd_node *n, FILE *out)
Print node tree hash values to designated file stream.
read_ctx * read_ctx_new(uint32_t options)
read_ctx management
@ LANGUAGE_SV
Swedish language markup.
@ LANGUAGE_DE
German language markup.
@ LANGUAGE_NL
Dutch language markup.
@ LANGUAGE_ES
Spanish language markup.
@ LANGUAGE_EN
English language markup.
@ LANGUAGE_HE
Hebrew language markup.
@ LANGUAGE_FR
French language markup.
uint32_t mmd_hash_node(mmd_node *n)
Calculate hash value for individual node (and it's children)
node_types
AST node types.
uint32_t mmd_hash_node_tree(mmd_node *n)
Calculate hash values for AST (and return overall hash value)
smart_quote_language
@ QUOTES_DUTCH
Dutch smart quotes.
@ QUOTES_ENGLISH
English smart quotes.
@ QUOTES_GERMAN_GUILLEMETS
German guillemets smart quotes.
@ QUOTES_FRENCH
French smart quotes.
@ QUOTES_GERMAN
German smart quotes.
@ QUOTES_SWEDISH
Swedish smart quotes.
@ QUOTES_SPANISH
Spanish smart quotes.
output_format
@ FORMAT_LATEX
LaTeX to generate PDF.
@ FORMAT_MMD
Raw MultiMarkdown source text.
@ FORMAT_HTML
Plain HTML.
read_ctx * mmd_metadata_filename(const char *fname, uint32_t options)
Line nodes are used specifically for parsing individual lines of text into the block structure.
mmd_node general
mmd_line_node starts with regular mmd_node
size_t c_start
Starting offset (in bytes) for line content (excluding line level markup)
size_t c_len
Byte length for content of the line (excluding line level markup)
Nodes are used to build the AST during parsing.
size_t start
Starting offset (in bytes) in the source text for this node.
struct mmd_node * content
If node was parsed into span-level content, place it here.
struct mmd_node * next
Pointer to next node in the AST.
struct mmd_node * tail
Pointer to last sibling node in the AST.
size_t len
Byte length in the source text for this node.
unsigned char type
type for this node
struct mmd_node * child
Pointer to first child node in the AST.
uint32_t hash
hash for the node, useful when comparing two parse trees for similar branches