libMultiMarkdown7 7.0.0-beta.1
Lightweight markup processor to produce HTML, LaTeX, and more.
Loading...
Searching...
No Matches
libMultiMarkdown.h
Go to the documentation of this file.
1
14
15/*
16
17 MIT License
18
19 Copyright (c) 2024-2026 Fletcher T. Penney
20
21 Permission is hereby granted, free of charge, to any person obtaining a copy
22 of this software and associated documentation files (the "Software"), to deal
23 in the Software without restriction, including without limitation the rights
24 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
25 copies of the Software, and to permit persons to whom the Software is
26 furnished to do so, subject to the following conditions:
27
28 The above copyright notice and this permission notice shall be included in all
29 copies or substantial portions of the Software.
30
31 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
32 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
33 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
34 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
35 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
36 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
37 SOFTWARE.
38
39
40
41 MultiMarkdown 7 makes use of:
42
43 uthash for hash tables
44 https://troydhanson.github.io/uthash/
45
46 miniz for zip archive handling
47 https://github.com/richgel999/miniz
48
49 yxml for XML parsing
50 https://dev.yorhel.nl/yxml
51
52 wingetopt for options parsing on Windows
53 https://github.com/alex85k/wingetopt
54
55 base64 for encoding binary data in HTML
56 https://github.com/zhicheng/base64
57
58 re2c is used to generate mmd_token_scanner.c and mmd_line_scanner.c
59*/
60
61
62#ifndef libMultiMarkdown7_H
63#define libMultiMarkdown7_H
64
65#include <stdint.h>
66#include <stdio.h>
67
68
69// Advance declarations
70typedef struct mmd_node mmd_node;
71typedef struct read_ctx read_ctx;
72typedef struct stack stack;
73
74
78void mmd_process_file(FILE * in, FILE * out, uint32_t options, const char * search_path, const char * source_path);
79void mmd_process_filename(const char * fname, FILE * out, uint32_t options, const char * search_path);
80void mmd_process_str(const char * text, FILE * out, uint32_t options, const char * search_path, const char * source_path);
81void mmd_process_str_len(const char * text, size_t in_len, FILE * out, uint32_t options, const char * search_path, const char * source_path);
82
83
86char * mmd_process_file_to_str(FILE * in, size_t * out_len, uint32_t options, const char * search_path, const char * source_path);
87char * mmd_process_filename_to_str(const char * fname, size_t * out_len, uint32_t options, const char * search_path);
88char * mmd_process_str_to_str(const char * text, size_t * out_len, uint32_t options, const char * search_path, const char * source_path);
89char * mmd_process_str_len_to_str(const char * text, size_t in_len, size_t * out_len, uint32_t options, const char * search_path, const char * source_path);
90
91
98mmd_node * mmd_parse_file(FILE * in, read_ctx * c, uint32_t options);
99mmd_node * mmd_parse_filename(const char * filename, read_ctx * c, uint32_t options);
100mmd_node * mmd_parse_str(const char * text, read_ctx * c, uint32_t options);
101mmd_node * mmd_parse_str_len(const char * text, size_t in_len, read_ctx * c, uint32_t options);
102
103
106void mmd_ast_file(FILE * in, FILE * out, uint32_t options);
107void mmd_ast_filename(const char * fname, FILE * out, uint32_t options);
108void mmd_ast_str(const char * text, FILE * out, uint32_t options);
109void mmd_ast_str_len(const char * text, size_t in_len, FILE * out, uint32_t options);
110
111
115void mmd_hash_file(FILE * in, FILE * out, uint32_t options);
116void mmd_hash_filename(const char * fname, FILE * out, uint32_t options);
117void mmd_hash_str(const char * text, FILE * out, uint32_t options);
118void mmd_hash_str_len(const char * text, size_t in_len, FILE * out, uint32_t options);
119
120
124read_ctx * mmd_metadata_filename(const char * fname, uint32_t options);
125read_ctx * mmd_metadata_file(FILE * in, uint32_t options);
126read_ctx * mmd_metadata_str(const char * text, uint32_t options);
127read_ctx * mmd_metadata_str_len(const char * text, size_t in_len, uint32_t options);
128
129
131
133void mmd_node_tree_free(mmd_node * n);
134
139
142
144read_ctx * read_ctx_new(uint32_t options);
145void read_ctx_reset(read_ctx * c, uint32_t options);
146void read_ctx_free(read_ctx * c);
147
148void custom_seed_rand(void);
149
150
162
163 FORMAT_DOCX,
164 FORMAT_BEAMER,
165 FORMAT_MEMOIR,
166 FORMAT_FODT,
167 FORMAT_ODT,
168};
169
170
180
181
191
192
194 // First 5 bits are for output_format (32 max)
195 // Next 4 bits are for smart_quote_language (16 max)
196 // Next 4 bits are for language (16 max)
213};
214
215
217#define MMD_OUT_FORMAT_MASK 0x1f
218#define MMD_SMART_QUOTE_MASK 0x01e0
219#define MMD_LANGUAGE_MASK 0x1E00
220
222#define MMD_OUT_FORMAT_FROM_OPTS(x) ((x & MMD_OUT_FORMAT_MASK) >> 0)
223
225#define MMD_SMART_QUOTE_FROM_OPTS(x) ((x & MMD_SMART_QUOTE_MASK) >> 5)
226
228#define MMD_LANGUAGE_FROM_OPTS(x) ((x & MMD_LANGUAGE_MASK) >> 9)
229
230
232struct mmd_node {
233 unsigned char type;
234 uint32_t hash;
235
236 size_t start;
237 size_t len;
238
239 struct mmd_node * next;
240 struct mmd_node * child;
241 struct mmd_node * tail;
242
243 struct mmd_node * content;
244};
245
246
250
251 size_t c_start;
252 size_t c_len;
253};
254
255typedef struct mmd_line_node mmd_line_node;
256
257
259#define MMD_TYPE_MASK 0xc0
260#define MMD_TOKEN_MASK 0x80
261
262#define MMD_NODE_IS_LINE(x) ((((mmd_node*)x)->type & MMD_TYPE_MASK) == 0x00)
263#define MMD_NODE_IS_BLOCK(x) ((((mmd_node*)x)->type & MMD_TYPE_MASK) == 0x40)
264#define MMD_NODE_IS_TOKEN(x) ((((mmd_node*)x)->type & MMD_TOKEN_MASK) == 0x80)
265
266
269 // Line types (1-63)
270 LINE_ATX_1 = 1,
271 LINE_ATX_2,
272 LINE_ATX_3,
273 LINE_ATX_4,
274 LINE_ATX_5,
275 LINE_ATX_6,
276 LINE_BACKTICK,
277 LINE_BLOCKQUOTE,
278 LINE_CONTINUATION,
279 LINE_DEF_ABBREVIATION,
280 LINE_DEF_CITATION,
281 LINE_DEF_FOOTNOTE,
282 LINE_DEF_GLOSSARY,
283 LINE_DEF_LINK,
284 LINE_DEFINITION,
285 LINE_EMPTY,
286 LINE_FALLBACK,
287 LINE_FENCE_BACKTICK_3,
288 LINE_FENCE_BACKTICK_4,
289 LINE_FENCE_BACKTICK_5,
290 LINE_FENCE_BACKTICK_START_3,
291 LINE_FENCE_BACKTICK_START_4,
292 LINE_FENCE_BACKTICK_START_5,
293 LINE_HR,
294 LINE_HTML,
295 LINE_HTML_BLOCK,
296 LINE_HTML_BLOCKISH,
297 LINE_INDENTED_SPACE,
298 LINE_INDENTED_TAB,
299 LINE_LIST_BULLETED,
300 LINE_LIST_ENUMERATED,
301 LINE_META,
302 LINE_PLAIN,
303 LINE_SETEXT_1,
304 LINE_SETEXT_2,
305 LINE_START_COMMENT,
306 LINE_STOP_COMMENT,
307 LINE_TABLE,
308 LINE_TABLE_SEPARATOR,
309 LINE_TOC,
310 LINE_YAML,
311
312 CODE_FENCE_LINE = 63, // TODO: Do I really need to use this?
313
314
315 // Block types (64-127)
316 BLOCK_BLOCKQUOTE = 64,
317 BLOCK_CODE_FENCED,
318 BLOCK_CODE_INDENTED,
319 BLOCK_DEF_ABBREVIATION,
320 BLOCK_DEF_CITATION,
321 BLOCK_DEF_FOOTNOTE,
322 BLOCK_DEF_GLOSSARY,
323 BLOCK_DEF_LINK,
324 BLOCK_DEFINITION,
325 BLOCK_DEFLIST,
326 BLOCK_EMPTY,
327 BLOCK_GENERAL,
328 BLOCK_H1,
329 BLOCK_H2,
330 BLOCK_H3,
331 BLOCK_H4,
332 BLOCK_H5,
333 BLOCK_H6,
334 BLOCK_HEADING,
335 BLOCK_HR,
336 BLOCK_HTML,
337 BLOCK_LIST_BULLETED,
338 BLOCK_LIST_BULLETED_LOOSE,
339 BLOCK_LIST_ENUMERATED,
340 BLOCK_LIST_ENUMERATED_LOOSE,
341 BLOCK_LIST_ITEM,
342 BLOCK_LIST_ITEM_TIGHT,
343 BLOCK_META,
344 BLOCK_PARA,
345 BLOCK_SETEXT_1,
346 BLOCK_SETEXT_2,
347 BLOCK_TABLE,
348 BLOCK_TABLE_HEADER,
349 BLOCK_TABLE_SECTION,
350 BLOCK_TABLE_ROW,
351 BLOCK_TABLE_SEPARATOR,
352 BLOCK_TERM,
353 BLOCK_TOC,
354 BLOCK_FIGURE,
355
356
357 // Token types (128-255)
358 TOKEN_EOF = 128,
359 TOKEN_NL,
360 TOKEN_LINEBREAK,
361 TOKEN_TEXT,
362 TOKEN_TEXT_ABBREVIATION,
363 TOKEN_TEXT_GLOSSARY,
364 TOKEN_TEXT_WHITESPACE,
365
366 TOKEN_AMPERSAND,
367 TOKEN_AMPERSAND_LONG,
368 TOKEN_HTML_ENTITY,
369
370 TOKEN_HASH,
371
372 TOKEN_STAR,
373 TOKEN_PLUS,
374 TOKEN_MINUS,
375
376 TEXT_NUMBER_POSS_LIST,
377
378 TOKEN_UL,
379 TOKEN_COLON,
380
381 TOKEN_ATX_MARKER,
382 TOKEN_BLOCKQUOTE_MARKER,
383 TOKEN_DEFLIST_COLON,
384 TOKEN_LIST_MARKER,
385 TOKEN_ABBREVIATION_MARKER,
386 TOKEN_FOOTNOTE_MARKER,
387 TOKEN_GLOSSARY_MARKER,
388 TOKEN_CITATION_MARKER,
389 TOKEN_VARIABLE_MARKER,
390
391 TOKEN_BACKTICK,
392 TOKEN_APOSTROPHE,
393 TOKEN_QUOTE_SINGLE,
394 TOKEN_QUOTE_DOUBLE,
395 TOKEN_QUOTE_DOUBLE_ALT,
396 TOKEN_ELLIPSIS,
397 TOKEN_DASH_M,
398 TOKEN_DASH_N,
399 TOKEN_DASH_N_RANGE,
400
401 TOKEN_PAREN_LEFT,
402 TOKEN_PAREN_RIGHT,
403 TOKEN_BRACKET_LEFT,
404 TOKEN_BRACKET_RIGHT,
405 TOKEN_ANGLE_LEFT,
406 TOKEN_ANGLE_RIGHT,
407 TOKEN_BRACE_LEFT,
408 TOKEN_BRACE_RIGHT,
409
410 TOKEN_PAIR_ANGLE,
411 TOKEN_PAIR_BACKTICK,
412 TOKEN_PAIR_BRACE,
413 TOKEN_PAIR_BRACKET,
414 TOKEN_PAIR_BRACKET_EMPTY,
415 TOKEN_PAIR_BRACKET_NOT_CITED,
416 TOKEN_PAIR_BRACKET_ABBREVIATION,
417 TOKEN_PAIR_BRACKET_FOOTNOTE,
418 TOKEN_PAIR_BRACKET_GLOSSARY,
419 TOKEN_PAIR_BRACKET_CITATION,
420 TOKEN_PAIR_BRACKET_IMAGE,
421 TOKEN_PAIR_BRACKET_LINK,
422 TOKEN_PAIR_BRACKET_VARIABLE,
423 TOKEN_PAIR_PAREN,
424 TOKEN_PAIR_QUOTE_DOUBLE,
425 TOKEN_PAIR_QUOTE_SINGLE,
426 TOKEN_PAIR_STAR,
428 TOKEN_PAIR_UL,
430 TOKEN_SPECIAL_CHARACTER,
431
432 TOKEN_PAIR_EMPH,
433 TOKEN_PAIR_STRONG,
434
435 TOKEN_ESCAPED_CHARACTER,
436 TOKEN_NBSP,
437 TOKEN_PIPE,
438
439 TOKEN_CM_ADD_OPEN,
440 TOKEN_CM_ADD_CLOSE,
441 TOKEN_CM_DEL_OPEN,
442 TOKEN_CM_DEL_CLOSE,
443 TOKEN_CM_SUB_OPEN,
444 TOKEN_CM_SUB_DIV,
445 TOKEN_CM_SUB_CLOSE,
446 TOKEN_CM_COM_OPEN,
447 TOKEN_CM_COM_CLOSE,
448 TOKEN_CM_HI_OPEN,
449 TOKEN_CM_HI_CLOSE,
450
451 TOKEN_PAIR_CM_ADD,
452 TOKEN_PAIR_CM_DEL,
453 TOKEN_PAIR_CM_SUB_DEL,
454 TOKEN_PAIR_CM_SUB_ADD,
455 TOKEN_PAIR_CM_COM,
456 TOKEN_PAIR_CM_HI,
457
458 TOKEN_SUPERSCRIPT,
459 TOKEN_SUBSCRIPT,
460
461 TOKEN_PAIR_SUPERSCRIPT,
462 TOKEN_PAIR_SUBSCRIPT,
463
464 TOKEN_MATH_PAREN_OPEN,
465 TOKEN_MATH_PAREN_CLOSE,
466 TOKEN_MATH_BRACKET_OPEN,
467 TOKEN_MATH_BRACKET_CLOSE,
468 TOKEN_MATH_DOLLAR_SINGLE,
469 TOKEN_MATH_DOLLAR_DOUBLE,
470
471 TOKEN_PAIR_MATH_PAREN,
472 TOKEN_PAIR_MATH_BRACKET,
473 TOKEN_PAIR_MATH_DOLLAR_SINGLE,
474 TOKEN_PAIR_MATH_DOLLAR_DOUBLE,
475
476 TOKEN_TABLE_CELL,
477 TOKEN_TABLE_DIVIDER,
478
479 TOKEN_MANUAL_LABEL,
480
481 OBJECT_REPLACEMENT_CHARACTER, // This must be the last type
482};
483
484
485void read_ctx_dump_headers(read_ctx * c);
486
487#endif
char * mmd_process_file_to_str(FILE *in, size_t *out_len, uint32_t options, const char *search_path, const char *source_path)
void mmd_process_file(FILE *in, FILE *out, uint32_t options, const char *search_path, const char *source_path)
mmd_options
@ MMD_OPTION_CRITIC_REJECT
Reject all proposed changes.
@ MMD_OPTION_RANDOM_NOTE_ID
Use random footnote id # to avoid collisions.
@ MMD_OPTION_PARSE_OPML
Convert from OPML to MMD text before parsing.
@ MMD_OPTION_EMBED_ASSETS
Embed assets (imagees, CSS) within the output file itself (eg. HTML)
@ MMD_OPTION_TRANSCLUDE
Enable file transclusion.
@ MMD_OPTION_MMD_HEADER
Enable use of mmd header and mmd footer metadata.
@ MMD_OPTION_COMPATIBILITY
Limit functionality to core Markdown features.
@ MMD_OPTION_STORE_ASSETS
Store assets (images, CSS) within archive file formats.
@ MMD_OPTION_DOWNLOAD_ASSETS
Attempt to download assets from the internet for storage.
@ MMD_OPTION_COMPLETE
Force creation of complete document.
@ MMD_OPTION_BLOCKS_ONLY
Process block-level tokens only; do not parse inside the blocks.
@ MMD_OPTION_RANDOM_HEADER_ID
Use random header id # to avoid collisions.
@ MMD_OPTION_STATS
Display performance stats on stderr.
@ MMD_OPTION_SNIPPET
Force creation of snippet instead of complete document.
@ MMD_OPTION_CRITIC_ACCEPT
Accept all proposed changes.
@ MMD_OPTION_PARSE_ITMZ
Convert from ITMZ to MMD text before parsing.
void mmd_node_free(mmd_node *n)
Utility functions.
void mmd_ast_file(FILE *in, FILE *out, uint32_t options)
void mmd_hash_file(FILE *in, FILE *out, uint32_t options)
mmd_node * mmd_parse_file(FILE *in, read_ctx *c, uint32_t options)
void mmd_node_tree_describe_hash(mmd_node *n, FILE *out)
Print node tree hash values to designated file stream.
read_ctx * read_ctx_new(uint32_t options)
read_ctx management
@ LANGUAGE_SV
Swedish language markup.
@ LANGUAGE_DE
German language markup.
@ LANGUAGE_NL
Dutch language markup.
@ LANGUAGE_ES
Spanish language markup.
@ LANGUAGE_EN
English language markup.
@ LANGUAGE_HE
Hebrew language markup.
@ LANGUAGE_FR
French language markup.
uint32_t mmd_hash_node(mmd_node *n)
Calculate hash value for individual node (and it's children)
node_types
AST node types.
@ TOKEN_PAIR_STAR_USED
Must immediately follow TOKEN_PAIR_STAR.
@ TOKEN_PAIR_UL_USED
Must immediately follow TOKEN_PAIR_UL.
uint32_t mmd_hash_node_tree(mmd_node *n)
Calculate hash values for AST (and return overall hash value)
smart_quote_language
@ QUOTES_DUTCH
Dutch smart quotes.
@ QUOTES_ENGLISH
English smart quotes.
@ QUOTES_GERMAN_GUILLEMETS
German guillemets smart quotes.
@ QUOTES_FRENCH
French smart quotes.
@ QUOTES_GERMAN
German smart quotes.
@ QUOTES_SWEDISH
Swedish smart quotes.
@ QUOTES_SPANISH
Spanish smart quotes.
output_format
@ FORMAT_AST
Display the AST for informational/debugging purposes.
@ FORMAT_LATEX
LaTeX to generate PDF.
@ FORMAT_OPML
Outline Processor Markup Language for outliners or mind-mapping programs.
@ FORMAT_MMD
Raw MultiMarkdown source text.
@ FORMAT_TEXTBUNDLE
TextBundle is a package file format for macOS/iOS.
@ FORMAT_HTML
Plain HTML.
@ FORMAT_EPUB
EPUB v3.
@ FORMAT_HASH
Display the AST with hash values for informational/debugging purposes.
@ FORMAT_TEXTPACK
Compressed variant of the TextBundle file format.
@ FORMAT_ITMZ
iThoughts Mind Mapping document
read_ctx * mmd_metadata_filename(const char *fname, uint32_t options)
Line nodes are used specifically for parsing individual lines of text into the block structure.
mmd_node general
mmd_line_node starts with regular mmd_node
size_t c_start
Starting offset (in bytes) for line content (excluding line level markup)
size_t c_len
Byte length for content of the line (excluding line level markup)
Nodes are used to build the AST during parsing.
size_t start
Starting offset (in bytes) in the source text for this node.
struct mmd_node * content
If node was parsed into span-level content, place it here.
struct mmd_node * next
Pointer to next node in the AST.
struct mmd_node * tail
Pointer to last sibling node in the AST.
size_t len
Byte length in the source text for this node.
unsigned char type
type for this node
struct mmd_node * child
Pointer to first child node in the AST.
uint32_t hash
hash for the node, useful when comparing two parse trees for similar branches