Skip to content

Commit

Permalink
Implement flag MD_FLAG_SKIPBOM on parser side.
Browse files Browse the repository at this point in the history
Also update md2html to enable this on the parser side rather than
renderer side.

Make MD_HTML_FLAG_SKIP_UTF8_BOM deprecated. md_html() converts the flag
into parser_flags.
  • Loading branch information
mity committed Feb 25, 2024
1 parent a00df7d commit ca4dd29
Show file tree
Hide file tree
Showing 5 changed files with 38 additions and 30 deletions.
8 changes: 2 additions & 6 deletions md2html/md2html.c
Original file line number Diff line number Diff line change
Expand Up @@ -34,12 +34,8 @@


/* Global options. */
static unsigned parser_flags = 0;
#ifndef MD4C_USE_ASCII
static unsigned renderer_flags = MD_HTML_FLAG_DEBUG | MD_HTML_FLAG_SKIP_UTF8_BOM;
#else
static unsigned renderer_flags = MD_HTML_FLAG_DEBUG;
#endif
static unsigned parser_flags = MD_FLAG_SKIPBOM;
static unsigned renderer_flags = MD_HTML_FLAG_DEBUG;
static int want_fullhtml = 0;
static int want_xhtml = 0;
static int want_stat = 0;
Expand Down
11 changes: 3 additions & 8 deletions src/md4c-html.c
Original file line number Diff line number Diff line change
Expand Up @@ -553,14 +553,9 @@ md_html(const MD_CHAR* input, MD_SIZE input_size,
render.escape_map[i] |= NEED_URL_ESC_FLAG;
}

/* Consider skipping UTF-8 byte order mark (BOM). */
if(renderer_flags & MD_HTML_FLAG_SKIP_UTF8_BOM && sizeof(MD_CHAR) == 1) {
static const MD_CHAR bom[3] = { (char)0xef, (char)0xbb, (char)0xbf };
if(input_size >= sizeof(bom) && memcmp(input, bom, sizeof(bom)) == 0) {
input += sizeof(bom);
input_size -= sizeof(bom);
}
}
/* For compatibility with old apps. */
if(renderer_flags & MD_HTML_FLAG_SKIP_UTF8_BOM)
parser.flags |= MD_FLAG_SKIPBOM;

return md_parse(input, input_size, &parser, (void*) &render);
}
Expand Down
2 changes: 1 addition & 1 deletion src/md4c-html.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@
/* If set, debug output from md_parse() is sent to stderr. */
#define MD_HTML_FLAG_DEBUG 0x0001
#define MD_HTML_FLAG_VERBATIM_ENTITIES 0x0002
#define MD_HTML_FLAG_SKIP_UTF8_BOM 0x0004
#define MD_HTML_FLAG_SKIP_UTF8_BOM 0x0004 /* Deprecated; use MD_FLAG_SKIPBOM on the parser side in new code. */
#define MD_HTML_FLAG_XHTML 0x0008


Expand Down
15 changes: 15 additions & 0 deletions src/md4c.c
Original file line number Diff line number Diff line change
Expand Up @@ -6468,6 +6468,21 @@ md_parse(const MD_CHAR* text, MD_SIZE size, const MD_PARSER* parser, void* userd
memcpy(&ctx.parser, parser, parser_size);
memset((uint8_t*)&ctx.parser + parser_size, 0, sizeof(ctx.parser) - parser_size);

#if defined MD4C_USE_UTF8 || defined MD4C_USE_UTF16
if(parser->flags & MD_FLAG_SKIPBOM) {
#ifdef MD4C_USE_UTF8
static const MD_CHAR bom[3] = { (char)0xef, (char)0xbb, (char)0xbf };
#endif
#ifdef MD4C_USE_UTF16
static const MD_CHAR bom[1] = { (WCHAR)0xfeff };
#endif
if(size >= SIZEOF_ARRAY(bom) && memcmp(text, bom, sizeof(bom)) == 0) {
text += SIZEOF_ARRAY(bom);
size -= SIZEOF_ARRAY(bom);
}
}
#endif

ctx.text = text;
ctx.size = size;
ctx.userdata = userdata;
Expand Down
32 changes: 17 additions & 15 deletions src/md4c.h
Original file line number Diff line number Diff line change
Expand Up @@ -303,21 +303,23 @@ typedef struct MD_SPAN_WIKILINK {
* By default (when MD_PARSER::flags == 0), we follow CommonMark specification.
* The following flags may allow some extensions or deviations from it.
*/
#define MD_FLAG_COLLAPSEWHITESPACE 0x0001 /* In MD_TEXT_NORMAL, collapse non-trivial whitespace into single ' ' */
#define MD_FLAG_PERMISSIVEATXHEADERS 0x0002 /* Do not require space in ATX headers ( ###header ) */
#define MD_FLAG_PERMISSIVEURLAUTOLINKS 0x0004 /* Recognize URLs as autolinks even without '<', '>' */
#define MD_FLAG_PERMISSIVEEMAILAUTOLINKS 0x0008 /* Recognize e-mails as autolinks even without '<', '>' and 'mailto:' */
#define MD_FLAG_NOINDENTEDCODEBLOCKS 0x0010 /* Disable indented code blocks. (Only fenced code works.) */
#define MD_FLAG_NOHTMLBLOCKS 0x0020 /* Disable raw HTML blocks. */
#define MD_FLAG_NOHTMLSPANS 0x0040 /* Disable raw HTML (inline). */
#define MD_FLAG_TABLES 0x0100 /* Enable tables extension. */
#define MD_FLAG_STRIKETHROUGH 0x0200 /* Enable strikethrough extension. */
#define MD_FLAG_PERMISSIVEWWWAUTOLINKS 0x0400 /* Enable WWW autolinks (even without any scheme prefix, if they begin with 'www.') */
#define MD_FLAG_TASKLISTS 0x0800 /* Enable task list extension. */
#define MD_FLAG_LATEXMATHSPANS 0x1000 /* Enable $ and $$ containing LaTeX equations. */
#define MD_FLAG_WIKILINKS 0x2000 /* Enable wiki links extension. */
#define MD_FLAG_UNDERLINE 0x4000 /* Enable underline extension (and disables '_' for normal emphasis). */
#define MD_FLAG_HARD_SOFT_BREAKS 0x8000 /* Force all soft breaks to act as hard breaks. */
#define MD_FLAG_COLLAPSEWHITESPACE 0x00000001 /* In MD_TEXT_NORMAL, collapse non-trivial whitespace into single ' ' */
#define MD_FLAG_PERMISSIVEATXHEADERS 0x00000002 /* Do not require space in ATX headers ( ###header ) */
#define MD_FLAG_PERMISSIVEURLAUTOLINKS 0x00000004 /* Recognize URLs as autolinks even without '<', '>' */
#define MD_FLAG_PERMISSIVEEMAILAUTOLINKS 0x00000008 /* Recognize e-mails as autolinks even without '<', '>' and 'mailto:' */
#define MD_FLAG_NOINDENTEDCODEBLOCKS 0x00000010 /* Disable indented code blocks. (Only fenced code works.) */
#define MD_FLAG_NOHTMLBLOCKS 0x00000020 /* Disable raw HTML blocks. */
#define MD_FLAG_NOHTMLSPANS 0x00000040 /* Disable raw HTML (inline). */
#define MD_FLAG_TABLES 0x00000100 /* Enable tables extension. */
#define MD_FLAG_STRIKETHROUGH 0x00000200 /* Enable strikethrough extension. */
#define MD_FLAG_PERMISSIVEWWWAUTOLINKS 0x00000400 /* Enable WWW autolinks (even without any scheme prefix, if they begin with 'www.') */
#define MD_FLAG_TASKLISTS 0x00000800 /* Enable task list extension. */
#define MD_FLAG_LATEXMATHSPANS 0x00001000 /* Enable $ and $$ containing LaTeX equations. */
#define MD_FLAG_WIKILINKS 0x00002000 /* Enable wiki links extension. */
#define MD_FLAG_UNDERLINE 0x00004000 /* Enable underline extension (and disables '_' for normal emphasis). */
#define MD_FLAG_HARD_SOFT_BREAKS 0x00008000 /* Force all soft breaks to act as hard breaks. */
#define MD_FLAG_SKIPBOM 0x00010000 /* Skip Unicode BOM, if present. */


#define MD_FLAG_PERMISSIVEAUTOLINKS (MD_FLAG_PERMISSIVEEMAILAUTOLINKS | MD_FLAG_PERMISSIVEURLAUTOLINKS | MD_FLAG_PERMISSIVEWWWAUTOLINKS)
#define MD_FLAG_NOHTML (MD_FLAG_NOHTMLBLOCKS | MD_FLAG_NOHTMLSPANS)
Expand Down

0 comments on commit ca4dd29

Please sign in to comment.