1 /* markdown.c - generic markdown parser */
4 * Copyright (c) 2009, Natacha Porté
5 * Copyright (c) 2011, Vicent Marti
7 * Permission to use, copy, modify, and distribute this software for any
8 * purpose with or without fee is hereby granted, provided that the above
9 * copyright notice and this permission notice appear in all copies.
11 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
12 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
14 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
29 #define strncasecmp _strnicmp
32 #define REF_TABLE_SIZE 8
34 #define BUFFER_BLOCK 0
37 #define MKD_LI_END 8 /* internal list flag */
39 #define gperf_case_strncmp(s1, s2, n) strncasecmp(s1, s2, n)
40 #define GPERF_DOWNCASE 1
41 #define GPERF_CASE_STRNCMP 1
42 #include "html_blocks.h"
48 /* link_ref: reference to a link */
55 struct link_ref *next;
58 /* char_trigger: function pointer to render active chars */
59 /* returns the number of chars taken care of */
60 /* data is the pointer of the beginning of the span */
61 /* offset is the number of valid chars before data */
64 (*char_trigger)(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
66 static size_t char_emphasis(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
67 static size_t char_linebreak(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
68 static size_t char_codespan(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
69 static size_t char_escape(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
70 static size_t char_entity(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
71 static size_t char_langle_tag(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
72 static size_t char_autolink_url(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
73 static size_t char_autolink_email(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
74 static size_t char_autolink_www(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
75 static size_t char_link(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
76 static size_t char_superscript(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
78 enum markdown_char_t {
88 MD_CHAR_AUTOLINK_EMAIL,
93 static char_trigger markdown_char_ptrs[] = {
103 &char_autolink_email,
108 /* render • structure containing one particular render */
110 struct sd_callbacks cb;
113 struct link_ref *refs[REF_TABLE_SIZE];
114 uint8_t active_char[256];
115 struct stack work_bufs[2];
116 unsigned int ext_flags;
121 /***************************
123 ***************************/
125 static inline struct buf *
126 rndr_newbuf(struct sd_markdown *rndr, int type)
128 static const size_t buf_size[2] = {256, 64};
129 struct buf *work = NULL;
130 struct stack *pool = &rndr->work_bufs[type];
132 if (pool->size < pool->asize &&
133 pool->item[pool->size] != NULL) {
134 work = pool->item[pool->size++];
137 work = bufnew(buf_size[type]);
138 stack_push(pool, work);
145 rndr_popbuf(struct sd_markdown *rndr, int type)
147 rndr->work_bufs[type].size--;
151 unscape_text(struct buf *ob, struct buf *src)
154 while (i < src->size) {
156 while (i < src->size && src->data[i] != '\\')
160 bufput(ob, src->data + org, i - org);
162 if (i + 1 >= src->size)
165 bufputc(ob, src->data[i + 1]);
171 hash_link_ref(const uint8_t *link_ref, size_t length)
174 unsigned int hash = 0;
176 for (i = 0; i < length; ++i)
177 hash = tolower(link_ref[i]) + (hash << 6) + (hash << 16) - hash;
182 static struct link_ref *
184 struct link_ref **references,
185 const uint8_t *name, size_t name_size)
187 struct link_ref *ref = calloc(1, sizeof(struct link_ref));
192 ref->id = hash_link_ref(name, name_size);
193 ref->next = references[ref->id % REF_TABLE_SIZE];
195 references[ref->id % REF_TABLE_SIZE] = ref;
199 static struct link_ref *
200 find_link_ref(struct link_ref **references, uint8_t *name, size_t length)
202 unsigned int hash = hash_link_ref(name, length);
203 struct link_ref *ref = NULL;
205 ref = references[hash % REF_TABLE_SIZE];
207 while (ref != NULL) {
218 free_link_refs(struct link_ref **references)
222 for (i = 0; i < REF_TABLE_SIZE; ++i) {
223 struct link_ref *r = references[i];
224 struct link_ref *next;
229 bufrelease(r->title);
237 * Check whether a char is a Markdown space.
239 * Right now we only consider spaces the actual
240 * space and a newline: tabs and carriage returns
241 * are filtered out during the preprocessing phase.
243 * If we wanted to actually be UTF-8 compliant, we
244 * should instead extract an Unicode codepoint from
245 * this character and check for space properties.
250 return c == ' ' || c == '\n';
253 /****************************
254 * INLINE PARSING FUNCTIONS *
255 ****************************/
257 /* is_mail_autolink • looks for the address part of a mail autolink and '>' */
258 /* this is less strict than the original markdown e-mail address matching */
260 is_mail_autolink(uint8_t *data, size_t size)
262 size_t i = 0, nb = 0;
264 /* address is assumed to be: [-@._a-zA-Z0-9]+ with exactly one '@' */
265 for (i = 0; i < size; ++i) {
266 if (isalnum(data[i]))
279 return (nb == 1) ? i + 1 : 0;
289 /* tag_length • returns the length of the given tag, or 0 is it's not valid */
291 tag_length(uint8_t *data, size_t size, enum mkd_autolink *autolink)
295 /* a valid tag can't be shorter than 3 chars */
296 if (size < 3) return 0;
298 /* begins with a '<' optionally followed by '/', followed by letter or number */
299 if (data[0] != '<') return 0;
300 i = (data[1] == '/') ? 2 : 1;
302 if (!isalnum(data[i]))
306 *autolink = MKDA_NOT_AUTOLINK;
308 /* try to find the beginning of an URI */
309 while (i < size && (isalnum(data[i]) || data[i] == '.' || data[i] == '+' || data[i] == '-'))
312 if (i > 1 && data[i] == '@') {
313 if ((j = is_mail_autolink(data + i, size - i)) != 0) {
314 *autolink = MKDA_EMAIL;
319 if (i > 2 && data[i] == ':') {
320 *autolink = MKDA_NORMAL;
324 /* completing autolink test: no whitespace or ' or " */
326 *autolink = MKDA_NOT_AUTOLINK;
328 else if (*autolink) {
332 if (data[i] == '\\') i += 2;
333 else if (data[i] == '>' || data[i] == '\'' ||
334 data[i] == '"' || data[i] == ' ' || data[i] == '\n')
339 if (i >= size) return 0;
340 if (i > j && data[i] == '>') return i + 1;
341 /* one of the forbidden chars has been found */
342 *autolink = MKDA_NOT_AUTOLINK;
345 /* looking for sometinhg looking like a tag end */
346 while (i < size && data[i] != '>') i++;
347 if (i >= size) return 0;
351 /* parse_inline • parses inline markdown elements */
353 parse_inline(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size)
355 size_t i = 0, end = 0;
357 struct buf work = { 0, 0, 0, 0 };
359 if (rndr->work_bufs[BUFFER_SPAN].size +
360 rndr->work_bufs[BUFFER_BLOCK].size > rndr->max_nesting)
364 /* copying inactive chars into the output */
365 while (end < size && (action = rndr->active_char[data[end]]) == 0) {
369 if (rndr->cb.normal_text) {
370 work.data = data + i;
372 rndr->cb.normal_text(ob, &work, rndr->opaque);
375 bufput(ob, data + i, end - i);
377 if (end >= size) break;
380 end = markdown_char_ptrs[(int)action](ob, rndr, data + i, i, size - i);
381 if (!end) /* no action from the callback */
390 /* find_emph_char • looks for the next emph uint8_t, skipping other constructs */
392 find_emph_char(uint8_t *data, size_t size, uint8_t c)
397 while (i < size && data[i] != c && data[i] != '`' && data[i] != '[')
406 /* not counting escaped chars */
407 if (i && data[i - 1] == '\\') {
411 if (data[i] == '`') {
412 size_t span_nb = 0, bt;
415 /* counting the number of opening backticks */
416 while (i < size && data[i] == '`') {
420 if (i >= size) return 0;
422 /* finding the matching closing sequence */
424 while (i < size && bt < span_nb) {
425 if (!tmp_i && data[i] == c) tmp_i = i;
426 if (data[i] == '`') bt++;
431 if (i >= size) return tmp_i;
433 /* skipping a link */
434 else if (data[i] == '[') {
439 while (i < size && data[i] != ']') {
440 if (!tmp_i && data[i] == c) tmp_i = i;
445 while (i < size && (data[i] == ' ' || data[i] == '\n'))
466 while (i < size && data[i] != cc) {
467 if (!tmp_i && data[i] == c) tmp_i = i;
481 /* parse_emph1 • parsing single emphase */
482 /* closed by a symbol not preceded by whitespace and not followed by symbol */
484 parse_emph1(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size, uint8_t c)
487 struct buf *work = 0;
490 if (!rndr->cb.emphasis) return 0;
492 /* skipping one symbol if coming from emph3 */
493 if (size > 1 && data[0] == c && data[1] == c) i = 1;
496 len = find_emph_char(data + i, size - i, c);
499 if (i >= size) return 0;
501 if (data[i] == c && !_isspace(data[i - 1])) {
503 if (rndr->ext_flags & MKDEXT_NO_INTRA_EMPHASIS) {
504 if (i + 1 < size && isalnum(data[i + 1]))
508 work = rndr_newbuf(rndr, BUFFER_SPAN);
509 parse_inline(work, rndr, data, i);
510 r = rndr->cb.emphasis(ob, work, rndr->opaque);
511 rndr_popbuf(rndr, BUFFER_SPAN);
512 return r ? i + 1 : 0;
519 /* parse_emph2 • parsing single emphase */
521 parse_emph2(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size, uint8_t c)
523 int (*render_method)(struct buf *ob, const struct buf *text, void *opaque);
525 struct buf *work = 0;
528 render_method = (c == '~') ? rndr->cb.strikethrough : rndr->cb.double_emphasis;
534 len = find_emph_char(data + i, size - i, c);
538 if (i + 1 < size && data[i] == c && data[i + 1] == c && i && !_isspace(data[i - 1])) {
539 work = rndr_newbuf(rndr, BUFFER_SPAN);
540 parse_inline(work, rndr, data, i);
541 r = render_method(ob, work, rndr->opaque);
542 rndr_popbuf(rndr, BUFFER_SPAN);
543 return r ? i + 2 : 0;
550 /* parse_emph3 • parsing single emphase */
551 /* finds the first closing tag, and delegates to the other emph */
553 parse_emph3(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size, uint8_t c)
559 len = find_emph_char(data + i, size - i, c);
563 /* skip whitespace preceded symbols */
564 if (data[i] != c || _isspace(data[i - 1]))
567 if (i + 2 < size && data[i + 1] == c && data[i + 2] == c && rndr->cb.triple_emphasis) {
568 /* triple symbol found */
569 struct buf *work = rndr_newbuf(rndr, BUFFER_SPAN);
571 parse_inline(work, rndr, data, i);
572 r = rndr->cb.triple_emphasis(ob, work, rndr->opaque);
573 rndr_popbuf(rndr, BUFFER_SPAN);
574 return r ? i + 3 : 0;
576 } else if (i + 1 < size && data[i + 1] == c) {
577 /* double symbol found, handing over to emph1 */
578 len = parse_emph1(ob, rndr, data - 2, size + 2, c);
583 /* single symbol found, handing over to emph2 */
584 len = parse_emph2(ob, rndr, data - 1, size + 1, c);
592 /* char_emphasis • single and double emphasis parsing */
594 char_emphasis(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
599 if (rndr->ext_flags & MKDEXT_NO_INTRA_EMPHASIS) {
600 if (offset > 0 && !_isspace(data[-1]) && data[-1] != '>')
604 if (size > 2 && data[1] != c) {
605 /* whitespace cannot follow an opening emphasis;
606 * strikethrough only takes two characters '~~' */
607 if (c == '~' || _isspace(data[1]) || (ret = parse_emph1(ob, rndr, data + 1, size - 1, c)) == 0)
613 if (size > 3 && data[1] == c && data[2] != c) {
614 if (_isspace(data[2]) || (ret = parse_emph2(ob, rndr, data + 2, size - 2, c)) == 0)
620 if (size > 4 && data[1] == c && data[2] == c && data[3] != c) {
621 if (c == '~' || _isspace(data[3]) || (ret = parse_emph3(ob, rndr, data + 3, size - 3, c)) == 0)
631 /* char_linebreak • '\n' preceded by two spaces (assuming linebreak != 0) */
633 char_linebreak(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
635 if (offset < 2 || data[-1] != ' ' || data[-2] != ' ')
638 /* removing the last space from ob and rendering */
639 while (ob->size && ob->data[ob->size - 1] == ' ')
642 return rndr->cb.linebreak(ob, rndr->opaque) ? 1 : 0;
646 /* char_codespan • '`' parsing a code span (assuming codespan != 0) */
648 char_codespan(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
650 size_t end, nb = 0, i, f_begin, f_end;
652 /* counting the number of backticks in the delimiter */
653 while (nb < size && data[nb] == '`')
656 /* finding the next delimiter */
658 for (end = nb; end < size && i < nb; end++) {
659 if (data[end] == '`') i++;
663 if (i < nb && end >= size)
664 return 0; /* no matching delimiter */
666 /* trimming outside whitespaces */
668 while (f_begin < end && data[f_begin] == ' ')
672 while (f_end > nb && data[f_end-1] == ' ')
676 if (f_begin < f_end) {
677 struct buf work = { data + f_begin, f_end - f_begin, 0, 0 };
678 if (!rndr->cb.codespan(ob, &work, rndr->opaque))
681 if (!rndr->cb.codespan(ob, 0, rndr->opaque))
689 /* char_escape • '\\' backslash escape */
691 char_escape(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
693 static const char *escape_chars = "\\`*_{}[]()#+-.!:|&<>^~";
694 struct buf work = { 0, 0, 0, 0 };
697 if (strchr(escape_chars, data[1]) == NULL)
700 if (rndr->cb.normal_text) {
701 work.data = data + 1;
703 rndr->cb.normal_text(ob, &work, rndr->opaque);
705 else bufputc(ob, data[1]);
706 } else if (size == 1) {
707 bufputc(ob, data[0]);
713 /* char_entity • '&' escaped when it doesn't belong to an entity */
714 /* valid entities are assumed to be anything matching &#?[A-Za-z0-9]+; */
716 char_entity(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
719 struct buf work = { 0, 0, 0, 0 };
721 if (end < size && data[end] == '#')
724 while (end < size && isalnum(data[end]))
727 if (end < size && data[end] == ';')
728 end++; /* real entity */
730 return 0; /* lone '&' */
732 if (rndr->cb.entity) {
735 rndr->cb.entity(ob, &work, rndr->opaque);
737 else bufput(ob, data, end);
742 /* char_langle_tag • '<' when tags or autolinks are allowed */
744 char_langle_tag(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
746 enum mkd_autolink altype = MKDA_NOT_AUTOLINK;
747 size_t end = tag_length(data, size, &altype);
748 struct buf work = { data, end, 0, 0 };
752 if (rndr->cb.autolink && altype != MKDA_NOT_AUTOLINK) {
753 struct buf *u_link = rndr_newbuf(rndr, BUFFER_SPAN);
754 work.data = data + 1;
756 unscape_text(u_link, &work);
757 ret = rndr->cb.autolink(ob, u_link, altype, rndr->opaque);
758 rndr_popbuf(rndr, BUFFER_SPAN);
760 else if (rndr->cb.raw_html_tag)
761 ret = rndr->cb.raw_html_tag(ob, &work, rndr->opaque);
769 char_autolink_www(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
771 struct buf *link, *link_url, *link_text;
772 size_t link_len, rewind;
774 if (!rndr->cb.link || rndr->in_link_body)
777 link = rndr_newbuf(rndr, BUFFER_SPAN);
779 if ((link_len = sd_autolink__www(&rewind, link, data, offset, size, 0)) > 0) {
780 link_url = rndr_newbuf(rndr, BUFFER_SPAN);
781 BUFPUTSL(link_url, "http://");
782 bufput(link_url, link->data, link->size);
785 if (rndr->cb.normal_text) {
786 link_text = rndr_newbuf(rndr, BUFFER_SPAN);
787 rndr->cb.normal_text(link_text, link, rndr->opaque);
788 rndr->cb.link(ob, link_url, NULL, link_text, rndr->opaque);
789 rndr_popbuf(rndr, BUFFER_SPAN);
791 rndr->cb.link(ob, link_url, NULL, link, rndr->opaque);
793 rndr_popbuf(rndr, BUFFER_SPAN);
796 rndr_popbuf(rndr, BUFFER_SPAN);
801 char_autolink_email(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
804 size_t link_len, rewind;
806 if (!rndr->cb.autolink || rndr->in_link_body)
809 link = rndr_newbuf(rndr, BUFFER_SPAN);
811 if ((link_len = sd_autolink__email(&rewind, link, data, offset, size, 0)) > 0) {
813 rndr->cb.autolink(ob, link, MKDA_EMAIL, rndr->opaque);
816 rndr_popbuf(rndr, BUFFER_SPAN);
821 char_autolink_url(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
824 size_t link_len, rewind;
826 if (!rndr->cb.autolink || rndr->in_link_body)
829 link = rndr_newbuf(rndr, BUFFER_SPAN);
831 if ((link_len = sd_autolink__url(&rewind, link, data, offset, size, 0)) > 0) {
833 rndr->cb.autolink(ob, link, MKDA_NORMAL, rndr->opaque);
836 rndr_popbuf(rndr, BUFFER_SPAN);
840 /* char_link • '[': parsing a link or an image */
842 char_link(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
844 int is_img = (offset && data[-1] == '!'), level;
845 size_t i = 1, txt_e, link_b = 0, link_e = 0, title_b = 0, title_e = 0;
846 struct buf *content = 0;
847 struct buf *link = 0;
848 struct buf *title = 0;
849 struct buf *u_link = 0;
850 size_t org_work_size = rndr->work_bufs[BUFFER_SPAN].size;
851 int text_has_nl = 0, ret = 0;
852 int in_title = 0, qtype = 0;
854 /* checking whether the correct renderer exists */
855 if ((is_img && !rndr->cb.image) || (!is_img && !rndr->cb.link))
858 /* looking for the matching closing bracket */
859 for (level = 1; i < size; i++) {
863 else if (data[i - 1] == '\\')
866 else if (data[i] == '[')
869 else if (data[i] == ']') {
882 /* skip any amount of whitespace or newline */
883 /* (this is much more laxist than original markdown syntax) */
884 while (i < size && _isspace(data[i]))
887 /* inline style link */
888 if (i < size && data[i] == '(') {
889 /* skipping initial whitespace */
892 while (i < size && _isspace(data[i]))
897 /* looking for link end: ' " ) */
899 if (data[i] == '\\') i += 2;
900 else if (data[i] == ')') break;
901 else if (i >= 1 && _isspace(data[i-1]) && (data[i] == '\'' || data[i] == '"')) break;
905 if (i >= size) goto cleanup;
908 /* looking for title end if present */
909 if (data[i] == '\'' || data[i] == '"') {
916 if (data[i] == '\\') i += 2;
917 else if (data[i] == qtype) {in_title = 0; i++;}
918 else if ((data[i] == ')') && !in_title) break;
922 if (i >= size) goto cleanup;
924 /* skipping whitespaces after title */
926 while (title_e > title_b && _isspace(data[title_e]))
929 /* checking for closing quote presence */
930 if (data[title_e] != '\'' && data[title_e] != '"') {
931 title_b = title_e = 0;
936 /* remove whitespace at the end of the link */
937 while (link_e > link_b && _isspace(data[link_e - 1]))
940 /* remove optional angle brackets around the link */
941 if (data[link_b] == '<') link_b++;
942 if (data[link_e - 1] == '>') link_e--;
944 /* building escaped link and title */
945 if (link_e > link_b) {
946 link = rndr_newbuf(rndr, BUFFER_SPAN);
947 bufput(link, data + link_b, link_e - link_b);
950 if (title_e > title_b) {
951 title = rndr_newbuf(rndr, BUFFER_SPAN);
952 bufput(title, data + title_b, title_e - title_b);
958 /* reference style link */
959 else if (i < size && data[i] == '[') {
960 struct buf id = { 0, 0, 0, 0 };
963 /* looking for the id */
966 while (i < size && data[i] != ']') i++;
967 if (i >= size) goto cleanup;
970 /* finding the link_ref */
971 if (link_b == link_e) {
973 struct buf *b = rndr_newbuf(rndr, BUFFER_SPAN);
976 for (j = 1; j < txt_e; j++) {
979 else if (data[j - 1] != ' ')
990 id.data = data + link_b;
991 id.size = link_e - link_b;
994 lr = find_link_ref(rndr->refs, id.data, id.size);
998 /* keeping link and title from link_ref */
1004 /* shortcut reference style link */
1006 struct buf id = { 0, 0, 0, 0 };
1007 struct link_ref *lr;
1009 /* crafting the id */
1011 struct buf *b = rndr_newbuf(rndr, BUFFER_SPAN);
1014 for (j = 1; j < txt_e; j++) {
1015 if (data[j] != '\n')
1016 bufputc(b, data[j]);
1017 else if (data[j - 1] != ' ')
1025 id.size = txt_e - 1;
1028 /* finding the link_ref */
1029 lr = find_link_ref(rndr->refs, id.data, id.size);
1033 /* keeping link and title from link_ref */
1037 /* rewinding the whitespace */
1041 /* building content: img alt is escaped, link content is parsed */
1043 content = rndr_newbuf(rndr, BUFFER_SPAN);
1045 bufput(content, data + 1, txt_e - 1);
1047 /* disable autolinking when parsing inline the
1048 * content of a link */
1049 rndr->in_link_body = 1;
1050 parse_inline(content, rndr, data + 1, txt_e - 1);
1051 rndr->in_link_body = 0;
1056 u_link = rndr_newbuf(rndr, BUFFER_SPAN);
1057 unscape_text(u_link, link);
1060 /* calling the relevant rendering function */
1062 if (ob->size && ob->data[ob->size - 1] == '!')
1065 ret = rndr->cb.image(ob, u_link, title, content, rndr->opaque);
1067 ret = rndr->cb.link(ob, u_link, title, content, rndr->opaque);
1072 rndr->work_bufs[BUFFER_SPAN].size = (int)org_work_size;
1077 char_superscript(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
1079 size_t sup_start, sup_len;
1082 if (!rndr->cb.superscript)
1088 if (data[1] == '(') {
1089 sup_start = sup_len = 2;
1091 while (sup_len < size && data[sup_len] != ')' && data[sup_len - 1] != '\\')
1094 if (sup_len == size)
1097 sup_start = sup_len = 1;
1099 while (sup_len < size && !_isspace(data[sup_len]))
1103 if (sup_len - sup_start == 0)
1104 return (sup_start == 2) ? 3 : 0;
1106 sup = rndr_newbuf(rndr, BUFFER_SPAN);
1107 parse_inline(sup, rndr, data + sup_start, sup_len - sup_start);
1108 rndr->cb.superscript(ob, sup, rndr->opaque);
1109 rndr_popbuf(rndr, BUFFER_SPAN);
1111 return (sup_start == 2) ? sup_len + 1 : sup_len;
1114 /*********************************
1115 * BLOCK-LEVEL PARSING FUNCTIONS *
1116 *********************************/
1118 /* is_empty • returns the line length when it is empty, 0 otherwise */
1120 is_empty(uint8_t *data, size_t size)
1124 for (i = 0; i < size && data[i] != '\n'; i++)
1131 /* is_hrule • returns whether a line is a horizontal rule */
1133 is_hrule(uint8_t *data, size_t size)
1135 size_t i = 0, n = 0;
1138 /* skipping initial spaces */
1139 if (size < 3) return 0;
1140 if (data[0] == ' ') { i++;
1141 if (data[1] == ' ') { i++;
1142 if (data[2] == ' ') { i++; } } }
1144 /* looking at the hrule uint8_t */
1146 || (data[i] != '*' && data[i] != '-' && data[i] != '_'))
1150 /* the whole line must be the char or whitespace */
1151 while (i < size && data[i] != '\n') {
1152 if (data[i] == c) n++;
1153 else if (data[i] != ' ')
1162 /* check if a line begins with a code fence; return the
1163 * width of the code fence */
1165 prefix_codefence(uint8_t *data, size_t size)
1167 size_t i = 0, n = 0;
1170 /* skipping initial spaces */
1171 if (size < 3) return 0;
1172 if (data[0] == ' ') { i++;
1173 if (data[1] == ' ') { i++;
1174 if (data[2] == ' ') { i++; } } }
1176 /* looking at the hrule uint8_t */
1177 if (i + 2 >= size || !(data[i] == '~' || data[i] == '`'))
1182 /* the whole line must be the uint8_t or whitespace */
1183 while (i < size && data[i] == c) {
1193 /* check if a line is a code fence; return its size if it is */
1195 is_codefence(uint8_t *data, size_t size, struct buf *syntax)
1197 size_t i = 0, syn_len = 0;
1200 i = prefix_codefence(data, size);
1204 while (i < size && data[i] == ' ')
1207 syn_start = data + i;
1209 if (i < size && data[i] == '{') {
1212 while (i < size && data[i] != '}' && data[i] != '\n') {
1216 if (i == size || data[i] != '}')
1219 /* strip all whitespace at the beginning and the end
1220 * of the {} block */
1221 while (syn_len > 0 && _isspace(syn_start[0])) {
1222 syn_start++; syn_len--;
1225 while (syn_len > 0 && _isspace(syn_start[syn_len - 1]))
1230 while (i < size && !_isspace(data[i])) {
1236 syntax->data = syn_start;
1237 syntax->size = syn_len;
1240 while (i < size && data[i] != '\n') {
1241 if (!_isspace(data[i]))
1250 /* is_atxheader • returns whether the line is a hash-prefixed header */
1252 is_atxheader(struct sd_markdown *rndr, uint8_t *data, size_t size)
1257 if (rndr->ext_flags & MKDEXT_SPACE_HEADERS) {
1260 while (level < size && level < 6 && data[level] == '#')
1263 if (level < size && data[level] != ' ')
1270 /* is_headerline • returns whether the line is a setext-style hdr underline */
1272 is_headerline(uint8_t *data, size_t size)
1276 /* test of level 1 header */
1277 if (data[i] == '=') {
1278 for (i = 1; i < size && data[i] == '='; i++);
1279 while (i < size && data[i] == ' ') i++;
1280 return (i >= size || data[i] == '\n') ? 1 : 0; }
1282 /* test of level 2 header */
1283 if (data[i] == '-') {
1284 for (i = 1; i < size && data[i] == '-'; i++);
1285 while (i < size && data[i] == ' ') i++;
1286 return (i >= size || data[i] == '\n') ? 2 : 0; }
1292 is_next_headerline(uint8_t *data, size_t size)
1296 while (i < size && data[i] != '\n')
1302 return is_headerline(data + i, size - i);
1305 /* prefix_quote • returns blockquote prefix length */
1307 prefix_quote(uint8_t *data, size_t size)
1310 if (i < size && data[i] == ' ') i++;
1311 if (i < size && data[i] == ' ') i++;
1312 if (i < size && data[i] == ' ') i++;
1314 if (i < size && data[i] == '>') {
1315 if (i + 1 < size && data[i + 1] == ' ')
1324 /* prefix_code • returns prefix length for block code*/
1326 prefix_code(uint8_t *data, size_t size)
1328 if (size > 3 && data[0] == ' ' && data[1] == ' '
1329 && data[2] == ' ' && data[3] == ' ') return 4;
1334 /* prefix_oli • returns ordered list item prefix */
1336 prefix_oli(uint8_t *data, size_t size)
1340 if (i < size && data[i] == ' ') i++;
1341 if (i < size && data[i] == ' ') i++;
1342 if (i < size && data[i] == ' ') i++;
1344 if (i >= size || data[i] < '0' || data[i] > '9')
1347 while (i < size && data[i] >= '0' && data[i] <= '9')
1350 if (i + 1 >= size || data[i] != '.' || data[i + 1] != ' ')
1353 if (is_next_headerline(data + i, size - i))
1359 /* prefix_uli • returns ordered list item prefix */
1361 prefix_uli(uint8_t *data, size_t size)
1365 if (i < size && data[i] == ' ') i++;
1366 if (i < size && data[i] == ' ') i++;
1367 if (i < size && data[i] == ' ') i++;
1369 if (i + 1 >= size ||
1370 (data[i] != '*' && data[i] != '+' && data[i] != '-') ||
1374 if (is_next_headerline(data + i, size - i))
1381 /* parse_block • parsing of one block, returning next uint8_t to parse */
1382 static void parse_block(struct buf *ob, struct sd_markdown *rndr,
1383 uint8_t *data, size_t size);
1386 /* parse_blockquote • handles parsing of a blockquote fragment */
1388 parse_blockquote(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size)
1390 size_t beg, end = 0, pre, work_size = 0;
1391 uint8_t *work_data = 0;
1392 struct buf *out = 0;
1394 out = rndr_newbuf(rndr, BUFFER_BLOCK);
1396 while (beg < size) {
1397 for (end = beg + 1; end < size && data[end - 1] != '\n'; end++);
1399 pre = prefix_quote(data + beg, end - beg);
1402 beg += pre; /* skipping prefix */
1404 /* empty line followed by non-quote line */
1405 else if (is_empty(data + beg, end - beg) &&
1406 (end >= size || (prefix_quote(data + end, size - end) == 0 &&
1407 !is_empty(data + end, size - end))))
1410 if (beg < end) { /* copy into the in-place working buffer */
1411 /* bufput(work, data + beg, end - beg); */
1413 work_data = data + beg;
1414 else if (data + beg != work_data + work_size)
1415 memmove(work_data + work_size, data + beg, end - beg);
1416 work_size += end - beg;
1421 parse_block(out, rndr, work_data, work_size);
1422 if (rndr->cb.blockquote)
1423 rndr->cb.blockquote(ob, out, rndr->opaque);
1424 rndr_popbuf(rndr, BUFFER_BLOCK);
1429 parse_htmlblock(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size, int do_render);
1431 /* parse_blockquote • handles parsing of a regular paragraph */
1433 parse_paragraph(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size)
1435 size_t i = 0, end = 0;
1437 struct buf work = { data, 0, 0, 0 };
1440 for (end = i + 1; end < size && data[end - 1] != '\n'; end++) /* empty */;
1442 if (is_empty(data + i, size - i))
1445 if ((level = is_headerline(data + i, size - i)) != 0)
1448 if (is_atxheader(rndr, data + i, size - i) ||
1449 is_hrule(data + i, size - i) ||
1450 prefix_quote(data + i, size - i)) {
1456 * Early termination of a paragraph with the same logic
1457 * as Markdown 1.0.0. If this logic is applied, the
1458 * Markdown 1.0.3 test suite won't pass cleanly
1460 * :: If the first character in a new line is not a letter,
1461 * let's check to see if there's some kind of block starting
1464 if ((rndr->ext_flags & MKDEXT_LAX_SPACING) && !isalnum(data[i])) {
1465 if (prefix_oli(data + i, size - i) ||
1466 prefix_uli(data + i, size - i)) {
1471 /* see if an html block starts here */
1472 if (data[i] == '<' && rndr->cb.blockhtml &&
1473 parse_htmlblock(ob, rndr, data + i, size - i, 0)) {
1478 /* see if a code fence starts here */
1479 if ((rndr->ext_flags & MKDEXT_FENCED_CODE) != 0 &&
1480 is_codefence(data + i, size - i, NULL) != 0) {
1490 while (work.size && data[work.size - 1] == '\n')
1494 struct buf *tmp = rndr_newbuf(rndr, BUFFER_BLOCK);
1495 parse_inline(tmp, rndr, work.data, work.size);
1496 if (rndr->cb.paragraph)
1497 rndr->cb.paragraph(ob, tmp, rndr->opaque);
1498 rndr_popbuf(rndr, BUFFER_BLOCK);
1500 struct buf *header_work;
1507 while (work.size && data[work.size] != '\n')
1510 beg = work.size + 1;
1511 while (work.size && data[work.size - 1] == '\n')
1514 if (work.size > 0) {
1515 struct buf *tmp = rndr_newbuf(rndr, BUFFER_BLOCK);
1516 parse_inline(tmp, rndr, work.data, work.size);
1518 if (rndr->cb.paragraph)
1519 rndr->cb.paragraph(ob, tmp, rndr->opaque);
1521 rndr_popbuf(rndr, BUFFER_BLOCK);
1523 work.size = i - beg;
1528 header_work = rndr_newbuf(rndr, BUFFER_SPAN);
1529 parse_inline(header_work, rndr, work.data, work.size);
1531 if (rndr->cb.header)
1532 rndr->cb.header(ob, header_work, (int)level, rndr->opaque);
1534 rndr_popbuf(rndr, BUFFER_SPAN);
1540 /* parse_fencedcode • handles parsing of a block-level code fragment */
1542 parse_fencedcode(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size)
1545 struct buf *work = 0;
1546 struct buf lang = { 0, 0, 0, 0 };
1548 beg = is_codefence(data, size, &lang);
1549 if (beg == 0) return 0;
1551 work = rndr_newbuf(rndr, BUFFER_BLOCK);
1553 while (beg < size) {
1555 struct buf fence_trail = { 0, 0, 0, 0 };
1557 fence_end = is_codefence(data + beg, size - beg, &fence_trail);
1558 if (fence_end != 0 && fence_trail.size == 0) {
1563 for (end = beg + 1; end < size && data[end - 1] != '\n'; end++);
1566 /* verbatim copy to the working buffer,
1567 escaping entities */
1568 if (is_empty(data + beg, end - beg))
1569 bufputc(work, '\n');
1570 else bufput(work, data + beg, end - beg);
1575 if (work->size && work->data[work->size - 1] != '\n')
1576 bufputc(work, '\n');
1578 if (rndr->cb.blockcode)
1579 rndr->cb.blockcode(ob, work, lang.size ? &lang : NULL, rndr->opaque);
1581 rndr_popbuf(rndr, BUFFER_BLOCK);
1586 parse_blockcode(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size)
1588 size_t beg, end, pre;
1589 struct buf *work = 0;
1591 work = rndr_newbuf(rndr, BUFFER_BLOCK);
1594 while (beg < size) {
1595 for (end = beg + 1; end < size && data[end - 1] != '\n'; end++) {};
1596 pre = prefix_code(data + beg, end - beg);
1599 beg += pre; /* skipping prefix */
1600 else if (!is_empty(data + beg, end - beg))
1601 /* non-empty non-prefixed line breaks the pre */
1605 /* verbatim copy to the working buffer,
1606 escaping entities */
1607 if (is_empty(data + beg, end - beg))
1608 bufputc(work, '\n');
1609 else bufput(work, data + beg, end - beg);
1614 while (work->size && work->data[work->size - 1] == '\n')
1617 bufputc(work, '\n');
1619 if (rndr->cb.blockcode)
1620 rndr->cb.blockcode(ob, work, NULL, rndr->opaque);
1622 rndr_popbuf(rndr, BUFFER_BLOCK);
1626 /* parse_listitem • parsing of a single list item */
1627 /* assuming initial prefix is already removed */
1629 parse_listitem(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size, int *flags)
1631 struct buf *work = 0, *inter = 0;
1632 size_t beg = 0, end, pre, sublist = 0, orgpre = 0, i;
1633 int in_empty = 0, has_inside_empty = 0, in_fence = 0;
1635 /* keeping track of the first indentation prefix */
1636 while (orgpre < 3 && orgpre < size && data[orgpre] == ' ')
1639 beg = prefix_uli(data, size);
1641 beg = prefix_oli(data, size);
1646 /* skipping to the beginning of the following line */
1648 while (end < size && data[end - 1] != '\n')
1651 /* getting working buffers */
1652 work = rndr_newbuf(rndr, BUFFER_SPAN);
1653 inter = rndr_newbuf(rndr, BUFFER_SPAN);
1655 /* putting the first line into the working buffer */
1656 bufput(work, data + beg, end - beg);
1659 /* process the following lines */
1660 while (beg < size) {
1661 size_t has_next_uli = 0, has_next_oli = 0;
1665 while (end < size && data[end - 1] != '\n')
1668 /* process an empty line */
1669 if (is_empty(data + beg, end - beg)) {
1675 /* calculating the indentation */
1677 while (i < 4 && beg + i < end && data[beg + i] == ' ')
1682 if (rndr->ext_flags & MKDEXT_FENCED_CODE) {
1683 if (is_codefence(data + beg + i, end - beg - i, NULL) != 0)
1684 in_fence = !in_fence;
1687 /* Only check for new list items if we are **not** inside
1688 * a fenced code block */
1690 has_next_uli = prefix_uli(data + beg + i, end - beg - i);
1691 has_next_oli = prefix_oli(data + beg + i, end - beg - i);
1694 /* checking for ul/ol switch */
1696 ((*flags & MKD_LIST_ORDERED) && has_next_uli) ||
1697 (!(*flags & MKD_LIST_ORDERED) && has_next_oli))){
1698 *flags |= MKD_LI_END;
1699 break; /* the following item must have same list type */
1702 /* checking for a new item */
1703 if ((has_next_uli && !is_hrule(data + beg + i, end - beg - i)) || has_next_oli) {
1705 has_inside_empty = 1;
1707 if (pre == orgpre) /* the following item must have */
1708 break; /* the same indentation */
1711 sublist = work->size;
1713 /* joining only indented stuff after empty lines;
1714 * note that now we only require 1 space of indentation
1715 * to continue a list */
1716 else if (in_empty && pre == 0) {
1717 *flags |= MKD_LI_END;
1720 else if (in_empty) {
1721 bufputc(work, '\n');
1722 has_inside_empty = 1;
1727 /* adding the line without prefix into the working buffer */
1728 bufput(work, data + beg + i, end - beg - i);
1732 /* render of li contents */
1733 if (has_inside_empty)
1734 *flags |= MKD_LI_BLOCK;
1736 if (*flags & MKD_LI_BLOCK) {
1737 /* intermediate render of block li */
1738 if (sublist && sublist < work->size) {
1739 parse_block(inter, rndr, work->data, sublist);
1740 parse_block(inter, rndr, work->data + sublist, work->size - sublist);
1743 parse_block(inter, rndr, work->data, work->size);
1745 /* intermediate render of inline li */
1746 if (sublist && sublist < work->size) {
1747 parse_inline(inter, rndr, work->data, sublist);
1748 parse_block(inter, rndr, work->data + sublist, work->size - sublist);
1751 parse_inline(inter, rndr, work->data, work->size);
1754 /* render of li itself */
1755 if (rndr->cb.listitem)
1756 rndr->cb.listitem(ob, inter, *flags, rndr->opaque);
1758 rndr_popbuf(rndr, BUFFER_SPAN);
1759 rndr_popbuf(rndr, BUFFER_SPAN);
1764 /* parse_list • parsing ordered or unordered list block */
1766 parse_list(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size, int flags)
1768 struct buf *work = 0;
1771 work = rndr_newbuf(rndr, BUFFER_BLOCK);
1774 j = parse_listitem(work, rndr, data + i, size - i, &flags);
1777 if (!j || (flags & MKD_LI_END))
1782 rndr->cb.list(ob, work, flags, rndr->opaque);
1783 rndr_popbuf(rndr, BUFFER_BLOCK);
1787 /* parse_atxheader • parsing of atx-style headers */
1789 parse_atxheader(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size)
1792 size_t i, end, skip;
1794 while (level < size && level < 6 && data[level] == '#')
1797 for (i = level; i < size && data[i] == ' '; i++);
1799 for (end = i; end < size && data[end] != '\n'; end++);
1802 while (end && data[end - 1] == '#')
1805 while (end && data[end - 1] == ' ')
1809 struct buf *work = rndr_newbuf(rndr, BUFFER_SPAN);
1811 parse_inline(work, rndr, data + i, end - i);
1813 if (rndr->cb.header)
1814 rndr->cb.header(ob, work, (int)level, rndr->opaque);
1816 rndr_popbuf(rndr, BUFFER_SPAN);
1823 /* htmlblock_end • checking end of HTML block : </tag>[ \t]*\n[ \t*]\n */
1824 /* returns the length on match, 0 otherwise */
1829 struct sd_markdown *rndr,
1835 /* checking if tag is a match */
1836 if (tag_len + 3 >= size ||
1837 strncasecmp((char *)data + 2, tag, tag_len) != 0 ||
1838 data[tag_len + 2] != '>')
1841 /* checking white lines */
1844 if (i < size && (w = is_empty(data + i, size - i)) == 0)
1845 return 0; /* non-blank after tag */
1850 w = is_empty(data + i, size - i);
1856 htmlblock_end(const char *curtag,
1857 struct sd_markdown *rndr,
1862 size_t tag_size = strlen(curtag);
1863 size_t i = 1, end_tag;
1864 int block_lines = 0;
1868 while (i < size && !(data[i - 1] == '<' && data[i] == '/')) {
1869 if (data[i] == '\n')
1875 /* If we are only looking for unindented tags, skip the tag
1876 * if it doesn't follow a newline.
1878 * The only exception to this is if the tag is still on the
1879 * initial line; in that case it still counts as a closing
1882 if (start_of_line && block_lines > 0 && data[i - 2] != '\n')
1885 if (i + 2 + tag_size >= size)
1888 end_tag = htmlblock_end_tag(curtag, tag_size, rndr, data + i - 1, size - i + 1);
1890 return i + end_tag - 1;
1897 /* parse_htmlblock • parsing of inline HTML block */
1899 parse_htmlblock(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size, int do_render)
1901 size_t i, j = 0, tag_end;
1902 const char *curtag = NULL;
1903 struct buf work = { data, 0, 0, 0 };
1905 /* identification of the opening tag */
1906 if (size < 2 || data[0] != '<')
1910 while (i < size && data[i] != '>' && data[i] != ' ')
1914 curtag = find_block_tag((char *)data + 1, (int)i - 1);
1916 /* handling of special cases */
1919 /* HTML comment, laxist form */
1920 if (size > 5 && data[1] == '!' && data[2] == '-' && data[3] == '-') {
1923 while (i < size && !(data[i - 2] == '-' && data[i - 1] == '-' && data[i] == '>'))
1929 j = is_empty(data + i, size - i);
1933 if (do_render && rndr->cb.blockhtml)
1934 rndr->cb.blockhtml(ob, &work, rndr->opaque);
1939 /* HR, which is the only self-closing block tag considered */
1940 if (size > 4 && (data[1] == 'h' || data[1] == 'H') && (data[2] == 'r' || data[2] == 'R')) {
1942 while (i < size && data[i] != '>')
1947 j = is_empty(data + i, size - i);
1950 if (do_render && rndr->cb.blockhtml)
1951 rndr->cb.blockhtml(ob, &work, rndr->opaque);
1957 /* no special case recognised */
1961 /* looking for an unindented matching closing tag */
1962 /* followed by a blank line */
1963 tag_end = htmlblock_end(curtag, rndr, data, size, 1);
1965 /* if not found, trying a second pass looking for indented match */
1966 /* but not if tag is "ins" or "del" (following original Markdown.pl) */
1967 if (!tag_end && strcmp(curtag, "ins") != 0 && strcmp(curtag, "del") != 0) {
1968 tag_end = htmlblock_end(curtag, rndr, data, size, 0);
1974 /* the end of the block has been found */
1975 work.size = tag_end;
1976 if (do_render && rndr->cb.blockhtml)
1977 rndr->cb.blockhtml(ob, &work, rndr->opaque);
1985 struct sd_markdown *rndr,
1993 struct buf *row_work = 0;
1995 if (!rndr->cb.table_cell || !rndr->cb.table_row)
1998 row_work = rndr_newbuf(rndr, BUFFER_SPAN);
2000 if (i < size && data[i] == '|')
2003 for (col = 0; col < columns && i < size; ++col) {
2004 size_t cell_start, cell_end;
2005 struct buf *cell_work;
2007 cell_work = rndr_newbuf(rndr, BUFFER_SPAN);
2009 while (i < size && _isspace(data[i]))
2014 while (i < size && data[i] != '|')
2019 while (cell_end > cell_start && _isspace(data[cell_end]))
2022 parse_inline(cell_work, rndr, data + cell_start, 1 + cell_end - cell_start);
2023 rndr->cb.table_cell(row_work, cell_work, col_data[col] | header_flag, rndr->opaque);
2025 rndr_popbuf(rndr, BUFFER_SPAN);
2029 for (; col < columns; ++col) {
2030 struct buf empty_cell = { 0, 0, 0, 0 };
2031 rndr->cb.table_cell(row_work, &empty_cell, col_data[col] | header_flag, rndr->opaque);
2034 rndr->cb.table_row(ob, row_work, rndr->opaque);
2036 rndr_popbuf(rndr, BUFFER_SPAN);
2042 struct sd_markdown *rndr,
2049 size_t i = 0, col, header_end, under_end;
2052 while (i < size && data[i] != '\n')
2053 if (data[i++] == '|')
2056 if (i == size || pipes == 0)
2061 while (header_end > 0 && _isspace(data[header_end - 1]))
2067 if (header_end && data[header_end - 1] == '|')
2070 *columns = pipes + 1;
2071 *column_data = calloc(*columns, sizeof(int));
2073 /* Parse the header underline */
2075 if (i < size && data[i] == '|')
2079 while (under_end < size && data[under_end] != '\n')
2082 for (col = 0; col < *columns && i < under_end; ++col) {
2085 while (i < under_end && data[i] == ' ')
2088 if (data[i] == ':') {
2089 i++; (*column_data)[col] |= MKD_TABLE_ALIGN_L;
2093 while (i < under_end && data[i] == '-') {
2097 if (i < under_end && data[i] == ':') {
2098 i++; (*column_data)[col] |= MKD_TABLE_ALIGN_R;
2102 while (i < under_end && data[i] == ' ')
2105 if (i < under_end && data[i] != '|')
2125 return under_end + 1;
2131 struct sd_markdown *rndr,
2137 struct buf *header_work = 0;
2138 struct buf *body_work = 0;
2141 int *col_data = NULL;
2143 header_work = rndr_newbuf(rndr, BUFFER_SPAN);
2144 body_work = rndr_newbuf(rndr, BUFFER_BLOCK);
2146 i = parse_table_header(header_work, rndr, data, size, &columns, &col_data);
2155 while (i < size && data[i] != '\n')
2156 if (data[i++] == '|')
2159 if (pipes == 0 || i == size) {
2177 rndr->cb.table(ob, header_work, body_work, rndr->opaque);
2181 rndr_popbuf(rndr, BUFFER_SPAN);
2182 rndr_popbuf(rndr, BUFFER_BLOCK);
2186 /* parse_block • parsing of one block, returning next uint8_t to parse */
2188 parse_block(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size)
2194 if (rndr->work_bufs[BUFFER_SPAN].size +
2195 rndr->work_bufs[BUFFER_BLOCK].size > rndr->max_nesting)
2198 while (beg < size) {
2199 txt_data = data + beg;
2202 if (is_atxheader(rndr, txt_data, end))
2203 beg += parse_atxheader(ob, rndr, txt_data, end);
2205 else if (data[beg] == '<' && rndr->cb.blockhtml &&
2206 (i = parse_htmlblock(ob, rndr, txt_data, end, 1)) != 0)
2209 else if ((i = is_empty(txt_data, end)) != 0)
2212 else if (is_hrule(txt_data, end)) {
2214 rndr->cb.hrule(ob, rndr->opaque);
2216 while (beg < size && data[beg] != '\n')
2222 else if ((rndr->ext_flags & MKDEXT_FENCED_CODE) != 0 &&
2223 (i = parse_fencedcode(ob, rndr, txt_data, end)) != 0)
2226 else if ((rndr->ext_flags & MKDEXT_TABLES) != 0 &&
2227 (i = parse_table(ob, rndr, txt_data, end)) != 0)
2230 else if (prefix_quote(txt_data, end))
2231 beg += parse_blockquote(ob, rndr, txt_data, end);
2233 else if (prefix_code(txt_data, end))
2234 beg += parse_blockcode(ob, rndr, txt_data, end);
2236 else if (prefix_uli(txt_data, end))
2237 beg += parse_list(ob, rndr, txt_data, end, 0);
2239 else if (prefix_oli(txt_data, end))
2240 beg += parse_list(ob, rndr, txt_data, end, MKD_LIST_ORDERED);
2243 beg += parse_paragraph(ob, rndr, txt_data, end);
2249 /*********************
2250 * REFERENCE PARSING *
2251 *********************/
2253 /* is_ref • returns whether a line is a reference or not */
2255 is_ref(const uint8_t *data, size_t beg, size_t end, size_t *last, struct link_ref **refs)
2259 size_t id_offset, id_end;
2260 size_t link_offset, link_end;
2261 size_t title_offset, title_end;
2264 /* up to 3 optional leading spaces */
2265 if (beg + 3 >= end) return 0;
2266 if (data[beg] == ' ') { i = 1;
2267 if (data[beg + 1] == ' ') { i = 2;
2268 if (data[beg + 2] == ' ') { i = 3;
2269 if (data[beg + 3] == ' ') return 0; } } }
2272 /* id part: anything but a newline between brackets */
2273 if (data[i] != '[') return 0;
2276 while (i < end && data[i] != '\n' && data[i] != '\r' && data[i] != ']')
2278 if (i >= end || data[i] != ']') return 0;
2281 /* spacer: colon (space | tab)* newline? (space | tab)* */
2283 if (i >= end || data[i] != ':') return 0;
2285 while (i < end && data[i] == ' ') i++;
2286 if (i < end && (data[i] == '\n' || data[i] == '\r')) {
2288 if (i < end && data[i] == '\r' && data[i - 1] == '\n') i++; }
2289 while (i < end && data[i] == ' ') i++;
2290 if (i >= end) return 0;
2292 /* link: whitespace-free sequence, optionally between angle brackets */
2298 while (i < end && data[i] != ' ' && data[i] != '\n' && data[i] != '\r')
2301 if (data[i - 1] == '>') link_end = i - 1;
2304 /* optional spacer: (space | tab)* (newline | '\'' | '"' | '(' ) */
2305 while (i < end && data[i] == ' ') i++;
2306 if (i < end && data[i] != '\n' && data[i] != '\r'
2307 && data[i] != '\'' && data[i] != '"' && data[i] != '(')
2310 /* computing end-of-line */
2311 if (i >= end || data[i] == '\r' || data[i] == '\n') line_end = i;
2312 if (i + 1 < end && data[i] == '\n' && data[i + 1] == '\r')
2315 /* optional (space|tab)* spacer after a newline */
2318 while (i < end && data[i] == ' ') i++; }
2320 /* optional title: any non-newline sequence enclosed in '"()
2321 alone on its line */
2322 title_offset = title_end = 0;
2324 && (data[i] == '\'' || data[i] == '"' || data[i] == '(')) {
2327 /* looking for EOL */
2328 while (i < end && data[i] != '\n' && data[i] != '\r') i++;
2329 if (i + 1 < end && data[i] == '\n' && data[i + 1] == '\r')
2334 while (i > title_offset && data[i] == ' ')
2336 if (i > title_offset
2337 && (data[i] == '\'' || data[i] == '"' || data[i] == ')')) {
2338 line_end = title_end;
2341 if (!line_end || link_end == link_offset)
2342 return 0; /* garbage after the link empty link */
2344 /* a valid ref has been found, filling-in return structures */
2349 struct link_ref *ref;
2351 ref = add_link_ref(refs, data + id_offset, id_end - id_offset);
2355 ref->link = bufnew(link_end - link_offset);
2356 bufput(ref->link, data + link_offset, link_end - link_offset);
2358 if (title_end > title_offset) {
2359 ref->title = bufnew(title_end - title_offset);
2360 bufput(ref->title, data + title_offset, title_end - title_offset);
2367 static void expand_tabs(struct buf *ob, const uint8_t *line, size_t size)
2369 size_t i = 0, tab = 0;
2374 while (i < size && line[i] != '\t') {
2379 bufput(ob, line + org, i - org);
2385 bufputc(ob, ' '); tab++;
2392 /**********************
2393 * EXPORTED FUNCTIONS *
2394 **********************/
2396 struct sd_markdown *
2398 unsigned int extensions,
2400 const struct sd_callbacks *callbacks,
2403 struct sd_markdown *md = NULL;
2405 assert(max_nesting > 0 && callbacks);
2407 md = malloc(sizeof(struct sd_markdown));
2411 memcpy(&md->cb, callbacks, sizeof(struct sd_callbacks));
2413 stack_init(&md->work_bufs[BUFFER_BLOCK], 4);
2414 stack_init(&md->work_bufs[BUFFER_SPAN], 8);
2416 memset(md->active_char, 0x0, 256);
2418 if (md->cb.emphasis || md->cb.double_emphasis || md->cb.triple_emphasis) {
2419 md->active_char['*'] = MD_CHAR_EMPHASIS;
2420 md->active_char['_'] = MD_CHAR_EMPHASIS;
2421 if (extensions & MKDEXT_STRIKETHROUGH)
2422 md->active_char['~'] = MD_CHAR_EMPHASIS;
2425 if (md->cb.codespan)
2426 md->active_char['`'] = MD_CHAR_CODESPAN;
2428 if (md->cb.linebreak)
2429 md->active_char['\n'] = MD_CHAR_LINEBREAK;
2431 if (md->cb.image || md->cb.link)
2432 md->active_char['['] = MD_CHAR_LINK;
2434 md->active_char['<'] = MD_CHAR_LANGLE;
2435 md->active_char['\\'] = MD_CHAR_ESCAPE;
2436 md->active_char['&'] = MD_CHAR_ENTITITY;
2438 if (extensions & MKDEXT_AUTOLINK) {
2439 md->active_char[':'] = MD_CHAR_AUTOLINK_URL;
2440 md->active_char['@'] = MD_CHAR_AUTOLINK_EMAIL;
2441 md->active_char['w'] = MD_CHAR_AUTOLINK_WWW;
2444 if (extensions & MKDEXT_SUPERSCRIPT)
2445 md->active_char['^'] = MD_CHAR_SUPERSCRIPT;
2447 /* Extension data */
2448 md->ext_flags = extensions;
2449 md->opaque = opaque;
2450 md->max_nesting = max_nesting;
2451 md->in_link_body = 0;
2457 sd_markdown_render(struct buf *ob, const uint8_t *document, size_t doc_size, struct sd_markdown *md)
2459 #define MARKDOWN_GROW(x) ((x) + ((x) >> 1))
2460 static const char UTF8_BOM[] = {0xEF, 0xBB, 0xBF};
2469 /* Preallocate enough space for our buffer to avoid expanding while copying */
2470 bufgrow(text, doc_size);
2472 /* reset the references table */
2473 memset(&md->refs, 0x0, REF_TABLE_SIZE * sizeof(void *));
2475 /* first pass: looking for references, copying everything else */
2478 /* Skip a possible UTF-8 BOM, even though the Unicode standard
2479 * discourages having these in UTF-8 documents */
2480 if (doc_size >= 3 && memcmp(document, UTF8_BOM, 3) == 0)
2483 while (beg < doc_size) /* iterating over lines */
2484 if (is_ref(document, beg, doc_size, &end, md->refs))
2486 else { /* skipping to the next line */
2488 while (end < doc_size && document[end] != '\n' && document[end] != '\r')
2491 /* adding the line body if present */
2493 expand_tabs(text, document + beg, end - beg);
2495 while (end < doc_size && (document[end] == '\n' || document[end] == '\r')) {
2496 /* add one \n per newline */
2497 if (document[end] == '\n' || (end + 1 < doc_size && document[end + 1] != '\n'))
2498 bufputc(text, '\n');
2505 /* pre-grow the output buffer to minimize allocations */
2506 bufgrow(ob, MARKDOWN_GROW(text->size));
2508 /* second pass: actual rendering */
2509 if (md->cb.doc_header)
2510 md->cb.doc_header(ob, md->opaque);
2513 /* adding a final newline if not already present */
2514 if (text->data[text->size - 1] != '\n' && text->data[text->size - 1] != '\r')
2515 bufputc(text, '\n');
2517 parse_block(ob, md, text->data, text->size);
2520 if (md->cb.doc_footer)
2521 md->cb.doc_footer(ob, md->opaque);
2525 free_link_refs(md->refs);
2527 assert(md->work_bufs[BUFFER_SPAN].size == 0);
2528 assert(md->work_bufs[BUFFER_BLOCK].size == 0);
2532 sd_markdown_free(struct sd_markdown *md)
2536 for (i = 0; i < (size_t)md->work_bufs[BUFFER_SPAN].asize; ++i)
2537 bufrelease(md->work_bufs[BUFFER_SPAN].item[i]);
2539 for (i = 0; i < (size_t)md->work_bufs[BUFFER_BLOCK].asize; ++i)
2540 bufrelease(md->work_bufs[BUFFER_BLOCK].item[i]);
2542 stack_free(&md->work_bufs[BUFFER_SPAN]);
2543 stack_free(&md->work_bufs[BUFFER_BLOCK]);
2549 sd_version(int *ver_major, int *ver_minor, int *ver_revision)
2551 *ver_major = SUNDOWN_VER_MAJOR;
2552 *ver_minor = SUNDOWN_VER_MINOR;
2553 *ver_revision = SUNDOWN_VER_REVISION;
2556 /* vim: set filetype=c: */