markdown-tokens.h
Go to the documentation of this file.
1
2/*
3 Copyright (c) 2009 by Chad Nelson
4 Released under the MIT License.
5 See the provided LICENSE.TXT file for details.
6*/
7
8#pragma once
9
10#include <optional>
11#include <unordered_map>
12#include <vector>
13
14#include "markdown.h"
15
16namespace markdown
17{
18 using TokenGroupIter = TokenGroup::iterator;
19 using CTokenGroupIter = TokenGroup::const_iterator;
20
21 class LinkIds
22 {
23 public:
24 struct Target
25 {
26 std::string url;
27 std::string title;
28
29 Target(const std::string& url_, const std::string& title_) : url(url_), title(title_)
30 {
31 }
32 };
33
34 std::optional<Target> find(const std::string& id) const;
35 void add(const std::string& id, const std::string& url, const std::string& title);
36
37 private:
38 using Table = std::unordered_map<std::string, Target>;
39
40 static std::string _scrubKey(std::string str);
41
42 Table mTable;
43 };
44
45 class Token
46 {
47 public:
49 {
50 }
51
52 virtual void writeAsHtml(std::ostream&) const = 0;
53
54 virtual void
55 writeAsOriginal(std::ostream& out) const
56 {
57 writeAsHtml(out);
58 }
59
60 virtual void writeToken(std::ostream& out) const = 0;
61
62 virtual void
63 writeToken(size_t indent, std::ostream& out) const
64 {
65 out << std::string(indent * 2, ' ');
66 writeToken(out);
67 }
68
69 virtual std::optional<TokenGroup>
71 {
72 return std::nullopt;
73 }
74
75 virtual std::optional<std::string>
76 text() const
77 {
78 return std::nullopt;
79 }
80
81 virtual bool
83 {
84 return false;
85 }
86
87 virtual bool
89 {
90 return false;
91 }
92
93 virtual bool
95 {
96 return false;
97 }
98
99 virtual bool
101 {
102 return false;
103 }
104
105 virtual bool
107 {
108 return false;
109 }
110
111 virtual bool
113 {
114 return false;
115 }
116
117 virtual bool
119 {
120 return false;
121 }
122
123 virtual bool
125 {
126 return false;
127 }
128
129 protected:
130 virtual void
131 preWrite(std::ostream& out) const
132 {
133 }
134
135 virtual void
136 postWrite(std::ostream& out) const
137 {
138 }
139 };
140} // namespace markdown
141
142namespace markdown::token
143{
144
145 size_t isValidTag(const std::string& tag, bool nonBlockFirst = false);
146
148 {
149 cAmps = 0x01,
151 cAngles = 0x04,
152 cQuotes = 0x08
153 };
154
155 class TextHolder : public Token
156 {
157 public:
158 TextHolder(const std::string& text, bool canContainMarkup, unsigned int encodingFlags) :
159 mText(text), mCanContainMarkup(canContainMarkup), mEncodingFlags(encodingFlags)
160 {
161 }
162
163 void writeAsHtml(std::ostream& out) const override;
164
165 void
166 writeToken(std::ostream& out) const override
167 {
168 out << "TextHolder: " << mText << '\n';
169 }
170
171 std::optional<std::string>
172 text() const override
173 {
174 return mText;
175 }
176
177 bool
178 canContainMarkup() const override
179 {
180 return mCanContainMarkup;
181 }
182
183 private:
184 const std::string mText;
185 const bool mCanContainMarkup;
186 const int mEncodingFlags;
187 };
188
189 class RawText : public TextHolder
190 {
191 public:
192 RawText(const std::string& text, bool canContainMarkup = true) :
194 {
195 }
196
197 void
198 writeToken(std::ostream& out) const override
199 {
200 out << "RawText: " << *text() << '\n';
201 }
202
203 std::optional<TokenGroup> processSpanElements(const LinkIds& idTable) override;
204
205 private:
206 using ReplacementTable = std::vector<TokenPtr>;
207
208 static std::string _processHtmlTagAttributes(std::string src,
209 ReplacementTable& replacements);
210 static std::string _processCodeSpans(std::string src, ReplacementTable& replacements);
211 static std::string _processEscapedCharacters(const std::string& src);
212 static std::string _processLinksImagesAndTags(const std::string& src,
213 ReplacementTable& replacements,
214 const LinkIds& idTable);
215 static std::string _processSpaceBracketedGroupings(const std::string& src,
216 ReplacementTable& replacements);
217 static TokenGroup _processBoldAndItalicSpans(const std::string& src,
218 ReplacementTable& replacements);
219
220 static TokenGroup _encodeProcessedItems(const std::string& src,
221 ReplacementTable& replacements);
222 static std::string _restoreProcessedItems(const std::string& src,
223 ReplacementTable& replacements);
224 };
225
226 class HtmlTag : public TextHolder
227 {
228 public:
229 HtmlTag(const std::string& contents) : TextHolder(contents, false, cAmps | cAngles)
230 {
231 }
232
233 void
234 writeToken(std::ostream& out) const override
235 {
236 out << "HtmlTag: " << *text() << '\n';
237 }
238
239 protected:
240 void
241 preWrite(std::ostream& out) const override
242 {
243 out << '<';
244 }
245
246 void
247 postWrite(std::ostream& out) const override
248 {
249 out << '>';
250 }
251 };
252
254 {
255 public:
256 HtmlAnchorTag(const std::string& url, const std::string& title = std::string());
257
258 void
259 writeToken(std::ostream& out) const override
260 {
261 out << "HtmlAnchorTag: " << *text() << '\n';
262 }
263 };
264
266 {
267 public:
268 InlineHtmlContents(const std::string& contents) :
269 TextHolder(contents, false, cAmps | cAngles)
270 {
271 }
272
273 void
274 writeToken(std::ostream& out) const override
275 {
276 out << "InlineHtmlContents: " << *text() << '\n';
277 }
278 };
279
281 {
282 public:
283 InlineHtmlComment(const std::string& contents) : TextHolder(contents, false, 0)
284 {
285 }
286
287 void
288 writeToken(std::ostream& out) const override
289 {
290 out << "InlineHtmlComment: " << *text() << '\n';
291 }
292 };
293
294 class CodeBlock : public TextHolder
295 {
296 public:
297 CodeBlock(const std::string& actualContents) :
298 TextHolder(actualContents, false, cDoubleAmps | cAngles | cQuotes)
299 {
300 }
301
302 void writeAsHtml(std::ostream& out) const override;
303
304 void
305 writeToken(std::ostream& out) const override
306 {
307 out << "CodeBlock: " << *text() << '\n';
308 }
309 };
310
311 class CodeSpan : public TextHolder
312 {
313 public:
314 CodeSpan(const std::string& actualContents) :
315 TextHolder(actualContents, false, cDoubleAmps | cAngles | cQuotes)
316 {
317 }
318
319 void writeAsHtml(std::ostream& out) const override;
320 void writeAsOriginal(std::ostream& out) const override;
321
322 void
323 writeToken(std::ostream& out) const override
324 {
325 out << "CodeSpan: " << *text() << '\n';
326 }
327 };
328
329 class Header : public TextHolder
330 {
331 public:
332 Header(size_t level, const std::string& text) :
333 TextHolder(text, true, cAmps | cAngles | cQuotes), mLevel(level)
334 {
335 }
336
337 void
338 writeToken(std::ostream& out) const override
339 {
340 out << "Header " << mLevel << ": " << *text() << '\n';
341 }
342
343 bool
344 inhibitParagraphs() const override
345 {
346 return true;
347 }
348
349 protected:
350 void
351 preWrite(std::ostream& out) const override
352 {
353 out << "<h" << mLevel << ">";
354 }
355
356 void
357 postWrite(std::ostream& out) const override
358 {
359 out << "</h" << mLevel << ">\n";
360 }
361
362 private:
363 size_t mLevel;
364 };
365
366 class BlankLine : public TextHolder
367 {
368 public:
369 BlankLine(const std::string& actualContents = std::string()) :
370 TextHolder(actualContents, false, 0)
371 {
372 }
373
374 void
375 writeToken(std::ostream& out) const override
376 {
377 out << "BlankLine: " << *text() << '\n';
378 }
379
380 bool
381 isBlankLine() const override
382 {
383 return true;
384 }
385 };
386
387 class EscapedCharacter : public Token
388 {
389 public:
390 EscapedCharacter(char c) : mChar(c)
391 {
392 }
393
394 void
395 writeAsHtml(std::ostream& out) const override
396 {
397 out << mChar;
398 }
399
400 void
401 writeAsOriginal(std::ostream& out) const override
402 {
403 out << '\\' << mChar;
404 }
405
406 void
407 writeToken(std::ostream& out) const override
408 {
409 out << "EscapedCharacter: " << mChar << '\n';
410 }
411
412 private:
413 const char mChar;
414 };
415
416 class Container : public Token
417 {
418 public:
419 Container(const TokenGroup& contents = TokenGroup()) :
420 mSubTokens(contents), mParagraphMode(false)
421 {
422 }
423
424 const TokenGroup&
425 subTokens() const
426 {
427 return mSubTokens;
428 }
429
430 void
432 {
433 mSubTokens.splice(mSubTokens.end(), tokens);
434 }
435
436 void
438 {
439 mSubTokens.swap(tokens);
440 }
441
442 bool
443 isContainer() const override
444 {
445 return true;
446 }
447
448 void writeAsHtml(std::ostream& out) const override;
449
450 void
451 writeToken(std::ostream& out) const override
452 {
453 out << "Container: error!" << '\n';
454 }
455
456 void writeToken(size_t indent, std::ostream& out) const override;
457
458 std::optional<TokenGroup> processSpanElements(const LinkIds& idTable) override;
459
460 virtual TokenPtr
461 clone(const TokenGroup& newContents) const
462 {
463 return TokenPtr(new Container(newContents));
464 }
465
466 virtual std::string
468 {
469 return "Container";
470 }
471
472 protected:
475 };
476
478 {
479 public:
480 InlineHtmlBlock(const TokenGroup& contents, bool isBlockTag = false) :
481 Container(contents), mIsBlockTag(isBlockTag)
482 {
483 }
484
485 InlineHtmlBlock(const std::string& contents) : mIsBlockTag(false)
486 {
487 mSubTokens.push_back(TokenPtr(new InlineHtmlContents(contents)));
488 }
489
490 bool
491 inhibitParagraphs() const override
492 {
493 return !mIsBlockTag;
494 }
495
497 clone(const TokenGroup& newContents) const override
498 {
499 return TokenPtr(new InlineHtmlBlock(newContents));
500 }
501
502 std::string
503 containerName() const override
504 {
505 return "InlineHtmlBlock";
506 }
507
508 // Inline HTML blocks always end with a blank line, so report it as one for
509 // parsing purposes.
510 bool
511 isBlankLine() const override
512 {
513 return true;
514 }
515
516 private:
517 bool mIsBlockTag;
518 };
519
520 class ListItem : public Container
521 {
522 public:
523 ListItem(const TokenGroup& contents) : Container(contents), mInhibitParagraphs(true)
524 {
525 }
526
527 void
529 {
530 mInhibitParagraphs = set;
531 }
532
533 bool
534 inhibitParagraphs() const override
535 {
536 return mInhibitParagraphs;
537 }
538
540 clone(const TokenGroup& newContents) const override
541 {
542 return TokenPtr(new ListItem(newContents));
543 }
544
545 std::string
546 containerName() const override
547 {
548 return "ListItem";
549 }
550
551 protected:
552 void
553 preWrite(std::ostream& out) const override
554 {
555 out << "<li>";
556 }
557
558 void
559 postWrite(std::ostream& out) const override
560 {
561 out << "</li>\n";
562 }
563
564 private:
565 bool mInhibitParagraphs;
566 };
567
569 {
570 public:
571 UnorderedList(const TokenGroup& contents, bool paragraphMode = false);
572
574 clone(const TokenGroup& newContents) const override
575 {
576 return TokenPtr(new UnorderedList(newContents));
577 }
578
579 std::string
580 containerName() const override
581 {
582 return "UnorderedList";
583 }
584
585 protected:
586 void
587 preWrite(std::ostream& out) const override
588 {
589 out << "\n<ul>\n";
590 }
591
592 void
593 postWrite(std::ostream& out) const override
594 {
595 out << "</ul>\n\n";
596 }
597 };
598
600 {
601 public:
602 OrderedList(const TokenGroup& contents, bool paragraphMode = false) :
603 UnorderedList(contents, paragraphMode)
604 {
605 }
606
608 clone(const TokenGroup& newContents) const override
609 {
610 return TokenPtr(new OrderedList(newContents));
611 }
612
613 std::string
614 containerName() const override
615 {
616 return "OrderedList";
617 }
618
619 protected:
620 void
621 preWrite(std::ostream& out) const override
622 {
623 out << "<ol>\n";
624 }
625
626 void
627 postWrite(std::ostream& out) const override
628 {
629 out << "</ol>\n\n";
630 }
631 };
632
633 class BlockQuote : public Container
634 {
635 public:
636 BlockQuote(const TokenGroup& contents) : Container(contents)
637 {
638 }
639
641 clone(const TokenGroup& newContents) const override
642 {
643 return TokenPtr(new BlockQuote(newContents));
644 }
645
646 std::string
647 containerName() const override
648 {
649 return "BlockQuote";
650 }
651
652 protected:
653 void
654 preWrite(std::ostream& out) const override
655 {
656 out << "<blockquote>\n";
657 }
658
659 void
660 postWrite(std::ostream& out) const override
661 {
662 out << "\n</blockquote>\n";
663 }
664 };
665
666 class Paragraph : public Container
667 {
668 public:
670 {
671 }
672
673 Paragraph(const TokenGroup& contents) : Container(contents)
674 {
675 }
676
678 clone(const TokenGroup& newContents) const override
679 {
680 return TokenPtr(new Paragraph(newContents));
681 }
682
683 std::string
684 containerName() const override
685 {
686 return "Paragraph";
687 }
688
689 protected:
690 void
691 preWrite(std::ostream& out) const override
692 {
693 out << "<p>";
694 }
695
696 void
697 postWrite(std::ostream& out) const override
698 {
699 out << "</p>\n\n";
700 }
701 };
702
704 {
705 public:
706 BoldOrItalicMarker(bool open, char c, size_t size) :
707 mOpenMarker(open),
708 mTokenCharacter(c),
709 mSize(size),
710 mMatch(0),
711 mCannotMatch(false),
712 mDisabled(false),
713 mId(-1)
714 {
715 }
716
717 bool
718 isUnmatchedOpenMarker() const override
719 {
720 return (mOpenMarker && mMatch == 0 && !mCannotMatch);
721 }
722
723 bool
724 isUnmatchedCloseMarker() const override
725 {
726 return (!mOpenMarker && mMatch == 0 && !mCannotMatch);
727 }
728
729 bool
730 isMatchedOpenMarker() const override
731 {
732 return (mOpenMarker && mMatch != 0);
733 }
734
735 bool
736 isMatchedCloseMarker() const override
737 {
738 return (!mOpenMarker && mMatch != 0);
739 }
740
741 void writeAsHtml(std::ostream& out) const override;
742 void writeToken(std::ostream& out) const override;
743
744 bool
746 {
747 return mOpenMarker;
748 }
749
750 char
752 {
753 return mTokenCharacter;
754 }
755
756 size_t
757 size() const
758 {
759 return mSize;
760 }
761
762 bool
763 matched() const
764 {
765 return (mMatch != 0);
766 }
767
769 matchedTo() const
770 {
771 return mMatch;
772 }
773
774 int
775 id() const
776 {
777 return mId;
778 }
779
780 void
781 matched(BoldOrItalicMarker* match, int id = -1)
782 {
783 mMatch = match;
784 mId = id;
785 }
786
787 void
788 cannotMatch(bool set)
789 {
790 mCannotMatch = set;
791 }
792
793 void
795 {
796 mCannotMatch = mDisabled = true;
797 }
798
799 private:
800 bool mOpenMarker; // Otherwise it's a close-marker
801 char mTokenCharacter; // Underscore or asterisk
802 size_t mSize; // 1=italics, 2=bold, 3=both
803 BoldOrItalicMarker* mMatch;
804 bool mCannotMatch;
805 bool mDisabled;
806 int mId;
807 };
808
809 class Image : public Token
810 {
811 public:
812 Image(const std::string& altText, const std::string& url, const std::string& title) :
813 mAltText(altText), mUrl(url), mTitle(title)
814 {
815 }
816
817 void writeAsHtml(std::ostream& out) const override;
818
819 void
820 writeToken(std::ostream& out) const override
821 {
822 out << "Image: " << mUrl << '\n';
823 }
824
825 private:
826 const std::string mAltText, mUrl, mTitle;
827 };
828
829} // namespace markdown::token
constexpr T c
std::string str(const T &t)
std::optional< Target > find(const std::string &id) const
Definition markdown.cpp:903
void add(const std::string &id, const std::string &url, const std::string &title)
Definition markdown.cpp:918
virtual bool inhibitParagraphs() const
virtual bool isUnmatchedOpenMarker() const
virtual std::optional< TokenGroup > processSpanElements(const LinkIds &idTable)
virtual void writeToken(size_t indent, std::ostream &out) const
virtual void writeToken(std::ostream &out) const =0
virtual bool isBlankLine() const
virtual void postWrite(std::ostream &out) const
virtual bool isMatchedOpenMarker() const
virtual bool isContainer() const
virtual bool canContainMarkup() const
virtual bool isUnmatchedCloseMarker() const
virtual bool isMatchedCloseMarker() const
virtual void preWrite(std::ostream &out) const
virtual void writeAsOriginal(std::ostream &out) const
virtual std::optional< std::string > text() const
virtual void writeAsHtml(std::ostream &) const =0
void writeToken(std::ostream &out) const override
bool isBlankLine() const override
BlankLine(const std::string &actualContents=std::string())
void preWrite(std::ostream &out) const override
void postWrite(std::ostream &out) const override
TokenPtr clone(const TokenGroup &newContents) const override
std::string containerName() const override
BlockQuote(const TokenGroup &contents)
void writeAsHtml(std::ostream &out) const override
bool isMatchedCloseMarker() const override
bool isUnmatchedOpenMarker() const override
BoldOrItalicMarker(bool open, char c, size_t size)
bool isUnmatchedCloseMarker() const override
bool isMatchedOpenMarker() const override
void writeToken(std::ostream &out) const override
BoldOrItalicMarker * matchedTo() const
void matched(BoldOrItalicMarker *match, int id=-1)
void writeAsHtml(std::ostream &out) const override
void writeToken(std::ostream &out) const override
CodeBlock(const std::string &actualContents)
void writeAsHtml(std::ostream &out) const override
void writeAsOriginal(std::ostream &out) const override
void writeToken(std::ostream &out) const override
CodeSpan(const std::string &actualContents)
void writeAsHtml(std::ostream &out) const override
bool isContainer() const override
void swapSubtokens(TokenGroup &tokens)
void appendSubtokens(TokenGroup &tokens)
const TokenGroup & subTokens() const
std::optional< TokenGroup > processSpanElements(const LinkIds &idTable) override
void writeToken(std::ostream &out) const override
Container(const TokenGroup &contents=TokenGroup())
virtual TokenPtr clone(const TokenGroup &newContents) const
virtual std::string containerName() const
void writeAsHtml(std::ostream &out) const override
void writeAsOriginal(std::ostream &out) const override
void writeToken(std::ostream &out) const override
void preWrite(std::ostream &out) const override
void postWrite(std::ostream &out) const override
bool inhibitParagraphs() const override
void writeToken(std::ostream &out) const override
Header(size_t level, const std::string &text)
void writeToken(std::ostream &out) const override
HtmlAnchorTag(const std::string &url, const std::string &title=std::string())
void preWrite(std::ostream &out) const override
void postWrite(std::ostream &out) const override
HtmlTag(const std::string &contents)
void writeToken(std::ostream &out) const override
void writeAsHtml(std::ostream &out) const override
void writeToken(std::ostream &out) const override
Image(const std::string &altText, const std::string &url, const std::string &title)
InlineHtmlBlock(const TokenGroup &contents, bool isBlockTag=false)
TokenPtr clone(const TokenGroup &newContents) const override
bool inhibitParagraphs() const override
InlineHtmlBlock(const std::string &contents)
bool isBlankLine() const override
std::string containerName() const override
void writeToken(std::ostream &out) const override
InlineHtmlComment(const std::string &contents)
InlineHtmlContents(const std::string &contents)
void writeToken(std::ostream &out) const override
void preWrite(std::ostream &out) const override
void postWrite(std::ostream &out) const override
TokenPtr clone(const TokenGroup &newContents) const override
bool inhibitParagraphs() const override
ListItem(const TokenGroup &contents)
std::string containerName() const override
void preWrite(std::ostream &out) const override
void postWrite(std::ostream &out) const override
TokenPtr clone(const TokenGroup &newContents) const override
OrderedList(const TokenGroup &contents, bool paragraphMode=false)
std::string containerName() const override
void preWrite(std::ostream &out) const override
void postWrite(std::ostream &out) const override
TokenPtr clone(const TokenGroup &newContents) const override
Paragraph(const TokenGroup &contents)
std::string containerName() const override
RawText(const std::string &text, bool canContainMarkup=true)
std::optional< TokenGroup > processSpanElements(const LinkIds &idTable) override
void writeToken(std::ostream &out) const override
void writeAsHtml(std::ostream &out) const override
TextHolder(const std::string &text, bool canContainMarkup, unsigned int encodingFlags)
void writeToken(std::ostream &out) const override
bool canContainMarkup() const override
std::optional< std::string > text() const override
void preWrite(std::ostream &out) const override
void postWrite(std::ostream &out) const override
TokenPtr clone(const TokenGroup &newContents) const override
UnorderedList(const TokenGroup &contents, bool paragraphMode=false)
std::string containerName() const override
size_t isValidTag(const std::string &tag, bool nonBlockFirst)
std::shared_ptr< Token > TokenPtr
Definition markdown.h:21
TokenGroup::iterator TokenGroupIter
std::list< TokenPtr > TokenGroup
Definition markdown.h:22
TokenGroup::const_iterator CTokenGroupIter
Target(const std::string &url_, const std::string &title_)