markdown-tokens.h
Go to the documentation of this file.
1 
2 /*
3  Copyright (c) 2009 by Chad Nelson
4  Released under the MIT License.
5  See the provided LICENSE.TXT file for details.
6 */
7 
8 #pragma once
9 
10 #include "markdown.h"
11 
12 #include <unordered_map>
13 #include <optional>
14 #include <vector>
15 
16 namespace markdown
17 {
18  using TokenGroupIter = TokenGroup::iterator;
19  using CTokenGroupIter = TokenGroup::const_iterator;
20 
21  class LinkIds
22  {
23  public:
24  struct Target
25  {
26  std::string url;
27  std::string title;
28 
29  Target(const std::string& url_, const std::string& title_):
30  url(url_), title(title_) { }
31  };
32 
33  std::optional<Target> find(const std::string& id) const;
34  void add(const std::string& id, const std::string& url, const
35  std::string& title);
36 
37  private:
38  using Table = std::unordered_map<std::string, Target>;
39 
40  static std::string _scrubKey(std::string str);
41 
42  Table mTable;
43  };
44 
45  class Token
46  {
47  public:
48  Token() { }
49 
50  virtual void writeAsHtml(std::ostream&) const = 0;
51  virtual void writeAsOriginal(std::ostream& out) const
52  {
53  writeAsHtml(out);
54  }
55  virtual void writeToken(std::ostream& out) const = 0;
56  virtual void writeToken(size_t indent, std::ostream& out) const
57  {
58  out << std::string(indent * 2, ' ');
59  writeToken(out);
60  }
61 
62  virtual std::optional<TokenGroup> processSpanElements(const LinkIds& idTable)
63  {
64  return std::nullopt;
65  }
66 
67  virtual std::optional<std::string> text() const
68  {
69  return std::nullopt;
70  }
71 
72  virtual bool canContainMarkup() const
73  {
74  return false;
75  }
76  virtual bool isBlankLine() const
77  {
78  return false;
79  }
80  virtual bool isContainer() const
81  {
82  return false;
83  }
84  virtual bool isUnmatchedOpenMarker() const
85  {
86  return false;
87  }
88  virtual bool isUnmatchedCloseMarker() const
89  {
90  return false;
91  }
92  virtual bool isMatchedOpenMarker() const
93  {
94  return false;
95  }
96  virtual bool isMatchedCloseMarker() const
97  {
98  return false;
99  }
100  virtual bool inhibitParagraphs() const
101  {
102  return false;
103  }
104 
105  protected:
106  virtual void preWrite(std::ostream& out) const { }
107  virtual void postWrite(std::ostream& out) const { }
108  };
109 }
110 
111 namespace markdown::token
112 {
113 
114  size_t isValidTag(const std::string& tag, bool nonBlockFirst = false);
115 
116  enum EncodingFlags { cAmps = 0x01, cDoubleAmps = 0x02, cAngles = 0x04, cQuotes = 0x08 };
117 
118  class TextHolder: public Token
119  {
120  public:
121  TextHolder(const std::string& text, bool canContainMarkup, unsigned int
122  encodingFlags): mText(text), mCanContainMarkup(canContainMarkup),
123  mEncodingFlags(encodingFlags) { }
124 
125  void writeAsHtml(std::ostream& out) const override;
126 
127  void writeToken(std::ostream& out) const override
128  {
129  out << "TextHolder: " << mText << '\n';
130  }
131 
132  std::optional<std::string> text() const override
133  {
134  return mText;
135  }
136 
137  bool canContainMarkup() const override
138  {
139  return mCanContainMarkup;
140  }
141 
142  private:
143  const std::string mText;
144  const bool mCanContainMarkup;
145  const int mEncodingFlags;
146  };
147 
148  class RawText: public TextHolder
149  {
150  public:
151  RawText(const std::string& text, bool canContainMarkup = true):
153 
154  void writeToken(std::ostream& out) const override
155  {
156  out << "RawText: " << *text() << '\n';
157  }
158 
159  std::optional<TokenGroup> processSpanElements(const LinkIds& idTable) override;
160 
161  private:
162  using ReplacementTable = std::vector<TokenPtr>;
163 
164  static std::string _processHtmlTagAttributes(std::string src, ReplacementTable& replacements);
165  static std::string _processCodeSpans(std::string src, ReplacementTable& replacements);
166  static std::string _processEscapedCharacters(const std::string& src);
167  static std::string _processLinksImagesAndTags(const std::string& src, ReplacementTable& replacements, const LinkIds& idTable);
168  static std::string _processSpaceBracketedGroupings(const std::string& src, ReplacementTable& replacements);
169  static TokenGroup _processBoldAndItalicSpans(const std::string& src, ReplacementTable& replacements);
170 
171  static TokenGroup _encodeProcessedItems(const std::string& src, ReplacementTable& replacements);
172  static std::string _restoreProcessedItems(const std::string& src, ReplacementTable& replacements);
173  };
174 
175  class HtmlTag: public TextHolder
176  {
177  public:
178  HtmlTag(const std::string& contents): TextHolder(contents, false, cAmps | cAngles) { }
179 
180  void writeToken(std::ostream& out) const override
181  {
182  out << "HtmlTag: " << *text() << '\n';
183  }
184 
185  protected:
186  void preWrite(std::ostream& out) const override
187  {
188  out << '<';
189  }
190  void postWrite(std::ostream& out) const override
191  {
192  out << '>';
193  }
194  };
195 
196  class HtmlAnchorTag: public TextHolder
197  {
198  public:
199  HtmlAnchorTag(const std::string& url, const std::string& title = std::string());
200 
201  void writeToken(std::ostream& out) const override
202  {
203  out << "HtmlAnchorTag: " << *text() << '\n';
204  }
205  };
206 
207  class InlineHtmlContents: public TextHolder
208  {
209  public:
210  InlineHtmlContents(const std::string& contents): TextHolder(contents, false,
211  cAmps | cAngles) { }
212 
213  void writeToken(std::ostream& out) const override
214  {
215  out << "InlineHtmlContents: " << *text() << '\n';
216  }
217  };
218 
219  class InlineHtmlComment: public TextHolder
220  {
221  public:
222  InlineHtmlComment(const std::string& contents): TextHolder(contents, false,
223  0) { }
224 
225  void writeToken(std::ostream& out) const override
226  {
227  out << "InlineHtmlComment: " << *text() << '\n';
228  }
229  };
230 
231  class CodeBlock: public TextHolder
232  {
233  public:
234  CodeBlock(const std::string& actualContents): TextHolder(actualContents,
235  false, cDoubleAmps | cAngles | cQuotes) { }
236 
237  void writeAsHtml(std::ostream& out) const override;
238 
239  void writeToken(std::ostream& out) const override
240  {
241  out << "CodeBlock: " << *text() << '\n';
242  }
243  };
244 
245  class CodeSpan: public TextHolder
246  {
247  public:
248  CodeSpan(const std::string& actualContents): TextHolder(actualContents,
249  false, cDoubleAmps | cAngles | cQuotes) { }
250 
251  void writeAsHtml(std::ostream& out) const override;
252  void writeAsOriginal(std::ostream& out) const override;
253  void writeToken(std::ostream& out) const override
254  {
255  out << "CodeSpan: " << *text() << '\n';
256  }
257  };
258 
259  class Header: public TextHolder
260  {
261  public:
262  Header(size_t level, const std::string& text): TextHolder(text, true,
263  cAmps | cAngles | cQuotes), mLevel(level) { }
264 
265  void writeToken(std::ostream& out) const override
266  {
267  out << "Header " <<
268  mLevel << ": " << *text() << '\n';
269  }
270 
271  bool inhibitParagraphs() const override
272  {
273  return true;
274  }
275 
276  protected:
277  void preWrite(std::ostream& out) const override
278  {
279  out << "<h" << mLevel << ">";
280  }
281  void postWrite(std::ostream& out) const override
282  {
283  out << "</h" << mLevel << ">\n";
284  }
285 
286  private:
287  size_t mLevel;
288  };
289 
290  class BlankLine: public TextHolder
291  {
292  public:
293  BlankLine(const std::string& actualContents = std::string()):
294  TextHolder(actualContents, false, 0) { }
295 
296  void writeToken(std::ostream& out) const override
297  {
298  out << "BlankLine: " << *text() << '\n';
299  }
300 
301  bool isBlankLine() const override
302  {
303  return true;
304  }
305  };
306 
307 
308 
309  class EscapedCharacter: public Token
310  {
311  public:
312  EscapedCharacter(char c): mChar(c) { }
313 
314  void writeAsHtml(std::ostream& out) const override
315  {
316  out << mChar;
317  }
318  void writeAsOriginal(std::ostream& out) const override
319  {
320  out << '\\' << mChar;
321  }
322  void writeToken(std::ostream& out) const override
323  {
324  out << "EscapedCharacter: " << mChar << '\n';
325  }
326 
327  private:
328  const char mChar;
329  };
330 
331 
332 
333  class Container: public Token
334  {
335  public:
336  Container(const TokenGroup& contents = TokenGroup()): mSubTokens(contents),
337  mParagraphMode(false) { }
338 
339  const TokenGroup& subTokens() const
340  {
341  return mSubTokens;
342  }
343  void appendSubtokens(TokenGroup& tokens)
344  {
345  mSubTokens.splice(mSubTokens.end(), tokens);
346  }
347  void swapSubtokens(TokenGroup& tokens)
348  {
349  mSubTokens.swap(tokens);
350  }
351 
352  bool isContainer() const override
353  {
354  return true;
355  }
356 
357  void writeAsHtml(std::ostream& out) const override;
358 
359  void writeToken(std::ostream& out) const override
360  {
361  out << "Container: error!" << '\n';
362  }
363  void writeToken(size_t indent, std::ostream& out) const override;
364 
365  std::optional<TokenGroup> processSpanElements(const LinkIds& idTable) override;
366 
367  virtual TokenPtr clone(const TokenGroup& newContents) const
368  {
369  return TokenPtr(new Container(newContents));
370  }
371  virtual std::string containerName() const
372  {
373  return "Container";
374  }
375 
376  protected:
377  TokenGroup mSubTokens;
378  bool mParagraphMode;
379  };
380 
381  class InlineHtmlBlock: public Container
382  {
383  public:
384  InlineHtmlBlock(const TokenGroup& contents, bool isBlockTag = false):
385  Container(contents), mIsBlockTag(isBlockTag) { }
386  InlineHtmlBlock(const std::string& contents): mIsBlockTag(false)
387  {
388  mSubTokens.push_back(TokenPtr(new InlineHtmlContents(contents)));
389  }
390 
391  bool inhibitParagraphs() const override
392  {
393  return !mIsBlockTag;
394  }
395 
396  TokenPtr clone(const TokenGroup& newContents) const override
397  {
398  return TokenPtr(new InlineHtmlBlock(newContents));
399  }
400  std::string containerName() const override
401  {
402  return "InlineHtmlBlock";
403  }
404 
405  // Inline HTML blocks always end with a blank line, so report it as one for
406  // parsing purposes.
407  bool isBlankLine() const override
408  {
409  return true;
410  }
411 
412  private:
413  bool mIsBlockTag;
414  };
415 
416  class ListItem: public Container
417  {
418  public:
419  ListItem(const TokenGroup& contents): Container(contents),
420  mInhibitParagraphs(true) { }
421 
422  void inhibitParagraphs(bool set)
423  {
424  mInhibitParagraphs = set;
425  }
426 
427  bool inhibitParagraphs() const override
428  {
429  return mInhibitParagraphs;
430  }
431 
432  TokenPtr clone(const TokenGroup& newContents) const override
433  {
434  return TokenPtr(new ListItem(newContents));
435  }
436  std::string containerName() const override
437  {
438  return "ListItem";
439  }
440 
441  protected:
442  void preWrite(std::ostream& out) const override
443  {
444  out << "<li>";
445  }
446  void postWrite(std::ostream& out) const override
447  {
448  out << "</li>\n";
449  }
450 
451  private:
452  bool mInhibitParagraphs;
453  };
454 
455  class UnorderedList: public Container
456  {
457  public:
458  UnorderedList(const TokenGroup& contents, bool paragraphMode = false);
459 
460  TokenPtr clone(const TokenGroup& newContents) const override
461  {
462  return TokenPtr(new UnorderedList(newContents));
463  }
464  std::string containerName() const override
465  {
466  return "UnorderedList";
467  }
468 
469  protected:
470  void preWrite(std::ostream& out) const override
471  {
472  out << "\n<ul>\n";
473  }
474  void postWrite(std::ostream& out) const override
475  {
476  out << "</ul>\n\n";
477  }
478  };
479 
480  class OrderedList: public UnorderedList
481  {
482  public:
483  OrderedList(const TokenGroup& contents, bool paragraphMode = false):
484  UnorderedList(contents, paragraphMode) { }
485 
486  TokenPtr clone(const TokenGroup& newContents) const override
487  {
488  return TokenPtr(new OrderedList(newContents));
489  }
490  std::string containerName() const override
491  {
492  return "OrderedList";
493  }
494 
495  protected:
496  void preWrite(std::ostream& out) const override
497  {
498  out << "<ol>\n";
499  }
500  void postWrite(std::ostream& out) const override
501  {
502  out << "</ol>\n\n";
503  }
504  };
505 
506  class BlockQuote: public Container
507  {
508  public:
509  BlockQuote(const TokenGroup& contents): Container(contents) { }
510 
511  TokenPtr clone(const TokenGroup& newContents) const override
512  {
513  return TokenPtr(new BlockQuote(newContents));
514  }
515  std::string containerName() const override
516  {
517  return "BlockQuote";
518  }
519 
520  protected:
521  void preWrite(std::ostream& out) const override
522  {
523  out << "<blockquote>\n";
524  }
525  void postWrite(std::ostream& out) const override
526  {
527  out << "\n</blockquote>\n";
528  }
529  };
530 
531  class Paragraph: public Container
532  {
533  public:
534  Paragraph() { }
535  Paragraph(const TokenGroup& contents): Container(contents) { }
536 
537  TokenPtr clone(const TokenGroup& newContents) const override
538  {
539  return TokenPtr(new Paragraph(newContents));
540  }
541  std::string containerName() const override
542  {
543  return "Paragraph";
544  }
545 
546  protected:
547  void preWrite(std::ostream& out) const override
548  {
549  out << "<p>";
550  }
551  void postWrite(std::ostream& out) const override
552  {
553  out << "</p>\n\n";
554  }
555  };
556 
557 
558 
559  class BoldOrItalicMarker: public Token
560  {
561  public:
562  BoldOrItalicMarker(bool open, char c, size_t size): mOpenMarker(open),
563  mTokenCharacter(c), mSize(size), mMatch(0), mCannotMatch(false),
564  mDisabled(false), mId(-1) { }
565 
566  bool isUnmatchedOpenMarker() const override
567  {
568  return (mOpenMarker && mMatch == 0 && !mCannotMatch);
569  }
570  bool isUnmatchedCloseMarker() const override
571  {
572  return (!mOpenMarker && mMatch == 0 && !mCannotMatch);
573  }
574  bool isMatchedOpenMarker() const override
575  {
576  return (mOpenMarker && mMatch != 0);
577  }
578  bool isMatchedCloseMarker() const override
579  {
580  return (!mOpenMarker && mMatch != 0);
581  }
582  void writeAsHtml(std::ostream& out) const override;
583  void writeToken(std::ostream& out) const override;
584 
585  bool isOpenMarker() const
586  {
587  return mOpenMarker;
588  }
589  char tokenCharacter() const
590  {
591  return mTokenCharacter;
592  }
593  size_t size() const
594  {
595  return mSize;
596  }
597  bool matched() const
598  {
599  return (mMatch != 0);
600  }
601  BoldOrItalicMarker* matchedTo() const
602  {
603  return mMatch;
604  }
605  int id() const
606  {
607  return mId;
608  }
609 
610  void matched(BoldOrItalicMarker* match, int id = -1)
611  {
612  mMatch = match;
613  mId = id;
614  }
615  void cannotMatch(bool set)
616  {
617  mCannotMatch = set;
618  }
619  void disable()
620  {
621  mCannotMatch = mDisabled = true;
622  }
623 
624  private:
625  bool mOpenMarker; // Otherwise it's a close-marker
626  char mTokenCharacter; // Underscore or asterisk
627  size_t mSize; // 1=italics, 2=bold, 3=both
628  BoldOrItalicMarker* mMatch;
629  bool mCannotMatch;
630  bool mDisabled;
631  int mId;
632  };
633 
634  class Image: public Token
635  {
636  public:
637  Image(const std::string& altText, const std::string& url, const std::string&
638  title): mAltText(altText), mUrl(url), mTitle(title) { }
639 
640  void writeAsHtml(std::ostream& out) const override;
641 
642  void writeToken(std::ostream& out) const override
643  {
644  out << "Image: " << mUrl << '\n';
645  }
646 
647  private:
648  const std::string mAltText, mUrl, mTitle;
649  };
650 
651 }
markdown::token::TextHolder::text
std::optional< std::string > text() const override
Definition: markdown-tokens.h:132
markdown::Token::isUnmatchedOpenMarker
virtual bool isUnmatchedOpenMarker() const
Definition: markdown-tokens.h:84
markdown::token::TextHolder::writeToken
void writeToken(std::ostream &out) const override
Definition: markdown-tokens.h:127
markdown::Token::isMatchedOpenMarker
virtual bool isMatchedOpenMarker() const
Definition: markdown-tokens.h:92
str
std::string str(const T &t)
Definition: UserAssistedSegmenterGuiWidgetController.cpp:42
markdown::LinkIds::Target::Target
Target(const std::string &url_, const std::string &title_)
Definition: markdown-tokens.h:29
markdown::token::RawText::RawText
RawText(const std::string &text, bool canContainMarkup=true)
Definition: markdown-tokens.h:151
markdown::token::HtmlTag::HtmlTag
HtmlTag(const std::string &contents)
Definition: markdown-tokens.h:178
markdown::Token::text
virtual std::optional< std::string > text() const
Definition: markdown-tokens.h:67
markdown::token::TextHolder::TextHolder
TextHolder(const std::string &text, bool canContainMarkup, unsigned int encodingFlags)
Definition: markdown-tokens.h:121
markdown::token::cDoubleAmps
@ cDoubleAmps
Definition: markdown-tokens.h:116
markdown::Token::Token
Token()
Definition: markdown-tokens.h:48
markdown::Token::inhibitParagraphs
virtual bool inhibitParagraphs() const
Definition: markdown-tokens.h:100
markdown::LinkIds::Target::title
std::string title
Definition: markdown-tokens.h:27
markdown.h
markdown::Token::isBlankLine
virtual bool isBlankLine() const
Definition: markdown-tokens.h:76
markdown::token::EncodingFlags
EncodingFlags
Definition: markdown-tokens.h:116
markdown::Token::isContainer
virtual bool isContainer() const
Definition: markdown-tokens.h:80
markdown::Token
Definition: markdown-tokens.h:45
markdown::token::isValidTag
size_t isValidTag(const std::string &tag, bool nonBlockFirst)
Definition: markdown-tokens.cpp:271
markdown::token::BoldOrItalicMarker
Definition: markdown-tokens.h:559
markdown::token::TextHolder
Definition: markdown-tokens.h:118
markdown::token::HtmlTag::postWrite
void postWrite(std::ostream &out) const override
Definition: markdown-tokens.h:190
markdown::token::TextHolder::canContainMarkup
bool canContainMarkup() const override
Definition: markdown-tokens.h:137
markdown::LinkIds::add
void add(const std::string &id, const std::string &url, const std::string &title)
Definition: markdown.cpp:871
markdown::LinkIds::Target::url
std::string url
Definition: markdown-tokens.h:26
markdown::token::RawText
Definition: markdown-tokens.h:148
markdown::token::HtmlTag::preWrite
void preWrite(std::ostream &out) const override
Definition: markdown-tokens.h:186
markdown::token::Image
Definition: markdown-tokens.h:634
armarx::ctrlutil::a
double a(double t, double a0, double j)
Definition: CtrlUtil.h:45
markdown::Token::writeToken
virtual void writeToken(size_t indent, std::ostream &out) const
Definition: markdown-tokens.h:56
markdown::token
Definition: markdown-tokens.cpp:16
markdown::Token::processSpanElements
virtual std::optional< TokenGroup > processSpanElements(const LinkIds &idTable)
Definition: markdown-tokens.h:62
markdown::TokenGroupIter
TokenGroup::iterator TokenGroupIter
Definition: markdown-tokens.h:18
markdown::Token::writeToken
virtual void writeToken(std::ostream &out) const =0
markdown::Token::canContainMarkup
virtual bool canContainMarkup() const
Definition: markdown-tokens.h:72
markdown
Definition: markdown-tokens.cpp:16
markdown::Token::writeAsHtml
virtual void writeAsHtml(std::ostream &) const =0
markdown::Token::preWrite
virtual void preWrite(std::ostream &out) const
Definition: markdown-tokens.h:106
markdown::Token::writeAsOriginal
virtual void writeAsOriginal(std::ostream &out) const
Definition: markdown-tokens.h:51
markdown::LinkIds
Definition: markdown-tokens.h:21
markdown::CTokenGroupIter
TokenGroup::const_iterator CTokenGroupIter
Definition: markdown-tokens.h:19
markdown::Token::postWrite
virtual void postWrite(std::ostream &out) const
Definition: markdown-tokens.h:107
markdown::Token::isMatchedCloseMarker
virtual bool isMatchedCloseMarker() const
Definition: markdown-tokens.h:96
markdown::token::HtmlTag::writeToken
void writeToken(std::ostream &out) const override
Definition: markdown-tokens.h:180
markdown::token::TextHolder::writeAsHtml
void writeAsHtml(std::ostream &out) const override
Definition: markdown-tokens.cpp:309
markdown::token::RawText::writeToken
void writeToken(std::ostream &out) const override
Definition: markdown-tokens.h:154
markdown::token::cAngles
@ cAngles
Definition: markdown-tokens.h:116
markdown::LinkIds::Target
Definition: markdown-tokens.h:24
markdown::TokenGroup
std::list< TokenPtr > TokenGroup
Definition: markdown.h:22
markdown::token::RawText::processSpanElements
std::optional< TokenGroup > processSpanElements(const LinkIds &idTable) override
Definition: markdown-tokens.cpp:325
markdown::token::Image::writeToken
void writeToken(std::ostream &out) const override
Definition: markdown-tokens.h:642
markdown::LinkIds::find
std::optional< Target > find(const std::string &id) const
Definition: markdown.cpp:857
armarx::ctrlutil::s
double s(double t, double s0, double v0, double a0, double j)
Definition: CtrlUtil.h:33
markdown::token::cAmps
@ cAmps
Definition: markdown-tokens.h:116
markdown::Token::isUnmatchedCloseMarker
virtual bool isUnmatchedCloseMarker() const
Definition: markdown-tokens.h:88
markdown::token::cQuotes
@ cQuotes
Definition: markdown-tokens.h:116
markdown::token::Image::Image
Image(const std::string &altText, const std::string &url, const std::string &title)
Definition: markdown-tokens.h:637
markdown::token::HtmlTag
Definition: markdown-tokens.h:175