markdown-tokens.cpp
Go to the documentation of this file.
1 
2 /*
3  Copyright (c) 2009 by Chad Nelson
4  Released under the MIT License.
5  See the provided LICENSE.TXT file for details.
6 */
7 
8 #include "markdown-tokens.h"
9 
10 #include <set>
11 #include <sstream>
12 #include <stack>
13 
14 #include <boost/regex.hpp>
15 
16 namespace markdown::token
17 {
18 
19  namespace
20  {
21 
22  const std::string cEscapedCharacters("\\`*_{}[]()#+-.!>");
23 
24  std::optional<size_t>
25  isEscapedCharacter(char c)
26  {
27  std::string::const_iterator i =
28  std::find(cEscapedCharacters.begin(), cEscapedCharacters.end(), c);
29 
30  if (i != cEscapedCharacters.end())
31  {
32  return std::distance(cEscapedCharacters.begin(), i);
33  }
34  else
35  {
36  return std::nullopt;
37  }
38  }
39 
40  char
41  escapedCharacter(size_t index)
42  {
43  return cEscapedCharacters[index];
44  }
45 
46  std::string
47  encodeString(const std::string& src, int encodingFlags)
48  {
49  bool amps = (encodingFlags & cAmps) != 0,
50  doubleAmps = (encodingFlags & cDoubleAmps) != 0,
51  angleBrackets = (encodingFlags & cAngles) != 0,
52  quotes = (encodingFlags & cQuotes) != 0;
53 
54  std::string tgt;
55 
56  for (std::string::const_iterator i = src.begin(), ie = src.end(); i != ie; ++i)
57  {
58  if (*i == '&' && amps)
59  {
60  static const boost::regex cIgnore(
61  "^(&amp;)|(&#[0-9]{1,3};)|(&#[xX][0-9a-fA-F]{1,2};)");
62 
63  if (boost::regex_search(i, ie, cIgnore))
64  {
65  tgt.push_back(*i);
66  }
67  else
68  {
69  tgt += "&amp;";
70  }
71  }
72  else if (*i == '&' && doubleAmps)
73  {
74  tgt += "&amp;";
75  }
76  else if (*i == '<' && angleBrackets)
77  {
78  tgt += "&lt;";
79  }
80  else if (*i == '>' && angleBrackets)
81  {
82  tgt += "&gt;";
83  }
84  else if (*i == '\"' && quotes)
85  {
86  tgt += "&quot;";
87  }
88  else
89  {
90  tgt.push_back(*i);
91  }
92  }
93 
94  return tgt;
95  }
96 
97  bool
98  looksLikeUrl(const std::string& str)
99  {
100  const char* schemes[] = {
101  "http://", "https://", "ftp://", "ftps://", "file://", "www.", "ftp.", nullptr};
102 
103  for (size_t x = 0; schemes[x] != nullptr; ++x)
104  {
105  const char *s = str.c_str(), *t = schemes[x];
106 
107  while (*s != 0 && *t != 0 && *s == *t)
108  {
109  ++s;
110  ++t;
111  }
112 
113  if (*t == 0)
114  {
115  return true;
116  }
117  }
118 
119  return false;
120  }
121 
122  bool
123  notValidNameCharacter(char c)
124  {
125  return !(isalnum(c) || c == '.' || c == '_' || c == '%' || c == '-' || c == '+');
126  }
127 
128  bool
129  notValidSiteCharacter(char c)
130  {
131  // NOTE: Kludge alert! The official spec for site characters is only
132  // "a-zA-Z._%-". However, MDTest supports "international domain names,"
133  // which use characters other than that; I'm kind of cheating here, handling
134  // those by allowing all utf8-encoded characters too.
135  return !(isalnum(c) || c == '.' || c == '_' || c == '%' || c == '-' || (c & 0x80));
136  }
137 
138  bool
139  isNotAlpha(char c)
140  {
141  return !isalpha(c);
142  }
143 
144  std::string
145  emailEncode(const std::string& src)
146  {
147  std::ostringstream out;
148  bool inHex = false;
149 
150  for (char i : src)
151  {
152  if (i & 0x80)
153  {
154  out << i;
155  }
156  else if (inHex)
157  {
158  out << "&#x" << std::hex << static_cast<int>(i) << ';';
159  }
160  else
161  {
162  out << "&#" << std::dec << static_cast<int>(i) << ';';
163  }
164 
165  inHex = !inHex;
166  }
167 
168  return out.str();
169  }
170 
171  bool
172  looksLikeEmailAddress(const std::string& str)
173  {
174  using Iter = std::string::const_iterator;
175  using RIter = std::string::const_reverse_iterator;
176  Iter i = std::find_if(str.begin(), str.end(), notValidNameCharacter);
177 
178  if (i != str.end() && *i == '@' && i != str.begin())
179  {
180  // The name part is valid.
181  i = std::find_if(i + 1, str.end(), notValidSiteCharacter);
182 
183  if (i == str.end())
184  {
185  // The site part doesn't contain any invalid characters.
186  RIter ri = std::find_if(str.rbegin(), str.rend(), isNotAlpha);
187 
188  if (ri != str.rend() && *ri == '.')
189  {
190  // It ends with a dot and only alphabetic characters.
191  size_t d = std::distance(ri.base(), str.end());
192 
193  if (d >= 2 && d <= 4)
194  {
195  // There are two-to-four of them. It's valid.
196  return true;
197  }
198  }
199  }
200  }
201 
202  return false;
203  }
204 
205  // From <http://en.wikipedia.org/wiki/HTML_element>
206 
207  const char* cOtherTagInit[] = {
208  // Header tags
209  "title/",
210  "base",
211  "link",
212  "basefont",
213  "script/",
214  "style/",
215  "object/",
216  "meta",
217 
218  // Inline tags
219  "em/",
220  "strong/",
221  "q/",
222  "cite/",
223  "dfn/",
224  "abbr/",
225  "acronym/",
226  "code/",
227  "samp/",
228  "kbd/",
229  "var/",
230  "sub/",
231  "sup/",
232  "del/",
233  "ins/",
234  "isindex",
235  "a/",
236  "img",
237  "br",
238  "map/",
239  "area",
240  "object/",
241  "param",
242  "applet/",
243  "span/",
244 
245  nullptr};
246 
247  const char* cBlockTagInit[] = {"p/", "blockquote/",
248  "hr", "h1/",
249  "h2/", "h3/",
250  "h4/", "h5/",
251  "h6/", "dl/",
252  "dt/", "dd/",
253  "ol/", "ul/",
254  "li/", "dir/",
255  "menu/", "table/",
256  "tr/", "th/",
257  "td/", "col",
258  "colgroup/", "caption/",
259  "thead/", "tbody/",
260  "tfoot/", "form/",
261  "select/", "option",
262  "input", "label/",
263  "textarea/", "div/",
264  "pre/", "address/",
265  "iframe/", "frame/",
266  "frameset/", "noframes/",
267  "center/", "b/",
268  "i/", "big/",
269  "small/", /*"s/",*/ "strike/",
270  "tt/", "u/",
271  "font/", "ins/",
272  "del/", nullptr};
273 
274  // Other official ones (not presently in use in this code)
275  //"!doctype", "bdo", "body", "button", "fieldset", "head", "html",
276  //"legend", "noscript", "optgroup", "xmp",
277 
278  std::set<std::string> otherTags, blockTags;
279 
280  void
281  initTag(std::set<std::string>& set, const char* init[])
282  {
283  for (size_t x = 0; init[x] != nullptr; ++x)
284  {
285  std::string str = init[x];
286 
287  if (*str.rbegin() == '/')
288  {
289  // Means it can have a closing tag
290  str = str.substr(0, str.length() - 1);
291  }
292 
293  set.insert(str);
294  }
295  }
296 
297  std::string
298  cleanTextLinkRef(const std::string& ref)
299  {
300  std::string r;
301 
302  for (char i : ref)
303  {
304  if (i == ' ')
305  {
306  if (r.empty() || *r.rbegin() != ' ')
307  {
308  r.push_back(' ');
309  }
310  }
311  else
312  {
313  r.push_back(i);
314  }
315  }
316 
317  return r;
318  }
319 
320  } // namespace
321 
322  size_t
323  isValidTag(const std::string& tag, bool nonBlockFirst)
324  {
325  if (blockTags.empty())
326  {
327  initTag(otherTags, cOtherTagInit);
328  initTag(blockTags, cBlockTagInit);
329  }
330 
331  if (nonBlockFirst)
332  {
333  if (otherTags.find(tag) != otherTags.end())
334  {
335  return 1;
336  }
337 
338  if (blockTags.find(tag) != blockTags.end())
339  {
340  return 2;
341  }
342  }
343  else
344  {
345  if (blockTags.find(tag) != blockTags.end())
346  {
347  return 2;
348  }
349 
350  if (otherTags.find(tag) != otherTags.end())
351  {
352  return 1;
353  }
354  }
355 
356  return 0;
357  }
358 
359  void
360  TextHolder::writeAsHtml(std::ostream& out) const
361  {
362  preWrite(out);
363 
364  if (mEncodingFlags != 0)
365  {
366  out << encodeString(mText, mEncodingFlags);
367  }
368  else
369  {
370  out << mText;
371  }
372 
373  postWrite(out);
374  }
375 
376  std::optional<TokenGroup>
378  {
379  if (!canContainMarkup())
380  {
381  return std::nullopt;
382  }
383 
384  ReplacementTable replacements;
385  std::string str = _processHtmlTagAttributes(*text(), replacements);
386  str = _processCodeSpans(str, replacements);
387  str = _processEscapedCharacters(str);
388  str = _processLinksImagesAndTags(str, replacements, idTable);
389  return _processBoldAndItalicSpans(str, replacements);
390  }
391 
392  std::string
393  RawText::_processHtmlTagAttributes(std::string src, ReplacementTable& replacements)
394  {
395  // Because "Attribute Content Is Not A Code Span"
396  std::string tgt;
397  std::string::const_iterator prev = src.begin(), end = src.end();
398 
399  while (true)
400  {
401  static const boost::regex cHtmlToken(
402  "<((/?)([a-zA-Z0-9]+)(?:( +[a-zA-Z0-9]+?(?: ?= ?(\"|').*?\\5))+? */? *))>");
403  boost::smatch m;
404 
405  if (boost::regex_search(prev, end, m, cHtmlToken))
406  {
407  // NOTE: Kludge alert! The `isValidTag` test is a cheat, only here
408  // to handle some edge cases between the Markdown test suite and the
409  // PHP-Markdown one, which seem to conflict.
410  if (isValidTag(m[3]))
411  {
412  tgt += std::string(prev, m[0].first);
413 
414  std::string fulltag = m[0], tgttag;
415  std::string::const_iterator prevtag = fulltag.begin(), endtag = fulltag.end();
416 
417  while (true)
418  {
419  static const boost::regex cAttributeStrings("= ?(\"|').*?\\1");
420  boost::smatch mtag;
421 
422  if (boost::regex_search(prevtag, endtag, mtag, cAttributeStrings))
423  {
424  tgttag += std::string(prevtag, mtag[0].first);
425  tgttag +=
426  "\x01@" + std::to_string(replacements.size()) + "@htmlTagAttr\x01";
427  prevtag = mtag[0].second;
428 
429  replacements.push_back(TokenPtr(
430  new TextHolder(std::string(mtag[0]), false, cAmps | cAngles)));
431  }
432  else
433  {
434  tgttag += std::string(prevtag, endtag);
435  break;
436  }
437  }
438 
439  tgt += tgttag;
440  prev = m[0].second;
441  }
442  else
443  {
444  tgt += std::string(prev, m[0].second);
445  prev = m[0].second;
446  }
447  }
448  else
449  {
450  tgt += std::string(prev, end);
451  break;
452  }
453  }
454 
455  return tgt;
456  }
457 
458  std::string
459  RawText::_processCodeSpans(std::string src, ReplacementTable& replacements)
460  {
461  static const boost::regex cCodeSpan[2] = {boost::regex("(?:^|(?<=[^\\\\]))`` (.+?) ``"),
462  boost::regex("(?:^|(?<=[^\\\\]))`(.+?)`")};
463 
464  for (const auto& pass : cCodeSpan)
465  {
466  std::string tgt;
467  std::string::const_iterator prev = src.begin(), end = src.end();
468 
469  while (true)
470  {
471  boost::smatch m;
472 
473  if (boost::regex_search(prev, end, m, pass))
474  {
475  tgt += std::string(prev, m[0].first);
476  tgt += "\x01@" + std::to_string(replacements.size()) + "@codeSpan\x01";
477  prev = m[0].second;
478  replacements.push_back(
479  TokenPtr(new CodeSpan(_restoreProcessedItems(m[1], replacements))));
480  }
481  else
482  {
483  tgt += std::string(prev, end);
484  break;
485  }
486  }
487 
488  src.swap(tgt);
489  tgt.clear();
490  }
491 
492  return src;
493  }
494 
495  std::string
496  RawText::_processEscapedCharacters(const std::string& src)
497  {
498  std::string tgt;
499  std::string::const_iterator prev = src.begin(), end = src.end();
500 
501  while (true)
502  {
503  std::string::const_iterator i = std::find(prev, end, '\\');
504 
505  if (i != end)
506  {
507  tgt += std::string(prev, i);
508  ++i;
509 
510  if (i != end)
511  {
512  std::optional<size_t> e = isEscapedCharacter(*i);
513 
514  if (e)
515  {
516  tgt += "\x01@#" + std::to_string(*e) + "@escaped\x01";
517  }
518  else
519  {
520  tgt = tgt + '\\' + *i;
521  }
522 
523  prev = i + 1;
524  }
525  else
526  {
527  tgt += '\\';
528  break;
529  }
530  }
531  else
532  {
533  tgt += std::string(prev, end);
534  break;
535  }
536  }
537 
538  return tgt;
539  }
540 
541  std::string
542  RawText::_processSpaceBracketedGroupings(const std::string& src, ReplacementTable& replacements)
543  {
544  static const boost::regex cRemove("(?:(?: \\*+ )|(?: _+ ))");
545 
546  std::string tgt;
547  std::string::const_iterator prev = src.begin(), end = src.end();
548 
549  while (true)
550  {
551  boost::smatch m;
552 
553  if (boost::regex_search(prev, end, m, cRemove))
554  {
555  tgt += std::string(prev, m[0].first);
556  tgt += "\x01@" + std::to_string(replacements.size()) + "@spaceBracketed\x01";
557  replacements.push_back(TokenPtr(new RawText(m[0])));
558  prev = m[0].second;
559  }
560  else
561  {
562  tgt += std::string(prev, end);
563  break;
564  }
565  }
566 
567  return tgt;
568  }
569 
570  std::string
571  RawText::_processLinksImagesAndTags(const std::string& src,
572  ReplacementTable& replacements,
573  const LinkIds& idTable)
574  {
575  // NOTE: Kludge alert! The "inline link or image" regex should be...
576  //
577  // "(?:(!?)\\[(.+?)\\] *\\((.*?)\\))"
578  //
579  // ...but that fails on the 'Images' test because it includes a "stupid URL"
580  // that has parentheses within it. The proper way to deal with this would be
581  // to match any nested parentheses, but regular expressions can't handle an
582  // unknown number of nested items, so I'm cheating -- the regex for it
583  // allows for one (and *only* one) pair of matched parentheses within the
584  // URL. It makes the regex hard to follow (it was even harder to get right),
585  // but it allows it to pass the test.
586  //
587  // The "reference link or image" one has a similar problem; it should be...
588  //
589  // "|(?:(!?)\\[(.+?)\\](?: *\\[(.*?)\\])?)"
590  //
591  static const boost::regex cExpression(
592  "(?:(!?)\\[([^\\]]+?)\\] *\\(([^\\(]*(?:\\(.*?\\).*?)*?)\\))" // Inline link or image
593  "|(?:(!?)\\[((?:[^]]*?\\[.*?\\].*?)|(?:.+?))\\](?: *\\[(.*?)\\])?)" // Reference link or image
594  "|(?:<(/?([a-zA-Z0-9]+).*?)>)" // potential HTML tag or auto-link
595  );
596  // Important captures: 1/4=image indicator, 2/5=contents/alttext,
597  // 3=URL/title, 6=optional link ID, 7=potential HTML tag or auto-link
598  // contents, 8=actual tag from 7.
599 
600  std::string tgt;
601  std::string::const_iterator prev = src.begin(), end = src.end();
602 
603  while (true)
604  {
605  boost::smatch m;
606 
607  if (boost::regex_search(prev, end, m, cExpression))
608  {
609  assert(m[0].matched);
610  assert(m[0].length() != 0);
611 
612  tgt += std::string(prev, m[0].first);
613  tgt += "\x01@" + std::to_string(replacements.size()) + "@links&Images1\x01";
614  prev = m[0].second;
615 
616  bool isImage = false, isLink = false, isReference = false;
617 
618  if (m[4].matched && m[4].length())
619  {
620  isImage = isReference = true;
621  }
622  else if (m[1].matched && m[1].length())
623  {
624  isImage = true;
625  }
626  else if (m[5].matched)
627  {
628  isLink = isReference = true;
629  }
630  else if (m[2].matched)
631  {
632  isLink = true;
633  }
634 
635  if (isImage || isLink)
636  {
637  std::string contentsOrAlttext, url, title;
638  bool resolved = false;
639 
640  if (isReference)
641  {
642  contentsOrAlttext = m[5];
643  std::string linkId = (m[6].matched ? std::string(m[6]) : std::string());
644 
645  if (linkId.empty())
646  {
647  linkId = cleanTextLinkRef(contentsOrAlttext);
648  }
649 
650  std::optional<markdown::LinkIds::Target> target = idTable.find(linkId);
651 
652  if (target)
653  {
654  url = target->url;
655  title = target->title;
656  resolved = true;
657  };
658  }
659  else
660  {
661  static const boost::regex cReference(
662  "^<?([^ >]*)>?(?: *(?:('|\")(.*)\\2)|(?:\\((.*)\\)))? *$");
663  // Useful captures: 1=url, 3/4=title
664  contentsOrAlttext = m[2];
665  std::string urlAndTitle = m[3];
666  boost::smatch mm;
667 
668  if (boost::regex_match(urlAndTitle, mm, cReference))
669  {
670  url = mm[1];
671 
672  if (mm[3].matched)
673  {
674  title = mm[3];
675  }
676  else if (mm[4].matched)
677  {
678  title = mm[4];
679  }
680 
681  resolved = true;
682  }
683  }
684 
685  if (!resolved)
686  {
687  // Just encode the first character as-is, and continue
688  // searching after it.
689  prev = m[0].first + 1;
690  replacements.push_back(
691  TokenPtr(new RawText(std::string(m[0].first, prev))));
692  }
693  else if (isImage)
694  {
695  replacements.push_back(TokenPtr(new Image(contentsOrAlttext, url, title)));
696  }
697  else
698  {
699  replacements.push_back(TokenPtr(new HtmlAnchorTag(url, title)));
700  tgt += contentsOrAlttext;
701  tgt += "\x01@" + std::to_string(replacements.size()) + "@links&Images2\x01";
702  replacements.push_back(TokenPtr(new HtmlTag("/a")));
703  }
704  }
705  else
706  {
707  // Otherwise it's an HTML tag or auto-link.
708  std::string contents = m[7];
709 
710  // cerr << "Evaluating potential HTML or auto-link: " << contents << endl;
711  // cerr << "m[8]=" << m[8] << endl;
712 
713  if (looksLikeUrl(contents))
714  {
715  TokenGroup subgroup;
716  subgroup.push_back(TokenPtr(new HtmlAnchorTag(contents)));
717  subgroup.push_back(TokenPtr(new RawText(contents, false)));
718  subgroup.push_back(TokenPtr(new HtmlTag("/a")));
719  replacements.push_back(TokenPtr(new Container(subgroup)));
720  }
721  else if (looksLikeEmailAddress(contents))
722  {
723  TokenGroup subgroup;
724  subgroup.push_back(
725  TokenPtr(new HtmlAnchorTag(emailEncode("mailto:" + contents))));
726  subgroup.push_back(TokenPtr(new RawText(emailEncode(contents), false)));
727  subgroup.push_back(TokenPtr(new HtmlTag("/a")));
728  replacements.push_back(TokenPtr(new Container(subgroup)));
729  }
730  else if (isValidTag(m[8]))
731  {
732  replacements.push_back(
733  TokenPtr(new HtmlTag(_restoreProcessedItems(contents, replacements))));
734  }
735  else
736  {
737  // Just encode it as-is
738  replacements.push_back(TokenPtr(new RawText(m[0])));
739  }
740  }
741  }
742  else
743  {
744  tgt += std::string(prev, end);
745  break;
746  }
747  }
748 
749  return tgt;
750  }
751 
752  TokenGroup
753  RawText::_processBoldAndItalicSpans(const std::string& src, ReplacementTable& replacements)
754  {
755  static const boost::regex cEmphasisExpression(
756  "(?:(?<![*_])([*_]{1,3})([^*_ ]+?)\\1(?![*_]))" // Mid-word emphasis
757  "|((?:(?<!\\*)\\*{1,3}(?!\\*)|(?<!_)_{1,3}(?!_))(?=.)(?! "
758  ")(?![.,:;] )(?![.,:;]$))" // Open
759  "|((?<![* ])\\*{1,3}(?!\\*)|(?<![ _])_{1,3}(?!_))" // Close
760  );
761 
762  TokenGroup tgt;
763  std::string::const_iterator i = src.begin(), end = src.end(), prev = i;
764 
765  while (true)
766  {
767  boost::smatch m;
768 
769  if (boost::regex_search(prev, end, m, cEmphasisExpression))
770  {
771  if (prev != m[0].first)
772  tgt.push_back(TokenPtr(new RawText(std::string(prev, m[0].first))));
773 
774  if (m[3].matched)
775  {
776  std::string token = m[3];
777  tgt.push_back(TokenPtr(new BoldOrItalicMarker(true, token[0], token.length())));
778  prev = m[0].second;
779  }
780  else if (m[4].matched)
781  {
782  std::string token = m[4];
783  tgt.push_back(
784  TokenPtr(new BoldOrItalicMarker(false, token[0], token.length())));
785  prev = m[0].second;
786  }
787  else
788  {
789  std::string token = m[1], contents = m[2];
790  tgt.push_back(TokenPtr(new BoldOrItalicMarker(true, token[0], token.length())));
791  tgt.push_back(TokenPtr(new RawText(std::string(contents))));
792  tgt.push_back(
793  TokenPtr(new BoldOrItalicMarker(false, token[0], token.length())));
794  prev = m[0].second;
795  }
796  }
797  else
798  {
799  if (prev != end)
800  tgt.push_back(TokenPtr(new RawText(std::string(prev, end))));
801 
802  break;
803  }
804  }
805 
806  int id = 0;
807 
808  for (TokenGroup::iterator ii = tgt.begin(), iie = tgt.end(); ii != iie; ++ii)
809  {
810  if ((*ii)->isUnmatchedOpenMarker())
811  {
812  BoldOrItalicMarker* openToken = dynamic_cast<BoldOrItalicMarker*>(ii->get());
813 
814  // Find a matching close-marker, if it's there
815  TokenGroup::iterator iii = ii;
816 
817  for (++iii; iii != iie; ++iii)
818  {
819  if ((*iii)->isUnmatchedCloseMarker())
820  {
821  BoldOrItalicMarker* closeToken =
822  dynamic_cast<BoldOrItalicMarker*>(iii->get());
823 
824  if (closeToken->size() == 3 && openToken->size() != 3)
825  {
826  // Split the close-token into a match for the open-token
827  // and a second for the leftovers.
828  closeToken->disable();
829  TokenGroup g;
830  g.push_back(TokenPtr(
831  new BoldOrItalicMarker(false,
832  closeToken->tokenCharacter(),
833  closeToken->size() - openToken->size())));
834  g.push_back(TokenPtr(new BoldOrItalicMarker(
835  false, closeToken->tokenCharacter(), openToken->size())));
836  TokenGroup::iterator after = iii;
837  ++after;
838  tgt.splice(after, g);
839  continue;
840  }
841 
842  if (closeToken->tokenCharacter() == openToken->tokenCharacter() &&
843  closeToken->size() == openToken->size())
844  {
845  openToken->matched(closeToken, id);
846  closeToken->matched(openToken, id);
847  ++id;
848  break;
849  }
850  else if (openToken->size() == 3)
851  {
852  // Split the open-token into a match for the close-token
853  // and a second for the leftovers.
854  openToken->disable();
855  TokenGroup g;
856  g.push_back(TokenPtr(
857  new BoldOrItalicMarker(true,
858  openToken->tokenCharacter(),
859  openToken->size() - closeToken->size())));
860  g.push_back(TokenPtr(new BoldOrItalicMarker(
861  true, openToken->tokenCharacter(), closeToken->size())));
862  TokenGroup::iterator after = ii;
863  ++after;
864  tgt.splice(after, g);
865  break;
866  }
867  }
868  }
869  }
870  }
871 
872  // "Unmatch" invalidly-nested matches.
873  std::stack<BoldOrItalicMarker*> openMatches;
874 
875  for (auto& ii : tgt)
876  {
877  if (ii->isMatchedOpenMarker())
878  {
879  BoldOrItalicMarker* open = dynamic_cast<BoldOrItalicMarker*>(ii.get());
880  openMatches.push(open);
881  }
882  else if (ii->isMatchedCloseMarker())
883  {
884  BoldOrItalicMarker* close = dynamic_cast<BoldOrItalicMarker*>(ii.get());
885 
886  if (close->id() != openMatches.top()->id())
887  {
888  close->matchedTo()->matched(nullptr);
889  close->matched(nullptr);
890  }
891  else
892  {
893  openMatches.pop();
894 
895  while (!openMatches.empty() && openMatches.top()->matchedTo() == nullptr)
896  {
897  openMatches.pop();
898  }
899  }
900  }
901  }
902 
903  TokenGroup r;
904 
905  for (auto& ii : tgt)
906  {
907  if (ii->text() && ii->canContainMarkup())
908  {
909  TokenGroup t = _encodeProcessedItems(*ii->text(), replacements);
910  r.splice(r.end(), t);
911  }
912  else
913  {
914  r.push_back(ii);
915  }
916  }
917 
918  return r;
919  }
920 
921  TokenGroup
922  RawText::_encodeProcessedItems(const std::string& src, ReplacementTable& replacements)
923  {
924  static const boost::regex cReplaced("\x01@(#?[0-9]*)@.+?\x01");
925 
926  TokenGroup r;
927  std::string::const_iterator prev = src.begin();
928 
929  while (true)
930  {
931  boost::smatch m;
932 
933  if (boost::regex_search(prev, src.end(), m, cReplaced))
934  {
935  std::string pre = std::string(prev, m[0].first);
936 
937  if (!pre.empty())
938  {
939  r.push_back(TokenPtr(new RawText(pre)));
940  }
941 
942  prev = m[0].second;
943 
944  std::string ref = m[1];
945 
946  if (ref[0] == '#')
947  {
948  size_t n = std::stoul(ref.substr(1));
949  r.push_back(TokenPtr(new EscapedCharacter(escapedCharacter(n))));
950  }
951  else if (!ref.empty())
952  {
953  size_t n = std::stoul(ref);
954 
955  assert(n < replacements.size());
956  r.push_back(replacements[n]);
957  } // Otherwise just eat it
958  }
959  else
960  {
961  std::string pre = std::string(prev, src.end());
962 
963  if (!pre.empty())
964  {
965  r.push_back(TokenPtr(new RawText(pre)));
966  }
967 
968  break;
969  }
970  }
971 
972  return r;
973  }
974 
975  std::string
976  RawText::_restoreProcessedItems(const std::string& src, ReplacementTable& replacements)
977  {
978  static const boost::regex cReplaced("\x01@(#?[0-9]*)@.+?\x01");
979 
980  std::ostringstream r;
981  std::string::const_iterator prev = src.begin();
982 
983  while (true)
984  {
985  boost::smatch m;
986 
987  if (boost::regex_search(prev, src.end(), m, cReplaced))
988  {
989  std::string pre = std::string(prev, m[0].first);
990 
991  if (!pre.empty())
992  {
993  r << pre;
994  }
995 
996  prev = m[0].second;
997 
998  std::string ref = m[1];
999 
1000  if (ref[0] == '#')
1001  {
1002  size_t n = std::stoul(ref.substr(1));
1003  r << '\\' << escapedCharacter(n);
1004  }
1005  else if (!ref.empty())
1006  {
1007  size_t n = std::stoul(ref);
1008 
1009  assert(n < replacements.size());
1010  replacements[n]->writeAsOriginal(r);
1011  } // Otherwise just eat it
1012  }
1013  else
1014  {
1015  std::string pre = std::string(prev, src.end());
1016 
1017  if (!pre.empty())
1018  {
1019  r << pre;
1020  }
1021 
1022  break;
1023  }
1024  }
1025 
1026  return r.str();
1027  }
1028 
1029  HtmlAnchorTag::HtmlAnchorTag(const std::string& url, const std::string& title) :
1030  TextHolder("<a href=\"" + encodeString(url, cQuotes | cAmps) + "\"" +
1031  (title.empty() ? std::string()
1032  : " title=\"" + encodeString(title, cQuotes | cAmps) + "\"") +
1033  ">",
1034  false,
1035  0)
1036  {
1037  // This space deliberately blank. ;-)
1038  }
1039 
1040  void
1041  CodeBlock::writeAsHtml(std::ostream& out) const
1042  {
1043  out << "<pre><code>";
1045  out << "</code></pre>\n\n";
1046  }
1047 
1048  void
1049  CodeSpan::writeAsHtml(std::ostream& out) const
1050  {
1051  out << "<code>";
1053  out << "</code>";
1054  }
1055 
1056  void
1057  CodeSpan::writeAsOriginal(std::ostream& out) const
1058  {
1059  out << '`' << *text() << '`';
1060  }
1061 
1062  void
1063  Container::writeAsHtml(std::ostream& out) const
1064  {
1065  preWrite(out);
1066 
1067  for (const auto& mSubToken : mSubTokens)
1068  {
1069  mSubToken->writeAsHtml(out);
1070  }
1071 
1072  postWrite(out);
1073  }
1074 
1075  void
1076  Container::writeToken(size_t indent, std::ostream& out) const
1077  {
1078  out << std::string(indent * 2, ' ') << containerName() << "\n";
1079 
1080  for (const auto& mSubToken : mSubTokens)
1081  {
1082  mSubToken->writeToken(indent + 1, out);
1083  }
1084  }
1085 
1086  std::optional<TokenGroup>
1088  {
1089  TokenGroup t;
1090 
1091  for (CTokenGroupIter ii = mSubTokens.begin(), iie = mSubTokens.end(); ii != iie; ++ii)
1092  {
1093  if ((*ii)->text())
1094  {
1095  std::optional<TokenGroup> subt = (*ii)->processSpanElements(idTable);
1096 
1097  if (subt)
1098  {
1099  if (subt->size() > 1)
1100  {
1101  t.push_back(TokenPtr(new Container(*subt)));
1102  }
1103  else if (!subt->empty())
1104  {
1105  t.push_back(*subt->begin());
1106  }
1107  }
1108  else
1109  {
1110  t.push_back(*ii);
1111  }
1112  }
1113  else
1114  {
1115  std::optional<TokenGroup> subt = (*ii)->processSpanElements(idTable);
1116 
1117  if (subt)
1118  {
1119  const Container* c = dynamic_cast<const Container*>((*ii).get());
1120  assert(c != 0);
1121  t.push_back(c->clone(*subt));
1122  }
1123  else
1124  {
1125  t.push_back(*ii);
1126  }
1127  }
1128  }
1129 
1130  swapSubtokens(t);
1131  return std::nullopt;
1132  }
1133 
1134  UnorderedList::UnorderedList(const TokenGroup& contents, bool paragraphMode)
1135  {
1136  if (paragraphMode)
1137  {
1138  // Change each of the text items into paragraphs
1139  for (const auto& content : contents)
1140  {
1141  token::ListItem* item = dynamic_cast<token::ListItem*>(content.get());
1142  assert(item != 0);
1143  item->inhibitParagraphs(false);
1144  mSubTokens.push_back(content);
1145  }
1146  }
1147  else
1148  {
1149  mSubTokens = contents;
1150  }
1151  }
1152 
1153  void
1154  BoldOrItalicMarker::writeAsHtml(std::ostream& out) const
1155  {
1156  if (!mDisabled)
1157  {
1158  if (mMatch != nullptr)
1159  {
1160  assert(mSize >= 1 && mSize <= 3);
1161 
1162  if (mOpenMarker)
1163  {
1164  out << (mSize == 1 ? "<em>" : mSize == 2 ? "<strong>" : "<strong><em>");
1165  }
1166  else
1167  {
1168  out << (mSize == 1 ? "</em>" : mSize == 2 ? "</strong>" : "</em></strong>");
1169  }
1170  }
1171  else
1172  {
1173  out << std::string(mSize, mTokenCharacter);
1174  }
1175  }
1176  }
1177 
1178  void
1179  BoldOrItalicMarker::writeToken(std::ostream& out) const
1180  {
1181  if (!mDisabled)
1182  {
1183  if (mMatch != nullptr)
1184  {
1185  std::string type = (mSize == 1 ? "italic" : mSize == 2 ? "bold" : "italic&bold");
1186 
1187  if (mOpenMarker)
1188  {
1189  out << "Matched open-" << type << " marker\n";
1190  }
1191  else
1192  {
1193  out << "Matched close-" << type << " marker\n";
1194  }
1195  }
1196  else
1197  {
1198  if (mOpenMarker)
1199  out << "Unmatched bold/italic open marker: "
1200  << std::string(mSize, mTokenCharacter) << "\n";
1201  else
1202  out << "Unmatched bold/italic close marker: "
1203  << std::string(mSize, mTokenCharacter) << "\n";
1204  }
1205  }
1206  }
1207 
1208  void
1209  Image::writeAsHtml(std::ostream& out) const
1210  {
1211  out << "<img src=\"" << mUrl << "\" alt=\"" << mAltText << "\"";
1212 
1213  if (!mTitle.empty())
1214  {
1215  out << " title=\"" << mTitle << "\"";
1216  }
1217 
1218  out << "/>";
1219  }
1220 
1221 } // namespace markdown::token
markdown::token::TextHolder::text
std::optional< std::string > text() const override
Definition: markdown-tokens.h:172
str
std::string str(const T &t)
Definition: UserAssistedSegmenterGuiWidgetController.cpp:43
markdown::token::RawText::RawText
RawText(const std::string &text, bool canContainMarkup=true)
Definition: markdown-tokens.h:192
markdown::token::Image::writeAsHtml
void writeAsHtml(std::ostream &out) const override
Definition: markdown-tokens.cpp:1209
index
uint8_t index
Definition: EtherCATFrame.h:59
markdown::token::HtmlAnchorTag::HtmlAnchorTag
HtmlAnchorTag(const std::string &url, const std::string &title=std::string())
Definition: markdown-tokens.cpp:1029
markdown::token::Container::mSubTokens
TokenGroup mSubTokens
Definition: markdown-tokens.h:473
markdown::token::BoldOrItalicMarker::writeToken
void writeToken(std::ostream &out) const override
Definition: markdown-tokens.cpp:1179
markdown::token::CodeSpan::writeAsOriginal
void writeAsOriginal(std::ostream &out) const override
Definition: markdown-tokens.cpp:1057
markdown::token::TextHolder::TextHolder
TextHolder(const std::string &text, bool canContainMarkup, unsigned int encodingFlags)
Definition: markdown-tokens.h:158
markdown::token::cDoubleAmps
@ cDoubleAmps
Definition: markdown-tokens.h:150
boost::target
Vertex target(const detail::edge_base< Directed, Vertex > &e, const PCG &)
Definition: point_cloud_graph.h:668
markdown::token::Container::containerName
virtual std::string containerName() const
Definition: markdown-tokens.h:467
markdown::token::Container::swapSubtokens
void swapSubtokens(TokenGroup &tokens)
Definition: markdown-tokens.h:437
c
constexpr T c
Definition: UnscentedKalmanFilterTest.cpp:46
markdown::token::isValidTag
size_t isValidTag(const std::string &tag, bool nonBlockFirst)
Definition: markdown-tokens.cpp:323
markdown::token::Container
Definition: markdown-tokens.h:416
markdown::token::Container::processSpanElements
std::optional< TokenGroup > processSpanElements(const LinkIds &idTable) override
Definition: markdown-tokens.cpp:1087
markdown::token::TextHolder
Definition: markdown-tokens.h:155
markdown::token::TextHolder::canContainMarkup
bool canContainMarkup() const override
Definition: markdown-tokens.h:178
cxxopts::empty
bool empty(const std::string &s)
Definition: cxxopts.hpp:234
markdown::TokenPtr
std::shared_ptr< Token > TokenPtr
Definition: markdown.h:21
markdown::token::Container::writeToken
void writeToken(std::ostream &out) const override
Definition: markdown-tokens.h:451
markdown::token::UnorderedList::UnorderedList
UnorderedList(const TokenGroup &contents, bool paragraphMode=false)
Definition: markdown-tokens.cpp:1134
markdown::token::ListItem::inhibitParagraphs
void inhibitParagraphs(bool set)
Definition: markdown-tokens.h:528
markdown::token::Container::writeAsHtml
void writeAsHtml(std::ostream &out) const override
Definition: markdown-tokens.cpp:1063
markdown::token
Definition: markdown-tokens.cpp:16
markdown::token::CodeBlock::writeAsHtml
void writeAsHtml(std::ostream &out) const override
Definition: markdown-tokens.cpp:1041
markdown::token::BoldOrItalicMarker::writeAsHtml
void writeAsHtml(std::ostream &out) const override
Definition: markdown-tokens.cpp:1154
markdown::Token::preWrite
virtual void preWrite(std::ostream &out) const
Definition: markdown-tokens.h:131
markdown::LinkIds
Definition: markdown-tokens.h:21
markdown::CTokenGroupIter
TokenGroup::const_iterator CTokenGroupIter
Definition: markdown-tokens.h:19
markdown::token::ListItem
Definition: markdown-tokens.h:520
armarx::to_string
const std::string & to_string(const std::string &s)
Definition: StringHelpers.h:41
markdown::Token::postWrite
virtual void postWrite(std::ostream &out) const
Definition: markdown-tokens.h:136
markdown::token::TextHolder::writeAsHtml
void writeAsHtml(std::ostream &out) const override
Definition: markdown-tokens.cpp:360
std
Definition: Application.h:66
set
set(LIBS ArmarXCoreInterfaces ${CMAKE_THREAD_LIBS_INIT} ${dl_LIBRARIES} ${rt_LIBRARIES} ${QT_LIBRARIES} ${Boost_LIBRARIES} BoostAssertionHandler ArmarXCPPUtility SimoxUtility) set(LIB_FILES ArmarXManager.cpp ArmarXMultipleObjectsScheduler.cpp ArmarXObjectScheduler.cpp ManagedIceObject.cpp ManagedIceObjectPlugin.cpp Component.cpp ComponentPlugin.cpp IceGridAdmin.cpp ArmarXObjectObserver.cpp IceManager.cpp PackagePath.cpp RemoteReferenceCount.cpp logging/LoggingUtil.cpp logging/Logging.cpp logging/LogSender.cpp logging/ArmarXLogBuf.cpp system/ArmarXDataPath.cpp system/DynamicLibrary.cpp system/ProcessWatcher.cpp system/FactoryCollectionBase.cpp system/cmake/CMakePackageFinder.cpp system/cmake/CMakePackageFinderCache.cpp system/cmake/ArmarXPackageToolInterface.cpp system/RemoteObjectNode.cpp services/sharedmemory/HardwareId.cpp services/tasks/RunningTask.cpp services/tasks/ThreadList.cpp services/tasks/ThreadPool.cpp services/profiler/Profiler.cpp services/profiler/FileLoggingStrategy.cpp services/profiler/IceLoggingStrategy.cpp application/Application.cpp application/ApplicationOptions.cpp application/ApplicationProcessFacet.cpp application/ApplicationNetworkStats.cpp application/properties/PropertyUser.cpp application/properties/Property.cpp application/properties/PropertyDefinition.cpp application/properties/PropertyDefinitionContainer.cpp application/properties/PropertyDefinitionHelpFormatter.cpp application/properties/PropertyDefinitionConfigFormatter.cpp application/properties/PropertyDefinitionBriefHelpFormatter.cpp application/properties/PropertyDefinitionXmlFormatter.cpp application/properties/PropertyDefinitionDoxygenFormatter.cpp application/properties/PropertyDefinitionDoxygenComponentPagesFormatter.cpp application/properties/PropertyDefinitionContainerBriefHelpFormatter.cpp application/properties/IceProperties.cpp exceptions/Exception.cpp exceptions/local/UnexpectedEnumValueException.cpp util/FileSystemPathBuilder.cpp util/StringHelpers.cpp util/IceReportSkipper.cpp util/Throttler.cpp util/distributed/AMDCallbackCollection.cpp util/distributed/RemoteHandle/ClientSideRemoteHandleControlBlock.cpp util/distributed/RemoteHandle/RemoteHandle.cpp util/distributed/RemoteHandle/RemoteHandleControlBlock.cpp time/ice_conversions.cpp time/json_conversions.cpp time/CallbackWaitLock.cpp time/Clock.cpp time/ClockType.cpp time/ClockTypeNames.cpp time/CycleUtil.cpp time/DateTime.cpp time/Duration.cpp time/Frequency.cpp time/LocalTimeServer.cpp time/Metronome.cpp time/ScopedStopWatch.cpp time/StopWatch.cpp time/Timer.cpp time/TimeKeeper.cpp time/TimeUtil.cpp csv/CsvWriter.cpp csv/CsvReader.cpp eigen/conversions.cpp eigen/ice_conversions.cpp) set(LIB_HEADERS ArmarXManager.h ArmarXDummyManager.h ArmarXMultipleObjectsScheduler.h ArmarXObjectObserver.h ArmarXObjectScheduler.h ArmarXFwd.h Component.h ComponentPlugin.h ComponentFactories.h CoreObjectFactories.h IceGridAdmin.h IceManager.h IceManagerImpl.h json_conversions.h ManagedIceObject.h ManagedIceObjectPlugin.h ManagedIceObjectImpl.h ManagedIceObjectDependency.h ManagedIceObjectRegistryInterface.h PackagePath.h RemoteReferenceCount.h system/ImportExport.h system/ImportExportComponent.h system/AbstractFactoryMethod.h system/FactoryCollectionBase.h system/Synchronization.h system/ArmarXDataPath.h system/DynamicLibrary.h system/ProcessWatcher.h system/ConditionSynchronization.h system/cmake/CMakePackageFinder.h system/cmake/CMakePackageFinderCache.h system/cmake/FindPackageX.cmake system/cmake/ArmarXPackageToolInterface.h system/RemoteObjectNode.h logging/LoggingUtil.h logging/LogSender.h logging/Logging.h logging/ArmarXLogBuf.h logging/SpamFilterData.h services/tasks/RunningTask.h services/tasks/PeriodicTask.h services/tasks/ThreadList.h services/tasks/TaskUtil.h services/tasks/ThreadPool.h services/sharedmemory/SharedMemoryProvider.h services/sharedmemory/SharedMemoryConsumer.h services/sharedmemory/IceSharedMemoryProvider.h services/sharedmemory/IceSharedMemoryConsumer.h services/sharedmemory/HardwareIdentifierProvider.h services/sharedmemory/HardwareId.h services/sharedmemory/exceptions/SharedMemoryExceptions.h services/profiler/Profiler.h services/profiler/LoggingStrategy.h services/profiler/FileLoggingStrategy.h services/profiler/IceLoggingStrategy.h application/Application.h application/ApplicationOptions.h application/ApplicationProcessFacet.h application/ApplicationNetworkStats.h application/properties/forward_declarations.h application/properties/Properties.h application/properties/Property.h application/properties/PluginEigen.h application/properties/PluginEnumNames.h application/properties/PluginCfgStruct.h application/properties/PluginAll.h application/properties/PropertyUser.h application/properties/PropertyDefinition.h application/properties/PropertyDefinition.hpp application/properties/PropertyDefinitionInterface.h application/properties/PropertyDefinitionContainer.h application/properties/PropertyDefinitionFormatter.h application/properties/PropertyDefinitionContainerFormatter.h application/properties/PropertyDefinitionConfigFormatter.h application/properties/PropertyDefinitionHelpFormatter.h application/properties/PropertyDefinitionBriefHelpFormatter.h application/properties/PropertyDefinitionXmlFormatter.h application/properties/PropertyDefinitionDoxygenFormatter.h application/properties/PropertyDefinitionDoxygenComponentPagesFormatter.h application/properties/PropertyDefinitionContainerBriefHelpFormatter.h application/properties/ProxyPropertyDefinition.h application/properties/IceProperties.h exceptions/Exception.h exceptions/LocalException.h exceptions/local/DynamicLibraryException.h exceptions/local/ExpressionException.h exceptions/local/FileIOException.h exceptions/local/InvalidPropertyValueException.h exceptions/local/MissingRequiredPropertyException.h exceptions/local/PropertyInheritanceCycleException.h exceptions/local/ProxyNotInitializedException.h exceptions/local/UnexpectedEnumValueException.h exceptions/local/UnmappedValueException.h exceptions/local/ValueRangeExceededException.h exceptions/user/NotImplementedYetException.h rapidxml/rapidxml.hpp rapidxml/rapidxml_print.hpp rapidxml/rapidxml_iterators.hpp rapidxml/rapidxml_utils.hpp rapidxml/wrapper/RapidXmlReader.h rapidxml/wrapper/RapidXmlWriter.h rapidxml/wrapper/DefaultRapidXmlReader.h rapidxml/wrapper/MultiNodeRapidXMLReader.h util/IceBlobToObject.h util/ObjectToIceBlob.h util/FileSystemPathBuilder.h util/FiniteStateMachine.h util/StringHelpers.h util/StringHelperTemplates.h util/algorithm.h util/OnScopeExit.h util/Predicates.h util/Preprocessor.h util/PropagateConst.h util/Registrar.h util/TemplateMetaProgramming.h util/TripleBuffer.h util/IceReportSkipper.h util/Throttler.h util/distributed/AMDCallbackCollection.h util/distributed/RemoteHandle/ClientSideRemoteHandleControlBlock.h util/distributed/RemoteHandle/RemoteHandle.h util/distributed/RemoteHandle/RemoteHandleControlBlock.h util/SimpleStatemachine.h time.h time_minimal.h time/forward_declarations.h time/ice_conversions.h time/json_conversions.h time/CallbackWaitLock.h time/Clock.h time/ClockType.h time/ClockTypeNames.h time/CycleUtil.h time/DateTime.h time/Duration.h time/Frequency.h time/LocalTimeServer.h time/Metronome.h time/ScopedStopWatch.h time/StopWatch.h time/Timer.h time/TimeUtil.h time/TimeKeeper.h csv/CsvWriter.h csv/CsvReader.h eigen/conversions.h eigen/ice_conversions.h ice_conversions.h ice_conversions/ice_conversions_boost_templates.h ice_conversions/ice_conversions_templates.h ice_conversions/ice_conversions_templates.tpp $
Definition: CMakeLists.txt:12
markdown::token::cAngles
@ cAngles
Definition: markdown-tokens.h:151
markdown-tokens.h
markdown::TokenGroup
std::list< TokenPtr > TokenGroup
Definition: markdown.h:22
markdown::token::CodeSpan::writeAsHtml
void writeAsHtml(std::ostream &out) const override
Definition: markdown-tokens.cpp:1049
markdown::token::RawText::processSpanElements
std::optional< TokenGroup > processSpanElements(const LinkIds &idTable) override
Definition: markdown-tokens.cpp:377
distance
double distance(const Point &a, const Point &b)
Definition: point.hpp:95
markdown::token::Container::Container
Container(const TokenGroup &contents=TokenGroup())
Definition: markdown-tokens.h:419
armarx::ctrlutil::s
double s(double t, double s0, double v0, double a0, double j)
Definition: CtrlUtil.h:33
markdown::token::cAmps
@ cAmps
Definition: markdown-tokens.h:149
markdown::token::cQuotes
@ cQuotes
Definition: markdown-tokens.h:152