markdown-tokens.cpp
Go to the documentation of this file.
1 
2 /*
3  Copyright (c) 2009 by Chad Nelson
4  Released under the MIT License.
5  See the provided LICENSE.TXT file for details.
6 */
7 
8 #include "markdown-tokens.h"
9 
10 #include <boost/regex.hpp>
11 
12 #include <stack>
13 #include <set>
14 #include <sstream>
15 
16 namespace markdown::token
17 {
18 
19  namespace
20  {
21 
22  const std::string cEscapedCharacters("\\`*_{}[]()#+-.!>");
23 
24  std::optional<size_t> isEscapedCharacter(char c)
25  {
26  std::string::const_iterator i = std::find(cEscapedCharacters.begin(),
27  cEscapedCharacters.end(), c);
28 
29  if (i != cEscapedCharacters.end())
30  {
31  return std::distance(cEscapedCharacters.begin(), i);
32  }
33  else
34  {
35  return std::nullopt;
36  }
37  }
38 
39  char escapedCharacter(size_t index)
40  {
41  return cEscapedCharacters[index];
42  }
43 
44  std::string encodeString(const std::string& src, int encodingFlags)
45  {
46  bool amps = (encodingFlags & cAmps) != 0,
47  doubleAmps = (encodingFlags & cDoubleAmps) != 0,
48  angleBrackets = (encodingFlags & cAngles) != 0,
49  quotes = (encodingFlags & cQuotes) != 0;
50 
51  std::string tgt;
52 
53  for (std::string::const_iterator i = src.begin(), ie = src.end(); i != ie; ++i)
54  {
55  if (*i == '&' && amps)
56  {
57  static const boost::regex cIgnore("^(&amp;)|(&#[0-9]{1,3};)|(&#[xX][0-9a-fA-F]{1,2};)");
58 
59  if (boost::regex_search(i, ie, cIgnore))
60  {
61  tgt.push_back(*i);
62  }
63  else
64  {
65  tgt += "&amp;";
66  }
67  }
68  else if (*i == '&' && doubleAmps)
69  {
70  tgt += "&amp;";
71  }
72  else if (*i == '<' && angleBrackets)
73  {
74  tgt += "&lt;";
75  }
76  else if (*i == '>' && angleBrackets)
77  {
78  tgt += "&gt;";
79  }
80  else if (*i == '\"' && quotes)
81  {
82  tgt += "&quot;";
83  }
84  else
85  {
86  tgt.push_back(*i);
87  }
88  }
89 
90  return tgt;
91  }
92 
93  bool looksLikeUrl(const std::string& str)
94  {
95  const char* schemes[] = { "http://", "https://", "ftp://", "ftps://",
96  "file://", "www.", "ftp.", nullptr
97  };
98 
99  for (size_t x = 0; schemes[x] != nullptr; ++x)
100  {
101  const char* s = str.c_str(), *t = schemes[x];
102 
103  while (*s != 0 && *t != 0 && *s == *t)
104  {
105  ++s;
106  ++t;
107  }
108 
109  if (*t == 0)
110  {
111  return true;
112  }
113  }
114 
115  return false;
116  }
117 
118  bool notValidNameCharacter(char c)
119  {
120  return !(isalnum(c) || c == '.' || c == '_' || c == '%' || c == '-' || c == '+');
121  }
122 
123  bool notValidSiteCharacter(char c)
124  {
125  // NOTE: Kludge alert! The official spec for site characters is only
126  // "a-zA-Z._%-". However, MDTest supports "international domain names,"
127  // which use characters other than that; I'm kind of cheating here, handling
128  // those by allowing all utf8-encoded characters too.
129  return !(isalnum(c) || c == '.' || c == '_' || c == '%' || c == '-' || (c & 0x80));
130  }
131 
132  bool isNotAlpha(char c)
133  {
134  return !isalpha(c);
135  }
136 
137  std::string emailEncode(const std::string& src)
138  {
139  std::ostringstream out;
140  bool inHex = false;
141 
142  for (char i : src)
143  {
144  if (i & 0x80)
145  {
146  out << i;
147  }
148  else if (inHex)
149  {
150  out << "&#x" << std::hex << static_cast<int>(i) << ';';
151  }
152  else
153  {
154  out << "&#" << std::dec << static_cast<int>(i) << ';';
155  }
156 
157  inHex = !inHex;
158  }
159 
160  return out.str();
161  }
162 
163  bool looksLikeEmailAddress(const std::string& str)
164  {
165  using Iter = std::string::const_iterator;
166  using RIter = std::string::const_reverse_iterator;
167  Iter i = std::find_if(str.begin(), str.end(), notValidNameCharacter);
168 
169  if (i != str.end() && *i == '@' && i != str.begin())
170  {
171  // The name part is valid.
172  i = std::find_if(i + 1, str.end(), notValidSiteCharacter);
173 
174  if (i == str.end())
175  {
176  // The site part doesn't contain any invalid characters.
177  RIter ri = std::find_if(str.rbegin(), str.rend(), isNotAlpha);
178 
179  if (ri != str.rend() && *ri == '.')
180  {
181  // It ends with a dot and only alphabetic characters.
182  size_t d = std::distance(ri.base(), str.end());
183 
184  if (d >= 2 && d <= 4)
185  {
186  // There are two-to-four of them. It's valid.
187  return true;
188  }
189  }
190  }
191  }
192 
193  return false;
194  }
195 
196  // From <http://en.wikipedia.org/wiki/HTML_element>
197 
198  const char* cOtherTagInit[] =
199  {
200  // Header tags
201  "title/", "base", "link", "basefont", "script/", "style/",
202  "object/", "meta",
203 
204  // Inline tags
205  "em/", "strong/", "q/", "cite/", "dfn/", "abbr/", "acronym/",
206  "code/", "samp/", "kbd/", "var/", "sub/", "sup/", "del/", "ins/",
207  "isindex", "a/", "img", "br", "map/", "area", "object/", "param",
208  "applet/", "span/",
209 
210  nullptr
211  };
212 
213  const char* cBlockTagInit[] = { "p/", "blockquote/", "hr", "h1/", "h2/",
214  "h3/", "h4/", "h5/", "h6/", "dl/", "dt/", "dd/", "ol/", "ul/",
215  "li/", "dir/", "menu/", "table/", "tr/", "th/", "td/", "col",
216  "colgroup/", "caption/", "thead/", "tbody/", "tfoot/", "form/",
217  "select/", "option", "input", "label/", "textarea/", "div/", "pre/",
218  "address/", "iframe/", "frame/", "frameset/", "noframes/",
219  "center/", "b/", "i/", "big/", "small/", /*"s/",*/ "strike/", "tt/",
220  "u/", "font/", "ins/", "del/", nullptr
221  };
222 
223  // Other official ones (not presently in use in this code)
224  //"!doctype", "bdo", "body", "button", "fieldset", "head", "html",
225  //"legend", "noscript", "optgroup", "xmp",
226 
227  std::set<std::string> otherTags, blockTags;
228 
229  void initTag(std::set<std::string>& set, const char* init[])
230  {
231  for (size_t x = 0; init[x] != nullptr; ++x)
232  {
233  std::string str = init[x];
234 
235  if (*str.rbegin() == '/')
236  {
237  // Means it can have a closing tag
238  str = str.substr(0, str.length() - 1);
239  }
240 
241  set.insert(str);
242  }
243  }
244 
245  std::string cleanTextLinkRef(const std::string& ref)
246  {
247  std::string r;
248 
249  for (char i : ref)
250  {
251  if (i == ' ')
252  {
253  if (r.empty() || *r.rbegin() != ' ')
254  {
255  r.push_back(' ');
256  }
257  }
258  else
259  {
260  r.push_back(i);
261  }
262  }
263 
264  return r;
265  }
266 
267  } // namespace
268 
269 
270 
271  size_t isValidTag(const std::string& tag, bool nonBlockFirst)
272  {
273  if (blockTags.empty())
274  {
275  initTag(otherTags, cOtherTagInit);
276  initTag(blockTags, cBlockTagInit);
277  }
278 
279  if (nonBlockFirst)
280  {
281  if (otherTags.find(tag) != otherTags.end())
282  {
283  return 1;
284  }
285 
286  if (blockTags.find(tag) != blockTags.end())
287  {
288  return 2;
289  }
290  }
291  else
292  {
293  if (blockTags.find(tag) != blockTags.end())
294  {
295  return 2;
296  }
297 
298  if (otherTags.find(tag) != otherTags.end())
299  {
300  return 1;
301  }
302  }
303 
304  return 0;
305  }
306 
307 
308 
309  void TextHolder::writeAsHtml(std::ostream& out) const
310  {
311  preWrite(out);
312 
313  if (mEncodingFlags != 0)
314  {
315  out << encodeString(mText, mEncodingFlags);
316  }
317  else
318  {
319  out << mText;
320  }
321 
322  postWrite(out);
323  }
324 
325  std::optional<TokenGroup> RawText::processSpanElements(const LinkIds& idTable)
326  {
327  if (!canContainMarkup())
328  {
329  return std::nullopt;
330  }
331 
332  ReplacementTable replacements;
333  std::string str = _processHtmlTagAttributes(*text(), replacements);
334  str = _processCodeSpans(str, replacements);
335  str = _processEscapedCharacters(str);
336  str = _processLinksImagesAndTags(str, replacements, idTable);
337  return _processBoldAndItalicSpans(str, replacements);
338  }
339 
340  std::string RawText::_processHtmlTagAttributes(std::string src, ReplacementTable&
341  replacements)
342  {
343  // Because "Attribute Content Is Not A Code Span"
344  std::string tgt;
345  std::string::const_iterator prev = src.begin(), end = src.end();
346 
347  while (true)
348  {
349  static const boost::regex cHtmlToken("<((/?)([a-zA-Z0-9]+)(?:( +[a-zA-Z0-9]+?(?: ?= ?(\"|').*?\\5))+? */? *))>");
350  boost::smatch m;
351 
352  if (boost::regex_search(prev, end, m, cHtmlToken))
353  {
354  // NOTE: Kludge alert! The `isValidTag` test is a cheat, only here
355  // to handle some edge cases between the Markdown test suite and the
356  // PHP-Markdown one, which seem to conflict.
357  if (isValidTag(m[3]))
358  {
359  tgt += std::string(prev, m[0].first);
360 
361  std::string fulltag = m[0], tgttag;
362  std::string::const_iterator prevtag = fulltag.begin(), endtag = fulltag.end();
363 
364  while (true)
365  {
366  static const boost::regex cAttributeStrings("= ?(\"|').*?\\1");
367  boost::smatch mtag;
368 
369  if (boost::regex_search(prevtag, endtag, mtag, cAttributeStrings))
370  {
371  tgttag += std::string(prevtag, mtag[0].first);
372  tgttag += "\x01@" + std::to_string(replacements.size()) + "@htmlTagAttr\x01";
373  prevtag = mtag[0].second;
374 
375  replacements.push_back(TokenPtr(new TextHolder(std::string(mtag[0]), false, cAmps | cAngles)));
376  }
377  else
378  {
379  tgttag += std::string(prevtag, endtag);
380  break;
381  }
382  }
383 
384  tgt += tgttag;
385  prev = m[0].second;
386  }
387  else
388  {
389  tgt += std::string(prev, m[0].second);
390  prev = m[0].second;
391  }
392  }
393  else
394  {
395  tgt += std::string(prev, end);
396  break;
397  }
398  }
399 
400  return tgt;
401  }
402 
403  std::string RawText::_processCodeSpans(std::string src, ReplacementTable&
404  replacements)
405  {
406  static const boost::regex cCodeSpan[2] =
407  {
408  boost::regex("(?:^|(?<=[^\\\\]))`` (.+?) ``"),
409  boost::regex("(?:^|(?<=[^\\\\]))`(.+?)`")
410  };
411 
412  for (const auto& pass : cCodeSpan)
413  {
414  std::string tgt;
415  std::string::const_iterator prev = src.begin(), end = src.end();
416 
417  while (true)
418  {
419  boost::smatch m;
420 
421  if (boost::regex_search(prev, end, m, pass))
422  {
423  tgt += std::string(prev, m[0].first);
424  tgt += "\x01@" + std::to_string(replacements.size()) + "@codeSpan\x01";
425  prev = m[0].second;
426  replacements.push_back(TokenPtr(new CodeSpan(_restoreProcessedItems(m[1], replacements))));
427  }
428  else
429  {
430  tgt += std::string(prev, end);
431  break;
432  }
433  }
434 
435  src.swap(tgt);
436  tgt.clear();
437  }
438 
439  return src;
440  }
441 
442  std::string RawText::_processEscapedCharacters(const std::string& src)
443  {
444  std::string tgt;
445  std::string::const_iterator prev = src.begin(), end = src.end();
446 
447  while (true)
448  {
449  std::string::const_iterator i = std::find(prev, end, '\\');
450 
451  if (i != end)
452  {
453  tgt += std::string(prev, i);
454  ++i;
455 
456  if (i != end)
457  {
458  std::optional<size_t> e = isEscapedCharacter(*i);
459 
460  if (e)
461  {
462  tgt += "\x01@#" + std::to_string(*e) + "@escaped\x01";
463  }
464  else
465  {
466  tgt = tgt + '\\' + *i;
467  }
468 
469  prev = i + 1;
470  }
471  else
472  {
473  tgt += '\\';
474  break;
475  }
476  }
477  else
478  {
479  tgt += std::string(prev, end);
480  break;
481  }
482  }
483 
484  return tgt;
485  }
486 
487  std::string RawText::_processSpaceBracketedGroupings(const std::string& src,
488  ReplacementTable& replacements)
489  {
490  static const boost::regex cRemove("(?:(?: \\*+ )|(?: _+ ))");
491 
492  std::string tgt;
493  std::string::const_iterator prev = src.begin(), end = src.end();
494 
495  while (true)
496  {
497  boost::smatch m;
498 
499  if (boost::regex_search(prev, end, m, cRemove))
500  {
501  tgt += std::string(prev, m[0].first);
502  tgt += "\x01@" + std::to_string(replacements.size()) + "@spaceBracketed\x01";
503  replacements.push_back(TokenPtr(new RawText(m[0])));
504  prev = m[0].second;
505  }
506  else
507  {
508  tgt += std::string(prev, end);
509  break;
510  }
511  }
512 
513  return tgt;
514  }
515 
516  std::string RawText::_processLinksImagesAndTags(const std::string& src,
517  ReplacementTable& replacements, const LinkIds& idTable)
518  {
519  // NOTE: Kludge alert! The "inline link or image" regex should be...
520  //
521  // "(?:(!?)\\[(.+?)\\] *\\((.*?)\\))"
522  //
523  // ...but that fails on the 'Images' test because it includes a "stupid URL"
524  // that has parentheses within it. The proper way to deal with this would be
525  // to match any nested parentheses, but regular expressions can't handle an
526  // unknown number of nested items, so I'm cheating -- the regex for it
527  // allows for one (and *only* one) pair of matched parentheses within the
528  // URL. It makes the regex hard to follow (it was even harder to get right),
529  // but it allows it to pass the test.
530  //
531  // The "reference link or image" one has a similar problem; it should be...
532  //
533  // "|(?:(!?)\\[(.+?)\\](?: *\\[(.*?)\\])?)"
534  //
535  static const boost::regex cExpression(
536  "(?:(!?)\\[([^\\]]+?)\\] *\\(([^\\(]*(?:\\(.*?\\).*?)*?)\\))" // Inline link or image
537  "|(?:(!?)\\[((?:[^]]*?\\[.*?\\].*?)|(?:.+?))\\](?: *\\[(.*?)\\])?)" // Reference link or image
538  "|(?:<(/?([a-zA-Z0-9]+).*?)>)" // potential HTML tag or auto-link
539  );
540  // Important captures: 1/4=image indicator, 2/5=contents/alttext,
541  // 3=URL/title, 6=optional link ID, 7=potential HTML tag or auto-link
542  // contents, 8=actual tag from 7.
543 
544  std::string tgt;
545  std::string::const_iterator prev = src.begin(), end = src.end();
546 
547  while (true)
548  {
549  boost::smatch m;
550 
551  if (boost::regex_search(prev, end, m, cExpression))
552  {
553  assert(m[0].matched);
554  assert(m[0].length() != 0);
555 
556  tgt += std::string(prev, m[0].first);
557  tgt += "\x01@" + std::to_string(replacements.size()) + "@links&Images1\x01";
558  prev = m[0].second;
559 
560  bool isImage = false, isLink = false, isReference = false;
561 
562  if (m[4].matched && m[4].length())
563  {
564  isImage = isReference = true;
565  }
566  else if (m[1].matched && m[1].length())
567  {
568  isImage = true;
569  }
570  else if (m[5].matched)
571  {
572  isLink = isReference = true;
573  }
574  else if (m[2].matched)
575  {
576  isLink = true;
577  }
578 
579  if (isImage || isLink)
580  {
581  std::string contentsOrAlttext, url, title;
582  bool resolved = false;
583 
584  if (isReference)
585  {
586  contentsOrAlttext = m[5];
587  std::string linkId = (m[6].matched ? std::string(m[6]) : std::string());
588 
589  if (linkId.empty())
590  {
591  linkId = cleanTextLinkRef(contentsOrAlttext);
592  }
593 
594  std::optional<markdown::LinkIds::Target> target = idTable.find(linkId);
595 
596  if (target)
597  {
598  url = target->url;
599  title = target->title;
600  resolved = true;
601  };
602  }
603  else
604  {
605  static const boost::regex cReference("^<?([^ >]*)>?(?: *(?:('|\")(.*)\\2)|(?:\\((.*)\\)))? *$");
606  // Useful captures: 1=url, 3/4=title
607  contentsOrAlttext = m[2];
608  std::string urlAndTitle = m[3];
609  boost::smatch mm;
610 
611  if (boost::regex_match(urlAndTitle, mm, cReference))
612  {
613  url = mm[1];
614 
615  if (mm[3].matched)
616  {
617  title = mm[3];
618  }
619  else if (mm[4].matched)
620  {
621  title = mm[4];
622  }
623 
624  resolved = true;
625  }
626  }
627 
628  if (!resolved)
629  {
630  // Just encode the first character as-is, and continue
631  // searching after it.
632  prev = m[0].first + 1;
633  replacements.push_back(TokenPtr(new RawText(std::string(m[0].first, prev))));
634  }
635  else if (isImage)
636  {
637  replacements.push_back(TokenPtr(new Image(contentsOrAlttext,
638  url, title)));
639  }
640  else
641  {
642  replacements.push_back(TokenPtr(new HtmlAnchorTag(url, title)));
643  tgt += contentsOrAlttext;
644  tgt += "\x01@" + std::to_string(replacements.size()) + "@links&Images2\x01";
645  replacements.push_back(TokenPtr(new HtmlTag("/a")));
646  }
647  }
648  else
649  {
650  // Otherwise it's an HTML tag or auto-link.
651  std::string contents = m[7];
652 
653  // cerr << "Evaluating potential HTML or auto-link: " << contents << endl;
654  // cerr << "m[8]=" << m[8] << endl;
655 
656  if (looksLikeUrl(contents))
657  {
658  TokenGroup subgroup;
659  subgroup.push_back(TokenPtr(new HtmlAnchorTag(contents)));
660  subgroup.push_back(TokenPtr(new RawText(contents, false)));
661  subgroup.push_back(TokenPtr(new HtmlTag("/a")));
662  replacements.push_back(TokenPtr(new Container(subgroup)));
663  }
664  else if (looksLikeEmailAddress(contents))
665  {
666  TokenGroup subgroup;
667  subgroup.push_back(TokenPtr(new HtmlAnchorTag(emailEncode("mailto:" + contents))));
668  subgroup.push_back(TokenPtr(new RawText(emailEncode(contents), false)));
669  subgroup.push_back(TokenPtr(new HtmlTag("/a")));
670  replacements.push_back(TokenPtr(new Container(subgroup)));
671  }
672  else if (isValidTag(m[8]))
673  {
674  replacements.push_back(TokenPtr(new HtmlTag(_restoreProcessedItems(contents, replacements))));
675  }
676  else
677  {
678  // Just encode it as-is
679  replacements.push_back(TokenPtr(new RawText(m[0])));
680  }
681  }
682  }
683  else
684  {
685  tgt += std::string(prev, end);
686  break;
687  }
688  }
689 
690  return tgt;
691  }
692 
693  TokenGroup RawText::_processBoldAndItalicSpans(const std::string& src,
694  ReplacementTable& replacements)
695  {
696  static const boost::regex cEmphasisExpression(
697  "(?:(?<![*_])([*_]{1,3})([^*_ ]+?)\\1(?![*_]))" // Mid-word emphasis
698  "|((?:(?<!\\*)\\*{1,3}(?!\\*)|(?<!_)_{1,3}(?!_))(?=.)(?! )(?![.,:;] )(?![.,:;]$))" // Open
699  "|((?<![* ])\\*{1,3}(?!\\*)|(?<![ _])_{1,3}(?!_))" // Close
700  );
701 
702  TokenGroup tgt;
703  std::string::const_iterator i = src.begin(), end = src.end(), prev = i;
704 
705  while (true)
706  {
707  boost::smatch m;
708 
709  if (boost::regex_search(prev, end, m, cEmphasisExpression))
710  {
711  if (prev != m[0].first) tgt.push_back(TokenPtr(new
712  RawText(std::string(prev, m[0].first))));
713 
714  if (m[3].matched)
715  {
716  std::string token = m[3];
717  tgt.push_back(TokenPtr(new BoldOrItalicMarker(true, token[0],
718  token.length())));
719  prev = m[0].second;
720  }
721  else if (m[4].matched)
722  {
723  std::string token = m[4];
724  tgt.push_back(TokenPtr(new BoldOrItalicMarker(false, token[0],
725  token.length())));
726  prev = m[0].second;
727  }
728  else
729  {
730  std::string token = m[1], contents = m[2];
731  tgt.push_back(TokenPtr(new BoldOrItalicMarker(true, token[0],
732  token.length())));
733  tgt.push_back(TokenPtr(new RawText(std::string(contents))));
734  tgt.push_back(TokenPtr(new BoldOrItalicMarker(false, token[0],
735  token.length())));
736  prev = m[0].second;
737  }
738  }
739  else
740  {
741  if (prev != end) tgt.push_back(TokenPtr(new RawText(std::string(prev,
742  end))));
743 
744  break;
745  }
746  }
747 
748  int id = 0;
749 
750  for (TokenGroup::iterator ii = tgt.begin(), iie = tgt.end(); ii != iie; ++ii)
751  {
752  if ((*ii)->isUnmatchedOpenMarker())
753  {
754  BoldOrItalicMarker* openToken = dynamic_cast<BoldOrItalicMarker*>(ii->get());
755 
756  // Find a matching close-marker, if it's there
757  TokenGroup::iterator iii = ii;
758 
759  for (++iii; iii != iie; ++iii)
760  {
761  if ((*iii)->isUnmatchedCloseMarker())
762  {
763  BoldOrItalicMarker* closeToken = dynamic_cast<BoldOrItalicMarker*>(iii->get());
764 
765  if (closeToken->size() == 3 && openToken->size() != 3)
766  {
767  // Split the close-token into a match for the open-token
768  // and a second for the leftovers.
769  closeToken->disable();
770  TokenGroup g;
771  g.push_back(TokenPtr(new BoldOrItalicMarker(false,
772  closeToken->tokenCharacter(), closeToken->size() -
773  openToken->size())));
774  g.push_back(TokenPtr(new BoldOrItalicMarker(false,
775  closeToken->tokenCharacter(), openToken->size())));
776  TokenGroup::iterator after = iii;
777  ++after;
778  tgt.splice(after, g);
779  continue;
780  }
781 
782  if (closeToken->tokenCharacter() == openToken->tokenCharacter()
783  && closeToken->size() == openToken->size())
784  {
785  openToken->matched(closeToken, id);
786  closeToken->matched(openToken, id);
787  ++id;
788  break;
789  }
790  else if (openToken->size() == 3)
791  {
792  // Split the open-token into a match for the close-token
793  // and a second for the leftovers.
794  openToken->disable();
795  TokenGroup g;
796  g.push_back(TokenPtr(new BoldOrItalicMarker(true,
797  openToken->tokenCharacter(), openToken->size() -
798  closeToken->size())));
799  g.push_back(TokenPtr(new BoldOrItalicMarker(true,
800  openToken->tokenCharacter(), closeToken->size())));
801  TokenGroup::iterator after = ii;
802  ++after;
803  tgt.splice(after, g);
804  break;
805  }
806  }
807  }
808  }
809  }
810 
811  // "Unmatch" invalidly-nested matches.
812  std::stack<BoldOrItalicMarker*> openMatches;
813 
814  for (auto& ii : tgt)
815  {
816  if (ii->isMatchedOpenMarker())
817  {
818  BoldOrItalicMarker* open = dynamic_cast<BoldOrItalicMarker*>(ii.get());
819  openMatches.push(open);
820  }
821  else if (ii->isMatchedCloseMarker())
822  {
823  BoldOrItalicMarker* close = dynamic_cast<BoldOrItalicMarker*>(ii.get());
824 
825  if (close->id() != openMatches.top()->id())
826  {
827  close->matchedTo()->matched(nullptr);
828  close->matched(nullptr);
829  }
830  else
831  {
832  openMatches.pop();
833 
834  while (!openMatches.empty() && openMatches.top()->matchedTo() == nullptr)
835  {
836  openMatches.pop();
837  }
838  }
839  }
840  }
841 
842  TokenGroup r;
843 
844  for (auto& ii : tgt)
845  {
846  if (ii->text() && ii->canContainMarkup())
847  {
848  TokenGroup t = _encodeProcessedItems(*ii->text(), replacements);
849  r.splice(r.end(), t);
850  }
851  else
852  {
853  r.push_back(ii);
854  }
855  }
856 
857  return r;
858  }
859 
860  TokenGroup RawText::_encodeProcessedItems(const std::string& src,
861  ReplacementTable& replacements)
862  {
863  static const boost::regex cReplaced("\x01@(#?[0-9]*)@.+?\x01");
864 
865  TokenGroup r;
866  std::string::const_iterator prev = src.begin();
867 
868  while (true)
869  {
870  boost::smatch m;
871 
872  if (boost::regex_search(prev, src.end(), m, cReplaced))
873  {
874  std::string pre = std::string(prev, m[0].first);
875 
876  if (!pre.empty())
877  {
878  r.push_back(TokenPtr(new RawText(pre)));
879  }
880 
881  prev = m[0].second;
882 
883  std::string ref = m[1];
884 
885  if (ref[0] == '#')
886  {
887  size_t n = std::stoul(ref.substr(1));
888  r.push_back(TokenPtr(new EscapedCharacter(escapedCharacter(n))));
889  }
890  else if (!ref.empty())
891  {
892  size_t n = std::stoul(ref);
893 
894  assert(n < replacements.size());
895  r.push_back(replacements[n]);
896  } // Otherwise just eat it
897  }
898  else
899  {
900  std::string pre = std::string(prev, src.end());
901 
902  if (!pre.empty())
903  {
904  r.push_back(TokenPtr(new RawText(pre)));
905  }
906 
907  break;
908  }
909  }
910 
911  return r;
912  }
913 
914  std::string RawText::_restoreProcessedItems(const std::string& src,
915  ReplacementTable& replacements)
916  {
917  static const boost::regex cReplaced("\x01@(#?[0-9]*)@.+?\x01");
918 
919  std::ostringstream r;
920  std::string::const_iterator prev = src.begin();
921 
922  while (true)
923  {
924  boost::smatch m;
925 
926  if (boost::regex_search(prev, src.end(), m, cReplaced))
927  {
928  std::string pre = std::string(prev, m[0].first);
929 
930  if (!pre.empty())
931  {
932  r << pre;
933  }
934 
935  prev = m[0].second;
936 
937  std::string ref = m[1];
938 
939  if (ref[0] == '#')
940  {
941  size_t n = std::stoul(ref.substr(1));
942  r << '\\' << escapedCharacter(n);
943  }
944  else if (!ref.empty())
945  {
946  size_t n = std::stoul(ref);
947 
948  assert(n < replacements.size());
949  replacements[n]->writeAsOriginal(r);
950  } // Otherwise just eat it
951  }
952  else
953  {
954  std::string pre = std::string(prev, src.end());
955 
956  if (!pre.empty())
957  {
958  r << pre;
959  }
960 
961  break;
962  }
963  }
964 
965  return r.str();
966  }
967 
968  HtmlAnchorTag::HtmlAnchorTag(const std::string& url, const std::string& title):
969  TextHolder("<a href=\"" + encodeString(url, cQuotes | cAmps) + "\""
970  + (title.empty() ? std::string() : " title=\"" + encodeString(title, cQuotes | cAmps) + "\"")
971  + ">", false, 0)
972  {
973  // This space deliberately blank. ;-)
974  }
975 
976  void CodeBlock::writeAsHtml(std::ostream& out) const
977  {
978  out << "<pre><code>";
980  out << "</code></pre>\n\n";
981  }
982 
983  void CodeSpan::writeAsHtml(std::ostream& out) const
984  {
985  out << "<code>";
987  out << "</code>";
988  }
989 
990  void CodeSpan::writeAsOriginal(std::ostream& out) const
991  {
992  out << '`' << *text() << '`';
993  }
994 
995 
996 
997  void Container::writeAsHtml(std::ostream& out) const
998  {
999  preWrite(out);
1000 
1001  for (const auto& mSubToken : mSubTokens)
1002  {
1003  mSubToken->writeAsHtml(out);
1004  }
1005 
1006  postWrite(out);
1007  }
1008 
1009  void Container::writeToken(size_t indent, std::ostream& out) const
1010  {
1011  out << std::string(indent * 2, ' ') << containerName() << "\n";
1012 
1013  for (const auto& mSubToken : mSubTokens)
1014  {
1015  mSubToken->writeToken(indent + 1, out);
1016  }
1017  }
1018 
1019  std::optional<TokenGroup> Container::processSpanElements(const LinkIds& idTable)
1020  {
1021  TokenGroup t;
1022 
1023  for (CTokenGroupIter ii = mSubTokens.begin(), iie = mSubTokens.end(); ii != iie;
1024  ++ii)
1025  {
1026  if ((*ii)->text())
1027  {
1028  std::optional<TokenGroup> subt = (*ii)->processSpanElements(idTable);
1029 
1030  if (subt)
1031  {
1032  if (subt->size() > 1)
1033  {
1034  t.push_back(TokenPtr(new Container(*subt)));
1035  }
1036  else if (!subt->empty())
1037  {
1038  t.push_back(*subt->begin());
1039  }
1040  }
1041  else
1042  {
1043  t.push_back(*ii);
1044  }
1045  }
1046  else
1047  {
1048  std::optional<TokenGroup> subt = (*ii)->processSpanElements(idTable);
1049 
1050  if (subt)
1051  {
1052  const Container* c = dynamic_cast<const Container*>((*ii).get());
1053  assert(c != 0);
1054  t.push_back(c->clone(*subt));
1055  }
1056  else
1057  {
1058  t.push_back(*ii);
1059  }
1060  }
1061  }
1062 
1063  swapSubtokens(t);
1064  return std::nullopt;
1065  }
1066 
1067  UnorderedList::UnorderedList(const TokenGroup& contents, bool paragraphMode)
1068  {
1069  if (paragraphMode)
1070  {
1071  // Change each of the text items into paragraphs
1072  for (const auto& content : contents)
1073  {
1074  token::ListItem* item = dynamic_cast<token::ListItem*>(content.get());
1075  assert(item != 0);
1076  item->inhibitParagraphs(false);
1077  mSubTokens.push_back(content);
1078  }
1079  }
1080  else
1081  {
1082  mSubTokens = contents;
1083  }
1084  }
1085 
1086 
1087 
1088  void BoldOrItalicMarker::writeAsHtml(std::ostream& out) const
1089  {
1090  if (!mDisabled)
1091  {
1092  if (mMatch != nullptr)
1093  {
1094  assert(mSize >= 1 && mSize <= 3);
1095 
1096  if (mOpenMarker)
1097  {
1098  out << (mSize == 1 ? "<em>" : mSize == 2 ? "<strong>" : "<strong><em>");
1099  }
1100  else
1101  {
1102  out << (mSize == 1 ? "</em>" : mSize == 2 ? "</strong>" : "</em></strong>");
1103  }
1104  }
1105  else
1106  {
1107  out << std::string(mSize, mTokenCharacter);
1108  }
1109  }
1110  }
1111 
1112  void BoldOrItalicMarker::writeToken(std::ostream& out) const
1113  {
1114  if (!mDisabled)
1115  {
1116  if (mMatch != nullptr)
1117  {
1118  std::string type = (mSize == 1 ? "italic" : mSize == 2 ? "bold" : "italic&bold");
1119 
1120  if (mOpenMarker)
1121  {
1122  out << "Matched open-" << type << " marker\n";
1123  }
1124  else
1125  {
1126  out << "Matched close-" << type << " marker\n";
1127  }
1128  }
1129  else
1130  {
1131  if (mOpenMarker) out << "Unmatched bold/italic open marker: " <<
1132  std::string(mSize, mTokenCharacter) << "\n";
1133  else out << "Unmatched bold/italic close marker: " <<
1134  std::string(mSize, mTokenCharacter) << "\n";
1135  }
1136  }
1137  }
1138 
1139  void Image::writeAsHtml(std::ostream& out) const
1140  {
1141  out << "<img src=\"" << mUrl << "\" alt=\"" << mAltText << "\"";
1142 
1143  if (!mTitle.empty())
1144  {
1145  out << " title=\"" << mTitle << "\"";
1146  }
1147 
1148  out << "/>";
1149  }
1150 
1151 }
1152 
markdown::token::TextHolder::text
std::optional< std::string > text() const override
Definition: markdown-tokens.h:132
str
std::string str(const T &t)
Definition: UserAssistedSegmenterGuiWidgetController.cpp:42
markdown::token::RawText::RawText
RawText(const std::string &text, bool canContainMarkup=true)
Definition: markdown-tokens.h:151
markdown::token::Image::writeAsHtml
void writeAsHtml(std::ostream &out) const override
Definition: markdown-tokens.cpp:1139
index
uint8_t index
Definition: EtherCATFrame.h:59
markdown::token::HtmlAnchorTag::HtmlAnchorTag
HtmlAnchorTag(const std::string &url, const std::string &title=std::string())
Definition: markdown-tokens.cpp:968
markdown::token::Container::mSubTokens
TokenGroup mSubTokens
Definition: markdown-tokens.h:377
markdown::token::BoldOrItalicMarker::writeToken
void writeToken(std::ostream &out) const override
Definition: markdown-tokens.cpp:1112
markdown::token::CodeSpan::writeAsOriginal
void writeAsOriginal(std::ostream &out) const override
Definition: markdown-tokens.cpp:990
markdown::token::TextHolder::TextHolder
TextHolder(const std::string &text, bool canContainMarkup, unsigned int encodingFlags)
Definition: markdown-tokens.h:121
markdown::token::cDoubleAmps
@ cDoubleAmps
Definition: markdown-tokens.h:116
boost::target
Vertex target(const detail::edge_base< Directed, Vertex > &e, const PCG &)
Definition: point_cloud_graph.h:688
markdown::token::Container::containerName
virtual std::string containerName() const
Definition: markdown-tokens.h:371
markdown::token::Container::swapSubtokens
void swapSubtokens(TokenGroup &tokens)
Definition: markdown-tokens.h:347
c
constexpr T c
Definition: UnscentedKalmanFilterTest.cpp:43
markdown::token::isValidTag
size_t isValidTag(const std::string &tag, bool nonBlockFirst)
Definition: markdown-tokens.cpp:271
markdown::token::Container
Definition: markdown-tokens.h:333
markdown::token::Container::processSpanElements
std::optional< TokenGroup > processSpanElements(const LinkIds &idTable) override
Definition: markdown-tokens.cpp:1019
markdown::token::TextHolder
Definition: markdown-tokens.h:118
markdown::token::TextHolder::canContainMarkup
bool canContainMarkup() const override
Definition: markdown-tokens.h:137
cxxopts::empty
bool empty(const std::string &s)
Definition: cxxopts.hpp:255
markdown::TokenPtr
std::shared_ptr< Token > TokenPtr
Definition: markdown.h:21
markdown::token::Container::writeToken
void writeToken(std::ostream &out) const override
Definition: markdown-tokens.h:359
markdown::token::UnorderedList::UnorderedList
UnorderedList(const TokenGroup &contents, bool paragraphMode=false)
Definition: markdown-tokens.cpp:1067
markdown::token::ListItem::inhibitParagraphs
void inhibitParagraphs(bool set)
Definition: markdown-tokens.h:422
markdown::token::Container::writeAsHtml
void writeAsHtml(std::ostream &out) const override
Definition: markdown-tokens.cpp:997
markdown::token
Definition: markdown-tokens.cpp:16
markdown::token::CodeBlock::writeAsHtml
void writeAsHtml(std::ostream &out) const override
Definition: markdown-tokens.cpp:976
markdown::token::BoldOrItalicMarker::writeAsHtml
void writeAsHtml(std::ostream &out) const override
Definition: markdown-tokens.cpp:1088
markdown::Token::preWrite
virtual void preWrite(std::ostream &out) const
Definition: markdown-tokens.h:106
markdown::LinkIds
Definition: markdown-tokens.h:21
markdown::CTokenGroupIter
TokenGroup::const_iterator CTokenGroupIter
Definition: markdown-tokens.h:19
markdown::token::ListItem
Definition: markdown-tokens.h:416
armarx::to_string
const std::string & to_string(const std::string &s)
Definition: StringHelpers.h:40
markdown::Token::postWrite
virtual void postWrite(std::ostream &out) const
Definition: markdown-tokens.h:107
markdown::token::TextHolder::writeAsHtml
void writeAsHtml(std::ostream &out) const override
Definition: markdown-tokens.cpp:309
std
Definition: Application.h:66
set
set(LIBS ArmarXCoreInterfaces ${CMAKE_THREAD_LIBS_INIT} ${dl_LIBRARIES} ${rt_LIBRARIES} ${QT_LIBRARIES} ${Boost_LIBRARIES} BoostAssertionHandler ArmarXCPPUtility SimoxUtility) set(LIB_FILES ArmarXManager.cpp ArmarXMultipleObjectsScheduler.cpp ArmarXObjectScheduler.cpp ManagedIceObject.cpp ManagedIceObjectPlugin.cpp Component.cpp ComponentPlugin.cpp IceGridAdmin.cpp ArmarXObjectObserver.cpp IceManager.cpp PackagePath.cpp RemoteReferenceCount.cpp logging/LoggingUtil.cpp logging/Logging.cpp logging/LogSender.cpp logging/ArmarXLogBuf.cpp system/ArmarXDataPath.cpp system/DynamicLibrary.cpp system/ProcessWatcher.cpp system/FactoryCollectionBase.cpp system/cmake/CMakePackageFinder.cpp system/cmake/CMakePackageFinderCache.cpp system/cmake/ArmarXPackageToolInterface.cpp system/RemoteObjectNode.cpp services/sharedmemory/HardwareId.cpp services/tasks/RunningTask.cpp services/tasks/ThreadList.cpp services/tasks/ThreadPool.cpp services/profiler/Profiler.cpp services/profiler/FileLoggingStrategy.cpp services/profiler/IceLoggingStrategy.cpp application/Application.cpp application/ApplicationOptions.cpp application/ApplicationProcessFacet.cpp application/ApplicationNetworkStats.cpp application/properties/PropertyUser.cpp application/properties/Property.cpp application/properties/PropertyDefinition.cpp application/properties/PropertyDefinitionContainer.cpp application/properties/PropertyDefinitionHelpFormatter.cpp application/properties/PropertyDefinitionConfigFormatter.cpp application/properties/PropertyDefinitionBriefHelpFormatter.cpp application/properties/PropertyDefinitionXmlFormatter.cpp application/properties/PropertyDefinitionDoxygenFormatter.cpp application/properties/PropertyDefinitionDoxygenComponentPagesFormatter.cpp application/properties/PropertyDefinitionContainerBriefHelpFormatter.cpp application/properties/IceProperties.cpp exceptions/Exception.cpp exceptions/local/UnexpectedEnumValueException.cpp util/FileSystemPathBuilder.cpp util/StringHelpers.cpp util/IceReportSkipper.cpp util/Throttler.cpp util/distributed/AMDCallbackCollection.cpp util/distributed/RemoteHandle/ClientSideRemoteHandleControlBlock.cpp util/distributed/RemoteHandle/RemoteHandle.cpp util/distributed/RemoteHandle/RemoteHandleControlBlock.cpp time/ice_conversions.cpp time/json_conversions.cpp time/CallbackWaitLock.cpp time/Clock.cpp time/ClockType.cpp time/ClockTypeNames.cpp time/CycleUtil.cpp time/DateTime.cpp time/Duration.cpp time/Frequency.cpp time/LocalTimeServer.cpp time/Metronome.cpp time/ScopedStopWatch.cpp time/StopWatch.cpp time/Timer.cpp time/TimeKeeper.cpp time/TimeUtil.cpp csv/CsvWriter.cpp csv/CsvReader.cpp eigen/conversions.cpp eigen/ice_conversions.cpp) set(LIB_HEADERS ArmarXManager.h ArmarXDummyManager.h ArmarXMultipleObjectsScheduler.h ArmarXObjectObserver.h ArmarXObjectScheduler.h ArmarXFwd.h Component.h ComponentPlugin.h ComponentFactories.h CoreObjectFactories.h IceGridAdmin.h IceManager.h IceManagerImpl.h json_conversions.h ManagedIceObject.h ManagedIceObjectPlugin.h ManagedIceObjectImpl.h ManagedIceObjectDependency.h ManagedIceObjectRegistryInterface.h PackagePath.h RemoteReferenceCount.h system/ImportExport.h system/ImportExportComponent.h system/AbstractFactoryMethod.h system/FactoryCollectionBase.h system/Synchronization.h system/ArmarXDataPath.h system/DynamicLibrary.h system/ProcessWatcher.h system/ConditionSynchronization.h system/cmake/CMakePackageFinder.h system/cmake/CMakePackageFinderCache.h system/cmake/FindPackageX.cmake system/cmake/ArmarXPackageToolInterface.h system/RemoteObjectNode.h logging/LoggingUtil.h logging/LogSender.h logging/Logging.h logging/ArmarXLogBuf.h logging/SpamFilterData.h services/tasks/RunningTask.h services/tasks/PeriodicTask.h services/tasks/ThreadList.h services/tasks/TaskUtil.h services/tasks/ThreadPool.h services/sharedmemory/SharedMemoryProvider.h services/sharedmemory/SharedMemoryConsumer.h services/sharedmemory/IceSharedMemoryProvider.h services/sharedmemory/IceSharedMemoryConsumer.h services/sharedmemory/HardwareIdentifierProvider.h services/sharedmemory/HardwareId.h services/sharedmemory/exceptions/SharedMemoryExceptions.h services/profiler/Profiler.h services/profiler/LoggingStrategy.h services/profiler/FileLoggingStrategy.h services/profiler/IceLoggingStrategy.h application/Application.h application/ApplicationOptions.h application/ApplicationProcessFacet.h application/ApplicationNetworkStats.h application/properties/forward_declarations.h application/properties/Properties.h application/properties/Property.h application/properties/PluginEigen.h application/properties/PluginEnumNames.h application/properties/PluginCfgStruct.h application/properties/PluginAll.h application/properties/PropertyUser.h application/properties/PropertyDefinition.h application/properties/PropertyDefinition.hpp application/properties/PropertyDefinitionInterface.h application/properties/PropertyDefinitionContainer.h application/properties/PropertyDefinitionFormatter.h application/properties/PropertyDefinitionContainerFormatter.h application/properties/PropertyDefinitionConfigFormatter.h application/properties/PropertyDefinitionHelpFormatter.h application/properties/PropertyDefinitionBriefHelpFormatter.h application/properties/PropertyDefinitionXmlFormatter.h application/properties/PropertyDefinitionDoxygenFormatter.h application/properties/PropertyDefinitionDoxygenComponentPagesFormatter.h application/properties/PropertyDefinitionContainerBriefHelpFormatter.h application/properties/ProxyPropertyDefinition.h application/properties/IceProperties.h exceptions/Exception.h exceptions/LocalException.h exceptions/local/DynamicLibraryException.h exceptions/local/ExpressionException.h exceptions/local/FileIOException.h exceptions/local/InvalidPropertyValueException.h exceptions/local/MissingRequiredPropertyException.h exceptions/local/PropertyInheritanceCycleException.h exceptions/local/ProxyNotInitializedException.h exceptions/local/UnexpectedEnumValueException.h exceptions/local/UnmappedValueException.h exceptions/local/ValueRangeExceededException.h exceptions/user/NotImplementedYetException.h rapidxml/rapidxml.hpp rapidxml/rapidxml_print.hpp rapidxml/rapidxml_iterators.hpp rapidxml/rapidxml_utils.hpp rapidxml/wrapper/RapidXmlReader.h rapidxml/wrapper/RapidXmlWriter.h rapidxml/wrapper/DefaultRapidXmlReader.h rapidxml/wrapper/MultiNodeRapidXMLReader.h util/IceBlobToObject.h util/ObjectToIceBlob.h util/FileSystemPathBuilder.h util/FiniteStateMachine.h util/StringHelpers.h util/StringHelperTemplates.h util/algorithm.h util/OnScopeExit.h util/Predicates.h util/Preprocessor.h util/PropagateConst.h util/Registrar.h util/TemplateMetaProgramming.h util/TripleBuffer.h util/IceReportSkipper.h util/Throttler.h util/distributed/AMDCallbackCollection.h util/distributed/RemoteHandle/ClientSideRemoteHandleControlBlock.h util/distributed/RemoteHandle/RemoteHandle.h util/distributed/RemoteHandle/RemoteHandleControlBlock.h util/SimpleStatemachine.h time.h time_minimal.h time/forward_declarations.h time/ice_conversions.h time/json_conversions.h time/CallbackWaitLock.h time/Clock.h time/ClockType.h time/ClockTypeNames.h time/CycleUtil.h time/DateTime.h time/Duration.h time/Frequency.h time/LocalTimeServer.h time/Metronome.h time/ScopedStopWatch.h time/StopWatch.h time/Timer.h time/TimeUtil.h time/TimeKeeper.h csv/CsvWriter.h csv/CsvReader.h eigen/conversions.h eigen/ice_conversions.h ice_conversions.h ice_conversions/ice_conversions_boost_templates.h ice_conversions/ice_conversions_templates.h ice_conversions/ice_conversions_templates.tpp $
Definition: CMakeLists.txt:12
markdown::token::cAngles
@ cAngles
Definition: markdown-tokens.h:116
markdown-tokens.h
markdown::TokenGroup
std::list< TokenPtr > TokenGroup
Definition: markdown.h:22
markdown::token::CodeSpan::writeAsHtml
void writeAsHtml(std::ostream &out) const override
Definition: markdown-tokens.cpp:983
markdown::token::RawText::processSpanElements
std::optional< TokenGroup > processSpanElements(const LinkIds &idTable) override
Definition: markdown-tokens.cpp:325
distance
double distance(const Point &a, const Point &b)
Definition: point.hpp:88
markdown::token::Container::Container
Container(const TokenGroup &contents=TokenGroup())
Definition: markdown-tokens.h:336
armarx::ctrlutil::s
double s(double t, double s0, double v0, double a0, double j)
Definition: CtrlUtil.h:33
markdown::token::cAmps
@ cAmps
Definition: markdown-tokens.h:116
markdown::token::cQuotes
@ cQuotes
Definition: markdown-tokens.h:116