You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 

202 lines
6.9 KiB

  1. <?php
  2. /**
  3. * Lexer adapted from Simple Test: http://sourceforge.net/projects/simpletest/
  4. * For an intro to the Lexer see:
  5. * https://web.archive.org/web/20120125041816/http://www.phppatterns.com/docs/develop/simple_test_lexer_notes
  6. *
  7. * @author Marcus Baker http://www.lastcraft.com
  8. */
  9. namespace dokuwiki\Parsing\Lexer;
  10. /**
  11. * Compounded regular expression.
  12. *
  13. * Any of the contained patterns could match and when one does it's label is returned.
  14. */
  15. class ParallelRegex
  16. {
  17. /** @var string[] patterns to match */
  18. protected $patterns = [];
  19. /** @var string[] labels for above patterns */
  20. protected $labels = [];
  21. /** @var string the compound regex matching all patterns */
  22. protected $regex;
  23. /** @var bool case sensitive matching? */
  24. protected $case;
  25. /**
  26. * Constructor. Starts with no patterns.
  27. *
  28. * @param boolean $case True for case sensitive, false
  29. * for insensitive.
  30. */
  31. public function __construct($case)
  32. {
  33. $this->case = $case;
  34. }
  35. /**
  36. * Adds a pattern with an optional label.
  37. *
  38. * @param mixed $pattern Perl style regex. Must be UTF-8
  39. * encoded. If its a string, the (, )
  40. * lose their meaning unless they
  41. * form part of a lookahead or
  42. * lookbehind assertation.
  43. * @param bool|string $label Label of regex to be returned
  44. * on a match. Label must be ASCII
  45. */
  46. public function addPattern($pattern, $label = true)
  47. {
  48. $count = count($this->patterns);
  49. $this->patterns[$count] = $pattern;
  50. $this->labels[$count] = $label;
  51. $this->regex = null;
  52. }
  53. /**
  54. * Attempts to match all patterns at once against a string.
  55. *
  56. * @param string $subject String to match against.
  57. * @param string $match First matched portion of
  58. * subject.
  59. * @return bool|string False if no match found, label if label exists, true if not
  60. */
  61. public function apply($subject, &$match)
  62. {
  63. if (count($this->patterns) == 0) {
  64. return false;
  65. }
  66. if (! preg_match($this->getCompoundedRegex(), $subject, $matches)) {
  67. $match = "";
  68. return false;
  69. }
  70. $match = $matches[0];
  71. $size = count($matches);
  72. // FIXME this could be made faster by storing the labels as keys in a hashmap
  73. for ($i = 1; $i < $size; $i++) {
  74. if ($matches[$i] && isset($this->labels[$i - 1])) {
  75. return $this->labels[$i - 1];
  76. }
  77. }
  78. return true;
  79. }
  80. /**
  81. * Attempts to split the string against all patterns at once
  82. *
  83. * @param string $subject String to match against.
  84. * @param array $split The split result: array containing, pre-match, match & post-match strings
  85. * @return boolean True on success.
  86. *
  87. * @author Christopher Smith <chris@jalakai.co.uk>
  88. */
  89. public function split($subject, &$split)
  90. {
  91. if (count($this->patterns) == 0) {
  92. return false;
  93. }
  94. if (! preg_match($this->getCompoundedRegex(), $subject, $matches)) {
  95. if (function_exists('preg_last_error')) {
  96. $err = preg_last_error();
  97. switch ($err) {
  98. case PREG_BACKTRACK_LIMIT_ERROR:
  99. msg('A PCRE backtrack error occured. Try to increase the pcre.backtrack_limit in php.ini', -1);
  100. break;
  101. case PREG_RECURSION_LIMIT_ERROR:
  102. msg('A PCRE recursion error occured. Try to increase the pcre.recursion_limit in php.ini', -1);
  103. break;
  104. case PREG_BAD_UTF8_ERROR:
  105. msg('A PCRE UTF-8 error occured. This might be caused by a faulty plugin', -1);
  106. break;
  107. case PREG_INTERNAL_ERROR:
  108. msg('A PCRE internal error occured. This might be caused by a faulty plugin', -1);
  109. break;
  110. }
  111. }
  112. $split = [$subject, "", ""];
  113. return false;
  114. }
  115. $idx = count($matches) - 2;
  116. [$pre, $post] = preg_split($this->patterns[$idx] . $this->getPerlMatchingFlags(), $subject, 2);
  117. $split = [$pre, $matches[0], $post];
  118. return $this->labels[$idx] ?? true;
  119. }
  120. /**
  121. * Compounds the patterns into a single
  122. * regular expression separated with the
  123. * "or" operator. Caches the regex.
  124. * Will automatically escape (, ) and / tokens.
  125. *
  126. * @return null|string
  127. */
  128. protected function getCompoundedRegex()
  129. {
  130. if ($this->regex == null) {
  131. $cnt = count($this->patterns);
  132. for ($i = 0; $i < $cnt; $i++) {
  133. /*
  134. * decompose the input pattern into "(", "(?", ")",
  135. * "[...]", "[]..]", "[^]..]", "[...[:...:]..]", "\x"...
  136. * elements.
  137. */
  138. preg_match_all('/\\\\.|' .
  139. '\(\?|' .
  140. '[()]|' .
  141. '\[\^?\]?(?:\\\\.|\[:[^]]*:\]|[^]\\\\])*\]|' .
  142. '[^[()\\\\]+/', $this->patterns[$i], $elts);
  143. $pattern = "";
  144. $level = 0;
  145. foreach ($elts[0] as $elt) {
  146. /*
  147. * for "(", ")" remember the nesting level, add "\"
  148. * only to the non-"(?" ones.
  149. */
  150. switch ($elt) {
  151. case '(':
  152. $pattern .= '\(';
  153. break;
  154. case ')':
  155. if ($level > 0)
  156. $level--; /* closing (? */
  157. else $pattern .= '\\';
  158. $pattern .= ')';
  159. break;
  160. case '(?':
  161. $level++;
  162. $pattern .= '(?';
  163. break;
  164. default:
  165. if (str_starts_with($elt, '\\'))
  166. $pattern .= $elt;
  167. else $pattern .= str_replace('/', '\/', $elt);
  168. }
  169. }
  170. $this->patterns[$i] = "($pattern)";
  171. }
  172. $this->regex = "/" . implode("|", $this->patterns) . "/" . $this->getPerlMatchingFlags();
  173. }
  174. return $this->regex;
  175. }
  176. /**
  177. * Accessor for perl regex mode flags to use.
  178. * @return string Perl regex flags.
  179. */
  180. protected function getPerlMatchingFlags()
  181. {
  182. return ($this->case ? "msS" : "msSi");
  183. }
  184. }