2 namespace wcf\system\bbcode
;
3 use wcf\system\exception\SystemException
;
5 use wcf\util\StringUtil
;
8 * Parses bbcodes and transforms them into the custom HTML element <woltlab-bbcode>
9 * that can be safely passed through HTMLPurifier's validation mechanism.
11 * All though not exactly required for all bbcodes, the actual output of an bbcode
12 * cannot be foreseen and potentially conflict with HTMLPurifier's whitelist. Examples
13 * are <iframe> or other embedded media that is allowed as a result of a bbcode, but
14 * not allowed to be directly provided by a user.
16 * @author Alexander Ebert
17 * @copyright 2001-2016 WoltLab GmbH
18 * @license GNU Lesser General Public License <http://opensource.org/licenses/lgpl-license.php>
19 * @package WoltLabSuite\Core\System\Bbcode
22 class HtmlBBCodeParser
extends BBCodeParser
{
24 * list of open tags with name and uuid
27 protected $openTagIdentifiers = [];
30 * regex for valid bbcode names
33 protected $validBBCodePattern = '~^[a-z](?:[a-z0-9\-_]+)?$~';
38 public function parse($text) {
39 $this->setText($text);
41 // difference to the original implementation: sourcecode bbcodes are handled too
42 $this->buildTagArray(false);
44 // difference to the original implementation: we don't care for unclosed tags,
45 // they'll be marked as invalid and removed at the end, leaving lonely opening
46 // tags that will eventually be removed within the marker processor
47 $this->buildXMLStructure();
49 $this->handleSourceBBCodes();
51 $this->buildParsedString();
53 return $this->parsedText
;
59 public function buildXMLStructure() {
60 // stack for open tags
61 $openTagStack = $openTagDataStack = [];
66 foreach ($this->tagArray
as $i => $tag) {
67 if ($tag['closing']) {
69 if (in_array($tag['name'], $openTagStack) && $this->isAllowed($openTagStack, $tag['name'], true)) {
70 // close unclosed tags
71 while (($previousTag = end($openTagStack)) != $tag['name']) {
72 $nextIndex = count($newTagArray);
74 // mark as invalid and do not flag as opened tag
75 $newTag = $this->buildTag('[/'.end($openTagStack).']');
76 $newTag['invalid'] = true;
78 $newTagArray[$nextIndex] = $newTag;
79 if (!isset($newTextArray[$nextIndex])) $newTextArray[$nextIndex] = '';
80 $newTextArray[$nextIndex] .= $this->textArray
[$i];
81 $this->textArray
[$i] = '';
82 array_pop($openTagStack);
83 array_pop($openTagDataStack);
86 $nextIndex = count($newTagArray);
87 $newTagArray[$nextIndex] = $tag;
88 array_pop($openTagStack);
89 array_pop($openTagDataStack);
90 if (!isset($newTextArray[$nextIndex])) $newTextArray[$nextIndex] = '';
91 $newTextArray[$nextIndex] .= $this->textArray
[$i];
95 // handle as plain text
96 $this->textArray
[$i] .= $tag['source'];
97 $last = count($newTagArray);
98 if (!isset($newTextArray[$last])) $newTextArray[$last] = '';
99 $newTextArray[$last] .= $this->textArray
[$i];
104 if ($this->isAllowed($openTagStack, $tag['name']) && $this->isValidTag($tag)) {
105 $openTagStack[] = $tag['name'];
106 $openTagDataStack[] = $tag;
107 $nextIndex = count($newTagArray);
108 $newTagArray[$nextIndex] = $tag;
109 if (!isset($newTextArray[$nextIndex])) $newTextArray[$nextIndex] = '';
110 $newTextArray[$nextIndex] .= $this->textArray
[$i];
114 $this->textArray
[$i] .= $tag['source'];
115 $last = count($newTagArray);
116 if (!isset($newTextArray[$last])) $newTextArray[$last] = '';
117 $newTextArray[$last] .= $this->textArray
[$i];
122 $last = count($newTagArray);
123 if (!isset($newTextArray[$last])) $newTextArray[$last] = '';
124 $newTextArray[$last] .= $this->textArray
[$i +
1];
126 // close unclosed open tags
127 while (end($openTagStack)) {
128 $nextIndex = count($newTagArray);
131 $newTag = $this->buildTag('[/'.end($openTagStack).']');
132 $newTag['invalid'] = true;
134 $newTagArray[$nextIndex] = $newTag;
135 if (!isset($newTextArray[$nextIndex])) $newTextArray[$nextIndex] = '';
136 array_pop($openTagStack);
137 array_pop($openTagDataStack);
140 $this->tagArray
= $newTagArray;
141 $this->textArray
= $newTextArray;
145 * Flags bbcodes inside code bbcodes for reversal, turning them back
146 * into their source state (= textual representation).
148 protected function handleSourceBBCodes() {
149 $sourceBBCodes = $this->getSourceBBCodes();
154 for ($i = 0, $length = count($this->tagArray
); $i < $length; $i++
) {
155 $tag = $this->tagArray
[$i];
157 if (!empty($tag['invalid'])) {
161 $name = $tag['name'];
163 if ($tag['closing']) {
165 // matches opening code tag
166 if ($inCode === $name) {
168 array_pop($openTagStack);
171 // unrelated tag, flag as invalid
172 $this->tagArray
[$i]['inCode'] = true;
178 array_pop($openTagStack);
182 // inside code block, flag as invalid
183 $this->tagArray
[$i]['inCode'] = true;
187 // starts a new code block
188 if (in_array($name, $sourceBBCodes)) {
189 // look ahead to see if there is a valid closing tag
190 $hasClosingTag = false;
191 for ($j = $i +
1; $j < $length; $j++
) {
192 if ($this->tagArray
[$j]['name'] === $name && empty($this->tagArray
[$j]['invalid'])) {
193 $hasClosingTag = true;
198 if ($hasClosingTag) {
202 // no closing tag, flag as invalid to avoid the
203 // entire content afterwards being treated as code
204 $this->tagArray
[$i]['inCode'] = true;
208 $openTagStack[] = $name;
216 public function buildParsedString() {
218 $this->parsedText
= '';
220 // reset identifiers for open tags
221 $this->openTagIdentifiers
= [];
223 // create text buffer
224 $buffer =& $this->parsedText
;
226 // stack of buffered tags
227 $bufferedTagStack = [];
229 // loop through the tags
231 foreach ($this->tagArray
as $i => $tag) {
232 // append text to buffer
233 $buffer .= $this->textArray
[$i];
235 if ($tag['closing']) {
236 if (!empty($tag['invalid'])) {
237 // drop invalid closing tag
240 else if (!empty($tag['inCode'])) {
241 // revert bbcodes inside code
242 $buffer .= $tag['source'];
246 // get buffered opening tag
247 $openingTag = end($bufferedTagStack);
250 if ($openingTag && $openingTag['name'] == $tag['name']) {
252 // insert buffered content as attribute value
253 foreach ($this->bbcodes
[$tag['name']]->getAttributes() as $attribute) {
254 if ($attribute->useText
&& !isset($openingTag['attributes'][$attribute->attributeNo
])) {
255 $openingTag['attributes'][$attribute->attributeNo
] = $buffer;
261 // validate tag attributes again
262 if ($this->isValidTag($openingTag)) {
264 if ($this->bbcodes
[$tag['name']]->className
) {
265 // difference to the original implementation: use the custom HTML element than to process them directly
266 $parsedTag = $this->compileTag($openingTag, $buffer, $tag);
270 $parsedTag = $this->buildOpeningTag($openingTag);
271 $closingTag = $this->buildClosingTag($tag);
272 if (!empty($closingTag) && $hideBuffer) $parsedTag .= $buffer.$closingTag;
276 $parsedTag = $openingTag['source'].$buffer.$tag['source'];
279 // close current buffer
280 array_pop($bufferedTagStack);
282 // open previous buffer
283 if (count($bufferedTagStack) > 0) {
284 $bufferedTag =& $bufferedTagStack[count($bufferedTagStack) - 1];
285 $buffer =& $bufferedTag['buffer'];
288 $buffer =& $this->parsedText
;
292 $buffer .= $parsedTag;
295 $buffer .= $this->buildClosingTag($tag);
299 if (!empty($tag['inCode'])) {
300 // revert bbcodes inside code
301 $buffer .= $tag['source'];
306 if ($this->needBuffering($tag)) {
309 $bufferedTagStack[] = $tag;
310 $buffer =& $bufferedTagStack[count($bufferedTagStack) - 1]['buffer'];
313 $buffer .= $this->buildOpeningTag($tag);
318 if (isset($this->textArray
[$i +
1])) $this->parsedText
.= $this->textArray
[$i +
1];
322 * Builds the bbcode output.
324 * @param string $name bbcode identifier
325 * @param array $attributes list of attributes
326 * @param \DOMElement $element element
327 * @return string parsed bbcode
329 public function getHtmlOutput($name, array $attributes, \DOMElement
$element) {
330 if (isset($this->bbcodes
[$name])) {
331 $bbcode = $this->bbcodes
[$name];
332 if ($bbcode->isSourceCode
) {
333 array_unshift($attributes, $element->textContent
);
336 $openingTag = ['attributes' => $attributes, 'name' => $name];
337 $closingTag = ['name' => $name];
339 if ($bbcode->getProcessor()) {
340 /** @var IBBCode $processor */
341 $processor = $bbcode->getProcessor();
342 return $processor->getParsedTag($openingTag, '<!-- META_CODE_INNER_CONTENT -->', $closingTag, $this);
345 return parent
::buildOpeningTag($openingTag) . '<!-- META_CODE_INNER_CONTENT -->' . parent
::buildClosingTag($closingTag);
349 // unknown bbcode, output plain tags
350 return $this->buildBBCodeTag($name, $attributes);
354 * Builds a plain bbcode string, used for unknown bbcodes.
356 * @param string $name bbcode identifier
357 * @param array $attributes list of attributes
358 * @param boolean $openingTagOnly only render the opening tag
361 public function buildBBCodeTag($name, $attributes, $openingTagOnly = false) {
362 if (!empty($attributes)) {
363 foreach ($attributes as &$attribute) {
364 $attribute = "'" . addcslashes($attribute, "'") . "'";
368 $attributes = '=' . implode(",", $attributes);
374 if ($openingTagOnly) {
375 return '[' . $name . $attributes . ']';
378 return '[' . $name . $attributes . ']<!-- META_CODE_INNER_CONTENT -->[/' . $name . ']';
382 * Returns the list of bbcodes that represent block elements.
384 * @return string[] list of bbcode block elements
386 public function getBlockBBCodes() {
388 foreach ($this->bbcodes
as $name => $bbcode) {
389 if ($bbcode->isBlockElement
) {
398 * Returns the list of bbcodes that represent source code elements.
400 * @return string[] list of bbcode source code elements
402 public function getSourceBBCodes() {
404 foreach ($this->bbcodes
as $name => $bbcode) {
405 if ($bbcode->isSourceCode
) {
414 * Compiles tag fragments into the custom HTML element.
416 * @param array $openingTag opening tag data
417 * @param string $content content between opening and closing tag
418 * @param array $closingTag closing tag data
419 * @return string custom HTML element
421 protected function compileTag(array $openingTag, $content, array $closingTag) {
422 return $this->buildOpeningTag($openingTag) . $content . $this->buildClosingTag($closingTag);
428 protected function buildOpeningTag(array $tag) {
429 $name = strtolower($tag['name']);
430 if (!$this->isValidBBCodeName($name)) {
431 return $tag['source'];
434 $uuid = StringUtil
::getUUID();
435 $this->openTagIdentifiers
[] = [
441 if (!empty($tag['attributes'])) {
442 // strip outer quote tags
443 $tag['attributes'] = array_map(function($attribute) {
444 if (preg_match('~^([\'"])(?P<content>.*)(\1)$~', $attribute, $matches)) {
445 return $matches['content'];
449 }, $tag['attributes']);
451 // uses base64 encoding to avoid an "escape" nightmare
452 $attributes = ' data-attributes="' . base64_encode(JSON
::encode($tag['attributes'])) . '"';
455 return '<woltlab-metacode-marker data-name="' . $name . '" data-uuid="' . $uuid . '" data-source="' . base64_encode($tag['source']) . '"' . $attributes . ' />';
461 protected function buildClosingTag(array $tag) {
462 $name = strtolower($tag['name']);
463 if (!$this->isValidBBCodeName($name)) {
464 return $tag['source'];
467 $data = array_pop($this->openTagIdentifiers
);
468 if ($data['name'] !== $name) {
469 // check if this is a source code tag as some people
470 // love to nest the same source bbcode
471 if (in_array($name, $this->getSourceBBCodes())) {
472 return $tag['source'];
475 throw new SystemException("Tag mismatch, expected '".$name."', got '".$data['name']."'.");
478 return '<woltlab-metacode-marker data-uuid="' . $data['uuid'] . '" data-source="' . base64_encode($tag['source']) . '" />';
482 * Returns true if provided name is a valid bbcode identifier.
484 * @param string $name bbcode identifier
485 * @return boolean true if provided name is a valid bbcode identifier
487 protected function isValidBBCodeName($name) {
488 return preg_match($this->validBBCodePattern
, $name) === 1;