Apply PSR-12 code style (#3886)
[GitHub/WoltLab/WCF.git] / wcfsetup / install / files / lib / system / html / input / HtmlInputProcessor.class.php
CommitLineData
4eef6bfd 1<?php
a9229942 2
4eef6bfd 3namespace wcf\system\html\input;
a9229942 4
60a35505 5use wcf\system\bbcode\HtmlBBCodeParser;
a9229942 6use wcf\system\html\AbstractHtmlProcessor;
4eef6bfd
AE
7use wcf\system\html\input\filter\IHtmlInputFilter;
8use wcf\system\html\input\filter\MessageHtmlInputFilter;
b4346e66 9use wcf\system\html\input\node\HtmlInputNodeProcessor;
13825b39 10use wcf\util\DOMUtil;
60a35505 11use wcf\util\StringUtil;
4eef6bfd 12
2f53b086 13/**
4ccf5975 14 * Reads a HTML string, applies filters and parses all nodes including bbcodes.
a9229942 15 *
4ccf5975 16 * @author Alexander Ebert
a9229942 17 * @copyright 2001-2019 WoltLab GmbH
4ccf5975
AE
18 * @license GNU Lesser General Public License <http://opensource.org/licenses/lgpl-license.php>
19 * @package WoltLabSuite\Core\System\Html\Input
20 * @since 3.0
2f53b086 21 */
a9229942
TD
22class HtmlInputProcessor extends AbstractHtmlProcessor
23{
24 /**
25 * list of embedded content grouped by type
26 * @var array
27 */
28 protected $embeddedContent = [];
29
30 /**
31 * @var IHtmlInputFilter
32 */
33 protected $htmlInputFilter;
34
35 /**
36 * @var HtmlInputNodeProcessor
37 */
38 protected $htmlInputNodeProcessor;
39
40 /**
41 * skip the HTML filter during message reprocessing
42 * @var bool
43 */
44 protected $skipFilter = false;
45
46 /**
47 * Processes the input html string.
48 *
49 * @param string $html html string
50 * @param string $objectType object type identifier
51 * @param int $objectID object id
52 * @param bool $convertFromBBCode interpret input as bbcode
53 */
54 public function process($html, $objectType, $objectID = 0, $convertFromBBCode = false)
55 {
56 $this->reset();
57
58 $this->setContext($objectType, $objectID);
59
60 // enforce consistent newlines
61 $html = StringUtil::trim(StringUtil::unifyNewlines($html));
62
63 // check if this is true HTML or just a bbcode string
64 if ($convertFromBBCode) {
65 $html = $this->convertToHtml($html);
66 }
67
68 // transform bbcodes into metacode markers
69 $html = HtmlBBCodeParser::getInstance()->parse($html);
70
71 // filter HTML
72 if (!$this->skipFilter) {
73 $html = $this->getHtmlInputFilter()->apply($html);
74 }
75
76 // pre-parse HTML
77 $this->getHtmlInputNodeProcessor()->load($this, $html);
78 $this->getHtmlInputNodeProcessor()->process();
79 $this->embeddedContent = $this->getHtmlInputNodeProcessor()->getEmbeddedContent();
80 }
81
82 /**
83 * Processes a HTML string to provide the general DOM API. This method
84 * does not perform any filtering or validation. You SHOULD NOT use this
85 * to deal with HTML that has not been filtered previously.
86 *
87 * @param string $html html string
88 */
89 public function processIntermediate($html)
90 {
91 $this->getHtmlInputNodeProcessor()->load($this, $html);
92 }
93
94 /**
95 * Reprocesses a message by transforming the message into an editor-like
96 * state using plain bbcodes instead of metacode elements.
97 *
98 * @param string $html html string
99 * @param string $objectType object type identifier
100 * @param int $objectID object id
101 * @since 3.1
102 */
103 public function reprocess($html, $objectType, $objectID)
104 {
105 $this->processIntermediate($html);
106
107 // revert embedded bbcodes for re-evaluation
108 $metacodes = DOMUtil::getElements($this->getHtmlInputNodeProcessor()->getDocument(), 'woltlab-metacode');
109 foreach ($metacodes as $metacode) {
110 $name = $metacode->getAttribute('data-name');
111 $attributes = $this->getHtmlInputNodeProcessor()
112 ->parseAttributes($metacode->getAttribute('data-attributes'));
113
114 $bbcodeAttributes = '';
115 foreach ($attributes as $attribute) {
116 if (!empty($bbcodeAttributes)) {
117 $bbcodeAttributes .= ',';
118 }
119
120 if ($attribute === true) {
121 $bbcodeAttributes .= 'true';
122 } elseif ($attribute === false) {
123 $bbcodeAttributes .= 'false';
124 } elseif (\is_string($attribute) || \is_numeric($attribute)) {
125 $bbcodeAttributes .= "'" . \addcslashes($attribute, "'") . "'";
126 } else {
127 // discard anything that is not string-like
128 $bbcodeAttributes .= "''";
129 }
130 }
131
132 $text = $metacode->ownerDocument->createTextNode(
133 '[' . $name . (!empty($bbcodeAttributes) ? '=' . $bbcodeAttributes : '') . ']'
134 );
135 $metacode->insertBefore($text, $metacode->firstChild);
136
137 $text = $metacode->ownerDocument->createTextNode('[/' . $name . ']');
138 $metacode->appendChild($text);
139
140 DOMUtil::removeNode($metacode, true);
141 }
142
143 try {
144 $this->skipFilter = true;
145 $this->process($this->getHtml(), $objectType, $objectID, false);
146 } finally {
147 $this->skipFilter = false;
148 }
149 }
150
151 /**
152 * Processes only embedded content. This method should only be called when rebuilding
153 * data where only embedded content is relevant, but no actual parsing is required.
154 *
155 * @param string $html html string
156 * @param string $objectType object type identifier
157 * @param int $objectID object id
158 * @throws \UnexpectedValueException
159 */
160 public function processEmbeddedContent($html, $objectType, $objectID)
161 {
162 if (!$objectID) {
163 throw new \UnexpectedValueException("Object id parameter must be non-zero.");
164 }
165
166 $this->setContext($objectType, $objectID);
167
168 $this->getHtmlInputNodeProcessor()->load($this, $html);
169 $this->getHtmlInputNodeProcessor()->processEmbeddedContent();
170 $this->embeddedContent = $this->getHtmlInputNodeProcessor()->getEmbeddedContent();
171 }
172
173 /**
174 * Checks the input html for disallowed bbcodes and returns any matches.
175 *
176 * @return string[] list of matched disallowed bbcodes
177 */
178 public function validate()
179 {
180 return $this->getHtmlInputNodeProcessor()->validate();
181 }
182
183 /**
184 * Enforces the maximum depth of nested quotes.
185 *
186 * @param int $depth
187 */
188 public function enforceQuoteDepth($depth)
189 {
190 $this->getHtmlInputNodeProcessor()->enforceQuoteDepth($depth);
191 }
192
193 /**
194 * Returns the parsed HTML ready to store.
195 *
196 * @return string parsed html
197 */
198 public function getHtml()
199 {
200 return $this->getHtmlInputNodeProcessor()->getHtml();
201 }
202
203 /**
204 * Returns the raw text content of current document.
205 *
206 * @return string raw text content
207 */
208 public function getTextContent()
209 {
210 return $this->getHtmlInputNodeProcessor()->getTextContent();
211 }
212
213 /**
214 * Returns true if the message appears to be empty.
215 *
216 * @return bool true if message appears to be empty
217 */
218 public function appearsToBeEmpty()
219 {
220 return $this->getHtmlInputNodeProcessor()->appearsToBeEmpty();
221 }
222
223 /**
224 * Returns the all embedded content data.
225 *
226 * @return array
227 */
228 public function getEmbeddedContent()
229 {
230 return $this->embeddedContent;
231 }
232
233 /**
234 * @return HtmlInputNodeProcessor
235 */
236 public function getHtmlInputNodeProcessor()
237 {
238 if ($this->htmlInputNodeProcessor === null) {
239 $this->htmlInputNodeProcessor = new HtmlInputNodeProcessor();
240 }
241
242 return $this->htmlInputNodeProcessor;
243 }
244
245 /**
246 * Sets the new object id.
247 *
248 * @param int $objectID object id
249 */
250 public function setObjectID($objectID)
251 {
252 $this->context['objectID'] = $objectID;
253 }
254
255 /**
256 * Resets internal states and discards references to objects.
257 */
258 protected function reset()
259 {
260 $this->embeddedContent = [];
261 $this->htmlInputNodeProcessor = null;
262 }
263
264 /**
265 * @return IHtmlInputFilter
266 */
267 protected function getHtmlInputFilter()
268 {
269 if ($this->htmlInputFilter === null) {
270 $this->htmlInputFilter = new MessageHtmlInputFilter();
271 }
272
273 return $this->htmlInputFilter;
274 }
275
276 /**
277 * Converts bbcodes using newlines into valid HTML.
278 *
279 * @param string $html html string
280 * @return string parsed html string
281 */
282 protected function convertToHtml($html)
283 {
284 $html = StringUtil::encodeHTML($html);
285 $html = \preg_replace('/\[attach=(\d+)\]/', "[attach=\\1,'none','2']", $html);
286 $parts = \preg_split('~(\n+)~', $html, -1, \PREG_SPLIT_DELIM_CAPTURE);
287
288 $openParagraph = false;
289 $html = '';
290 for ($i = 0, $length = \count($parts); $i < $length; $i++) {
291 $part = $parts[$i];
292 if (\strpos($part, "\n") !== false) {
293 $newlines = \substr_count($part, "\n");
294 if ($newlines === 1) {
295 $html .= '<br>';
296 } else {
297 if ($openParagraph) {
298 $html .= '</p>';
299 $openParagraph = false;
300 }
301
302 // ignore one newline because a new paragraph with bbcodes is created
303 // using two subsequent newlines
304 $newlines--;
305 if ($newlines === 0) {
306 continue;
307 }
308
309 $html .= \str_repeat('<p><br></p>', $newlines);
310 }
311 } else {
312 if (!$openParagraph) {
313 $html .= '<p>';
314 }
315
316 $html .= $part;
317 $openParagraph = true;
318 }
319 }
320
321 return $html . '</p>';
322 }
4eef6bfd 323}