From cc8ed1659a87152d470fbf46e730679e9ed33d23 Mon Sep 17 00:00:00 2001 From: Alexander Ebert Date: Thu, 28 Jul 2016 12:18:04 +0200 Subject: [PATCH] Added output processor for Google AMP --- .../output/AmpHtmlOutputProcessor.class.php | 107 ++++++++++++++++++ 1 file changed, 107 insertions(+) create mode 100644 wcfsetup/install/files/lib/system/html/output/AmpHtmlOutputProcessor.class.php diff --git a/wcfsetup/install/files/lib/system/html/output/AmpHtmlOutputProcessor.class.php b/wcfsetup/install/files/lib/system/html/output/AmpHtmlOutputProcessor.class.php new file mode 100644 index 0000000000..215b0d44d1 --- /dev/null +++ b/wcfsetup/install/files/lib/system/html/output/AmpHtmlOutputProcessor.class.php @@ -0,0 +1,107 @@ + + * @package WoltLabSuite\Core\System\Html\Output + * @since 3.0 + */ +class AmpHtmlOutputProcessor extends HtmlOutputProcessor { + /** + * @inheritDoc + */ + public function process($html, $objectType, $objectID) { + parent::process($html, $objectType, $objectID); + + $document = $this->getHtmlOutputNodeProcessor()->getDocument(); + + // remove tags and discarding content + $tags = [ + // general + 'base', 'frame', 'frameset', 'object', 'param', 'applet', 'embed', + + // forms + 'input', 'textarea', 'select', 'option', + + // special + 'style' + ]; + foreach ($tags as $tag) { + $elements = $document->getElementsByTagName($tag); + while ($elements->length) DOMUtil::removeNode($elements->item(0), true); + } + + // remove tags but keep child nodes + $tags = ['form']; + foreach ($tags as $tag) { + $elements = $document->getElementsByTagName($tag); + while ($elements->length) DOMUtil::removeNode($elements->item(0), false); + } + + // remove script tags unless the type is application/ld+json + $elements = $this->filterElements( + $document->getElementsByTagName('script'), + function ($element) { + /** @var \DOMElement $element */ + return ($element->getAttribute('type') === 'application/ld+json'); + } + ); + foreach ($elements as $element) DOMUtil::removeNode($element); + + // replace tags + $tags = ['img', 'video', 'audio', 'iframe']; + foreach ($tags as $tag) { + $elements = $document->getElementsByTagName($tag); + while ($elements->length) { + /** @var \DOMElement $element */ + $element = $elements->item(0); + if ($tag === 'img') { + $styles = $element->getAttribute('style'); + if (preg_match('~\bheight:\s*(\d+)px\b~', $styles, $matches)) $element->setAttribute('height', $matches[1]); + if (preg_match('~\bwidth:\s*(\d+)px\b~', $styles, $matches)) $element->setAttribute('width', $matches[1]); + + if (!$element->getAttribute('height') || !$element->getAttribute('width')) { + DOMUtil::removeNode($element); + continue; + } + + $element->removeAttribute('style'); + } + + $newElement = $element->ownerDocument->createElement('amp-' . $tag); + + // copy attributes + for ($i = 0, $length = $element->attributes->length; $i < $length; $i++) { + $attr = $element->attributes->item($i); + + $newElement->setAttribute($attr->localName, $attr->nodeValue); + } + + $element->parentNode->insertBefore($newElement, $element); + DOMUtil::removeNode($element); + } + } + + + } + + protected function filterElements(\DOMNodeList $elements, callable $callback) { + $badElements = []; + + foreach ($elements as $element) { + if ($callback($element) === false) { + $badElements[] = $element; + } + } + + return $badElements; + } +} -- 2.20.1