From: Tim Düsterhus Date: Fri, 21 Oct 2016 15:15:11 +0000 (+0200) Subject: Update composer dependencies X-Git-Tag: 3.0.0_Beta_5~85^2 X-Git-Url: https://git.stricted.de/?a=commitdiff_plain;h=3d7c273b473ce911c03f6435e270e861a448249a;p=GitHub%2FWoltLab%2FWCF.git Update composer dependencies --- diff --git a/wcfsetup/install/files/lib/system/api/composer.json b/wcfsetup/install/files/lib/system/api/composer.json index 08eb7faed9..a2ebfdb80b 100644 --- a/wcfsetup/install/files/lib/system/api/composer.json +++ b/wcfsetup/install/files/lib/system/api/composer.json @@ -3,8 +3,8 @@ "vendor-dir": "./" }, "require": { - "ezyang/htmlpurifier": "4.7.*", + "ezyang/htmlpurifier": "4.8.*", "erusev/parsedown": "1.6.*", - "pelago/emogrifier": "1.0.*" + "pelago/emogrifier": "1.1.*" } } diff --git a/wcfsetup/install/files/lib/system/api/composer.lock b/wcfsetup/install/files/lib/system/api/composer.lock index 370165e5eb..3b446b280d 100644 --- a/wcfsetup/install/files/lib/system/api/composer.lock +++ b/wcfsetup/install/files/lib/system/api/composer.lock @@ -4,8 +4,8 @@ "Read more about it at https://getcomposer.org/doc/01-basic-usage.md#composer-lock-the-lock-file", "This file is @generated automatically" ], - "hash": "7403d7c709a9942dc2f75396d5fe55fe", - "content-hash": "42237c86b167290edb2d1cd117a93105", + "hash": "b368975e316f52b2af949a6d8fdc560e", + "content-hash": "d9f12e8c782481ed844e535718c478c3", "packages": [ { "name": "erusev/parsedown", @@ -48,16 +48,16 @@ }, { "name": "ezyang/htmlpurifier", - "version": "v4.7.0", + "version": "v4.8.0", "source": { "type": "git", "url": "https://github.com/ezyang/htmlpurifier.git", - "reference": "ae1828d955112356f7677c465f94f7deb7d27a40" + "reference": "d0c392f77d2f2a3dcf7fcb79e2a1e2b8804e75b2" }, "dist": { "type": "zip", - "url": "https://api.github.com/repos/ezyang/htmlpurifier/zipball/ae1828d955112356f7677c465f94f7deb7d27a40", - "reference": "ae1828d955112356f7677c465f94f7deb7d27a40", + "url": "https://api.github.com/repos/ezyang/htmlpurifier/zipball/d0c392f77d2f2a3dcf7fcb79e2a1e2b8804e75b2", + "reference": "d0c392f77d2f2a3dcf7fcb79e2a1e2b8804e75b2", "shasum": "" }, "require": { @@ -88,35 +88,33 @@ "keywords": [ "html" ], - "time": "2015-08-05 01:03:42" + "time": "2016-07-16 12:58:58" }, { "name": "pelago/emogrifier", - "version": "V1.0.0", + "version": "V1.1.0", "source": { "type": "git", "url": "https://github.com/jjriv/emogrifier.git", - "reference": "1160bcbc523c7941d2d0dc2a9e59c51c66420b4b" + "reference": "dd9442740e044a11968bf6a5d94460a5426a2419" }, "dist": { "type": "zip", - "url": "https://api.github.com/repos/jjriv/emogrifier/zipball/1160bcbc523c7941d2d0dc2a9e59c51c66420b4b", - "reference": "1160bcbc523c7941d2d0dc2a9e59c51c66420b4b", + "url": "https://api.github.com/repos/jjriv/emogrifier/zipball/dd9442740e044a11968bf6a5d94460a5426a2419", + "reference": "dd9442740e044a11968bf6a5d94460a5426a2419", "shasum": "" }, "require": { - "ext-mbstring": "*", - "php": ">=5.4.0" + "php": ">=5.4.0,<=7.1.99" }, "require-dev": { - "phpunit/phpunit": "4.8.11", - "squizlabs/php_codesniffer": "2.3.4", - "typo3-ci/typo3sniffpool": "2.1.1" + "phpunit/phpunit": "4.8.27", + "squizlabs/php_codesniffer": "2.6.0" }, "type": "library", "extra": { "branch-alias": { - "dev-master": "1.1.x-dev" + "dev-master": "1.2.x-dev" } }, "autoload": { @@ -150,7 +148,7 @@ ], "description": "Converts CSS styles into inline style attributes in your HTML code", "homepage": "http://www.pelagodesign.com/sidecar/emogrifier/", - "time": "2015-10-14 22:22:15" + "time": "2016-09-20 15:15:23" } ], "packages-dev": [], diff --git a/wcfsetup/install/files/lib/system/api/composer/ClassLoader.php b/wcfsetup/install/files/lib/system/api/composer/ClassLoader.php index ff6ecfb822..ac67d302a1 100644 --- a/wcfsetup/install/files/lib/system/api/composer/ClassLoader.php +++ b/wcfsetup/install/files/lib/system/api/composer/ClassLoader.php @@ -53,8 +53,8 @@ class ClassLoader private $useIncludePath = false; private $classMap = array(); - private $classMapAuthoritative = false; + private $missingClasses = array(); public function getPrefixes() { @@ -322,20 +322,20 @@ class ClassLoader if (isset($this->classMap[$class])) { return $this->classMap[$class]; } - if ($this->classMapAuthoritative) { + if ($this->classMapAuthoritative || isset($this->missingClasses[$class])) { return false; } $file = $this->findFileWithExtension($class, '.php'); // Search for Hack files if we are running on HHVM - if ($file === null && defined('HHVM_VERSION')) { + if (false === $file && defined('HHVM_VERSION')) { $file = $this->findFileWithExtension($class, '.hh'); } - if ($file === null) { + if (false === $file) { // Remember that this class does not exist. - return $this->classMap[$class] = false; + $this->missingClasses[$class] = true; } return $file; @@ -399,6 +399,8 @@ class ClassLoader if ($this->useIncludePath && $file = stream_resolve_include_path($logicalPathPsr0)) { return $file; } + + return false; } } diff --git a/wcfsetup/install/files/lib/system/api/composer/installed.json b/wcfsetup/install/files/lib/system/api/composer/installed.json index 2c9d6c7777..6d47f57a97 100644 --- a/wcfsetup/install/files/lib/system/api/composer/installed.json +++ b/wcfsetup/install/files/lib/system/api/composer/installed.json @@ -1,50 +1,4 @@ [ - { - "name": "ezyang/htmlpurifier", - "version": "v4.7.0", - "version_normalized": "4.7.0.0", - "source": { - "type": "git", - "url": "https://github.com/ezyang/htmlpurifier.git", - "reference": "ae1828d955112356f7677c465f94f7deb7d27a40" - }, - "dist": { - "type": "zip", - "url": "https://api.github.com/repos/ezyang/htmlpurifier/zipball/ae1828d955112356f7677c465f94f7deb7d27a40", - "reference": "ae1828d955112356f7677c465f94f7deb7d27a40", - "shasum": "" - }, - "require": { - "php": ">=5.2" - }, - "time": "2015-08-05 01:03:42", - "type": "library", - "installation-source": "dist", - "autoload": { - "psr-0": { - "HTMLPurifier": "library/" - }, - "files": [ - "library/HTMLPurifier.composer.php" - ] - }, - "notification-url": "https://packagist.org/downloads/", - "license": [ - "LGPL" - ], - "authors": [ - { - "name": "Edward Z. Yang", - "email": "admin@htmlpurifier.org", - "homepage": "http://ezyang.com" - } - ], - "description": "Standards compliant HTML filter written in PHP", - "homepage": "http://htmlpurifier.org/", - "keywords": [ - "html" - ] - }, { "name": "erusev/parsedown", "version": "1.6.0", @@ -88,33 +42,31 @@ }, { "name": "pelago/emogrifier", - "version": "V1.0.0", - "version_normalized": "1.0.0.0", + "version": "V1.1.0", + "version_normalized": "1.1.0.0", "source": { "type": "git", "url": "https://github.com/jjriv/emogrifier.git", - "reference": "1160bcbc523c7941d2d0dc2a9e59c51c66420b4b" + "reference": "dd9442740e044a11968bf6a5d94460a5426a2419" }, "dist": { "type": "zip", - "url": "https://api.github.com/repos/jjriv/emogrifier/zipball/1160bcbc523c7941d2d0dc2a9e59c51c66420b4b", - "reference": "1160bcbc523c7941d2d0dc2a9e59c51c66420b4b", + "url": "https://api.github.com/repos/jjriv/emogrifier/zipball/dd9442740e044a11968bf6a5d94460a5426a2419", + "reference": "dd9442740e044a11968bf6a5d94460a5426a2419", "shasum": "" }, "require": { - "ext-mbstring": "*", - "php": ">=5.4.0" + "php": ">=5.4.0,<=7.1.99" }, "require-dev": { - "phpunit/phpunit": "4.8.11", - "squizlabs/php_codesniffer": "2.3.4", - "typo3-ci/typo3sniffpool": "2.1.1" + "phpunit/phpunit": "4.8.27", + "squizlabs/php_codesniffer": "2.6.0" }, - "time": "2015-10-14 22:22:15", + "time": "2016-09-20 15:15:23", "type": "library", "extra": { "branch-alias": { - "dev-master": "1.1.x-dev" + "dev-master": "1.2.x-dev" } }, "installation-source": "dist", @@ -149,5 +101,51 @@ ], "description": "Converts CSS styles into inline style attributes in your HTML code", "homepage": "http://www.pelagodesign.com/sidecar/emogrifier/" + }, + { + "name": "ezyang/htmlpurifier", + "version": "v4.8.0", + "version_normalized": "4.8.0.0", + "source": { + "type": "git", + "url": "https://github.com/ezyang/htmlpurifier.git", + "reference": "d0c392f77d2f2a3dcf7fcb79e2a1e2b8804e75b2" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/ezyang/htmlpurifier/zipball/d0c392f77d2f2a3dcf7fcb79e2a1e2b8804e75b2", + "reference": "d0c392f77d2f2a3dcf7fcb79e2a1e2b8804e75b2", + "shasum": "" + }, + "require": { + "php": ">=5.2" + }, + "time": "2016-07-16 12:58:58", + "type": "library", + "installation-source": "dist", + "autoload": { + "psr-0": { + "HTMLPurifier": "library/" + }, + "files": [ + "library/HTMLPurifier.composer.php" + ] + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "LGPL" + ], + "authors": [ + { + "name": "Edward Z. Yang", + "email": "admin@htmlpurifier.org", + "homepage": "http://ezyang.com" + } + ], + "description": "Standards compliant HTML filter written in PHP", + "homepage": "http://htmlpurifier.org/", + "keywords": [ + "html" + ] } ] diff --git a/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/INSTALL b/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/INSTALL index 677c04aa04..e6dd02afa7 100644 --- a/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/INSTALL +++ b/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/INSTALL @@ -15,10 +15,8 @@ with these contents. --------------------------------------------------------------------------- 1. Compatibility -HTML Purifier is PHP 5 only, and is actively tested from PHP 5.0.5 and -up. It has no core dependencies with other libraries. PHP -4 support was deprecated on December 31, 2007 with HTML Purifier 3.0.0. -HTML Purifier is not compatible with zend.ze1_compatibility_mode. +HTML Purifier is PHP 5 and PHP 7, and is actively tested from PHP 5.0.5 +and up. It has no core dependencies with other libraries. These optional extensions can enhance the capabilities of HTML Purifier: @@ -29,7 +27,10 @@ These optional extensions can enhance the capabilities of HTML Purifier: These optional libraries can enhance the capabilities of HTML Purifier: * CSSTidy : Clean CSS stylesheets using %Core.ExtractStyleBlocks + Note: You should use the modernized fork of CSSTidy available + at https://github.com/Cerdic/CSSTidy * Net_IDNA2 (PEAR) : IRI support using %Core.EnableIDNA + Note: This is not necessary for PHP 5.3 or later --------------------------------------------------------------------------- 2. Reconnaissance @@ -305,11 +306,9 @@ appropriate permissions using: chmod -R 0755 HTMLPurifier/DefinitionCache/Serializer If the above command doesn't work, you may need to assign write permissions -to all. This may be necessary if your webserver runs as nobody, but is -not recommended since it means any other user can write files in the -directory. Use: +to group: - chmod -R 0777 HTMLPurifier/DefinitionCache/Serializer + chmod -R 0775 HTMLPurifier/DefinitionCache/Serializer You can also chmod files via your FTP client; this option is usually accessible by right clicking the corresponding directory and diff --git a/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/INSTALL.fr.utf8 b/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/INSTALL.fr.utf8 index 06e628cc96..95164abba5 100644 --- a/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/INSTALL.fr.utf8 +++ b/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/INSTALL.fr.utf8 @@ -1,4 +1,4 @@ - + Installation Comment installer HTML Purifier diff --git a/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/NEWS b/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/NEWS index a9124af1a1..039c31e939 100644 --- a/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/NEWS +++ b/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/NEWS @@ -9,6 +9,37 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier . Internal change ========================== +4.8.0, released 2016-07-16 +# By default, when a link has a target attribute associated + with it, we now also add rel="noreferrer" in order to + prevent the new window from being able to overwrite + the original frame. To disable this protection, + set %HTML.TargetNoreferrer to FALSE. +! Full PHP 7 compatibility, the test suite is ALL GO. +! %CSS.AllowDuplicates permits duplicate CSS properties. +! Support for 'tel' URIs. +! Partial support for 'border-radius' properties when %CSS.AllowProprietary is true. + The slash syntax, i.e., 'border-radius: 2em 1em 4em / 0.5em 3em' is not + yet supported. +! %Attr.ID.HTML5 turns on HTML5-style ID handling. +- alt truncation could result in malformed UTF-8 sequence. Don't + truncate. Thanks Brandon Farber for reporting. +- Linkify regex is smarter, based off of Gruber's regex. +- IDNA supported natively on PHP 5.3 and later. +- Non all-numeric top-level names (e.g., foo.1f, 1f) are now + allowed. +- Minor bounds error fix to squash a PHP 7 notice. +- Support non-/tmp temporary directories for data:// validation +- Give a better error message when a user attempts to allow + ul/ol without allowing li. +- On some versions of PHP, the Serializer DefinitionCache could + infinite loop when the directory exists but is not listable. (#49) +- Don't match for inside comments with + %Core.ConvertDocumentToFragment. (#67) +- SafeObject is now less case sensitive. (#57) +- AutoFormat.RemoveEmpty.Predicate now correctly renders in + web form. (#85) + 4.7.0, released 2015-08-04 # opacity is now considered a "tricky" CSS property rather than a proprietary one. diff --git a/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/README b/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/README deleted file mode 100644 index 53f26f1c28..0000000000 --- a/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/README +++ /dev/null @@ -1,24 +0,0 @@ - -README - All about HTML Purifier - -HTML Purifier is an HTML filtering solution that uses a unique combination -of robust whitelists and agressive parsing to ensure that not only are -XSS attacks thwarted, but the resulting HTML is standards compliant. - -HTML Purifier is oriented towards richly formatted documents from -untrusted sources that require CSS and a full tag-set. This library can -be configured to accept a more restrictive set of tags, but it won't be -as efficient as more bare-bones parsers. It will, however, do the job -right, which may be more important. - -Places to go: - -* See INSTALL for a quick installation guide -* See docs/ for developer-oriented documentation, code examples and - an in-depth installation guide. -* See WYSIWYG for information on editors like TinyMCE and FCKeditor - -HTML Purifier can be found on the web at: http://htmlpurifier.org/ - - vim: et sw=4 sts=4 diff --git a/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/README.md b/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/README.md new file mode 100644 index 0000000000..029369f6f3 --- /dev/null +++ b/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/README.md @@ -0,0 +1,29 @@ +HTML Purifier +============= + +HTML Purifier is an HTML filtering solution that uses a unique combination +of robust whitelists and agressive parsing to ensure that not only are +XSS attacks thwarted, but the resulting HTML is standards compliant. + +HTML Purifier is oriented towards richly formatted documents from +untrusted sources that require CSS and a full tag-set. This library can +be configured to accept a more restrictive set of tags, but it won't be +as efficient as more bare-bones parsers. It will, however, do the job +right, which may be more important. + +Places to go: + +* See INSTALL for a quick installation guide +* See docs/ for developer-oriented documentation, code examples and + an in-depth installation guide. +* See WYSIWYG for information on editors like TinyMCE and FCKeditor + +HTML Purifier can be found on the web at: [http://htmlpurifier.org/](http://htmlpurifier.org/) + +## Installation + +Package available on [Composer](https://packagist.org/packages/ezyang/htmlpurifier). + +If you're using Composer to manage dependencies, you can use + + $ composer require "ezyang/htmlpurifier": "dev-master" diff --git a/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/TODO b/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/TODO index a92abf2802..1afb33cbf7 100644 --- a/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/TODO +++ b/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/TODO @@ -32,7 +32,7 @@ Things to do as soon as possible: FUTURE VERSIONS --------------- -4.8 release [OMG CONFIG PONIES] +4.9 release [OMG CONFIG PONIES] ! Fix Printer. It's from the old days when we didn't have decent XML classes ! Factor demo.php into a set of Printer classes, and then create a stub file for users here (inside the actual HTML Purifier library) diff --git a/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/VERSION b/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/VERSION index 1163055e28..6ca6df113f 100644 --- a/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/VERSION +++ b/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/VERSION @@ -1 +1 @@ -4.7.0 \ No newline at end of file +4.8.0 \ No newline at end of file diff --git a/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/WHATSNEW b/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/WHATSNEW index 4e5eb2b691..7acce06dfb 100644 --- a/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/WHATSNEW +++ b/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/WHATSNEW @@ -1,4 +1,9 @@ -HTML Purifier 4.7.0 is a bugfix release, collecting two years -worth of accumulated bug fixes. Highlighted bugfixes are updated -YouTube filter code, corrected rgb() CSS parsing, and one new -configuration option, %AutoFormat.RemoveEmpty.Predicate. +HTML Purifier 4.8.0 is a bugfix release, collecting a year +of accumulated bug fixes. In particular, we fixed some minor +bugs and now declare full PHP 7 compatibility. The primary +backwards-incompatible change is that HTML Purifier will now +add rel="noreferrer" to all links with target attributes +(you can disable this with %HTML.TargetNoReferrer.) Other +changes: new configuration options %CSS.AllowDuplicates and +%Attr.ID.HTML5; border-radius is partially supported when +%CSS.AllowProprietary, and tel URIs are supported by default. diff --git a/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/library/HTMLPurifier.includes.php b/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/library/HTMLPurifier.includes.php index fdb58c2d37..b1131ef910 100644 --- a/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/library/HTMLPurifier.includes.php +++ b/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/library/HTMLPurifier.includes.php @@ -7,7 +7,7 @@ * primary concern and you are using an opcode cache. PLEASE DO NOT EDIT THIS * FILE, changes will be overwritten the next time the script is run. * - * @version 4.7.0 + * @version 4.8.0 * * @warning * You must *not* include any other HTML Purifier files before this file, @@ -137,6 +137,7 @@ require 'HTMLPurifier/AttrTransform/SafeObject.php'; require 'HTMLPurifier/AttrTransform/SafeParam.php'; require 'HTMLPurifier/AttrTransform/ScriptRequired.php'; require 'HTMLPurifier/AttrTransform/TargetBlank.php'; +require 'HTMLPurifier/AttrTransform/TargetNoreferrer.php'; require 'HTMLPurifier/AttrTransform/Textarea.php'; require 'HTMLPurifier/ChildDef/Chameleon.php'; require 'HTMLPurifier/ChildDef/Custom.php'; @@ -175,6 +176,7 @@ require 'HTMLPurifier/HTMLModule/StyleAttribute.php'; require 'HTMLPurifier/HTMLModule/Tables.php'; require 'HTMLPurifier/HTMLModule/Target.php'; require 'HTMLPurifier/HTMLModule/TargetBlank.php'; +require 'HTMLPurifier/HTMLModule/TargetNoreferrer.php'; require 'HTMLPurifier/HTMLModule/Text.php'; require 'HTMLPurifier/HTMLModule/Tidy.php'; require 'HTMLPurifier/HTMLModule/XMLCommonAttributes.php'; @@ -225,5 +227,6 @@ require 'HTMLPurifier/URIScheme/https.php'; require 'HTMLPurifier/URIScheme/mailto.php'; require 'HTMLPurifier/URIScheme/news.php'; require 'HTMLPurifier/URIScheme/nntp.php'; +require 'HTMLPurifier/URIScheme/tel.php'; require 'HTMLPurifier/VarParser/Flexible.php'; require 'HTMLPurifier/VarParser/Native.php'; diff --git a/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/library/HTMLPurifier.php b/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/library/HTMLPurifier.php index c6041bc113..38a78e8da8 100644 --- a/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/library/HTMLPurifier.php +++ b/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/library/HTMLPurifier.php @@ -19,7 +19,7 @@ */ /* - HTML Purifier 4.7.0 - Standards Compliant HTML Filtering + HTML Purifier 4.8.0 - Standards Compliant HTML Filtering Copyright (C) 2006-2008 Edward Z. Yang This library is free software; you can redistribute it and/or @@ -58,12 +58,12 @@ class HTMLPurifier * Version of HTML Purifier. * @type string */ - public $version = '4.7.0'; + public $version = '4.8.0'; /** * Constant with version of HTML Purifier. */ - const VERSION = '4.7.0'; + const VERSION = '4.8.0'; /** * Global configuration object. @@ -104,7 +104,7 @@ class HTMLPurifier /** * Initializes the purifier. * - * @param HTMLPurifier_Config $config Optional HTMLPurifier_Config object + * @param HTMLPurifier_Config|mixed $config Optional HTMLPurifier_Config object * for all instances of the purifier, if omitted, a default * configuration is supplied (which can be overridden on a * per-use basis). diff --git a/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/library/HTMLPurifier.safe-includes.php b/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/library/HTMLPurifier.safe-includes.php index 9dea6d1ed5..fe587c7863 100644 --- a/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/library/HTMLPurifier.safe-includes.php +++ b/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/library/HTMLPurifier.safe-includes.php @@ -131,6 +131,7 @@ require_once $__dir . '/HTMLPurifier/AttrTransform/SafeObject.php'; require_once $__dir . '/HTMLPurifier/AttrTransform/SafeParam.php'; require_once $__dir . '/HTMLPurifier/AttrTransform/ScriptRequired.php'; require_once $__dir . '/HTMLPurifier/AttrTransform/TargetBlank.php'; +require_once $__dir . '/HTMLPurifier/AttrTransform/TargetNoreferrer.php'; require_once $__dir . '/HTMLPurifier/AttrTransform/Textarea.php'; require_once $__dir . '/HTMLPurifier/ChildDef/Chameleon.php'; require_once $__dir . '/HTMLPurifier/ChildDef/Custom.php'; @@ -169,6 +170,7 @@ require_once $__dir . '/HTMLPurifier/HTMLModule/StyleAttribute.php'; require_once $__dir . '/HTMLPurifier/HTMLModule/Tables.php'; require_once $__dir . '/HTMLPurifier/HTMLModule/Target.php'; require_once $__dir . '/HTMLPurifier/HTMLModule/TargetBlank.php'; +require_once $__dir . '/HTMLPurifier/HTMLModule/TargetNoreferrer.php'; require_once $__dir . '/HTMLPurifier/HTMLModule/Text.php'; require_once $__dir . '/HTMLPurifier/HTMLModule/Tidy.php'; require_once $__dir . '/HTMLPurifier/HTMLModule/XMLCommonAttributes.php'; @@ -219,5 +221,6 @@ require_once $__dir . '/HTMLPurifier/URIScheme/https.php'; require_once $__dir . '/HTMLPurifier/URIScheme/mailto.php'; require_once $__dir . '/HTMLPurifier/URIScheme/news.php'; require_once $__dir . '/HTMLPurifier/URIScheme/nntp.php'; +require_once $__dir . '/HTMLPurifier/URIScheme/tel.php'; require_once $__dir . '/HTMLPurifier/VarParser/Flexible.php'; require_once $__dir . '/HTMLPurifier/VarParser/Native.php'; diff --git a/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/library/HTMLPurifier/AttrCollections.php b/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/library/HTMLPurifier/AttrCollections.php index 4f6c2e39a2..c7b17cf144 100644 --- a/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/library/HTMLPurifier/AttrCollections.php +++ b/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/library/HTMLPurifier/AttrCollections.php @@ -21,6 +21,11 @@ class HTMLPurifier_AttrCollections * @param HTMLPurifier_HTMLModule[] $modules Hash array of HTMLPurifier_HTMLModule members */ public function __construct($attr_types, $modules) + { + $this->doConstruct($attr_types, $modules); + } + + public function doConstruct($attr_types, $modules) { // load extensions from the modules foreach ($modules as $module) { diff --git a/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/library/HTMLPurifier/AttrDef/CSS.php b/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/library/HTMLPurifier/AttrDef/CSS.php index 02c1641fb2..2b977ca38e 100644 --- a/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/library/HTMLPurifier/AttrDef/CSS.php +++ b/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/library/HTMLPurifier/AttrDef/CSS.php @@ -25,6 +25,7 @@ class HTMLPurifier_AttrDef_CSS extends HTMLPurifier_AttrDef $css = $this->parseCDATA($css); $definition = $config->getCSSDefinition(); + $allow_duplicates = $config->get("CSS.AllowDuplicates"); // we're going to break the spec and explode by semicolons. // This is because semicolon rarely appears in escaped form @@ -34,6 +35,7 @@ class HTMLPurifier_AttrDef_CSS extends HTMLPurifier_AttrDef $declarations = explode(';', $css); $propvalues = array(); + $new_declarations = ''; /** * Name of the current CSS property being validated. @@ -83,7 +85,11 @@ class HTMLPurifier_AttrDef_CSS extends HTMLPurifier_AttrDef if ($result === false) { continue; } - $propvalues[$property] = $result; + if ($allow_duplicates) { + $new_declarations .= "$property:$result;"; + } else { + $propvalues[$property] = $result; + } } $context->destroy('CurrentCSSProperty'); @@ -92,7 +98,6 @@ class HTMLPurifier_AttrDef_CSS extends HTMLPurifier_AttrDef // slightly inefficient, but it's the only way of getting rid of // duplicates. Perhaps config to optimize it, but not now. - $new_declarations = ''; foreach ($propvalues as $prop => $value) { $new_declarations .= "$prop:$value;"; } diff --git a/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/library/HTMLPurifier/AttrDef/CSS/URI.php b/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/library/HTMLPurifier/AttrDef/CSS/URI.php index f9434230e2..6617acace5 100644 --- a/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/library/HTMLPurifier/AttrDef/CSS/URI.php +++ b/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/library/HTMLPurifier/AttrDef/CSS/URI.php @@ -33,6 +33,9 @@ class HTMLPurifier_AttrDef_CSS_URI extends HTMLPurifier_AttrDef_URI return false; } $uri_string = substr($uri_string, 4); + if (strlen($uri_string) == 0) { + return false; + } $new_length = strlen($uri_string) - 1; if ($uri_string[$new_length] != ')') { return false; diff --git a/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/library/HTMLPurifier/AttrDef/HTML/ID.php b/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/library/HTMLPurifier/AttrDef/HTML/ID.php index 3d86efb44c..4ba45610fe 100644 --- a/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/library/HTMLPurifier/AttrDef/HTML/ID.php +++ b/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/library/HTMLPurifier/AttrDef/HTML/ID.php @@ -72,18 +72,26 @@ class HTMLPurifier_AttrDef_HTML_ID extends HTMLPurifier_AttrDef // we purposely avoid using regex, hopefully this is faster - if (ctype_alpha($id)) { - $result = true; - } else { - if (!ctype_alpha(@$id[0])) { + if ($config->get('Attr.ID.HTML5') === true) { + if (preg_match('/[\t\n\x0b\x0c ]/', $id)) { return false; } - // primitive style of regexps, I suppose - $trim = trim( - $id, - 'A..Za..z0..9:-._' - ); - $result = ($trim === ''); + } else { + if (ctype_alpha($id)) { + // OK + } else { + if (!ctype_alpha(@$id[0])) { + return false; + } + // primitive style of regexps, I suppose + $trim = trim( + $id, + 'A..Za..z0..9:-._' + ); + if ($trim !== '') { + return false; + } + } } $regexp = $config->get('Attr.IDBlacklistRegexp'); @@ -91,14 +99,14 @@ class HTMLPurifier_AttrDef_HTML_ID extends HTMLPurifier_AttrDef return false; } - if (!$this->selector && $result) { + if (!$this->selector) { $id_accumulator->add($id); } // if no change was made to the ID, return the result // else, return the new id if stripping whitespace made it // valid, or return false. - return $result ? $id : false; + return $id; } } diff --git a/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/library/HTMLPurifier/AttrDef/URI/Host.php b/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/library/HTMLPurifier/AttrDef/URI/Host.php index e7df800b1e..151f7aff75 100644 --- a/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/library/HTMLPurifier/AttrDef/URI/Host.php +++ b/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/library/HTMLPurifier/AttrDef/URI/Host.php @@ -76,24 +76,33 @@ class HTMLPurifier_AttrDef_URI_Host extends HTMLPurifier_AttrDef // fairly well supported. $underscore = $config->get('Core.AllowHostnameUnderscore') ? '_' : ''; + // Based off of RFC 1738, but amended so that + // as per RFC 3696, the top label need only not be all numeric. // The productions describing this are: $a = '[a-z]'; // alpha $an = '[a-z0-9]'; // alphanum $and = "[a-z0-9-$underscore]"; // alphanum | "-" // domainlabel = alphanum | alphanum *( alphanum | "-" ) alphanum - $domainlabel = "$an($and*$an)?"; - // toplabel = alpha | alpha *( alphanum | "-" ) alphanum - $toplabel = "$a($and*$an)?"; + $domainlabel = "$an(?:$and*$an)?"; + // AMENDED as per RFC 3696 + // toplabel = alphanum | alphanum *( alphanum | "-" ) alphanum + // side condition: not all numeric + $toplabel = "$an(?:$and*$an)?"; // hostname = *( domainlabel "." ) toplabel [ "." ] - if (preg_match("/^($domainlabel\.)*$toplabel\.?$/i", $string)) { - return $string; + if (preg_match("/^(?:$domainlabel\.)*($toplabel)\.?$/i", $string, $matches)) { + if (!ctype_digit($matches[1])) { + return $string; + } } + // PHP 5.3 and later support this functionality natively + if (function_exists('idn_to_ascii')) { + return idn_to_ascii($string); + // If we have Net_IDNA2 support, we can support IRIs by // punycoding them. (This is the most portable thing to do, // since otherwise we have to assume browsers support - - if ($config->get('Core.EnableIDNA')) { + } elseif ($config->get('Core.EnableIDNA')) { $idna = new Net_IDNA2(array('encoding' => 'utf8', 'overlong' => false, 'strict' => true)); // we need to encode each period separately $parts = explode('.', $string); diff --git a/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/library/HTMLPurifier/AttrTransform/ImgRequired.php b/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/library/HTMLPurifier/AttrTransform/ImgRequired.php index 7df6cb3e1b..235ebb34b6 100644 --- a/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/library/HTMLPurifier/AttrTransform/ImgRequired.php +++ b/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/library/HTMLPurifier/AttrTransform/ImgRequired.php @@ -32,8 +32,7 @@ class HTMLPurifier_AttrTransform_ImgRequired extends HTMLPurifier_AttrTransform if ($src) { $alt = $config->get('Attr.DefaultImageAlt'); if ($alt === null) { - // truncate if the alt is too long - $attr['alt'] = substr(basename($attr['src']), 0, 40); + $attr['alt'] = basename($attr['src']); } else { $attr['alt'] = $alt; } diff --git a/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/library/HTMLPurifier/AttrTransform/TargetNoreferrer.php b/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/library/HTMLPurifier/AttrTransform/TargetNoreferrer.php new file mode 100644 index 0000000000..587dc2e079 --- /dev/null +++ b/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/library/HTMLPurifier/AttrTransform/TargetNoreferrer.php @@ -0,0 +1,37 @@ +info['page-break-inside'] = new HTMLPurifier_AttrDef_Enum(array('auto', 'avoid')); + $border_radius = new HTMLPurifier_AttrDef_CSS_Composite( + array( + new HTMLPurifier_AttrDef_CSS_Percentage(true), // disallow negative + new HTMLPurifier_AttrDef_CSS_Length('0') // disallow negative + )); + + $this->info['border-top-left-radius'] = + $this->info['border-top-right-radius'] = + $this->info['border-bottom-right-radius'] = + $this->info['border-bottom-left-radius'] = new HTMLPurifier_AttrDef_CSS_Multiple($border_radius, 2); + // TODO: support SLASH syntax + $this->info['border-radius'] = new HTMLPurifier_AttrDef_CSS_Multiple($border_radius, 4); + } /** diff --git a/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/library/HTMLPurifier/ChildDef/List.php b/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/library/HTMLPurifier/ChildDef/List.php index 891b9f6f5b..5a53a4b494 100644 --- a/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/library/HTMLPurifier/ChildDef/List.php +++ b/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/library/HTMLPurifier/ChildDef/List.php @@ -38,6 +38,12 @@ class HTMLPurifier_ChildDef_List extends HTMLPurifier_ChildDef return false; } + // if li is not allowed, delete parent node + if (!isset($config->getHTMLDefinition()->info['li'])) { + trigger_error("Cannot allow ul/ol without allowing li", E_USER_WARNING); + return false; + } + // the new set of children $result = array(); diff --git a/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/library/HTMLPurifier/Config.php b/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/library/HTMLPurifier/Config.php index 2b2db0c264..7b9dcf0ec5 100644 --- a/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/library/HTMLPurifier/Config.php +++ b/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/library/HTMLPurifier/Config.php @@ -21,7 +21,7 @@ class HTMLPurifier_Config * HTML Purifier's version * @type string */ - public $version = '4.7.0'; + public $version = '4.8.0'; /** * Whether or not to automatically finalize diff --git a/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema.ser b/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema.ser index 1e6ccd2275..0a7a406e13 100644 Binary files a/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema.ser and b/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema.ser differ diff --git a/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Attr.ID.HTML5.txt b/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Attr.ID.HTML5.txt new file mode 100644 index 0000000000..735d4b7a10 --- /dev/null +++ b/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Attr.ID.HTML5.txt @@ -0,0 +1,10 @@ +Attr.ID.HTML5 +TYPE: bool/null +DEFAULT: null +VERSION: 4.8.0 +--DESCRIPTION-- +In HTML5, restrictions on the format of the id attribute have been significantly +relaxed, such that any string is valid so long as it contains no spaces and +is at least one character. In lieu of a general HTML5 compatibility flag, +set this configuration directive to true to use the relaxed rules. +--# vim: et sw=4 sts=4 diff --git a/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/CSS.AllowDuplicates.txt b/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/CSS.AllowDuplicates.txt new file mode 100644 index 0000000000..4d054b1f07 --- /dev/null +++ b/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/CSS.AllowDuplicates.txt @@ -0,0 +1,11 @@ +CSS.AllowDuplicates +TYPE: bool +DEFAULT: false +VERSION: 4.8.0 +--DESCRIPTION-- +

+ By default, HTML Purifier removes duplicate CSS properties, + like color:red; color:blue. If this is set to + true, duplicate properties are allowed. +

+--# vim: et sw=4 sts=4 diff --git a/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Cache.SerializerPermissions.txt b/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Cache.SerializerPermissions.txt index b2b83d9ab6..2e0cc81044 100644 --- a/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Cache.SerializerPermissions.txt +++ b/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Cache.SerializerPermissions.txt @@ -1,5 +1,5 @@ Cache.SerializerPermissions -TYPE: int +TYPE: int/null VERSION: 4.3.0 DEFAULT: 0755 --DESCRIPTION-- @@ -8,4 +8,9 @@ DEFAULT: 0755 Directory permissions of the files and directories created inside the DefinitionCache/Serializer or other custom serializer path.

+

+ In HTML Purifier 4.8.0, this also supports NULL, + which means that no chmod'ing or directory creation shall + occur. +

--# vim: et sw=4 sts=4 diff --git a/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/HTML.TargetNoreferrer.txt b/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/HTML.TargetNoreferrer.txt new file mode 100644 index 0000000000..cb5a0b0e5e --- /dev/null +++ b/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/HTML.TargetNoreferrer.txt @@ -0,0 +1,9 @@ +HTML.TargetNoreferrer +TYPE: bool +VERSION: 4.8.0 +DEFAULT: TRUE +--DESCRIPTION-- +If enabled, noreferrer rel attributes are added to links which have +a target attribute associated with them. This prevents malicious +destinations from overwriting the original window. +--# vim: et sw=4 sts=4 diff --git a/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/URI.AllowedSchemes.txt b/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/URI.AllowedSchemes.txt index 666635a5ff..eb97307e20 100644 --- a/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/URI.AllowedSchemes.txt +++ b/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/URI.AllowedSchemes.txt @@ -8,6 +8,7 @@ array ( 'ftp' => true, 'nntp' => true, 'news' => true, + 'tel' => true, ) --DESCRIPTION-- Whitelist that defines the schemes that a URI is allowed to have. This diff --git a/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/library/HTMLPurifier/DefinitionCache.php b/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/library/HTMLPurifier/DefinitionCache.php index 67bb5b1e69..9aa8ff354f 100644 --- a/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/library/HTMLPurifier/DefinitionCache.php +++ b/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/library/HTMLPurifier/DefinitionCache.php @@ -118,7 +118,7 @@ abstract class HTMLPurifier_DefinitionCache /** * Clears all expired (older version or revision) objects from cache - * @note Be carefuly implementing this method as flush. Flush must + * @note Be careful implementing this method as flush. Flush must * not interfere with other Definition types, and cleanup() * should not be repeatedly called by userland code. * @param HTMLPurifier_Config $config diff --git a/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/library/HTMLPurifier/DefinitionCache/Serializer.php b/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/library/HTMLPurifier/DefinitionCache/Serializer.php index ce268d91b4..f930c6b946 100644 --- a/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/library/HTMLPurifier/DefinitionCache/Serializer.php +++ b/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/library/HTMLPurifier/DefinitionCache/Serializer.php @@ -97,6 +97,12 @@ class HTMLPurifier_DefinitionCache_Serializer extends HTMLPurifier_DefinitionCac } $dir = $this->generateDirectoryPath($config); $dh = opendir($dir); + // Apparently, on some versions of PHP, readdir will return + // an empty string if you pass an invalid argument to readdir. + // So you need this test. See #49. + if (false === $dh) { + return false; + } while (false !== ($filename = readdir($dh))) { if (empty($filename)) { continue; @@ -106,6 +112,7 @@ class HTMLPurifier_DefinitionCache_Serializer extends HTMLPurifier_DefinitionCac } unlink($dir . '/' . $filename); } + return true; } /** @@ -119,6 +126,10 @@ class HTMLPurifier_DefinitionCache_Serializer extends HTMLPurifier_DefinitionCac } $dir = $this->generateDirectoryPath($config); $dh = opendir($dir); + // See #49 (and above). + if (false === $dh) { + return false; + } while (false !== ($filename = readdir($dh))) { if (empty($filename)) { continue; @@ -131,6 +142,7 @@ class HTMLPurifier_DefinitionCache_Serializer extends HTMLPurifier_DefinitionCac unlink($dir . '/' . $filename); } } + return true; } /** @@ -186,11 +198,12 @@ class HTMLPurifier_DefinitionCache_Serializer extends HTMLPurifier_DefinitionCac if ($result !== false) { // set permissions of the new file (no execute) $chmod = $config->get('Cache.SerializerPermissions'); - if (!$chmod) { - $chmod = 0644; // invalid config or simpletest + if ($chmod === null) { + // don't do anything + } else { + $chmod = $chmod & 0666; + chmod($file, $chmod); } - $chmod = $chmod & 0666; - chmod($file, $chmod); } return $result; } @@ -204,9 +217,6 @@ class HTMLPurifier_DefinitionCache_Serializer extends HTMLPurifier_DefinitionCac { $directory = $this->generateDirectoryPath($config); $chmod = $config->get('Cache.SerializerPermissions'); - if (!$chmod) { - $chmod = 0755; // invalid config or simpletest - } if (!is_dir($directory)) { $base = $this->generateBaseDirectoryPath($config); if (!is_dir($base)) { @@ -219,7 +229,19 @@ class HTMLPurifier_DefinitionCache_Serializer extends HTMLPurifier_DefinitionCac } elseif (!$this->_testPermissions($base, $chmod)) { return false; } - mkdir($directory, $chmod); + if ($chmod === null) { + trigger_error( + 'Base directory ' . $base . ' does not exist, + please create or change using %Cache.SerializerPath', + E_USER_WARNING + ); + return false; + } + if ($chmod !== null) { + mkdir($directory, $chmod); + } else { + mkdir($directory); + } if (!$this->_testPermissions($directory, $chmod)) { trigger_error( 'Base directory ' . $base . ' does not exist, @@ -256,7 +278,7 @@ class HTMLPurifier_DefinitionCache_Serializer extends HTMLPurifier_DefinitionCac ); return false; } - if (function_exists('posix_getuid')) { + if (function_exists('posix_getuid') && $chmod !== null) { // POSIX system, we can give more specific advice if (fileowner($dir) === posix_getuid()) { // we can chmod it ourselves diff --git a/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/library/HTMLPurifier/HTMLModule/TargetNoreferrer.php b/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/library/HTMLPurifier/HTMLModule/TargetNoreferrer.php new file mode 100644 index 0000000000..32484d6019 --- /dev/null +++ b/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/library/HTMLPurifier/HTMLModule/TargetNoreferrer.php @@ -0,0 +1,21 @@ +addBlankElement('a'); + $a->attr_transform_post[] = new HTMLPurifier_AttrTransform_TargetNoreferrer(); + } +} diff --git a/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/library/HTMLPurifier/HTMLModuleManager.php b/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/library/HTMLPurifier/HTMLModuleManager.php index f3a17cb03b..2546c043c4 100644 --- a/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/library/HTMLPurifier/HTMLModuleManager.php +++ b/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/library/HTMLPurifier/HTMLModuleManager.php @@ -271,6 +271,11 @@ class HTMLPurifier_HTMLModuleManager if ($config->get('HTML.TargetBlank')) { $modules[] = 'TargetBlank'; } + // NB: HTML.TargetNoreferrer must be AFTER HTML.TargetBlank + // so that its post-attr-transform gets run afterwards. + if ($config->get('HTML.TargetNoreferrer')) { + $modules[] = 'TargetNoreferrer'; + } // merge in custom modules $modules = array_merge($modules, $this->userModules); diff --git a/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/library/HTMLPurifier/Injector/Linkify.php b/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/library/HTMLPurifier/Injector/Linkify.php index 069708c250..74f83eaa7d 100644 --- a/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/library/HTMLPurifier/Injector/Linkify.php +++ b/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/library/HTMLPurifier/Injector/Linkify.php @@ -31,9 +31,14 @@ class HTMLPurifier_Injector_Linkify extends HTMLPurifier_Injector return; } - // there is/are URL(s). Let's split the string: - // Note: this regex is extremely permissive - $bits = preg_split('#((?:https?|ftp)://[^\s\'",<>()]+)#Su', $token->data, -1, PREG_SPLIT_DELIM_CAPTURE); + // there is/are URL(s). Let's split the string. + // We use this regex: + // https://gist.github.com/gruber/249502 + // but with @cscott's backtracking fix and also + // the Unicode characters un-Unicodified. + $bits = preg_split( + '/\\b((?:[a-z][\\w\\-]+:(?:\\/{1,3}|[a-z0-9%])|www\\d{0,3}[.]|[a-z0-9.\\-]+[.][a-z]{2,4}\\/)(?:[^\\s()<>]|\\((?:[^\\s()<>]|(?:\\([^\\s()<>]+\\)))*\\))+(?:\\((?:[^\\s()<>]|(?:\\([^\\s()<>]+\\)))*\\)|[^\\s`!()\\[\\]{};:\'".,<>?\x{00ab}\x{00bb}\x{201c}\x{201d}\x{2018}\x{2019}]))/iu', + $token->data, -1, PREG_SPLIT_DELIM_CAPTURE); $token = array(); diff --git a/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/library/HTMLPurifier/Injector/RemoveEmpty.php b/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/library/HTMLPurifier/Injector/RemoveEmpty.php index 01353ff1d5..0ebc477c68 100644 --- a/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/library/HTMLPurifier/Injector/RemoveEmpty.php +++ b/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/library/HTMLPurifier/Injector/RemoveEmpty.php @@ -46,6 +46,12 @@ class HTMLPurifier_Injector_RemoveEmpty extends HTMLPurifier_Injector $this->removeNbsp = $config->get('AutoFormat.RemoveEmpty.RemoveNbsp'); $this->removeNbspExceptions = $config->get('AutoFormat.RemoveEmpty.RemoveNbsp.Exceptions'); $this->exclude = $config->get('AutoFormat.RemoveEmpty.Predicate'); + foreach ($this->exclude as $key => $attrs) { + if (!is_array($attrs)) { + // HACK, see HTMLPurifier/Printer/ConfigForm.php + $this->exclude[$key] = explode(';', $attrs); + } + } $this->attrValidator = new HTMLPurifier_AttrValidator(); } diff --git a/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/library/HTMLPurifier/Injector/SafeObject.php b/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/library/HTMLPurifier/Injector/SafeObject.php index 3d17e07af2..317f7864dd 100644 --- a/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/library/HTMLPurifier/Injector/SafeObject.php +++ b/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/library/HTMLPurifier/Injector/SafeObject.php @@ -36,6 +36,7 @@ class HTMLPurifier_Injector_SafeObject extends HTMLPurifier_Injector ); /** + * These are all lower-case keys. * @type array */ protected $allowedParam = array( @@ -43,7 +44,7 @@ class HTMLPurifier_Injector_SafeObject extends HTMLPurifier_Injector 'movie' => true, 'flashvars' => true, 'src' => true, - 'allowFullScreen' => true, // if omitted, assume to be 'false' + 'allowfullscreen' => true, // if omitted, assume to be 'false' ); /** @@ -93,9 +94,11 @@ class HTMLPurifier_Injector_SafeObject extends HTMLPurifier_Injector $token->attr['name'] === $this->addParam[$n]) { // keep token, and add to param stack $this->paramStack[$i][$n] = true; - } elseif (isset($this->allowedParam[$n])) { + } elseif (isset($this->allowedParam[strtolower($n)])) { // keep token, don't do anything to it // (could possibly check for duplicates here) + // Note: In principle, parameters should be case sensitive. + // But it seems they are not really; so accept any case. } else { $token = false; } diff --git a/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/library/HTMLPurifier/Lexer.php b/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/library/HTMLPurifier/Lexer.php index 43732621dc..44c5c659da 100644 --- a/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/library/HTMLPurifier/Lexer.php +++ b/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/library/HTMLPurifier/Lexer.php @@ -345,12 +345,17 @@ class HTMLPurifier_Lexer public function extractBody($html) { $matches = array(); - $result = preg_match('!]*>(.*)!is', $html, $matches); + $result = preg_match('|(.*?)]*>(.*)|is', $html, $matches); if ($result) { - return $matches[1]; - } else { - return $html; + // Make sure it's not in a comment + $comment_start = strrpos($matches[1], ''); + if ($comment_start === false || + ($comment_end !== false && $comment_end > $comment_start)) { + return $matches[2]; + } } + return $html; } } diff --git a/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/library/HTMLPurifier/Printer/ConfigForm.php b/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/library/HTMLPurifier/Printer/ConfigForm.php index 36100ce738..65a7779041 100644 --- a/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/library/HTMLPurifier/Printer/ConfigForm.php +++ b/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/library/HTMLPurifier/Printer/ConfigForm.php @@ -327,6 +327,10 @@ class HTMLPurifier_Printer_ConfigForm_default extends HTMLPurifier_Printer case HTMLPurifier_VarParser::HASH: $nvalue = ''; foreach ($value as $i => $v) { + if (is_array($v)) { + // HACK + $v = implode(";", $v); + } $nvalue .= "$i:$v" . PHP_EOL; } $value = $nvalue; diff --git a/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/library/HTMLPurifier/URIScheme/data.php b/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/library/HTMLPurifier/URIScheme/data.php index 6ebca49848..41c49d5533 100644 --- a/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/library/HTMLPurifier/URIScheme/data.php +++ b/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/library/HTMLPurifier/URIScheme/data.php @@ -79,9 +79,18 @@ class HTMLPurifier_URIScheme_data extends HTMLPurifier_URIScheme } else { $raw_data = $data; } + if ( strlen($raw_data) < 12 ) { + // error; exif_imagetype throws exception with small files, + // and this likely indicates a corrupt URI/failed parse anyway + return false; + } // XXX probably want to refactor this into a general mechanism // for filtering arbitrary content types - $file = tempnam("/tmp", ""); + if (function_exists('sys_get_temp_dir')) { + $file = tempnam(sys_get_temp_dir(), ""); + } else { + $file = tempnam("/tmp", ""); + } file_put_contents($file, $raw_data); if (function_exists('exif_imagetype')) { $image_code = exif_imagetype($file); diff --git a/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/library/HTMLPurifier/URIScheme/tel.php b/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/library/HTMLPurifier/URIScheme/tel.php new file mode 100644 index 0000000000..8cd1933527 --- /dev/null +++ b/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/library/HTMLPurifier/URIScheme/tel.php @@ -0,0 +1,46 @@ +userinfo = null; + $uri->host = null; + $uri->port = null; + + // Delete all non-numeric characters, non-x characters + // from phone number, EXCEPT for a leading plus sign. + $uri->path = preg_replace('/(?!^\+)[^\dx]/', '', + // Normalize e(x)tension to lower-case + str_replace('X', 'x', $uri->path)); + + return true; + } +} + +// vim: et sw=4 sts=4 diff --git a/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/maintenance/.htaccess b/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/maintenance/.htaccess new file mode 100644 index 0000000000..3a42882788 --- /dev/null +++ b/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/maintenance/.htaccess @@ -0,0 +1 @@ +Deny from all diff --git a/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/maintenance/PH5P.patch b/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/maintenance/PH5P.patch new file mode 100644 index 0000000000..763709509b --- /dev/null +++ b/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/maintenance/PH5P.patch @@ -0,0 +1,102 @@ +--- C:\Users\Edward\Webs\htmlpurifier\maintenance\PH5P.php 2008-07-07 09:12:12.000000000 -0400 ++++ C:\Users\Edward\Webs\htmlpurifier\maintenance/PH5P.new.php 2008-12-06 02:29:34.988800000 -0500 +@@ -65,7 +65,7 @@ + + public function __construct($data) { + $data = str_replace("\r\n", "\n", $data); +- $date = str_replace("\r", null, $data); ++ $data = str_replace("\r", null, $data); + + $this->data = $data; + $this->char = -1; +@@ -211,7 +211,10 @@ + // If nothing is returned, emit a U+0026 AMPERSAND character token. + // Otherwise, emit the character token that was returned. + $char = (!$entity) ? '&' : $entity; +- $this->emitToken($char); ++ $this->emitToken(array( ++ 'type' => self::CHARACTR, ++ 'data' => $char ++ )); + + // Finally, switch to the data state. + $this->state = 'data'; +@@ -708,7 +711,7 @@ + } elseif($char === '&') { + /* U+0026 AMPERSAND (&) + Switch to the entity in attribute value state. */ +- $this->entityInAttributeValueState('non'); ++ $this->entityInAttributeValueState(); + + } elseif($char === '>') { + /* U+003E GREATER-THAN SIGN (>) +@@ -738,7 +741,8 @@ + ? '&' + : $entity; + +- $this->emitToken($char); ++ $last = count($this->token['attr']) - 1; ++ $this->token['attr'][$last]['value'] .= $char; + } + + private function bogusCommentState() { +@@ -1066,6 +1070,11 @@ + $this->char++; + + if(in_array($id, $this->entities)) { ++ if ($e_name[$c-1] !== ';') { ++ if ($c < $len && $e_name[$c] == ';') { ++ $this->char++; // consume extra semicolon ++ } ++ } + $entity = $id; + break; + } +@@ -2084,7 +2093,7 @@ + /* Reconstruct the active formatting elements, if any. */ + $this->reconstructActiveFormattingElements(); + +- $this->insertElement($token); ++ $this->insertElement($token, true, true); + break; + } + break; +@@ -3465,7 +3474,18 @@ + } + } + +- private function insertElement($token, $append = true) { ++ private function insertElement($token, $append = true, $check = false) { ++ // Proprietary workaround for libxml2's limitations with tag names ++ if ($check) { ++ // Slightly modified HTML5 tag-name modification, ++ // removing anything that's not an ASCII letter, digit, or hyphen ++ $token['name'] = preg_replace('/[^a-z0-9-]/i', '', $token['name']); ++ // Remove leading hyphens and numbers ++ $token['name'] = ltrim($token['name'], '-0..9'); ++ // In theory, this should ever be needed, but just in case ++ if ($token['name'] === '') $token['name'] = 'span'; // arbitrary generic choice ++ } ++ + $el = $this->dom->createElement($token['name']); + + foreach($token['attr'] as $attr) { +@@ -3659,7 +3679,7 @@ + } + } + +- private function generateImpliedEndTags(array $exclude = array()) { ++ private function generateImpliedEndTags($exclude = array()) { + /* When the steps below require the UA to generate implied end tags, + then, if the current node is a dd element, a dt element, an li element, + a p element, a td element, a th element, or a tr element, the UA must +@@ -3673,7 +3693,8 @@ + } + } + +- private function getElementCategory($name) { ++ private function getElementCategory($node) { ++ $name = $node->tagName; + if(in_array($name, $this->special)) + return self::SPECIAL; + diff --git a/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/maintenance/PH5P.php b/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/maintenance/PH5P.php new file mode 100644 index 0000000000..9d83dcbf55 --- /dev/null +++ b/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/maintenance/PH5P.php @@ -0,0 +1,3889 @@ +data = $data; + $this->char = -1; + $this->EOF = strlen($data); + $this->tree = new HTML5TreeConstructer; + $this->content_model = self::PCDATA; + + $this->state = 'data'; + + while($this->state !== null) { + $this->{$this->state.'State'}(); + } + } + + public function save() + { + return $this->tree->save(); + } + + private function char() + { + return ($this->char < $this->EOF) + ? $this->data[$this->char] + : false; + } + + private function character($s, $l = 0) + { + if($s + $l < $this->EOF) { + if($l === 0) { + return $this->data[$s]; + } else { + return substr($this->data, $s, $l); + } + } + } + + private function characters($char_class, $start) + { + return preg_replace('#^(['.$char_class.']+).*#s', '\\1', substr($this->data, $start)); + } + + private function dataState() + { + // Consume the next input character + $this->char++; + $char = $this->char(); + + if($char === '&' && ($this->content_model === self::PCDATA || $this->content_model === self::RCDATA)) { + /* U+0026 AMPERSAND (&) + When the content model flag is set to one of the PCDATA or RCDATA + states: switch to the entity data state. Otherwise: treat it as per + the "anything else" entry below. */ + $this->state = 'entityData'; + + } elseif($char === '-') { + /* If the content model flag is set to either the RCDATA state or + the CDATA state, and the escape flag is false, and there are at + least three characters before this one in the input stream, and the + last four characters in the input stream, including this one, are + U+003C LESS-THAN SIGN, U+0021 EXCLAMATION MARK, U+002D HYPHEN-MINUS, + and U+002D HYPHEN-MINUS (""), + set the escape flag to false. */ + if(($this->content_model === self::RCDATA || + $this->content_model === self::CDATA) && $this->escape === true && + $this->character($this->char, 3) === '-->') { + $this->escape = false; + } + + /* In any case, emit the input character as a character token. + Stay in the data state. */ + $this->emitToken(array( + 'type' => self::CHARACTR, + 'data' => $char + )); + + } elseif($this->char === $this->EOF) { + /* EOF + Emit an end-of-file token. */ + $this->EOF(); + + } elseif($this->content_model === self::PLAINTEXT) { + /* When the content model flag is set to the PLAINTEXT state + THIS DIFFERS GREATLY FROM THE SPEC: Get the remaining characters of + the text and emit it as a character token. */ + $this->emitToken(array( + 'type' => self::CHARACTR, + 'data' => substr($this->data, $this->char) + )); + + $this->EOF(); + + } else { + /* Anything else + THIS DIFFERS GREATLY FROM THE SPEC: Get as many character that + otherwise would also be treated as a character token and emit it + as a single character token. Stay in the data state. */ + $len = strcspn($this->data, '<&', $this->char); + $char = substr($this->data, $this->char, $len); + $this->char += $len - 1; + + $this->emitToken(array( + 'type' => self::CHARACTR, + 'data' => $char + )); + + $this->state = 'data'; + } + } + + private function entityDataState() + { + // Attempt to consume an entity. + $entity = $this->entity(); + + // If nothing is returned, emit a U+0026 AMPERSAND character token. + // Otherwise, emit the character token that was returned. + $char = (!$entity) ? '&' : $entity; + $this->emitToken($char); + + // Finally, switch to the data state. + $this->state = 'data'; + } + + private function tagOpenState() + { + switch($this->content_model) { + case self::RCDATA: + case self::CDATA: + /* If the next input character is a U+002F SOLIDUS (/) character, + consume it and switch to the close tag open state. If the next + input character is not a U+002F SOLIDUS (/) character, emit a + U+003C LESS-THAN SIGN character token and switch to the data + state to process the next input character. */ + if($this->character($this->char + 1) === '/') { + $this->char++; + $this->state = 'closeTagOpen'; + + } else { + $this->emitToken(array( + 'type' => self::CHARACTR, + 'data' => '<' + )); + + $this->state = 'data'; + } + break; + + case self::PCDATA: + // If the content model flag is set to the PCDATA state + // Consume the next input character: + $this->char++; + $char = $this->char(); + + if($char === '!') { + /* U+0021 EXCLAMATION MARK (!) + Switch to the markup declaration open state. */ + $this->state = 'markupDeclarationOpen'; + + } elseif($char === '/') { + /* U+002F SOLIDUS (/) + Switch to the close tag open state. */ + $this->state = 'closeTagOpen'; + + } elseif(preg_match('/^[A-Za-z]$/', $char)) { + /* U+0041 LATIN LETTER A through to U+005A LATIN LETTER Z + Create a new start tag token, set its tag name to the lowercase + version of the input character (add 0x0020 to the character's code + point), then switch to the tag name state. (Don't emit the token + yet; further details will be filled in before it is emitted.) */ + $this->token = array( + 'name' => strtolower($char), + 'type' => self::STARTTAG, + 'attr' => array() + ); + + $this->state = 'tagName'; + + } elseif($char === '>') { + /* U+003E GREATER-THAN SIGN (>) + Parse error. Emit a U+003C LESS-THAN SIGN character token and a + U+003E GREATER-THAN SIGN character token. Switch to the data state. */ + $this->emitToken(array( + 'type' => self::CHARACTR, + 'data' => '<>' + )); + + $this->state = 'data'; + + } elseif($char === '?') { + /* U+003F QUESTION MARK (?) + Parse error. Switch to the bogus comment state. */ + $this->state = 'bogusComment'; + + } else { + /* Anything else + Parse error. Emit a U+003C LESS-THAN SIGN character token and + reconsume the current input character in the data state. */ + $this->emitToken(array( + 'type' => self::CHARACTR, + 'data' => '<' + )); + + $this->char--; + $this->state = 'data'; + } + break; + } + } + + private function closeTagOpenState() + { + $next_node = strtolower($this->characters('A-Za-z', $this->char + 1)); + $the_same = count($this->tree->stack) > 0 && $next_node === end($this->tree->stack)->nodeName; + + if(($this->content_model === self::RCDATA || $this->content_model === self::CDATA) && + (!$the_same || ($the_same && (!preg_match('/[\t\n\x0b\x0c >\/]/', + $this->character($this->char + 1 + strlen($next_node))) || $this->EOF === $this->char)))) { + /* If the content model flag is set to the RCDATA or CDATA states then + examine the next few characters. If they do not match the tag name of + the last start tag token emitted (case insensitively), or if they do but + they are not immediately followed by one of the following characters: + * U+0009 CHARACTER TABULATION + * U+000A LINE FEED (LF) + * U+000B LINE TABULATION + * U+000C FORM FEED (FF) + * U+0020 SPACE + * U+003E GREATER-THAN SIGN (>) + * U+002F SOLIDUS (/) + * EOF + ...then there is a parse error. Emit a U+003C LESS-THAN SIGN character + token, a U+002F SOLIDUS character token, and switch to the data state + to process the next input character. */ + $this->emitToken(array( + 'type' => self::CHARACTR, + 'data' => 'state = 'data'; + + } else { + /* Otherwise, if the content model flag is set to the PCDATA state, + or if the next few characters do match that tag name, consume the + next input character: */ + $this->char++; + $char = $this->char(); + + if(preg_match('/^[A-Za-z]$/', $char)) { + /* U+0041 LATIN LETTER A through to U+005A LATIN LETTER Z + Create a new end tag token, set its tag name to the lowercase version + of the input character (add 0x0020 to the character's code point), then + switch to the tag name state. (Don't emit the token yet; further details + will be filled in before it is emitted.) */ + $this->token = array( + 'name' => strtolower($char), + 'type' => self::ENDTAG + ); + + $this->state = 'tagName'; + + } elseif($char === '>') { + /* U+003E GREATER-THAN SIGN (>) + Parse error. Switch to the data state. */ + $this->state = 'data'; + + } elseif($this->char === $this->EOF) { + /* EOF + Parse error. Emit a U+003C LESS-THAN SIGN character token and a U+002F + SOLIDUS character token. Reconsume the EOF character in the data state. */ + $this->emitToken(array( + 'type' => self::CHARACTR, + 'data' => 'char--; + $this->state = 'data'; + + } else { + /* Parse error. Switch to the bogus comment state. */ + $this->state = 'bogusComment'; + } + } + } + + private function tagNameState() + { + // Consume the next input character: + $this->char++; + $char = $this->character($this->char); + + if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) { + /* U+0009 CHARACTER TABULATION + U+000A LINE FEED (LF) + U+000B LINE TABULATION + U+000C FORM FEED (FF) + U+0020 SPACE + Switch to the before attribute name state. */ + $this->state = 'beforeAttributeName'; + + } elseif($char === '>') { + /* U+003E GREATER-THAN SIGN (>) + Emit the current tag token. Switch to the data state. */ + $this->emitToken($this->token); + $this->state = 'data'; + + } elseif($this->char === $this->EOF) { + /* EOF + Parse error. Emit the current tag token. Reconsume the EOF + character in the data state. */ + $this->emitToken($this->token); + + $this->char--; + $this->state = 'data'; + + } elseif($char === '/') { + /* U+002F SOLIDUS (/) + Parse error unless this is a permitted slash. Switch to the before + attribute name state. */ + $this->state = 'beforeAttributeName'; + + } else { + /* Anything else + Append the current input character to the current tag token's tag name. + Stay in the tag name state. */ + $this->token['name'] .= strtolower($char); + $this->state = 'tagName'; + } + } + + private function beforeAttributeNameState() + { + // Consume the next input character: + $this->char++; + $char = $this->character($this->char); + + if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) { + /* U+0009 CHARACTER TABULATION + U+000A LINE FEED (LF) + U+000B LINE TABULATION + U+000C FORM FEED (FF) + U+0020 SPACE + Stay in the before attribute name state. */ + $this->state = 'beforeAttributeName'; + + } elseif($char === '>') { + /* U+003E GREATER-THAN SIGN (>) + Emit the current tag token. Switch to the data state. */ + $this->emitToken($this->token); + $this->state = 'data'; + + } elseif($char === '/') { + /* U+002F SOLIDUS (/) + Parse error unless this is a permitted slash. Stay in the before + attribute name state. */ + $this->state = 'beforeAttributeName'; + + } elseif($this->char === $this->EOF) { + /* EOF + Parse error. Emit the current tag token. Reconsume the EOF + character in the data state. */ + $this->emitToken($this->token); + + $this->char--; + $this->state = 'data'; + + } else { + /* Anything else + Start a new attribute in the current tag token. Set that attribute's + name to the current input character, and its value to the empty string. + Switch to the attribute name state. */ + $this->token['attr'][] = array( + 'name' => strtolower($char), + 'value' => null + ); + + $this->state = 'attributeName'; + } + } + + private function attributeNameState() + { + // Consume the next input character: + $this->char++; + $char = $this->character($this->char); + + if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) { + /* U+0009 CHARACTER TABULATION + U+000A LINE FEED (LF) + U+000B LINE TABULATION + U+000C FORM FEED (FF) + U+0020 SPACE + Stay in the before attribute name state. */ + $this->state = 'afterAttributeName'; + + } elseif($char === '=') { + /* U+003D EQUALS SIGN (=) + Switch to the before attribute value state. */ + $this->state = 'beforeAttributeValue'; + + } elseif($char === '>') { + /* U+003E GREATER-THAN SIGN (>) + Emit the current tag token. Switch to the data state. */ + $this->emitToken($this->token); + $this->state = 'data'; + + } elseif($char === '/' && $this->character($this->char + 1) !== '>') { + /* U+002F SOLIDUS (/) + Parse error unless this is a permitted slash. Switch to the before + attribute name state. */ + $this->state = 'beforeAttributeName'; + + } elseif($this->char === $this->EOF) { + /* EOF + Parse error. Emit the current tag token. Reconsume the EOF + character in the data state. */ + $this->emitToken($this->token); + + $this->char--; + $this->state = 'data'; + + } else { + /* Anything else + Append the current input character to the current attribute's name. + Stay in the attribute name state. */ + $last = count($this->token['attr']) - 1; + $this->token['attr'][$last]['name'] .= strtolower($char); + + $this->state = 'attributeName'; + } + } + + private function afterAttributeNameState() + { + // Consume the next input character: + $this->char++; + $char = $this->character($this->char); + + if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) { + /* U+0009 CHARACTER TABULATION + U+000A LINE FEED (LF) + U+000B LINE TABULATION + U+000C FORM FEED (FF) + U+0020 SPACE + Stay in the after attribute name state. */ + $this->state = 'afterAttributeName'; + + } elseif($char === '=') { + /* U+003D EQUALS SIGN (=) + Switch to the before attribute value state. */ + $this->state = 'beforeAttributeValue'; + + } elseif($char === '>') { + /* U+003E GREATER-THAN SIGN (>) + Emit the current tag token. Switch to the data state. */ + $this->emitToken($this->token); + $this->state = 'data'; + + } elseif($char === '/' && $this->character($this->char + 1) !== '>') { + /* U+002F SOLIDUS (/) + Parse error unless this is a permitted slash. Switch to the + before attribute name state. */ + $this->state = 'beforeAttributeName'; + + } elseif($this->char === $this->EOF) { + /* EOF + Parse error. Emit the current tag token. Reconsume the EOF + character in the data state. */ + $this->emitToken($this->token); + + $this->char--; + $this->state = 'data'; + + } else { + /* Anything else + Start a new attribute in the current tag token. Set that attribute's + name to the current input character, and its value to the empty string. + Switch to the attribute name state. */ + $this->token['attr'][] = array( + 'name' => strtolower($char), + 'value' => null + ); + + $this->state = 'attributeName'; + } + } + + private function beforeAttributeValueState() + { + // Consume the next input character: + $this->char++; + $char = $this->character($this->char); + + if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) { + /* U+0009 CHARACTER TABULATION + U+000A LINE FEED (LF) + U+000B LINE TABULATION + U+000C FORM FEED (FF) + U+0020 SPACE + Stay in the before attribute value state. */ + $this->state = 'beforeAttributeValue'; + + } elseif($char === '"') { + /* U+0022 QUOTATION MARK (") + Switch to the attribute value (double-quoted) state. */ + $this->state = 'attributeValueDoubleQuoted'; + + } elseif($char === '&') { + /* U+0026 AMPERSAND (&) + Switch to the attribute value (unquoted) state and reconsume + this input character. */ + $this->char--; + $this->state = 'attributeValueUnquoted'; + + } elseif($char === '\'') { + /* U+0027 APOSTROPHE (') + Switch to the attribute value (single-quoted) state. */ + $this->state = 'attributeValueSingleQuoted'; + + } elseif($char === '>') { + /* U+003E GREATER-THAN SIGN (>) + Emit the current tag token. Switch to the data state. */ + $this->emitToken($this->token); + $this->state = 'data'; + + } else { + /* Anything else + Append the current input character to the current attribute's value. + Switch to the attribute value (unquoted) state. */ + $last = count($this->token['attr']) - 1; + $this->token['attr'][$last]['value'] .= $char; + + $this->state = 'attributeValueUnquoted'; + } + } + + private function attributeValueDoubleQuotedState() + { + // Consume the next input character: + $this->char++; + $char = $this->character($this->char); + + if($char === '"') { + /* U+0022 QUOTATION MARK (") + Switch to the before attribute name state. */ + $this->state = 'beforeAttributeName'; + + } elseif($char === '&') { + /* U+0026 AMPERSAND (&) + Switch to the entity in attribute value state. */ + $this->entityInAttributeValueState('double'); + + } elseif($this->char === $this->EOF) { + /* EOF + Parse error. Emit the current tag token. Reconsume the character + in the data state. */ + $this->emitToken($this->token); + + $this->char--; + $this->state = 'data'; + + } else { + /* Anything else + Append the current input character to the current attribute's value. + Stay in the attribute value (double-quoted) state. */ + $last = count($this->token['attr']) - 1; + $this->token['attr'][$last]['value'] .= $char; + + $this->state = 'attributeValueDoubleQuoted'; + } + } + + private function attributeValueSingleQuotedState() + { + // Consume the next input character: + $this->char++; + $char = $this->character($this->char); + + if($char === '\'') { + /* U+0022 QUOTATION MARK (') + Switch to the before attribute name state. */ + $this->state = 'beforeAttributeName'; + + } elseif($char === '&') { + /* U+0026 AMPERSAND (&) + Switch to the entity in attribute value state. */ + $this->entityInAttributeValueState('single'); + + } elseif($this->char === $this->EOF) { + /* EOF + Parse error. Emit the current tag token. Reconsume the character + in the data state. */ + $this->emitToken($this->token); + + $this->char--; + $this->state = 'data'; + + } else { + /* Anything else + Append the current input character to the current attribute's value. + Stay in the attribute value (single-quoted) state. */ + $last = count($this->token['attr']) - 1; + $this->token['attr'][$last]['value'] .= $char; + + $this->state = 'attributeValueSingleQuoted'; + } + } + + private function attributeValueUnquotedState() + { + // Consume the next input character: + $this->char++; + $char = $this->character($this->char); + + if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) { + /* U+0009 CHARACTER TABULATION + U+000A LINE FEED (LF) + U+000B LINE TABULATION + U+000C FORM FEED (FF) + U+0020 SPACE + Switch to the before attribute name state. */ + $this->state = 'beforeAttributeName'; + + } elseif($char === '&') { + /* U+0026 AMPERSAND (&) + Switch to the entity in attribute value state. */ + $this->entityInAttributeValueState('non'); + + } elseif($char === '>') { + /* U+003E GREATER-THAN SIGN (>) + Emit the current tag token. Switch to the data state. */ + $this->emitToken($this->token); + $this->state = 'data'; + + } else { + /* Anything else + Append the current input character to the current attribute's value. + Stay in the attribute value (unquoted) state. */ + $last = count($this->token['attr']) - 1; + $this->token['attr'][$last]['value'] .= $char; + + $this->state = 'attributeValueUnquoted'; + } + } + + private function entityInAttributeValueState() + { + // Attempt to consume an entity. + $entity = $this->entity(); + + // If nothing is returned, append a U+0026 AMPERSAND character to the + // current attribute's value. Otherwise, emit the character token that + // was returned. + $char = (!$entity) + ? '&' + : $entity; + + $this->emitToken($char); + } + + private function bogusCommentState() + { + /* Consume every character up to the first U+003E GREATER-THAN SIGN + character (>) or the end of the file (EOF), whichever comes first. Emit + a comment token whose data is the concatenation of all the characters + starting from and including the character that caused the state machine + to switch into the bogus comment state, up to and including the last + consumed character before the U+003E character, if any, or up to the + end of the file otherwise. (If the comment was started by the end of + the file (EOF), the token is empty.) */ + $data = $this->characters('^>', $this->char); + $this->emitToken(array( + 'data' => $data, + 'type' => self::COMMENT + )); + + $this->char += strlen($data); + + /* Switch to the data state. */ + $this->state = 'data'; + + /* If the end of the file was reached, reconsume the EOF character. */ + if($this->char === $this->EOF) { + $this->char = $this->EOF - 1; + } + } + + private function markupDeclarationOpenState() + { + /* If the next two characters are both U+002D HYPHEN-MINUS (-) + characters, consume those two characters, create a comment token whose + data is the empty string, and switch to the comment state. */ + if($this->character($this->char + 1, 2) === '--') { + $this->char += 2; + $this->state = 'comment'; + $this->token = array( + 'data' => null, + 'type' => self::COMMENT + ); + + /* Otherwise if the next seven chacacters are a case-insensitive match + for the word "DOCTYPE", then consume those characters and switch to the + DOCTYPE state. */ + } elseif(strtolower($this->character($this->char + 1, 7)) === 'doctype') { + $this->char += 7; + $this->state = 'doctype'; + + /* Otherwise, is is a parse error. Switch to the bogus comment state. + The next character that is consumed, if any, is the first character + that will be in the comment. */ + } else { + $this->char++; + $this->state = 'bogusComment'; + } + } + + private function commentState() + { + /* Consume the next input character: */ + $this->char++; + $char = $this->char(); + + /* U+002D HYPHEN-MINUS (-) */ + if($char === '-') { + /* Switch to the comment dash state */ + $this->state = 'commentDash'; + + /* EOF */ + } elseif($this->char === $this->EOF) { + /* Parse error. Emit the comment token. Reconsume the EOF character + in the data state. */ + $this->emitToken($this->token); + $this->char--; + $this->state = 'data'; + + /* Anything else */ + } else { + /* Append the input character to the comment token's data. Stay in + the comment state. */ + $this->token['data'] .= $char; + } + } + + private function commentDashState() + { + /* Consume the next input character: */ + $this->char++; + $char = $this->char(); + + /* U+002D HYPHEN-MINUS (-) */ + if($char === '-') { + /* Switch to the comment end state */ + $this->state = 'commentEnd'; + + /* EOF */ + } elseif($this->char === $this->EOF) { + /* Parse error. Emit the comment token. Reconsume the EOF character + in the data state. */ + $this->emitToken($this->token); + $this->char--; + $this->state = 'data'; + + /* Anything else */ + } else { + /* Append a U+002D HYPHEN-MINUS (-) character and the input + character to the comment token's data. Switch to the comment state. */ + $this->token['data'] .= '-'.$char; + $this->state = 'comment'; + } + } + + private function commentEndState() + { + /* Consume the next input character: */ + $this->char++; + $char = $this->char(); + + if($char === '>') { + $this->emitToken($this->token); + $this->state = 'data'; + + } elseif($char === '-') { + $this->token['data'] .= '-'; + + } elseif($this->char === $this->EOF) { + $this->emitToken($this->token); + $this->char--; + $this->state = 'data'; + + } else { + $this->token['data'] .= '--'.$char; + $this->state = 'comment'; + } + } + + private function doctypeState() + { + /* Consume the next input character: */ + $this->char++; + $char = $this->char(); + + if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) { + $this->state = 'beforeDoctypeName'; + + } else { + $this->char--; + $this->state = 'beforeDoctypeName'; + } + } + + private function beforeDoctypeNameState() + { + /* Consume the next input character: */ + $this->char++; + $char = $this->char(); + + if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) { + // Stay in the before DOCTYPE name state. + + } elseif(preg_match('/^[a-z]$/', $char)) { + $this->token = array( + 'name' => strtoupper($char), + 'type' => self::DOCTYPE, + 'error' => true + ); + + $this->state = 'doctypeName'; + + } elseif($char === '>') { + $this->emitToken(array( + 'name' => null, + 'type' => self::DOCTYPE, + 'error' => true + )); + + $this->state = 'data'; + + } elseif($this->char === $this->EOF) { + $this->emitToken(array( + 'name' => null, + 'type' => self::DOCTYPE, + 'error' => true + )); + + $this->char--; + $this->state = 'data'; + + } else { + $this->token = array( + 'name' => $char, + 'type' => self::DOCTYPE, + 'error' => true + ); + + $this->state = 'doctypeName'; + } + } + + private function doctypeNameState() + { + /* Consume the next input character: */ + $this->char++; + $char = $this->char(); + + if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) { + $this->state = 'AfterDoctypeName'; + + } elseif($char === '>') { + $this->emitToken($this->token); + $this->state = 'data'; + + } elseif(preg_match('/^[a-z]$/', $char)) { + $this->token['name'] .= strtoupper($char); + + } elseif($this->char === $this->EOF) { + $this->emitToken($this->token); + $this->char--; + $this->state = 'data'; + + } else { + $this->token['name'] .= $char; + } + + $this->token['error'] = ($this->token['name'] === 'HTML') + ? false + : true; + } + + private function afterDoctypeNameState() + { + /* Consume the next input character: */ + $this->char++; + $char = $this->char(); + + if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) { + // Stay in the DOCTYPE name state. + + } elseif($char === '>') { + $this->emitToken($this->token); + $this->state = 'data'; + + } elseif($this->char === $this->EOF) { + $this->emitToken($this->token); + $this->char--; + $this->state = 'data'; + + } else { + $this->token['error'] = true; + $this->state = 'bogusDoctype'; + } + } + + private function bogusDoctypeState() + { + /* Consume the next input character: */ + $this->char++; + $char = $this->char(); + + if($char === '>') { + $this->emitToken($this->token); + $this->state = 'data'; + + } elseif($this->char === $this->EOF) { + $this->emitToken($this->token); + $this->char--; + $this->state = 'data'; + + } else { + // Stay in the bogus DOCTYPE state. + } + } + + private function entity() + { + $start = $this->char; + + // This section defines how to consume an entity. This definition is + // used when parsing entities in text and in attributes. + + // The behaviour depends on the identity of the next character (the + // one immediately after the U+0026 AMPERSAND character): + + switch($this->character($this->char + 1)) { + // U+0023 NUMBER SIGN (#) + case '#': + + // The behaviour further depends on the character after the + // U+0023 NUMBER SIGN: + switch($this->character($this->char + 1)) { + // U+0078 LATIN SMALL LETTER X + // U+0058 LATIN CAPITAL LETTER X + case 'x': + case 'X': + // Follow the steps below, but using the range of + // characters U+0030 DIGIT ZERO through to U+0039 DIGIT + // NINE, U+0061 LATIN SMALL LETTER A through to U+0066 + // LATIN SMALL LETTER F, and U+0041 LATIN CAPITAL LETTER + // A, through to U+0046 LATIN CAPITAL LETTER F (in other + // words, 0-9, A-F, a-f). + $char = 1; + $char_class = '0-9A-Fa-f'; + break; + + // Anything else + default: + // Follow the steps below, but using the range of + // characters U+0030 DIGIT ZERO through to U+0039 DIGIT + // NINE (i.e. just 0-9). + $char = 0; + $char_class = '0-9'; + break; + } + + // Consume as many characters as match the range of characters + // given above. + $this->char++; + $e_name = $this->characters($char_class, $this->char + $char + 1); + $entity = $this->character($start, $this->char); + $cond = strlen($e_name) > 0; + + // The rest of the parsing happens bellow. + break; + + // Anything else + default: + // Consume the maximum number of characters possible, with the + // consumed characters case-sensitively matching one of the + // identifiers in the first column of the entities table. + $e_name = $this->characters('0-9A-Za-z;', $this->char + 1); + $len = strlen($e_name); + + for($c = 1; $c <= $len; $c++) { + $id = substr($e_name, 0, $c); + $this->char++; + + if(in_array($id, $this->entities)) { + $entity = $id; + break; + } + } + + $cond = isset($entity); + // The rest of the parsing happens bellow. + break; + } + + if(!$cond) { + // If no match can be made, then this is a parse error. No + // characters are consumed, and nothing is returned. + $this->char = $start; + return false; + } + + // Return a character token for the character corresponding to the + // entity name (as given by the second column of the entities table). + return html_entity_decode('&'.$entity.';', ENT_QUOTES, 'UTF-8'); + } + + private function emitToken($token) + { + $emit = $this->tree->emitToken($token); + + if(is_int($emit)) { + $this->content_model = $emit; + + } elseif($token['type'] === self::ENDTAG) { + $this->content_model = self::PCDATA; + } + } + + private function EOF() + { + $this->state = null; + $this->tree->emitToken(array( + 'type' => self::EOF + )); + } +} + +class HTML5TreeConstructer +{ + public $stack = array(); + + private $phase; + private $mode; + private $dom; + private $foster_parent = null; + private $a_formatting = array(); + + private $head_pointer = null; + private $form_pointer = null; + + private $scoping = array('button','caption','html','marquee','object','table','td','th'); + private $formatting = array('a','b','big','em','font','i','nobr','s','small','strike','strong','tt','u'); + private $special = array('address','area','base','basefont','bgsound', + 'blockquote','body','br','center','col','colgroup','dd','dir','div','dl', + 'dt','embed','fieldset','form','frame','frameset','h1','h2','h3','h4','h5', + 'h6','head','hr','iframe','image','img','input','isindex','li','link', + 'listing','menu','meta','noembed','noframes','noscript','ol','optgroup', + 'option','p','param','plaintext','pre','script','select','spacer','style', + 'tbody','textarea','tfoot','thead','title','tr','ul','wbr'); + + // The different phases. + const INIT_PHASE = 0; + const ROOT_PHASE = 1; + const MAIN_PHASE = 2; + const END_PHASE = 3; + + // The different insertion modes for the main phase. + const BEFOR_HEAD = 0; + const IN_HEAD = 1; + const AFTER_HEAD = 2; + const IN_BODY = 3; + const IN_TABLE = 4; + const IN_CAPTION = 5; + const IN_CGROUP = 6; + const IN_TBODY = 7; + const IN_ROW = 8; + const IN_CELL = 9; + const IN_SELECT = 10; + const AFTER_BODY = 11; + const IN_FRAME = 12; + const AFTR_FRAME = 13; + + // The different types of elements. + const SPECIAL = 0; + const SCOPING = 1; + const FORMATTING = 2; + const PHRASING = 3; + + const MARKER = 0; + + public function __construct() + { + $this->phase = self::INIT_PHASE; + $this->mode = self::BEFOR_HEAD; + $this->dom = new DOMDocument; + + $this->dom->encoding = 'UTF-8'; + $this->dom->preserveWhiteSpace = true; + $this->dom->substituteEntities = true; + $this->dom->strictErrorChecking = false; + } + + // Process tag tokens + public function emitToken($token) + { + switch($this->phase) { + case self::INIT_PHASE: return $this->initPhase($token); break; + case self::ROOT_PHASE: return $this->rootElementPhase($token); break; + case self::MAIN_PHASE: return $this->mainPhase($token); break; + case self::END_PHASE : return $this->trailingEndPhase($token); break; + } + } + + private function initPhase($token) + { + /* Initially, the tree construction stage must handle each token + emitted from the tokenisation stage as follows: */ + + /* A DOCTYPE token that is marked as being in error + A comment token + A start tag token + An end tag token + A character token that is not one of one of U+0009 CHARACTER TABULATION, + U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), + or U+0020 SPACE + An end-of-file token */ + if((isset($token['error']) && $token['error']) || + $token['type'] === HTML5::COMMENT || + $token['type'] === HTML5::STARTTAG || + $token['type'] === HTML5::ENDTAG || + $token['type'] === HTML5::EOF || + ($token['type'] === HTML5::CHARACTR && isset($token['data']) && + !preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data']))) { + /* This specification does not define how to handle this case. In + particular, user agents may ignore the entirety of this specification + altogether for such documents, and instead invoke special parse modes + with a greater emphasis on backwards compatibility. */ + + $this->phase = self::ROOT_PHASE; + return $this->rootElementPhase($token); + + /* A DOCTYPE token marked as being correct */ + } elseif(isset($token['error']) && !$token['error']) { + /* Append a DocumentType node to the Document node, with the name + attribute set to the name given in the DOCTYPE token (which will be + "HTML"), and the other attributes specific to DocumentType objects + set to null, empty lists, or the empty string as appropriate. */ + $doctype = new DOMDocumentType(null, null, 'HTML'); + + /* Then, switch to the root element phase of the tree construction + stage. */ + $this->phase = self::ROOT_PHASE; + + /* A character token that is one of one of U+0009 CHARACTER TABULATION, + U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), + or U+0020 SPACE */ + } elseif(isset($token['data']) && preg_match('/^[\t\n\x0b\x0c ]+$/', + $token['data'])) { + /* Append that character to the Document node. */ + $text = $this->dom->createTextNode($token['data']); + $this->dom->appendChild($text); + } + } + + private function rootElementPhase($token) + { + /* After the initial phase, as each token is emitted from the tokenisation + stage, it must be processed as described in this section. */ + + /* A DOCTYPE token */ + if($token['type'] === HTML5::DOCTYPE) { + // Parse error. Ignore the token. + + /* A comment token */ + } elseif($token['type'] === HTML5::COMMENT) { + /* Append a Comment node to the Document object with the data + attribute set to the data given in the comment token. */ + $comment = $this->dom->createComment($token['data']); + $this->dom->appendChild($comment); + + /* A character token that is one of one of U+0009 CHARACTER TABULATION, + U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), + or U+0020 SPACE */ + } elseif($token['type'] === HTML5::CHARACTR && + preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) { + /* Append that character to the Document node. */ + $text = $this->dom->createTextNode($token['data']); + $this->dom->appendChild($text); + + /* A character token that is not one of U+0009 CHARACTER TABULATION, + U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED + (FF), or U+0020 SPACE + A start tag token + An end tag token + An end-of-file token */ + } elseif(($token['type'] === HTML5::CHARACTR && + !preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) || + $token['type'] === HTML5::STARTTAG || + $token['type'] === HTML5::ENDTAG || + $token['type'] === HTML5::EOF) { + /* Create an HTMLElement node with the tag name html, in the HTML + namespace. Append it to the Document object. Switch to the main + phase and reprocess the current token. */ + $html = $this->dom->createElement('html'); + $this->dom->appendChild($html); + $this->stack[] = $html; + + $this->phase = self::MAIN_PHASE; + return $this->mainPhase($token); + } + } + + private function mainPhase($token) + { + /* Tokens in the main phase must be handled as follows: */ + + /* A DOCTYPE token */ + if($token['type'] === HTML5::DOCTYPE) { + // Parse error. Ignore the token. + + /* A start tag token with the tag name "html" */ + } elseif($token['type'] === HTML5::STARTTAG && $token['name'] === 'html') { + /* If this start tag token was not the first start tag token, then + it is a parse error. */ + + /* For each attribute on the token, check to see if the attribute + is already present on the top element of the stack of open elements. + If it is not, add the attribute and its corresponding value to that + element. */ + foreach($token['attr'] as $attr) { + if(!$this->stack[0]->hasAttribute($attr['name'])) { + $this->stack[0]->setAttribute($attr['name'], $attr['value']); + } + } + + /* An end-of-file token */ + } elseif($token['type'] === HTML5::EOF) { + /* Generate implied end tags. */ + $this->generateImpliedEndTags(); + + /* Anything else. */ + } else { + /* Depends on the insertion mode: */ + switch($this->mode) { + case self::BEFOR_HEAD: return $this->beforeHead($token); break; + case self::IN_HEAD: return $this->inHead($token); break; + case self::AFTER_HEAD: return $this->afterHead($token); break; + case self::IN_BODY: return $this->inBody($token); break; + case self::IN_TABLE: return $this->inTable($token); break; + case self::IN_CAPTION: return $this->inCaption($token); break; + case self::IN_CGROUP: return $this->inColumnGroup($token); break; + case self::IN_TBODY: return $this->inTableBody($token); break; + case self::IN_ROW: return $this->inRow($token); break; + case self::IN_CELL: return $this->inCell($token); break; + case self::IN_SELECT: return $this->inSelect($token); break; + case self::AFTER_BODY: return $this->afterBody($token); break; + case self::IN_FRAME: return $this->inFrameset($token); break; + case self::AFTR_FRAME: return $this->afterFrameset($token); break; + case self::END_PHASE: return $this->trailingEndPhase($token); break; + } + } + } + + private function beforeHead($token) + { + /* Handle the token as follows: */ + + /* A character token that is one of one of U+0009 CHARACTER TABULATION, + U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), + or U+0020 SPACE */ + if($token['type'] === HTML5::CHARACTR && + preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) { + /* Append the character to the current node. */ + $this->insertText($token['data']); + + /* A comment token */ + } elseif($token['type'] === HTML5::COMMENT) { + /* Append a Comment node to the current node with the data attribute + set to the data given in the comment token. */ + $this->insertComment($token['data']); + + /* A start tag token with the tag name "head" */ + } elseif($token['type'] === HTML5::STARTTAG && $token['name'] === 'head') { + /* Create an element for the token, append the new element to the + current node and push it onto the stack of open elements. */ + $element = $this->insertElement($token); + + /* Set the head element pointer to this new element node. */ + $this->head_pointer = $element; + + /* Change the insertion mode to "in head". */ + $this->mode = self::IN_HEAD; + + /* A start tag token whose tag name is one of: "base", "link", "meta", + "script", "style", "title". Or an end tag with the tag name "html". + Or a character token that is not one of U+0009 CHARACTER TABULATION, + U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), + or U+0020 SPACE. Or any other start tag token */ + } elseif($token['type'] === HTML5::STARTTAG || + ($token['type'] === HTML5::ENDTAG && $token['name'] === 'html') || + ($token['type'] === HTML5::CHARACTR && !preg_match('/^[\t\n\x0b\x0c ]$/', + $token['data']))) { + /* Act as if a start tag token with the tag name "head" and no + attributes had been seen, then reprocess the current token. */ + $this->beforeHead(array( + 'name' => 'head', + 'type' => HTML5::STARTTAG, + 'attr' => array() + )); + + return $this->inHead($token); + + /* Any other end tag */ + } elseif($token['type'] === HTML5::ENDTAG) { + /* Parse error. Ignore the token. */ + } + } + + private function inHead($token) + { + /* Handle the token as follows: */ + + /* A character token that is one of one of U+0009 CHARACTER TABULATION, + U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), + or U+0020 SPACE. + + THIS DIFFERS FROM THE SPEC: If the current node is either a title, style + or script element, append the character to the current node regardless + of its content. */ + if(($token['type'] === HTML5::CHARACTR && + preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) || ( + $token['type'] === HTML5::CHARACTR && in_array(end($this->stack)->nodeName, + array('title', 'style', 'script')))) { + /* Append the character to the current node. */ + $this->insertText($token['data']); + + /* A comment token */ + } elseif($token['type'] === HTML5::COMMENT) { + /* Append a Comment node to the current node with the data attribute + set to the data given in the comment token. */ + $this->insertComment($token['data']); + + } elseif($token['type'] === HTML5::ENDTAG && + in_array($token['name'], array('title', 'style', 'script'))) { + array_pop($this->stack); + return HTML5::PCDATA; + + /* A start tag with the tag name "title" */ + } elseif($token['type'] === HTML5::STARTTAG && $token['name'] === 'title') { + /* Create an element for the token and append the new element to the + node pointed to by the head element pointer, or, if that is null + (innerHTML case), to the current node. */ + if($this->head_pointer !== null) { + $element = $this->insertElement($token, false); + $this->head_pointer->appendChild($element); + + } else { + $element = $this->insertElement($token); + } + + /* Switch the tokeniser's content model flag to the RCDATA state. */ + return HTML5::RCDATA; + + /* A start tag with the tag name "style" */ + } elseif($token['type'] === HTML5::STARTTAG && $token['name'] === 'style') { + /* Create an element for the token and append the new element to the + node pointed to by the head element pointer, or, if that is null + (innerHTML case), to the current node. */ + if($this->head_pointer !== null) { + $element = $this->insertElement($token, false); + $this->head_pointer->appendChild($element); + + } else { + $this->insertElement($token); + } + + /* Switch the tokeniser's content model flag to the CDATA state. */ + return HTML5::CDATA; + + /* A start tag with the tag name "script" */ + } elseif($token['type'] === HTML5::STARTTAG && $token['name'] === 'script') { + /* Create an element for the token. */ + $element = $this->insertElement($token, false); + $this->head_pointer->appendChild($element); + + /* Switch the tokeniser's content model flag to the CDATA state. */ + return HTML5::CDATA; + + /* A start tag with the tag name "base", "link", or "meta" */ + } elseif($token['type'] === HTML5::STARTTAG && in_array($token['name'], + array('base', 'link', 'meta'))) { + /* Create an element for the token and append the new element to the + node pointed to by the head element pointer, or, if that is null + (innerHTML case), to the current node. */ + if($this->head_pointer !== null) { + $element = $this->insertElement($token, false); + $this->head_pointer->appendChild($element); + array_pop($this->stack); + + } else { + $this->insertElement($token); + } + + /* An end tag with the tag name "head" */ + } elseif($token['type'] === HTML5::ENDTAG && $token['name'] === 'head') { + /* If the current node is a head element, pop the current node off + the stack of open elements. */ + if($this->head_pointer->isSameNode(end($this->stack))) { + array_pop($this->stack); + + /* Otherwise, this is a parse error. */ + } else { + // k + } + + /* Change the insertion mode to "after head". */ + $this->mode = self::AFTER_HEAD; + + /* A start tag with the tag name "head" or an end tag except "html". */ + } elseif(($token['type'] === HTML5::STARTTAG && $token['name'] === 'head') || + ($token['type'] === HTML5::ENDTAG && $token['name'] !== 'html')) { + // Parse error. Ignore the token. + + /* Anything else */ + } else { + /* If the current node is a head element, act as if an end tag + token with the tag name "head" had been seen. */ + if($this->head_pointer->isSameNode(end($this->stack))) { + $this->inHead(array( + 'name' => 'head', + 'type' => HTML5::ENDTAG + )); + + /* Otherwise, change the insertion mode to "after head". */ + } else { + $this->mode = self::AFTER_HEAD; + } + + /* Then, reprocess the current token. */ + return $this->afterHead($token); + } + } + + private function afterHead($token) + { + /* Handle the token as follows: */ + + /* A character token that is one of one of U+0009 CHARACTER TABULATION, + U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), + or U+0020 SPACE */ + if($token['type'] === HTML5::CHARACTR && + preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) { + /* Append the character to the current node. */ + $this->insertText($token['data']); + + /* A comment token */ + } elseif($token['type'] === HTML5::COMMENT) { + /* Append a Comment node to the current node with the data attribute + set to the data given in the comment token. */ + $this->insertComment($token['data']); + + /* A start tag token with the tag name "body" */ + } elseif($token['type'] === HTML5::STARTTAG && $token['name'] === 'body') { + /* Insert a body element for the token. */ + $this->insertElement($token); + + /* Change the insertion mode to "in body". */ + $this->mode = self::IN_BODY; + + /* A start tag token with the tag name "frameset" */ + } elseif($token['type'] === HTML5::STARTTAG && $token['name'] === 'frameset') { + /* Insert a frameset element for the token. */ + $this->insertElement($token); + + /* Change the insertion mode to "in frameset". */ + $this->mode = self::IN_FRAME; + + /* A start tag token whose tag name is one of: "base", "link", "meta", + "script", "style", "title" */ + } elseif($token['type'] === HTML5::STARTTAG && in_array($token['name'], + array('base', 'link', 'meta', 'script', 'style', 'title'))) { + /* Parse error. Switch the insertion mode back to "in head" and + reprocess the token. */ + $this->mode = self::IN_HEAD; + return $this->inHead($token); + + /* Anything else */ + } else { + /* Act as if a start tag token with the tag name "body" and no + attributes had been seen, and then reprocess the current token. */ + $this->afterHead(array( + 'name' => 'body', + 'type' => HTML5::STARTTAG, + 'attr' => array() + )); + + return $this->inBody($token); + } + } + + private function inBody($token) + { + /* Handle the token as follows: */ + + switch($token['type']) { + /* A character token */ + case HTML5::CHARACTR: + /* Reconstruct the active formatting elements, if any. */ + $this->reconstructActiveFormattingElements(); + + /* Append the token's character to the current node. */ + $this->insertText($token['data']); + break; + + /* A comment token */ + case HTML5::COMMENT: + /* Append a Comment node to the current node with the data + attribute set to the data given in the comment token. */ + $this->insertComment($token['data']); + break; + + case HTML5::STARTTAG: + switch($token['name']) { + /* A start tag token whose tag name is one of: "script", + "style" */ + case 'script': case 'style': + /* Process the token as if the insertion mode had been "in + head". */ + return $this->inHead($token); + break; + + /* A start tag token whose tag name is one of: "base", "link", + "meta", "title" */ + case 'base': case 'link': case 'meta': case 'title': + /* Parse error. Process the token as if the insertion mode + had been "in head". */ + return $this->inHead($token); + break; + + /* A start tag token with the tag name "body" */ + case 'body': + /* Parse error. If the second element on the stack of open + elements is not a body element, or, if the stack of open + elements has only one node on it, then ignore the token. + (innerHTML case) */ + if(count($this->stack) === 1 || $this->stack[1]->nodeName !== 'body') { + // Ignore + + /* Otherwise, for each attribute on the token, check to see + if the attribute is already present on the body element (the + second element) on the stack of open elements. If it is not, + add the attribute and its corresponding value to that + element. */ + } else { + foreach($token['attr'] as $attr) { + if(!$this->stack[1]->hasAttribute($attr['name'])) { + $this->stack[1]->setAttribute($attr['name'], $attr['value']); + } + } + } + break; + + /* A start tag whose tag name is one of: "address", + "blockquote", "center", "dir", "div", "dl", "fieldset", + "listing", "menu", "ol", "p", "ul" */ + case 'address': case 'blockquote': case 'center': case 'dir': + case 'div': case 'dl': case 'fieldset': case 'listing': + case 'menu': case 'ol': case 'p': case 'ul': + /* If the stack of open elements has a p element in scope, + then act as if an end tag with the tag name p had been + seen. */ + if($this->elementInScope('p')) { + $this->emitToken(array( + 'name' => 'p', + 'type' => HTML5::ENDTAG + )); + } + + /* Insert an HTML element for the token. */ + $this->insertElement($token); + break; + + /* A start tag whose tag name is "form" */ + case 'form': + /* If the form element pointer is not null, ignore the + token with a parse error. */ + if($this->form_pointer !== null) { + // Ignore. + + /* Otherwise: */ + } else { + /* If the stack of open elements has a p element in + scope, then act as if an end tag with the tag name p + had been seen. */ + if($this->elementInScope('p')) { + $this->emitToken(array( + 'name' => 'p', + 'type' => HTML5::ENDTAG + )); + } + + /* Insert an HTML element for the token, and set the + form element pointer to point to the element created. */ + $element = $this->insertElement($token); + $this->form_pointer = $element; + } + break; + + /* A start tag whose tag name is "li", "dd" or "dt" */ + case 'li': case 'dd': case 'dt': + /* If the stack of open elements has a p element in scope, + then act as if an end tag with the tag name p had been + seen. */ + if($this->elementInScope('p')) { + $this->emitToken(array( + 'name' => 'p', + 'type' => HTML5::ENDTAG + )); + } + + $stack_length = count($this->stack) - 1; + + for($n = $stack_length; 0 <= $n; $n--) { + /* 1. Initialise node to be the current node (the + bottommost node of the stack). */ + $stop = false; + $node = $this->stack[$n]; + $cat = $this->getElementCategory($node->tagName); + + /* 2. If node is an li, dd or dt element, then pop all + the nodes from the current node up to node, including + node, then stop this algorithm. */ + if($token['name'] === $node->tagName || ($token['name'] !== 'li' + && ($node->tagName === 'dd' || $node->tagName === 'dt'))) { + for($x = $stack_length; $x >= $n ; $x--) { + array_pop($this->stack); + } + + break; + } + + /* 3. If node is not in the formatting category, and is + not in the phrasing category, and is not an address or + div element, then stop this algorithm. */ + if($cat !== self::FORMATTING && $cat !== self::PHRASING && + $node->tagName !== 'address' && $node->tagName !== 'div') { + break; + } + } + + /* Finally, insert an HTML element with the same tag + name as the token's. */ + $this->insertElement($token); + break; + + /* A start tag token whose tag name is "plaintext" */ + case 'plaintext': + /* If the stack of open elements has a p element in scope, + then act as if an end tag with the tag name p had been + seen. */ + if($this->elementInScope('p')) { + $this->emitToken(array( + 'name' => 'p', + 'type' => HTML5::ENDTAG + )); + } + + /* Insert an HTML element for the token. */ + $this->insertElement($token); + + return HTML5::PLAINTEXT; + break; + + /* A start tag whose tag name is one of: "h1", "h2", "h3", "h4", + "h5", "h6" */ + case 'h1': case 'h2': case 'h3': case 'h4': case 'h5': case 'h6': + /* If the stack of open elements has a p element in scope, + then act as if an end tag with the tag name p had been seen. */ + if($this->elementInScope('p')) { + $this->emitToken(array( + 'name' => 'p', + 'type' => HTML5::ENDTAG + )); + } + + /* If the stack of open elements has in scope an element whose + tag name is one of "h1", "h2", "h3", "h4", "h5", or "h6", then + this is a parse error; pop elements from the stack until an + element with one of those tag names has been popped from the + stack. */ + while($this->elementInScope(array('h1', 'h2', 'h3', 'h4', 'h5', 'h6'))) { + array_pop($this->stack); + } + + /* Insert an HTML element for the token. */ + $this->insertElement($token); + break; + + /* A start tag whose tag name is "a" */ + case 'a': + /* If the list of active formatting elements contains + an element whose tag name is "a" between the end of the + list and the last marker on the list (or the start of + the list if there is no marker on the list), then this + is a parse error; act as if an end tag with the tag name + "a" had been seen, then remove that element from the list + of active formatting elements and the stack of open + elements if the end tag didn't already remove it (it + might not have if the element is not in table scope). */ + $leng = count($this->a_formatting); + + for($n = $leng - 1; $n >= 0; $n--) { + if($this->a_formatting[$n] === self::MARKER) { + break; + + } elseif($this->a_formatting[$n]->nodeName === 'a') { + $this->emitToken(array( + 'name' => 'a', + 'type' => HTML5::ENDTAG + )); + break; + } + } + + /* Reconstruct the active formatting elements, if any. */ + $this->reconstructActiveFormattingElements(); + + /* Insert an HTML element for the token. */ + $el = $this->insertElement($token); + + /* Add that element to the list of active formatting + elements. */ + $this->a_formatting[] = $el; + break; + + /* A start tag whose tag name is one of: "b", "big", "em", "font", + "i", "nobr", "s", "small", "strike", "strong", "tt", "u" */ + case 'b': case 'big': case 'em': case 'font': case 'i': + case 'nobr': case 's': case 'small': case 'strike': + case 'strong': case 'tt': case 'u': + /* Reconstruct the active formatting elements, if any. */ + $this->reconstructActiveFormattingElements(); + + /* Insert an HTML element for the token. */ + $el = $this->insertElement($token); + + /* Add that element to the list of active formatting + elements. */ + $this->a_formatting[] = $el; + break; + + /* A start tag token whose tag name is "button" */ + case 'button': + /* If the stack of open elements has a button element in scope, + then this is a parse error; act as if an end tag with the tag + name "button" had been seen, then reprocess the token. (We don't + do that. Unnecessary.) */ + if($this->elementInScope('button')) { + $this->inBody(array( + 'name' => 'button', + 'type' => HTML5::ENDTAG + )); + } + + /* Reconstruct the active formatting elements, if any. */ + $this->reconstructActiveFormattingElements(); + + /* Insert an HTML element for the token. */ + $this->insertElement($token); + + /* Insert a marker at the end of the list of active + formatting elements. */ + $this->a_formatting[] = self::MARKER; + break; + + /* A start tag token whose tag name is one of: "marquee", "object" */ + case 'marquee': case 'object': + /* Reconstruct the active formatting elements, if any. */ + $this->reconstructActiveFormattingElements(); + + /* Insert an HTML element for the token. */ + $this->insertElement($token); + + /* Insert a marker at the end of the list of active + formatting elements. */ + $this->a_formatting[] = self::MARKER; + break; + + /* A start tag token whose tag name is "xmp" */ + case 'xmp': + /* Reconstruct the active formatting elements, if any. */ + $this->reconstructActiveFormattingElements(); + + /* Insert an HTML element for the token. */ + $this->insertElement($token); + + /* Switch the content model flag to the CDATA state. */ + return HTML5::CDATA; + break; + + /* A start tag whose tag name is "table" */ + case 'table': + /* If the stack of open elements has a p element in scope, + then act as if an end tag with the tag name p had been seen. */ + if($this->elementInScope('p')) { + $this->emitToken(array( + 'name' => 'p', + 'type' => HTML5::ENDTAG + )); + } + + /* Insert an HTML element for the token. */ + $this->insertElement($token); + + /* Change the insertion mode to "in table". */ + $this->mode = self::IN_TABLE; + break; + + /* A start tag whose tag name is one of: "area", "basefont", + "bgsound", "br", "embed", "img", "param", "spacer", "wbr" */ + case 'area': case 'basefont': case 'bgsound': case 'br': + case 'embed': case 'img': case 'param': case 'spacer': + case 'wbr': + /* Reconstruct the active formatting elements, if any. */ + $this->reconstructActiveFormattingElements(); + + /* Insert an HTML element for the token. */ + $this->insertElement($token); + + /* Immediately pop the current node off the stack of open elements. */ + array_pop($this->stack); + break; + + /* A start tag whose tag name is "hr" */ + case 'hr': + /* If the stack of open elements has a p element in scope, + then act as if an end tag with the tag name p had been seen. */ + if($this->elementInScope('p')) { + $this->emitToken(array( + 'name' => 'p', + 'type' => HTML5::ENDTAG + )); + } + + /* Insert an HTML element for the token. */ + $this->insertElement($token); + + /* Immediately pop the current node off the stack of open elements. */ + array_pop($this->stack); + break; + + /* A start tag whose tag name is "image" */ + case 'image': + /* Parse error. Change the token's tag name to "img" and + reprocess it. (Don't ask.) */ + $token['name'] = 'img'; + return $this->inBody($token); + break; + + /* A start tag whose tag name is "input" */ + case 'input': + /* Reconstruct the active formatting elements, if any. */ + $this->reconstructActiveFormattingElements(); + + /* Insert an input element for the token. */ + $element = $this->insertElement($token, false); + + /* If the form element pointer is not null, then associate the + input element with the form element pointed to by the form + element pointer. */ + $this->form_pointer !== null + ? $this->form_pointer->appendChild($element) + : end($this->stack)->appendChild($element); + + /* Pop that input element off the stack of open elements. */ + array_pop($this->stack); + break; + + /* A start tag whose tag name is "isindex" */ + case 'isindex': + /* Parse error. */ + // w/e + + /* If the form element pointer is not null, + then ignore the token. */ + if($this->form_pointer === null) { + /* Act as if a start tag token with the tag name "form" had + been seen. */ + $this->inBody(array( + 'name' => 'body', + 'type' => HTML5::STARTTAG, + 'attr' => array() + )); + + /* Act as if a start tag token with the tag name "hr" had + been seen. */ + $this->inBody(array( + 'name' => 'hr', + 'type' => HTML5::STARTTAG, + 'attr' => array() + )); + + /* Act as if a start tag token with the tag name "p" had + been seen. */ + $this->inBody(array( + 'name' => 'p', + 'type' => HTML5::STARTTAG, + 'attr' => array() + )); + + /* Act as if a start tag token with the tag name "label" + had been seen. */ + $this->inBody(array( + 'name' => 'label', + 'type' => HTML5::STARTTAG, + 'attr' => array() + )); + + /* Act as if a stream of character tokens had been seen. */ + $this->insertText('This is a searchable index. '. + 'Insert your search keywords here: '); + + /* Act as if a start tag token with the tag name "input" + had been seen, with all the attributes from the "isindex" + token, except with the "name" attribute set to the value + "isindex" (ignoring any explicit "name" attribute). */ + $attr = $token['attr']; + $attr[] = array('name' => 'name', 'value' => 'isindex'); + + $this->inBody(array( + 'name' => 'input', + 'type' => HTML5::STARTTAG, + 'attr' => $attr + )); + + /* Act as if a stream of character tokens had been seen + (see below for what they should say). */ + $this->insertText('This is a searchable index. '. + 'Insert your search keywords here: '); + + /* Act as if an end tag token with the tag name "label" + had been seen. */ + $this->inBody(array( + 'name' => 'label', + 'type' => HTML5::ENDTAG + )); + + /* Act as if an end tag token with the tag name "p" had + been seen. */ + $this->inBody(array( + 'name' => 'p', + 'type' => HTML5::ENDTAG + )); + + /* Act as if a start tag token with the tag name "hr" had + been seen. */ + $this->inBody(array( + 'name' => 'hr', + 'type' => HTML5::ENDTAG + )); + + /* Act as if an end tag token with the tag name "form" had + been seen. */ + $this->inBody(array( + 'name' => 'form', + 'type' => HTML5::ENDTAG + )); + } + break; + + /* A start tag whose tag name is "textarea" */ + case 'textarea': + $this->insertElement($token); + + /* Switch the tokeniser's content model flag to the + RCDATA state. */ + return HTML5::RCDATA; + break; + + /* A start tag whose tag name is one of: "iframe", "noembed", + "noframes" */ + case 'iframe': case 'noembed': case 'noframes': + $this->insertElement($token); + + /* Switch the tokeniser's content model flag to the CDATA state. */ + return HTML5::CDATA; + break; + + /* A start tag whose tag name is "select" */ + case 'select': + /* Reconstruct the active formatting elements, if any. */ + $this->reconstructActiveFormattingElements(); + + /* Insert an HTML element for the token. */ + $this->insertElement($token); + + /* Change the insertion mode to "in select". */ + $this->mode = self::IN_SELECT; + break; + + /* A start or end tag whose tag name is one of: "caption", "col", + "colgroup", "frame", "frameset", "head", "option", "optgroup", + "tbody", "td", "tfoot", "th", "thead", "tr". */ + case 'caption': case 'col': case 'colgroup': case 'frame': + case 'frameset': case 'head': case 'option': case 'optgroup': + case 'tbody': case 'td': case 'tfoot': case 'th': case 'thead': + case 'tr': + // Parse error. Ignore the token. + break; + + /* A start or end tag whose tag name is one of: "event-source", + "section", "nav", "article", "aside", "header", "footer", + "datagrid", "command" */ + case 'event-source': case 'section': case 'nav': case 'article': + case 'aside': case 'header': case 'footer': case 'datagrid': + case 'command': + // Work in progress! + break; + + /* A start tag token not covered by the previous entries */ + default: + /* Reconstruct the active formatting elements, if any. */ + $this->reconstructActiveFormattingElements(); + + $this->insertElement($token); + break; + } + break; + + case HTML5::ENDTAG: + switch($token['name']) { + /* An end tag with the tag name "body" */ + case 'body': + /* If the second element in the stack of open elements is + not a body element, this is a parse error. Ignore the token. + (innerHTML case) */ + if(count($this->stack) < 2 || $this->stack[1]->nodeName !== 'body') { + // Ignore. + + /* If the current node is not the body element, then this + is a parse error. */ + } elseif(end($this->stack)->nodeName !== 'body') { + // Parse error. + } + + /* Change the insertion mode to "after body". */ + $this->mode = self::AFTER_BODY; + break; + + /* An end tag with the tag name "html" */ + case 'html': + /* Act as if an end tag with tag name "body" had been seen, + then, if that token wasn't ignored, reprocess the current + token. */ + $this->inBody(array( + 'name' => 'body', + 'type' => HTML5::ENDTAG + )); + + return $this->afterBody($token); + break; + + /* An end tag whose tag name is one of: "address", "blockquote", + "center", "dir", "div", "dl", "fieldset", "listing", "menu", + "ol", "pre", "ul" */ + case 'address': case 'blockquote': case 'center': case 'dir': + case 'div': case 'dl': case 'fieldset': case 'listing': + case 'menu': case 'ol': case 'pre': case 'ul': + /* If the stack of open elements has an element in scope + with the same tag name as that of the token, then generate + implied end tags. */ + if($this->elementInScope($token['name'])) { + $this->generateImpliedEndTags(); + + /* Now, if the current node is not an element with + the same tag name as that of the token, then this + is a parse error. */ + // w/e + + /* If the stack of open elements has an element in + scope with the same tag name as that of the token, + then pop elements from this stack until an element + with that tag name has been popped from the stack. */ + for($n = count($this->stack) - 1; $n >= 0; $n--) { + if($this->stack[$n]->nodeName === $token['name']) { + $n = -1; + } + + array_pop($this->stack); + } + } + break; + + /* An end tag whose tag name is "form" */ + case 'form': + /* If the stack of open elements has an element in scope + with the same tag name as that of the token, then generate + implied end tags. */ + if($this->elementInScope($token['name'])) { + $this->generateImpliedEndTags(); + + } + + if(end($this->stack)->nodeName !== $token['name']) { + /* Now, if the current node is not an element with the + same tag name as that of the token, then this is a parse + error. */ + // w/e + + } else { + /* Otherwise, if the current node is an element with + the same tag name as that of the token pop that element + from the stack. */ + array_pop($this->stack); + } + + /* In any case, set the form element pointer to null. */ + $this->form_pointer = null; + break; + + /* An end tag whose tag name is "p" */ + case 'p': + /* If the stack of open elements has a p element in scope, + then generate implied end tags, except for p elements. */ + if($this->elementInScope('p')) { + $this->generateImpliedEndTags(array('p')); + + /* If the current node is not a p element, then this is + a parse error. */ + // k + + /* If the stack of open elements has a p element in + scope, then pop elements from this stack until the stack + no longer has a p element in scope. */ + for($n = count($this->stack) - 1; $n >= 0; $n--) { + if($this->elementInScope('p')) { + array_pop($this->stack); + + } else { + break; + } + } + } + break; + + /* An end tag whose tag name is "dd", "dt", or "li" */ + case 'dd': case 'dt': case 'li': + /* If the stack of open elements has an element in scope + whose tag name matches the tag name of the token, then + generate implied end tags, except for elements with the + same tag name as the token. */ + if($this->elementInScope($token['name'])) { + $this->generateImpliedEndTags(array($token['name'])); + + /* If the current node is not an element with the same + tag name as the token, then this is a parse error. */ + // w/e + + /* If the stack of open elements has an element in scope + whose tag name matches the tag name of the token, then + pop elements from this stack until an element with that + tag name has been popped from the stack. */ + for($n = count($this->stack) - 1; $n >= 0; $n--) { + if($this->stack[$n]->nodeName === $token['name']) { + $n = -1; + } + + array_pop($this->stack); + } + } + break; + + /* An end tag whose tag name is one of: "h1", "h2", "h3", "h4", + "h5", "h6" */ + case 'h1': case 'h2': case 'h3': case 'h4': case 'h5': case 'h6': + $elements = array('h1', 'h2', 'h3', 'h4', 'h5', 'h6'); + + /* If the stack of open elements has in scope an element whose + tag name is one of "h1", "h2", "h3", "h4", "h5", or "h6", then + generate implied end tags. */ + if($this->elementInScope($elements)) { + $this->generateImpliedEndTags(); + + /* Now, if the current node is not an element with the same + tag name as that of the token, then this is a parse error. */ + // w/e + + /* If the stack of open elements has in scope an element + whose tag name is one of "h1", "h2", "h3", "h4", "h5", or + "h6", then pop elements from the stack until an element + with one of those tag names has been popped from the stack. */ + while($this->elementInScope($elements)) { + array_pop($this->stack); + } + } + break; + + /* An end tag whose tag name is one of: "a", "b", "big", "em", + "font", "i", "nobr", "s", "small", "strike", "strong", "tt", "u" */ + case 'a': case 'b': case 'big': case 'em': case 'font': + case 'i': case 'nobr': case 's': case 'small': case 'strike': + case 'strong': case 'tt': case 'u': + /* 1. Let the formatting element be the last element in + the list of active formatting elements that: + * is between the end of the list and the last scope + marker in the list, if any, or the start of the list + otherwise, and + * has the same tag name as the token. + */ + while(true) { + for($a = count($this->a_formatting) - 1; $a >= 0; $a--) { + if($this->a_formatting[$a] === self::MARKER) { + break; + + } elseif($this->a_formatting[$a]->tagName === $token['name']) { + $formatting_element = $this->a_formatting[$a]; + $in_stack = in_array($formatting_element, $this->stack, true); + $fe_af_pos = $a; + break; + } + } + + /* If there is no such node, or, if that node is + also in the stack of open elements but the element + is not in scope, then this is a parse error. Abort + these steps. The token is ignored. */ + if(!isset($formatting_element) || ($in_stack && + !$this->elementInScope($token['name']))) { + break; + + /* Otherwise, if there is such a node, but that node + is not in the stack of open elements, then this is a + parse error; remove the element from the list, and + abort these steps. */ + } elseif(isset($formatting_element) && !$in_stack) { + unset($this->a_formatting[$fe_af_pos]); + $this->a_formatting = array_merge($this->a_formatting); + break; + } + + /* 2. Let the furthest block be the topmost node in the + stack of open elements that is lower in the stack + than the formatting element, and is not an element in + the phrasing or formatting categories. There might + not be one. */ + $fe_s_pos = array_search($formatting_element, $this->stack, true); + $length = count($this->stack); + + for($s = $fe_s_pos + 1; $s < $length; $s++) { + $category = $this->getElementCategory($this->stack[$s]->nodeName); + + if($category !== self::PHRASING && $category !== self::FORMATTING) { + $furthest_block = $this->stack[$s]; + } + } + + /* 3. If there is no furthest block, then the UA must + skip the subsequent steps and instead just pop all + the nodes from the bottom of the stack of open + elements, from the current node up to the formatting + element, and remove the formatting element from the + list of active formatting elements. */ + if(!isset($furthest_block)) { + for($n = $length - 1; $n >= $fe_s_pos; $n--) { + array_pop($this->stack); + } + + unset($this->a_formatting[$fe_af_pos]); + $this->a_formatting = array_merge($this->a_formatting); + break; + } + + /* 4. Let the common ancestor be the element + immediately above the formatting element in the stack + of open elements. */ + $common_ancestor = $this->stack[$fe_s_pos - 1]; + + /* 5. If the furthest block has a parent node, then + remove the furthest block from its parent node. */ + if($furthest_block->parentNode !== null) { + $furthest_block->parentNode->removeChild($furthest_block); + } + + /* 6. Let a bookmark note the position of the + formatting element in the list of active formatting + elements relative to the elements on either side + of it in the list. */ + $bookmark = $fe_af_pos; + + /* 7. Let node and last node be the furthest block. + Follow these steps: */ + $node = $furthest_block; + $last_node = $furthest_block; + + while(true) { + for($n = array_search($node, $this->stack, true) - 1; $n >= 0; $n--) { + /* 7.1 Let node be the element immediately + prior to node in the stack of open elements. */ + $node = $this->stack[$n]; + + /* 7.2 If node is not in the list of active + formatting elements, then remove node from + the stack of open elements and then go back + to step 1. */ + if(!in_array($node, $this->a_formatting, true)) { + unset($this->stack[$n]); + $this->stack = array_merge($this->stack); + + } else { + break; + } + } + + /* 7.3 Otherwise, if node is the formatting + element, then go to the next step in the overall + algorithm. */ + if($node === $formatting_element) { + break; + + /* 7.4 Otherwise, if last node is the furthest + block, then move the aforementioned bookmark to + be immediately after the node in the list of + active formatting elements. */ + } elseif($last_node === $furthest_block) { + $bookmark = array_search($node, $this->a_formatting, true) + 1; + } + + /* 7.5 If node has any children, perform a + shallow clone of node, replace the entry for + node in the list of active formatting elements + with an entry for the clone, replace the entry + for node in the stack of open elements with an + entry for the clone, and let node be the clone. */ + if($node->hasChildNodes()) { + $clone = $node->cloneNode(); + $s_pos = array_search($node, $this->stack, true); + $a_pos = array_search($node, $this->a_formatting, true); + + $this->stack[$s_pos] = $clone; + $this->a_formatting[$a_pos] = $clone; + $node = $clone; + } + + /* 7.6 Insert last node into node, first removing + it from its previous parent node if any. */ + if($last_node->parentNode !== null) { + $last_node->parentNode->removeChild($last_node); + } + + $node->appendChild($last_node); + + /* 7.7 Let last node be node. */ + $last_node = $node; + } + + /* 8. Insert whatever last node ended up being in + the previous step into the common ancestor node, + first removing it from its previous parent node if + any. */ + if($last_node->parentNode !== null) { + $last_node->parentNode->removeChild($last_node); + } + + $common_ancestor->appendChild($last_node); + + /* 9. Perform a shallow clone of the formatting + element. */ + $clone = $formatting_element->cloneNode(); + + /* 10. Take all of the child nodes of the furthest + block and append them to the clone created in the + last step. */ + while($furthest_block->hasChildNodes()) { + $child = $furthest_block->firstChild; + $furthest_block->removeChild($child); + $clone->appendChild($child); + } + + /* 11. Append that clone to the furthest block. */ + $furthest_block->appendChild($clone); + + /* 12. Remove the formatting element from the list + of active formatting elements, and insert the clone + into the list of active formatting elements at the + position of the aforementioned bookmark. */ + $fe_af_pos = array_search($formatting_element, $this->a_formatting, true); + unset($this->a_formatting[$fe_af_pos]); + $this->a_formatting = array_merge($this->a_formatting); + + $af_part1 = array_slice($this->a_formatting, 0, $bookmark - 1); + $af_part2 = array_slice($this->a_formatting, $bookmark, count($this->a_formatting)); + $this->a_formatting = array_merge($af_part1, array($clone), $af_part2); + + /* 13. Remove the formatting element from the stack + of open elements, and insert the clone into the stack + of open elements immediately after (i.e. in a more + deeply nested position than) the position of the + furthest block in that stack. */ + $fe_s_pos = array_search($formatting_element, $this->stack, true); + $fb_s_pos = array_search($furthest_block, $this->stack, true); + unset($this->stack[$fe_s_pos]); + + $s_part1 = array_slice($this->stack, 0, $fb_s_pos); + $s_part2 = array_slice($this->stack, $fb_s_pos + 1, count($this->stack)); + $this->stack = array_merge($s_part1, array($clone), $s_part2); + + /* 14. Jump back to step 1 in this series of steps. */ + unset($formatting_element, $fe_af_pos, $fe_s_pos, $furthest_block); + } + break; + + /* An end tag token whose tag name is one of: "button", + "marquee", "object" */ + case 'button': case 'marquee': case 'object': + /* If the stack of open elements has an element in scope whose + tag name matches the tag name of the token, then generate implied + tags. */ + if($this->elementInScope($token['name'])) { + $this->generateImpliedEndTags(); + + /* Now, if the current node is not an element with the same + tag name as the token, then this is a parse error. */ + // k + + /* Now, if the stack of open elements has an element in scope + whose tag name matches the tag name of the token, then pop + elements from the stack until that element has been popped from + the stack, and clear the list of active formatting elements up + to the last marker. */ + for($n = count($this->stack) - 1; $n >= 0; $n--) { + if($this->stack[$n]->nodeName === $token['name']) { + $n = -1; + } + + array_pop($this->stack); + } + + $marker = end(array_keys($this->a_formatting, self::MARKER, true)); + + for($n = count($this->a_formatting) - 1; $n > $marker; $n--) { + array_pop($this->a_formatting); + } + } + break; + + /* Or an end tag whose tag name is one of: "area", "basefont", + "bgsound", "br", "embed", "hr", "iframe", "image", "img", + "input", "isindex", "noembed", "noframes", "param", "select", + "spacer", "table", "textarea", "wbr" */ + case 'area': case 'basefont': case 'bgsound': case 'br': + case 'embed': case 'hr': case 'iframe': case 'image': + case 'img': case 'input': case 'isindex': case 'noembed': + case 'noframes': case 'param': case 'select': case 'spacer': + case 'table': case 'textarea': case 'wbr': + // Parse error. Ignore the token. + break; + + /* An end tag token not covered by the previous entries */ + default: + for($n = count($this->stack) - 1; $n >= 0; $n--) { + /* Initialise node to be the current node (the bottommost + node of the stack). */ + $node = end($this->stack); + + /* If node has the same tag name as the end tag token, + then: */ + if($token['name'] === $node->nodeName) { + /* Generate implied end tags. */ + $this->generateImpliedEndTags(); + + /* If the tag name of the end tag token does not + match the tag name of the current node, this is a + parse error. */ + // k + + /* Pop all the nodes from the current node up to + node, including node, then stop this algorithm. */ + for($x = count($this->stack) - $n; $x >= $n; $x--) { + array_pop($this->stack); + } + + } else { + $category = $this->getElementCategory($node); + + if($category !== self::SPECIAL && $category !== self::SCOPING) { + /* Otherwise, if node is in neither the formatting + category nor the phrasing category, then this is a + parse error. Stop this algorithm. The end tag token + is ignored. */ + return false; + } + } + } + break; + } + break; + } + } + + private function inTable($token) + { + $clear = array('html', 'table'); + + /* A character token that is one of one of U+0009 CHARACTER TABULATION, + U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), + or U+0020 SPACE */ + if($token['type'] === HTML5::CHARACTR && + preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) { + /* Append the character to the current node. */ + $text = $this->dom->createTextNode($token['data']); + end($this->stack)->appendChild($text); + + /* A comment token */ + } elseif($token['type'] === HTML5::COMMENT) { + /* Append a Comment node to the current node with the data + attribute set to the data given in the comment token. */ + $comment = $this->dom->createComment($token['data']); + end($this->stack)->appendChild($comment); + + /* A start tag whose tag name is "caption" */ + } elseif($token['type'] === HTML5::STARTTAG && + $token['name'] === 'caption') { + /* Clear the stack back to a table context. */ + $this->clearStackToTableContext($clear); + + /* Insert a marker at the end of the list of active + formatting elements. */ + $this->a_formatting[] = self::MARKER; + + /* Insert an HTML element for the token, then switch the + insertion mode to "in caption". */ + $this->insertElement($token); + $this->mode = self::IN_CAPTION; + + /* A start tag whose tag name is "colgroup" */ + } elseif($token['type'] === HTML5::STARTTAG && + $token['name'] === 'colgroup') { + /* Clear the stack back to a table context. */ + $this->clearStackToTableContext($clear); + + /* Insert an HTML element for the token, then switch the + insertion mode to "in column group". */ + $this->insertElement($token); + $this->mode = self::IN_CGROUP; + + /* A start tag whose tag name is "col" */ + } elseif($token['type'] === HTML5::STARTTAG && + $token['name'] === 'col') { + $this->inTable(array( + 'name' => 'colgroup', + 'type' => HTML5::STARTTAG, + 'attr' => array() + )); + + $this->inColumnGroup($token); + + /* A start tag whose tag name is one of: "tbody", "tfoot", "thead" */ + } elseif($token['type'] === HTML5::STARTTAG && in_array($token['name'], + array('tbody', 'tfoot', 'thead'))) { + /* Clear the stack back to a table context. */ + $this->clearStackToTableContext($clear); + + /* Insert an HTML element for the token, then switch the insertion + mode to "in table body". */ + $this->insertElement($token); + $this->mode = self::IN_TBODY; + + /* A start tag whose tag name is one of: "td", "th", "tr" */ + } elseif($token['type'] === HTML5::STARTTAG && + in_array($token['name'], array('td', 'th', 'tr'))) { + /* Act as if a start tag token with the tag name "tbody" had been + seen, then reprocess the current token. */ + $this->inTable(array( + 'name' => 'tbody', + 'type' => HTML5::STARTTAG, + 'attr' => array() + )); + + return $this->inTableBody($token); + + /* A start tag whose tag name is "table" */ + } elseif($token['type'] === HTML5::STARTTAG && + $token['name'] === 'table') { + /* Parse error. Act as if an end tag token with the tag name "table" + had been seen, then, if that token wasn't ignored, reprocess the + current token. */ + $this->inTable(array( + 'name' => 'table', + 'type' => HTML5::ENDTAG + )); + + return $this->mainPhase($token); + + /* An end tag whose tag name is "table" */ + } elseif($token['type'] === HTML5::ENDTAG && + $token['name'] === 'table') { + /* If the stack of open elements does not have an element in table + scope with the same tag name as the token, this is a parse error. + Ignore the token. (innerHTML case) */ + if(!$this->elementInScope($token['name'], true)) { + return false; + + /* Otherwise: */ + } else { + /* Generate implied end tags. */ + $this->generateImpliedEndTags(); + + /* Now, if the current node is not a table element, then this + is a parse error. */ + // w/e + + /* Pop elements from this stack until a table element has been + popped from the stack. */ + while(true) { + $current = end($this->stack)->nodeName; + array_pop($this->stack); + + if($current === 'table') { + break; + } + } + + /* Reset the insertion mode appropriately. */ + $this->resetInsertionMode(); + } + + /* An end tag whose tag name is one of: "body", "caption", "col", + "colgroup", "html", "tbody", "td", "tfoot", "th", "thead", "tr" */ + } elseif($token['type'] === HTML5::ENDTAG && in_array($token['name'], + array('body', 'caption', 'col', 'colgroup', 'html', 'tbody', 'td', + 'tfoot', 'th', 'thead', 'tr'))) { + // Parse error. Ignore the token. + + /* Anything else */ + } else { + /* Parse error. Process the token as if the insertion mode was "in + body", with the following exception: */ + + /* If the current node is a table, tbody, tfoot, thead, or tr + element, then, whenever a node would be inserted into the current + node, it must instead be inserted into the foster parent element. */ + if(in_array(end($this->stack)->nodeName, + array('table', 'tbody', 'tfoot', 'thead', 'tr'))) { + /* The foster parent element is the parent element of the last + table element in the stack of open elements, if there is a + table element and it has such a parent element. If there is no + table element in the stack of open elements (innerHTML case), + then the foster parent element is the first element in the + stack of open elements (the html element). Otherwise, if there + is a table element in the stack of open elements, but the last + table element in the stack of open elements has no parent, or + its parent node is not an element, then the foster parent + element is the element before the last table element in the + stack of open elements. */ + for($n = count($this->stack) - 1; $n >= 0; $n--) { + if($this->stack[$n]->nodeName === 'table') { + $table = $this->stack[$n]; + break; + } + } + + if(isset($table) && $table->parentNode !== null) { + $this->foster_parent = $table->parentNode; + + } elseif(!isset($table)) { + $this->foster_parent = $this->stack[0]; + + } elseif(isset($table) && ($table->parentNode === null || + $table->parentNode->nodeType !== XML_ELEMENT_NODE)) { + $this->foster_parent = $this->stack[$n - 1]; + } + } + + $this->inBody($token); + } + } + + private function inCaption($token) + { + /* An end tag whose tag name is "caption" */ + if($token['type'] === HTML5::ENDTAG && $token['name'] === 'caption') { + /* If the stack of open elements does not have an element in table + scope with the same tag name as the token, this is a parse error. + Ignore the token. (innerHTML case) */ + if(!$this->elementInScope($token['name'], true)) { + // Ignore + + /* Otherwise: */ + } else { + /* Generate implied end tags. */ + $this->generateImpliedEndTags(); + + /* Now, if the current node is not a caption element, then this + is a parse error. */ + // w/e + + /* Pop elements from this stack until a caption element has + been popped from the stack. */ + while(true) { + $node = end($this->stack)->nodeName; + array_pop($this->stack); + + if($node === 'caption') { + break; + } + } + + /* Clear the list of active formatting elements up to the last + marker. */ + $this->clearTheActiveFormattingElementsUpToTheLastMarker(); + + /* Switch the insertion mode to "in table". */ + $this->mode = self::IN_TABLE; + } + + /* A start tag whose tag name is one of: "caption", "col", "colgroup", + "tbody", "td", "tfoot", "th", "thead", "tr", or an end tag whose tag + name is "table" */ + } elseif(($token['type'] === HTML5::STARTTAG && in_array($token['name'], + array('caption', 'col', 'colgroup', 'tbody', 'td', 'tfoot', 'th', + 'thead', 'tr'))) || ($token['type'] === HTML5::ENDTAG && + $token['name'] === 'table')) { + /* Parse error. Act as if an end tag with the tag name "caption" + had been seen, then, if that token wasn't ignored, reprocess the + current token. */ + $this->inCaption(array( + 'name' => 'caption', + 'type' => HTML5::ENDTAG + )); + + return $this->inTable($token); + + /* An end tag whose tag name is one of: "body", "col", "colgroup", + "html", "tbody", "td", "tfoot", "th", "thead", "tr" */ + } elseif($token['type'] === HTML5::ENDTAG && in_array($token['name'], + array('body', 'col', 'colgroup', 'html', 'tbody', 'tfoot', 'th', + 'thead', 'tr'))) { + // Parse error. Ignore the token. + + /* Anything else */ + } else { + /* Process the token as if the insertion mode was "in body". */ + $this->inBody($token); + } + } + + private function inColumnGroup($token) + { + /* A character token that is one of one of U+0009 CHARACTER TABULATION, + U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), + or U+0020 SPACE */ + if($token['type'] === HTML5::CHARACTR && + preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) { + /* Append the character to the current node. */ + $text = $this->dom->createTextNode($token['data']); + end($this->stack)->appendChild($text); + + /* A comment token */ + } elseif($token['type'] === HTML5::COMMENT) { + /* Append a Comment node to the current node with the data + attribute set to the data given in the comment token. */ + $comment = $this->dom->createComment($token['data']); + end($this->stack)->appendChild($comment); + + /* A start tag whose tag name is "col" */ + } elseif($token['type'] === HTML5::STARTTAG && $token['name'] === 'col') { + /* Insert a col element for the token. Immediately pop the current + node off the stack of open elements. */ + $this->insertElement($token); + array_pop($this->stack); + + /* An end tag whose tag name is "colgroup" */ + } elseif($token['type'] === HTML5::ENDTAG && + $token['name'] === 'colgroup') { + /* If the current node is the root html element, then this is a + parse error, ignore the token. (innerHTML case) */ + if(end($this->stack)->nodeName === 'html') { + // Ignore + + /* Otherwise, pop the current node (which will be a colgroup + element) from the stack of open elements. Switch the insertion + mode to "in table". */ + } else { + array_pop($this->stack); + $this->mode = self::IN_TABLE; + } + + /* An end tag whose tag name is "col" */ + } elseif($token['type'] === HTML5::ENDTAG && $token['name'] === 'col') { + /* Parse error. Ignore the token. */ + + /* Anything else */ + } else { + /* Act as if an end tag with the tag name "colgroup" had been seen, + and then, if that token wasn't ignored, reprocess the current token. */ + $this->inColumnGroup(array( + 'name' => 'colgroup', + 'type' => HTML5::ENDTAG + )); + + return $this->inTable($token); + } + } + + private function inTableBody($token) + { + $clear = array('tbody', 'tfoot', 'thead', 'html'); + + /* A start tag whose tag name is "tr" */ + if($token['type'] === HTML5::STARTTAG && $token['name'] === 'tr') { + /* Clear the stack back to a table body context. */ + $this->clearStackToTableContext($clear); + + /* Insert a tr element for the token, then switch the insertion + mode to "in row". */ + $this->insertElement($token); + $this->mode = self::IN_ROW; + + /* A start tag whose tag name is one of: "th", "td" */ + } elseif($token['type'] === HTML5::STARTTAG && + ($token['name'] === 'th' || $token['name'] === 'td')) { + /* Parse error. Act as if a start tag with the tag name "tr" had + been seen, then reprocess the current token. */ + $this->inTableBody(array( + 'name' => 'tr', + 'type' => HTML5::STARTTAG, + 'attr' => array() + )); + + return $this->inRow($token); + + /* An end tag whose tag name is one of: "tbody", "tfoot", "thead" */ + } elseif($token['type'] === HTML5::ENDTAG && + in_array($token['name'], array('tbody', 'tfoot', 'thead'))) { + /* If the stack of open elements does not have an element in table + scope with the same tag name as the token, this is a parse error. + Ignore the token. */ + if(!$this->elementInScope($token['name'], true)) { + // Ignore + + /* Otherwise: */ + } else { + /* Clear the stack back to a table body context. */ + $this->clearStackToTableContext($clear); + + /* Pop the current node from the stack of open elements. Switch + the insertion mode to "in table". */ + array_pop($this->stack); + $this->mode = self::IN_TABLE; + } + + /* A start tag whose tag name is one of: "caption", "col", "colgroup", + "tbody", "tfoot", "thead", or an end tag whose tag name is "table" */ + } elseif(($token['type'] === HTML5::STARTTAG && in_array($token['name'], + array('caption', 'col', 'colgroup', 'tbody', 'tfoor', 'thead'))) || + ($token['type'] === HTML5::STARTTAG && $token['name'] === 'table')) { + /* If the stack of open elements does not have a tbody, thead, or + tfoot element in table scope, this is a parse error. Ignore the + token. (innerHTML case) */ + if(!$this->elementInScope(array('tbody', 'thead', 'tfoot'), true)) { + // Ignore. + + /* Otherwise: */ + } else { + /* Clear the stack back to a table body context. */ + $this->clearStackToTableContext($clear); + + /* Act as if an end tag with the same tag name as the current + node ("tbody", "tfoot", or "thead") had been seen, then + reprocess the current token. */ + $this->inTableBody(array( + 'name' => end($this->stack)->nodeName, + 'type' => HTML5::ENDTAG + )); + + return $this->mainPhase($token); + } + + /* An end tag whose tag name is one of: "body", "caption", "col", + "colgroup", "html", "td", "th", "tr" */ + } elseif($token['type'] === HTML5::ENDTAG && in_array($token['name'], + array('body', 'caption', 'col', 'colgroup', 'html', 'td', 'th', 'tr'))) { + /* Parse error. Ignore the token. */ + + /* Anything else */ + } else { + /* Process the token as if the insertion mode was "in table". */ + $this->inTable($token); + } + } + + private function inRow($token) + { + $clear = array('tr', 'html'); + + /* A start tag whose tag name is one of: "th", "td" */ + if($token['type'] === HTML5::STARTTAG && + ($token['name'] === 'th' || $token['name'] === 'td')) { + /* Clear the stack back to a table row context. */ + $this->clearStackToTableContext($clear); + + /* Insert an HTML element for the token, then switch the insertion + mode to "in cell". */ + $this->insertElement($token); + $this->mode = self::IN_CELL; + + /* Insert a marker at the end of the list of active formatting + elements. */ + $this->a_formatting[] = self::MARKER; + + /* An end tag whose tag name is "tr" */ + } elseif($token['type'] === HTML5::ENDTAG && $token['name'] === 'tr') { + /* If the stack of open elements does not have an element in table + scope with the same tag name as the token, this is a parse error. + Ignore the token. (innerHTML case) */ + if(!$this->elementInScope($token['name'], true)) { + // Ignore. + + /* Otherwise: */ + } else { + /* Clear the stack back to a table row context. */ + $this->clearStackToTableContext($clear); + + /* Pop the current node (which will be a tr element) from the + stack of open elements. Switch the insertion mode to "in table + body". */ + array_pop($this->stack); + $this->mode = self::IN_TBODY; + } + + /* A start tag whose tag name is one of: "caption", "col", "colgroup", + "tbody", "tfoot", "thead", "tr" or an end tag whose tag name is "table" */ + } elseif($token['type'] === HTML5::STARTTAG && in_array($token['name'], + array('caption', 'col', 'colgroup', 'tbody', 'tfoot', 'thead', 'tr'))) { + /* Act as if an end tag with the tag name "tr" had been seen, then, + if that token wasn't ignored, reprocess the current token. */ + $this->inRow(array( + 'name' => 'tr', + 'type' => HTML5::ENDTAG + )); + + return $this->inCell($token); + + /* An end tag whose tag name is one of: "tbody", "tfoot", "thead" */ + } elseif($token['type'] === HTML5::ENDTAG && + in_array($token['name'], array('tbody', 'tfoot', 'thead'))) { + /* If the stack of open elements does not have an element in table + scope with the same tag name as the token, this is a parse error. + Ignore the token. */ + if(!$this->elementInScope($token['name'], true)) { + // Ignore. + + /* Otherwise: */ + } else { + /* Otherwise, act as if an end tag with the tag name "tr" had + been seen, then reprocess the current token. */ + $this->inRow(array( + 'name' => 'tr', + 'type' => HTML5::ENDTAG + )); + + return $this->inCell($token); + } + + /* An end tag whose tag name is one of: "body", "caption", "col", + "colgroup", "html", "td", "th" */ + } elseif($token['type'] === HTML5::ENDTAG && in_array($token['name'], + array('body', 'caption', 'col', 'colgroup', 'html', 'td', 'th', 'tr'))) { + /* Parse error. Ignore the token. */ + + /* Anything else */ + } else { + /* Process the token as if the insertion mode was "in table". */ + $this->inTable($token); + } + } + + private function inCell($token) + { + /* An end tag whose tag name is one of: "td", "th" */ + if($token['type'] === HTML5::ENDTAG && + ($token['name'] === 'td' || $token['name'] === 'th')) { + /* If the stack of open elements does not have an element in table + scope with the same tag name as that of the token, then this is a + parse error and the token must be ignored. */ + if(!$this->elementInScope($token['name'], true)) { + // Ignore. + + /* Otherwise: */ + } else { + /* Generate implied end tags, except for elements with the same + tag name as the token. */ + $this->generateImpliedEndTags(array($token['name'])); + + /* Now, if the current node is not an element with the same tag + name as the token, then this is a parse error. */ + // k + + /* Pop elements from this stack until an element with the same + tag name as the token has been popped from the stack. */ + while(true) { + $node = end($this->stack)->nodeName; + array_pop($this->stack); + + if($node === $token['name']) { + break; + } + } + + /* Clear the list of active formatting elements up to the last + marker. */ + $this->clearTheActiveFormattingElementsUpToTheLastMarker(); + + /* Switch the insertion mode to "in row". (The current node + will be a tr element at this point.) */ + $this->mode = self::IN_ROW; + } + + /* A start tag whose tag name is one of: "caption", "col", "colgroup", + "tbody", "td", "tfoot", "th", "thead", "tr" */ + } elseif($token['type'] === HTML5::STARTTAG && in_array($token['name'], + array('caption', 'col', 'colgroup', 'tbody', 'td', 'tfoot', 'th', + 'thead', 'tr'))) { + /* If the stack of open elements does not have a td or th element + in table scope, then this is a parse error; ignore the token. + (innerHTML case) */ + if(!$this->elementInScope(array('td', 'th'), true)) { + // Ignore. + + /* Otherwise, close the cell (see below) and reprocess the current + token. */ + } else { + $this->closeCell(); + return $this->inRow($token); + } + + /* A start tag whose tag name is one of: "caption", "col", "colgroup", + "tbody", "td", "tfoot", "th", "thead", "tr" */ + } elseif($token['type'] === HTML5::STARTTAG && in_array($token['name'], + array('caption', 'col', 'colgroup', 'tbody', 'td', 'tfoot', 'th', + 'thead', 'tr'))) { + /* If the stack of open elements does not have a td or th element + in table scope, then this is a parse error; ignore the token. + (innerHTML case) */ + if(!$this->elementInScope(array('td', 'th'), true)) { + // Ignore. + + /* Otherwise, close the cell (see below) and reprocess the current + token. */ + } else { + $this->closeCell(); + return $this->inRow($token); + } + + /* An end tag whose tag name is one of: "body", "caption", "col", + "colgroup", "html" */ + } elseif($token['type'] === HTML5::ENDTAG && in_array($token['name'], + array('body', 'caption', 'col', 'colgroup', 'html'))) { + /* Parse error. Ignore the token. */ + + /* An end tag whose tag name is one of: "table", "tbody", "tfoot", + "thead", "tr" */ + } elseif($token['type'] === HTML5::ENDTAG && in_array($token['name'], + array('table', 'tbody', 'tfoot', 'thead', 'tr'))) { + /* If the stack of open elements does not have an element in table + scope with the same tag name as that of the token (which can only + happen for "tbody", "tfoot" and "thead", or, in the innerHTML case), + then this is a parse error and the token must be ignored. */ + if(!$this->elementInScope($token['name'], true)) { + // Ignore. + + /* Otherwise, close the cell (see below) and reprocess the current + token. */ + } else { + $this->closeCell(); + return $this->inRow($token); + } + + /* Anything else */ + } else { + /* Process the token as if the insertion mode was "in body". */ + $this->inBody($token); + } + } + + private function inSelect($token) + { + /* Handle the token as follows: */ + + /* A character token */ + if($token['type'] === HTML5::CHARACTR) { + /* Append the token's character to the current node. */ + $this->insertText($token['data']); + + /* A comment token */ + } elseif($token['type'] === HTML5::COMMENT) { + /* Append a Comment node to the current node with the data + attribute set to the data given in the comment token. */ + $this->insertComment($token['data']); + + /* A start tag token whose tag name is "option" */ + } elseif($token['type'] === HTML5::STARTTAG && + $token['name'] === 'option') { + /* If the current node is an option element, act as if an end tag + with the tag name "option" had been seen. */ + if(end($this->stack)->nodeName === 'option') { + $this->inSelect(array( + 'name' => 'option', + 'type' => HTML5::ENDTAG + )); + } + + /* Insert an HTML element for the token. */ + $this->insertElement($token); + + /* A start tag token whose tag name is "optgroup" */ + } elseif($token['type'] === HTML5::STARTTAG && + $token['name'] === 'optgroup') { + /* If the current node is an option element, act as if an end tag + with the tag name "option" had been seen. */ + if(end($this->stack)->nodeName === 'option') { + $this->inSelect(array( + 'name' => 'option', + 'type' => HTML5::ENDTAG + )); + } + + /* If the current node is an optgroup element, act as if an end tag + with the tag name "optgroup" had been seen. */ + if(end($this->stack)->nodeName === 'optgroup') { + $this->inSelect(array( + 'name' => 'optgroup', + 'type' => HTML5::ENDTAG + )); + } + + /* Insert an HTML element for the token. */ + $this->insertElement($token); + + /* An end tag token whose tag name is "optgroup" */ + } elseif($token['type'] === HTML5::ENDTAG && + $token['name'] === 'optgroup') { + /* First, if the current node is an option element, and the node + immediately before it in the stack of open elements is an optgroup + element, then act as if an end tag with the tag name "option" had + been seen. */ + $elements_in_stack = count($this->stack); + + if($this->stack[$elements_in_stack - 1]->nodeName === 'option' && + $this->stack[$elements_in_stack - 2]->nodeName === 'optgroup') { + $this->inSelect(array( + 'name' => 'option', + 'type' => HTML5::ENDTAG + )); + } + + /* If the current node is an optgroup element, then pop that node + from the stack of open elements. Otherwise, this is a parse error, + ignore the token. */ + if($this->stack[$elements_in_stack - 1] === 'optgroup') { + array_pop($this->stack); + } + + /* An end tag token whose tag name is "option" */ + } elseif($token['type'] === HTML5::ENDTAG && + $token['name'] === 'option') { + /* If the current node is an option element, then pop that node + from the stack of open elements. Otherwise, this is a parse error, + ignore the token. */ + if(end($this->stack)->nodeName === 'option') { + array_pop($this->stack); + } + + /* An end tag whose tag name is "select" */ + } elseif($token['type'] === HTML5::ENDTAG && + $token['name'] === 'select') { + /* If the stack of open elements does not have an element in table + scope with the same tag name as the token, this is a parse error. + Ignore the token. (innerHTML case) */ + if(!$this->elementInScope($token['name'], true)) { + // w/e + + /* Otherwise: */ + } else { + /* Pop elements from the stack of open elements until a select + element has been popped from the stack. */ + while(true) { + $current = end($this->stack)->nodeName; + array_pop($this->stack); + + if($current === 'select') { + break; + } + } + + /* Reset the insertion mode appropriately. */ + $this->resetInsertionMode(); + } + + /* A start tag whose tag name is "select" */ + } elseif($token['name'] === 'select' && + $token['type'] === HTML5::STARTTAG) { + /* Parse error. Act as if the token had been an end tag with the + tag name "select" instead. */ + $this->inSelect(array( + 'name' => 'select', + 'type' => HTML5::ENDTAG + )); + + /* An end tag whose tag name is one of: "caption", "table", "tbody", + "tfoot", "thead", "tr", "td", "th" */ + } elseif(in_array($token['name'], array('caption', 'table', 'tbody', + 'tfoot', 'thead', 'tr', 'td', 'th')) && $token['type'] === HTML5::ENDTAG) { + /* Parse error. */ + // w/e + + /* If the stack of open elements has an element in table scope with + the same tag name as that of the token, then act as if an end tag + with the tag name "select" had been seen, and reprocess the token. + Otherwise, ignore the token. */ + if($this->elementInScope($token['name'], true)) { + $this->inSelect(array( + 'name' => 'select', + 'type' => HTML5::ENDTAG + )); + + $this->mainPhase($token); + } + + /* Anything else */ + } else { + /* Parse error. Ignore the token. */ + } + } + + private function afterBody($token) + { + /* Handle the token as follows: */ + + /* A character token that is one of one of U+0009 CHARACTER TABULATION, + U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), + or U+0020 SPACE */ + if($token['type'] === HTML5::CHARACTR && + preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) { + /* Process the token as it would be processed if the insertion mode + was "in body". */ + $this->inBody($token); + + /* A comment token */ + } elseif($token['type'] === HTML5::COMMENT) { + /* Append a Comment node to the first element in the stack of open + elements (the html element), with the data attribute set to the + data given in the comment token. */ + $comment = $this->dom->createComment($token['data']); + $this->stack[0]->appendChild($comment); + + /* An end tag with the tag name "html" */ + } elseif($token['type'] === HTML5::ENDTAG && $token['name'] === 'html') { + /* If the parser was originally created in order to handle the + setting of an element's innerHTML attribute, this is a parse error; + ignore the token. (The element will be an html element in this + case.) (innerHTML case) */ + + /* Otherwise, switch to the trailing end phase. */ + $this->phase = self::END_PHASE; + + /* Anything else */ + } else { + /* Parse error. Set the insertion mode to "in body" and reprocess + the token. */ + $this->mode = self::IN_BODY; + return $this->inBody($token); + } + } + + private function inFrameset($token) + { + /* Handle the token as follows: */ + + /* A character token that is one of one of U+0009 CHARACTER TABULATION, + U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), + U+000D CARRIAGE RETURN (CR), or U+0020 SPACE */ + if($token['type'] === HTML5::CHARACTR && + preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) { + /* Append the character to the current node. */ + $this->insertText($token['data']); + + /* A comment token */ + } elseif($token['type'] === HTML5::COMMENT) { + /* Append a Comment node to the current node with the data + attribute set to the data given in the comment token. */ + $this->insertComment($token['data']); + + /* A start tag with the tag name "frameset" */ + } elseif($token['name'] === 'frameset' && + $token['type'] === HTML5::STARTTAG) { + $this->insertElement($token); + + /* An end tag with the tag name "frameset" */ + } elseif($token['name'] === 'frameset' && + $token['type'] === HTML5::ENDTAG) { + /* If the current node is the root html element, then this is a + parse error; ignore the token. (innerHTML case) */ + if(end($this->stack)->nodeName === 'html') { + // Ignore + + } else { + /* Otherwise, pop the current node from the stack of open + elements. */ + array_pop($this->stack); + + /* If the parser was not originally created in order to handle + the setting of an element's innerHTML attribute (innerHTML case), + and the current node is no longer a frameset element, then change + the insertion mode to "after frameset". */ + $this->mode = self::AFTR_FRAME; + } + + /* A start tag with the tag name "frame" */ + } elseif($token['name'] === 'frame' && + $token['type'] === HTML5::STARTTAG) { + /* Insert an HTML element for the token. */ + $this->insertElement($token); + + /* Immediately pop the current node off the stack of open elements. */ + array_pop($this->stack); + + /* A start tag with the tag name "noframes" */ + } elseif($token['name'] === 'noframes' && + $token['type'] === HTML5::STARTTAG) { + /* Process the token as if the insertion mode had been "in body". */ + $this->inBody($token); + + /* Anything else */ + } else { + /* Parse error. Ignore the token. */ + } + } + + private function afterFrameset($token) + { + /* Handle the token as follows: */ + + /* A character token that is one of one of U+0009 CHARACTER TABULATION, + U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), + U+000D CARRIAGE RETURN (CR), or U+0020 SPACE */ + if($token['type'] === HTML5::CHARACTR && + preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) { + /* Append the character to the current node. */ + $this->insertText($token['data']); + + /* A comment token */ + } elseif($token['type'] === HTML5::COMMENT) { + /* Append a Comment node to the current node with the data + attribute set to the data given in the comment token. */ + $this->insertComment($token['data']); + + /* An end tag with the tag name "html" */ + } elseif($token['name'] === 'html' && + $token['type'] === HTML5::ENDTAG) { + /* Switch to the trailing end phase. */ + $this->phase = self::END_PHASE; + + /* A start tag with the tag name "noframes" */ + } elseif($token['name'] === 'noframes' && + $token['type'] === HTML5::STARTTAG) { + /* Process the token as if the insertion mode had been "in body". */ + $this->inBody($token); + + /* Anything else */ + } else { + /* Parse error. Ignore the token. */ + } + } + + private function trailingEndPhase($token) + { + /* After the main phase, as each token is emitted from the tokenisation + stage, it must be processed as described in this section. */ + + /* A DOCTYPE token */ + if($token['type'] === HTML5::DOCTYPE) { + // Parse error. Ignore the token. + + /* A comment token */ + } elseif($token['type'] === HTML5::COMMENT) { + /* Append a Comment node to the Document object with the data + attribute set to the data given in the comment token. */ + $comment = $this->dom->createComment($token['data']); + $this->dom->appendChild($comment); + + /* A character token that is one of one of U+0009 CHARACTER TABULATION, + U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), + or U+0020 SPACE */ + } elseif($token['type'] === HTML5::CHARACTR && + preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) { + /* Process the token as it would be processed in the main phase. */ + $this->mainPhase($token); + + /* A character token that is not one of U+0009 CHARACTER TABULATION, + U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), + or U+0020 SPACE. Or a start tag token. Or an end tag token. */ + } elseif(($token['type'] === HTML5::CHARACTR && + preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) || + $token['type'] === HTML5::STARTTAG || $token['type'] === HTML5::ENDTAG) { + /* Parse error. Switch back to the main phase and reprocess the + token. */ + $this->phase = self::MAIN_PHASE; + return $this->mainPhase($token); + + /* An end-of-file token */ + } elseif($token['type'] === HTML5::EOF) { + /* OMG DONE!! */ + } + } + + private function insertElement($token, $append = true) + { + $el = $this->dom->createElement($token['name']); + + foreach($token['attr'] as $attr) { + if(!$el->hasAttribute($attr['name'])) { + $el->setAttribute($attr['name'], $attr['value']); + } + } + + $this->appendToRealParent($el); + $this->stack[] = $el; + + return $el; + } + + private function insertText($data) + { + $text = $this->dom->createTextNode($data); + $this->appendToRealParent($text); + } + + private function insertComment($data) + { + $comment = $this->dom->createComment($data); + $this->appendToRealParent($comment); + } + + private function appendToRealParent($node) + { + if($this->foster_parent === null) { + end($this->stack)->appendChild($node); + + } elseif($this->foster_parent !== null) { + /* If the foster parent element is the parent element of the + last table element in the stack of open elements, then the new + node must be inserted immediately before the last table element + in the stack of open elements in the foster parent element; + otherwise, the new node must be appended to the foster parent + element. */ + for($n = count($this->stack) - 1; $n >= 0; $n--) { + if($this->stack[$n]->nodeName === 'table' && + $this->stack[$n]->parentNode !== null) { + $table = $this->stack[$n]; + break; + } + } + + if(isset($table) && $this->foster_parent->isSameNode($table->parentNode)) + $this->foster_parent->insertBefore($node, $table); + else + $this->foster_parent->appendChild($node); + + $this->foster_parent = null; + } + } + + private function elementInScope($el, $table = false) + { + if(is_array($el)) { + foreach($el as $element) { + if($this->elementInScope($element, $table)) { + return true; + } + } + + return false; + } + + $leng = count($this->stack); + + for($n = 0; $n < $leng; $n++) { + /* 1. Initialise node to be the current node (the bottommost node of + the stack). */ + $node = $this->stack[$leng - 1 - $n]; + + if($node->tagName === $el) { + /* 2. If node is the target node, terminate in a match state. */ + return true; + + } elseif($node->tagName === 'table') { + /* 3. Otherwise, if node is a table element, terminate in a failure + state. */ + return false; + + } elseif($table === true && in_array($node->tagName, array('caption', 'td', + 'th', 'button', 'marquee', 'object'))) { + /* 4. Otherwise, if the algorithm is the "has an element in scope" + variant (rather than the "has an element in table scope" variant), + and node is one of the following, terminate in a failure state. */ + return false; + + } elseif($node === $node->ownerDocument->documentElement) { + /* 5. Otherwise, if node is an html element (root element), terminate + in a failure state. (This can only happen if the node is the topmost + node of the stack of open elements, and prevents the next step from + being invoked if there are no more elements in the stack.) */ + return false; + } + + /* Otherwise, set node to the previous entry in the stack of open + elements and return to step 2. (This will never fail, since the loop + will always terminate in the previous step if the top of the stack + is reached.) */ + } + } + + private function reconstructActiveFormattingElements() + { + /* 1. If there are no entries in the list of active formatting elements, + then there is nothing to reconstruct; stop this algorithm. */ + $formatting_elements = count($this->a_formatting); + + if($formatting_elements === 0) { + return false; + } + + /* 3. Let entry be the last (most recently added) element in the list + of active formatting elements. */ + $entry = end($this->a_formatting); + + /* 2. If the last (most recently added) entry in the list of active + formatting elements is a marker, or if it is an element that is in the + stack of open elements, then there is nothing to reconstruct; stop this + algorithm. */ + if($entry === self::MARKER || in_array($entry, $this->stack, true)) { + return false; + } + + for($a = $formatting_elements - 1; $a >= 0; true) { + /* 4. If there are no entries before entry in the list of active + formatting elements, then jump to step 8. */ + if($a === 0) { + $step_seven = false; + break; + } + + /* 5. Let entry be the entry one earlier than entry in the list of + active formatting elements. */ + $a--; + $entry = $this->a_formatting[$a]; + + /* 6. If entry is neither a marker nor an element that is also in + thetack of open elements, go to step 4. */ + if($entry === self::MARKER || in_array($entry, $this->stack, true)) { + break; + } + } + + while(true) { + /* 7. Let entry be the element one later than entry in the list of + active formatting elements. */ + if(isset($step_seven) && $step_seven === true) { + $a++; + $entry = $this->a_formatting[$a]; + } + + /* 8. Perform a shallow clone of the element entry to obtain clone. */ + $clone = $entry->cloneNode(); + + /* 9. Append clone to the current node and push it onto the stack + of open elements so that it is the new current node. */ + end($this->stack)->appendChild($clone); + $this->stack[] = $clone; + + /* 10. Replace the entry for entry in the list with an entry for + clone. */ + $this->a_formatting[$a] = $clone; + + /* 11. If the entry for clone in the list of active formatting + elements is not the last entry in the list, return to step 7. */ + if(end($this->a_formatting) !== $clone) { + $step_seven = true; + } else { + break; + } + } + } + + private function clearTheActiveFormattingElementsUpToTheLastMarker() + { + /* When the steps below require the UA to clear the list of active + formatting elements up to the last marker, the UA must perform the + following steps: */ + + while(true) { + /* 1. Let entry be the last (most recently added) entry in the list + of active formatting elements. */ + $entry = end($this->a_formatting); + + /* 2. Remove entry from the list of active formatting elements. */ + array_pop($this->a_formatting); + + /* 3. If entry was a marker, then stop the algorithm at this point. + The list has been cleared up to the last marker. */ + if($entry === self::MARKER) { + break; + } + } + } + + private function generateImpliedEndTags(array $exclude = array()) + { + /* When the steps below require the UA to generate implied end tags, + then, if the current node is a dd element, a dt element, an li element, + a p element, a td element, a th element, or a tr element, the UA must + act as if an end tag with the respective tag name had been seen and + then generate implied end tags again. */ + $node = end($this->stack); + $elements = array_diff(array('dd', 'dt', 'li', 'p', 'td', 'th', 'tr'), $exclude); + + while(in_array(end($this->stack)->nodeName, $elements)) { + array_pop($this->stack); + } + } + + private function getElementCategory($name) + { + if(in_array($name, $this->special)) + return self::SPECIAL; + + elseif(in_array($name, $this->scoping)) + return self::SCOPING; + + elseif(in_array($name, $this->formatting)) + return self::FORMATTING; + + else + return self::PHRASING; + } + + private function clearStackToTableContext($elements) + { + /* When the steps above require the UA to clear the stack back to a + table context, it means that the UA must, while the current node is not + a table element or an html element, pop elements from the stack of open + elements. If this causes any elements to be popped from the stack, then + this is a parse error. */ + while(true) { + $node = end($this->stack)->nodeName; + + if(in_array($node, $elements)) { + break; + } else { + array_pop($this->stack); + } + } + } + + private function resetInsertionMode() + { + /* 1. Let last be false. */ + $last = false; + $leng = count($this->stack); + + for($n = $leng - 1; $n >= 0; $n--) { + /* 2. Let node be the last node in the stack of open elements. */ + $node = $this->stack[$n]; + + /* 3. If node is the first node in the stack of open elements, then + set last to true. If the element whose innerHTML attribute is being + set is neither a td element nor a th element, then set node to the + element whose innerHTML attribute is being set. (innerHTML case) */ + if($this->stack[0]->isSameNode($node)) { + $last = true; + } + + /* 4. If node is a select element, then switch the insertion mode to + "in select" and abort these steps. (innerHTML case) */ + if($node->nodeName === 'select') { + $this->mode = self::IN_SELECT; + break; + + /* 5. If node is a td or th element, then switch the insertion mode + to "in cell" and abort these steps. */ + } elseif($node->nodeName === 'td' || $node->nodeName === 'th') { + $this->mode = self::IN_CELL; + break; + + /* 6. If node is a tr element, then switch the insertion mode to + "in row" and abort these steps. */ + } elseif($node->nodeName === 'tr') { + $this->mode = self::IN_ROW; + break; + + /* 7. If node is a tbody, thead, or tfoot element, then switch the + insertion mode to "in table body" and abort these steps. */ + } elseif(in_array($node->nodeName, array('tbody', 'thead', 'tfoot'))) { + $this->mode = self::IN_TBODY; + break; + + /* 8. If node is a caption element, then switch the insertion mode + to "in caption" and abort these steps. */ + } elseif($node->nodeName === 'caption') { + $this->mode = self::IN_CAPTION; + break; + + /* 9. If node is a colgroup element, then switch the insertion mode + to "in column group" and abort these steps. (innerHTML case) */ + } elseif($node->nodeName === 'colgroup') { + $this->mode = self::IN_CGROUP; + break; + + /* 10. If node is a table element, then switch the insertion mode + to "in table" and abort these steps. */ + } elseif($node->nodeName === 'table') { + $this->mode = self::IN_TABLE; + break; + + /* 11. If node is a head element, then switch the insertion mode + to "in body" ("in body"! not "in head"!) and abort these steps. + (innerHTML case) */ + } elseif($node->nodeName === 'head') { + $this->mode = self::IN_BODY; + break; + + /* 12. If node is a body element, then switch the insertion mode to + "in body" and abort these steps. */ + } elseif($node->nodeName === 'body') { + $this->mode = self::IN_BODY; + break; + + /* 13. If node is a frameset element, then switch the insertion + mode to "in frameset" and abort these steps. (innerHTML case) */ + } elseif($node->nodeName === 'frameset') { + $this->mode = self::IN_FRAME; + break; + + /* 14. If node is an html element, then: if the head element + pointer is null, switch the insertion mode to "before head", + otherwise, switch the insertion mode to "after head". In either + case, abort these steps. (innerHTML case) */ + } elseif($node->nodeName === 'html') { + $this->mode = ($this->head_pointer === null) + ? self::BEFOR_HEAD + : self::AFTER_HEAD; + + break; + + /* 15. If last is true, then set the insertion mode to "in body" + and abort these steps. (innerHTML case) */ + } elseif($last) { + $this->mode = self::IN_BODY; + break; + } + } + } + + private function closeCell() + { + /* If the stack of open elements has a td or th element in table scope, + then act as if an end tag token with that tag name had been seen. */ + foreach(array('td', 'th') as $cell) { + if($this->elementInScope($cell, true)) { + $this->inCell(array( + 'name' => $cell, + 'type' => HTML5::ENDTAG + )); + + break; + } + } + } + + public function save() + { + return $this->dom; + } +} diff --git a/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/maintenance/add-vimline.php b/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/maintenance/add-vimline.php new file mode 100644 index 0000000000..d6a8eb202a --- /dev/null +++ b/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/maintenance/add-vimline.php @@ -0,0 +1,130 @@ +#!/usr/bin/php +globr('.', '*'); +foreach ($files as $file) { + if ( + !is_file($file) || + prefix_is('./docs/doxygen', $file) || + prefix_is('./library/standalone', $file) || + prefix_is('./docs/specimens', $file) || + postfix_is('.ser', $file) || + postfix_is('.tgz', $file) || + postfix_is('.patch', $file) || + postfix_is('.dtd', $file) || + postfix_is('.ent', $file) || + postfix_is('.png', $file) || + postfix_is('.ico', $file) || + // wontfix + postfix_is('.vtest', $file) || + postfix_is('.svg', $file) || + postfix_is('.phpt', $file) || + postfix_is('VERSION', $file) || + postfix_is('WHATSNEW', $file) || + postfix_is('configdoc/usage.xml', $file) || + postfix_is('library/HTMLPurifier.includes.php', $file) || + postfix_is('library/HTMLPurifier.safe-includes.php', $file) || + postfix_is('smoketests/xssAttacks.xml', $file) || + // phpt files + postfix_is('.diff', $file) || + postfix_is('.exp', $file) || + postfix_is('.log', $file) || + postfix_is('.out', $file) || + + $file == './library/HTMLPurifier/Lexer/PH5P.php' || + $file == './maintenance/PH5P.php' + ) continue; + $ext = strrchr($file, '.'); + if ( + postfix_is('README', $file) || + postfix_is('LICENSE', $file) || + postfix_is('CREDITS', $file) || + postfix_is('INSTALL', $file) || + postfix_is('NEWS', $file) || + postfix_is('TODO', $file) || + postfix_is('WYSIWYG', $file) || + postfix_is('Changelog', $file) + ) $ext = '.txt'; + if (postfix_is('Doxyfile', $file)) $ext = 'Doxyfile'; + if (postfix_is('.php.in', $file)) $ext = '.php'; + $no_nl = false; + switch ($ext) { + case '.php': + case '.inc': + case '.js': + $line = '// %s'; + break; + case '.html': + case '.xsl': + case '.xml': + case '.htc': + $line = ""; + break; + case '.htmlt': + $no_nl = true; + $line = '--# %s'; + break; + case '.ini': + $line = '; %s'; + break; + case '.css': + $line = '/* %s */'; + break; + case '.bat': + $line = 'rem %s'; + break; + case '.txt': + case '.utf8': + if ( + prefix_is('./library/HTMLPurifier/ConfigSchema', $file) || + prefix_is('./smoketests/test-schema', $file) || + prefix_is('./tests/HTMLPurifier/StringHashParser', $file) + ) { + $no_nl = true; + $line = '--# %s'; + } else { + $line = ' %s'; + } + break; + case 'Doxyfile': + $line = '# %s'; + break; + default: + throw new Exception('Unknown file: ' . $file); + } + + echo "$file\n"; + $contents = file_get_contents($file); + + $regex = '~' . str_replace('%s', 'vim: .+', preg_quote($line, '~')) . '~m'; + $contents = preg_replace($regex, '', $contents); + + $contents = rtrim($contents); + + if (strpos($contents, "\r\n") !== false) $nl = "\r\n"; + elseif (strpos($contents, "\n") !== false) $nl = "\n"; + elseif (strpos($contents, "\r") !== false) $nl = "\r"; + else $nl = PHP_EOL; + + if (!$no_nl) $contents .= $nl; + $contents .= $nl . str_replace('%s', $vimline, $line) . $nl; + + file_put_contents($file, $contents); + +} + +// vim: et sw=4 sts=4 diff --git a/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/maintenance/common.php b/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/maintenance/common.php new file mode 100644 index 0000000000..342bc205ab --- /dev/null +++ b/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/maintenance/common.php @@ -0,0 +1,25 @@ +docs/doxygen/info.log 2>docs/doxygen/errors.log +if [ "$?" != 0 ]; then + cat docs/doxygen/errors.log + exit +fi +cd docs +tar czf doxygen.tgz doxygen diff --git a/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/maintenance/config-scanner.php b/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/maintenance/config-scanner.php new file mode 100644 index 0000000000..c614d1fbc2 --- /dev/null +++ b/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/maintenance/config-scanner.php @@ -0,0 +1,155 @@ +#!/usr/bin/php +globr('.', '*.php'); +$files = array(); +foreach ($raw_files as $file) { + $file = substr($file, 2); // rm leading './' + if (strncmp('standalone/', $file, 11) === 0) continue; // rm generated files + if (substr_count($file, '.') > 1) continue; // rm meta files + $files[] = $file; +} + +/** + * Moves the $i cursor to the next non-whitespace token + */ +function consumeWhitespace($tokens, &$i) +{ + do {$i++;} while (is_array($tokens[$i]) && $tokens[$i][0] === T_WHITESPACE); +} + +/** + * Tests whether or not a token is a particular type. There are three run-cases: + * - ($token, $expect_token): tests if the token is $expect_token type; + * - ($token, $expect_value): tests if the token is the string $expect_value; + * - ($token, $expect_token, $expect_value): tests if token is $expect_token type, and + * its string representation is $expect_value + */ +function testToken($token, $value_or_token, $value = null) +{ + if (is_null($value)) { + if (is_int($value_or_token)) return is_array($token) && $token[0] === $value_or_token; + else return $token === $value_or_token; + } else { + return is_array($token) && $token[0] === $value_or_token && $token[1] === $value; + } +} + +$counter = 0; +$full_counter = 0; +$tracker = array(); + +foreach ($files as $file) { + $tokens = token_get_all(file_get_contents($file)); + $file = str_replace('\\', '/', $file); + for ($i = 0, $c = count($tokens); $i < $c; $i++) { + $ok = false; + // Match $config + if (!$ok && testToken($tokens[$i], T_VARIABLE, '$config')) $ok = true; + // Match $this->config + while (!$ok && testToken($tokens[$i], T_VARIABLE, '$this')) { + consumeWhitespace($tokens, $i); + if (!testToken($tokens[$i], T_OBJECT_OPERATOR)) break; + consumeWhitespace($tokens, $i); + if (testToken($tokens[$i], T_STRING, 'config')) $ok = true; + break; + } + if (!$ok) continue; + + $ok = false; + for($i++; $i < $c; $i++) { + if ($tokens[$i] === ',' || $tokens[$i] === ')' || $tokens[$i] === ';') { + break; + } + if (is_string($tokens[$i])) continue; + if ($tokens[$i][0] === T_OBJECT_OPERATOR) { + $ok = true; + break; + } + } + if (!$ok) continue; + + $line = $tokens[$i][2]; + + consumeWhitespace($tokens, $i); + if (!testToken($tokens[$i], T_STRING, 'get')) continue; + + consumeWhitespace($tokens, $i); + if (!testToken($tokens[$i], '(')) continue; + + $full_counter++; + + $matched = false; + do { + + // What we currently don't match are batch retrievals, and + // wildcard retrievals. This data might be useful in the future, + // which is why we have a do {} while loop that doesn't actually + // do anything. + + consumeWhitespace($tokens, $i); + if (!testToken($tokens[$i], T_CONSTANT_ENCAPSED_STRING)) continue; + $id = substr($tokens[$i][1], 1, -1); + + $counter++; + $matched = true; + + if (!isset($tracker[$id])) $tracker[$id] = array(); + if (!isset($tracker[$id][$file])) $tracker[$id][$file] = array(); + $tracker[$id][$file][] = $line; + + } while (0); + + //echo "$file:$line uses $namespace.$directive\n"; + } +} + +echo "\n$counter/$full_counter instances of \$config or \$this->config found in source code.\n"; + +echo "Generating XML... "; + +$xw = new XMLWriter(); +$xw->openURI('../configdoc/usage.xml'); +$xw->setIndent(true); +$xw->startDocument('1.0', 'UTF-8'); +$xw->startElement('usage'); +foreach ($tracker as $id => $files) { + $xw->startElement('directive'); + $xw->writeAttribute('id', $id); + foreach ($files as $file => $lines) { + $xw->startElement('file'); + $xw->writeAttribute('name', $file); + foreach ($lines as $line) { + $xw->writeElement('line', $line); + } + $xw->endElement(); + } + $xw->endElement(); +} +$xw->endElement(); +$xw->flush(); + +echo "done!\n"; + +// vim: et sw=4 sts=4 diff --git a/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/maintenance/flush-definition-cache.php b/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/maintenance/flush-definition-cache.php new file mode 100644 index 0000000000..138badb659 --- /dev/null +++ b/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/maintenance/flush-definition-cache.php @@ -0,0 +1,42 @@ +#!/usr/bin/php +flush($config); +} + +echo "Cache flushed successfully.\n"; + +// vim: et sw=4 sts=4 diff --git a/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/maintenance/flush.php b/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/maintenance/flush.php new file mode 100644 index 0000000000..c0853d230b --- /dev/null +++ b/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/maintenance/flush.php @@ -0,0 +1,30 @@ +#!/usr/bin/php +/'; + +foreach ( $entity_files as $file ) { + $contents = file_get_contents($entity_dir . $file); + $matches = array(); + preg_match_all($regexp, $contents, $matches, PREG_SET_ORDER); + foreach ($matches as $match) { + $entity_table[$match[1]] = unichr($match[2]); + } +} + +$output = serialize($entity_table); + +$fh = fopen($output_file, 'w'); +fwrite($fh, $output); +fclose($fh); + +echo "Completed successfully."; + +// vim: et sw=4 sts=4 diff --git a/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/maintenance/generate-includes.php b/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/maintenance/generate-includes.php new file mode 100644 index 0000000000..01e1c2abab --- /dev/null +++ b/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/maintenance/generate-includes.php @@ -0,0 +1,192 @@ +#!/usr/bin/php +globr('.', '*.php'); +if (!$raw_files) throw new Exception('Did not find any PHP source files'); +$files = array(); +foreach ($raw_files as $file) { + $file = substr($file, 2); // rm leading './' + if (strncmp('standalone/', $file, 11) === 0) continue; // rm generated files + if (substr_count($file, '.') > 1) continue; // rm meta files + $ok = true; + foreach ($exclude_dirs as $dir) { + if (strncmp($dir, $file, strlen($dir)) === 0) { + $ok = false; + break; + } + } + if (!$ok) continue; // rm excluded directories + if (in_array($file, $exclude_files)) continue; // rm excluded files + $files[] = $file; +} +echo "done!\n"; + +// Reorder list so that dependencies are included first: + +/** + * Returns a lookup array of dependencies for a file. + * + * @note This function expects that format $name extends $parent on one line + * + * @param string $file + * File to check dependencies of. + * @return array + * Lookup array of files the file is dependent on, sorted accordingly. + */ +function get_dependency_lookup($file) +{ + static $cache = array(); + if (isset($cache[$file])) return $cache[$file]; + if (!file_exists($file)) { + echo "File doesn't exist: $file\n"; + return array(); + } + $fh = fopen($file, 'r'); + $deps = array(); + while (!feof($fh)) { + $line = fgets($fh); + if (strncmp('class', $line, 5) === 0) { + // The implementation here is fragile and will break if we attempt + // to use interfaces. Beware! + $arr = explode(' extends ', trim($line, ' {'."\n\r"), 2); + if (count($arr) < 2) break; + $parent = $arr[1]; + $dep_file = HTMLPurifier_Bootstrap::getPath($parent); + if (!$dep_file) break; + $deps[$dep_file] = true; + break; + } + } + fclose($fh); + foreach (array_keys($deps) as $file) { + // Extra dependencies must come *before* base dependencies + $deps = get_dependency_lookup($file) + $deps; + } + $cache[$file] = $deps; + return $deps; +} + +/** + * Sorts files based on dependencies. This function is lazy and will not + * group files with dependencies together; it will merely ensure that a file + * is never included before its dependencies are. + * + * @param $files + * Files array to sort. + * @return + * Sorted array ($files is not modified by reference!) + */ +function dep_sort($files) +{ + $ret = array(); + $cache = array(); + foreach ($files as $file) { + if (isset($cache[$file])) continue; + $deps = get_dependency_lookup($file); + foreach (array_keys($deps) as $dep) { + if (!isset($cache[$dep])) { + $ret[] = $dep; + $cache[$dep] = true; + } + } + $cache[$file] = true; + $ret[] = $file; + } + return $ret; +} + +$files = dep_sort($files); + +// Build the actual include stub: + +$version = trim(file_get_contents('../VERSION')); + +// stub +$php = " PH5P.patch"); +unlink($newt); + +// vim: et sw=4 sts=4 diff --git a/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/maintenance/generate-schema-cache.php b/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/maintenance/generate-schema-cache.php new file mode 100644 index 0000000000..339ff12dae --- /dev/null +++ b/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/maintenance/generate-schema-cache.php @@ -0,0 +1,45 @@ +#!/usr/bin/php +buildDir($interchange); + +$loader = dirname(__FILE__) . '/../config-schema.php'; +if (file_exists($loader)) include $loader; +foreach ($_SERVER['argv'] as $i => $dir) { + if ($i === 0) continue; + $builder->buildDir($interchange, realpath($dir)); +} + +$interchange->validate(); + +$schema_builder = new HTMLPurifier_ConfigSchema_Builder_ConfigSchema(); +$schema = $schema_builder->build($interchange); + +echo "Saving schema... "; +file_put_contents($target, serialize($schema)); +echo "done!\n"; + +// vim: et sw=4 sts=4 diff --git a/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/maintenance/generate-standalone.php b/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/maintenance/generate-standalone.php new file mode 100644 index 0000000000..254d4d83bc --- /dev/null +++ b/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/maintenance/generate-standalone.php @@ -0,0 +1,159 @@ +#!/usr/bin/php +copyr($dir, 'standalone/' . $dir); +} + +/** + * Copies the contents of a file to the standalone directory + * @param string $file File to copy + */ +function make_file_standalone($file) +{ + global $FS; + $FS->mkdirr('standalone/' . dirname($file)); + copy_and_remove_includes($file, 'standalone/' . $file); + return true; +} + +/** + * Copies a file to another location recursively, if it is a PHP file + * remove includes + * @param string $file Original file + * @param string $sfile New location of file + */ +function copy_and_remove_includes($file, $sfile) +{ + $contents = file_get_contents($file); + if (strrchr($file, '.') === '.php') $contents = replace_includes($contents); + return file_put_contents($sfile, $contents); +} + +/** + * @param $matches preg_replace_callback matches array, where index 1 + * is the filename to include + */ +function replace_includes_callback($matches) +{ + $file = $matches[1]; + $preserve = array( + // PEAR (external) + 'XML/HTMLSax3.php' => 1 + ); + if (isset($preserve[$file])) { + return $matches[0]; + } + if (isset($GLOBALS['loaded'][$file])) return ''; + $GLOBALS['loaded'][$file] = true; + return replace_includes(remove_php_tags(file_get_contents($file))); +} + +echo 'Generating includes file... '; +shell_exec('php generate-includes.php'); +echo "done!\n"; + +chdir(dirname(__FILE__) . '/../library/'); + +echo 'Creating full file...'; +$contents = replace_includes(file_get_contents('HTMLPurifier.includes.php')); +$contents = str_replace( + // Note that bootstrap is now inside the standalone file + "define('HTMLPURIFIER_PREFIX', realpath(dirname(__FILE__) . '/..'));", + "define('HTMLPURIFIER_PREFIX', dirname(__FILE__) . '/standalone'); + set_include_path(HTMLPURIFIER_PREFIX . PATH_SEPARATOR . get_include_path());", + $contents +); +file_put_contents('HTMLPurifier.standalone.php', $contents); +echo ' done!' . PHP_EOL; + +echo 'Creating standalone directory...'; +$FS->rmdirr('standalone'); // ensure a clean copy + +// data files +$FS->mkdirr('standalone/HTMLPurifier/DefinitionCache/Serializer'); +make_file_standalone('HTMLPurifier/EntityLookup/entities.ser'); +make_file_standalone('HTMLPurifier/ConfigSchema/schema.ser'); + +// non-standard inclusion setup +make_dir_standalone('HTMLPurifier/ConfigSchema'); +make_dir_standalone('HTMLPurifier/Language'); +make_dir_standalone('HTMLPurifier/Filter'); +make_dir_standalone('HTMLPurifier/Printer'); +make_file_standalone('HTMLPurifier/Printer.php'); +make_file_standalone('HTMLPurifier/Lexer/PH5P.php'); + +echo ' done!' . PHP_EOL; + +// vim: et sw=4 sts=4 diff --git a/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/maintenance/merge-library.php b/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/maintenance/merge-library.php new file mode 100644 index 0000000000..de2eecdc08 --- /dev/null +++ b/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/maintenance/merge-library.php @@ -0,0 +1,11 @@ +#!/usr/bin/php +open('w'); + $multiline = false; + foreach ($hash as $key => $value) { + $multiline = $multiline || (strpos($value, "\n") !== false); + if ($multiline) { + $file->put("--$key--" . PHP_EOL); + $file->put(str_replace("\n", PHP_EOL, $value) . PHP_EOL); + } else { + if ($key == 'ID') { + $file->put("$value" . PHP_EOL); + } else { + $file->put("$key: $value" . PHP_EOL); + } + } + } + $file->close(); +} + +$schema = HTMLPurifier_ConfigSchema::instance(); +$adapter = new HTMLPurifier_ConfigSchema_StringHashReverseAdapter($schema); + +foreach ($schema->info as $ns => $ns_array) { + saveHash($adapter->get($ns)); + foreach ($ns_array as $dir => $x) { + saveHash($adapter->get($ns, $dir)); + } +} + +// vim: et sw=4 sts=4 diff --git a/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/maintenance/old-remove-require-once.php b/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/maintenance/old-remove-require-once.php new file mode 100644 index 0000000000..f47c7d0f1a --- /dev/null +++ b/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/maintenance/old-remove-require-once.php @@ -0,0 +1,32 @@ +#!/usr/bin/php +globr('.', '*.php'); +foreach ($files as $file) { + if (substr_count(basename($file), '.') > 1) continue; + $old_code = file_get_contents($file); + $new_code = preg_replace("#^require_once .+[\n\r]*#m", '', $old_code); + if ($old_code !== $new_code) { + file_put_contents($file, $new_code); + } +} + +// vim: et sw=4 sts=4 diff --git a/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/maintenance/old-remove-schema-def.php b/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/maintenance/old-remove-schema-def.php new file mode 100644 index 0000000000..5ae0319736 --- /dev/null +++ b/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/maintenance/old-remove-schema-def.php @@ -0,0 +1,32 @@ +#!/usr/bin/php +globr('.', '*.php'); +foreach ($files as $file) { + if (substr_count(basename($file), '.') > 1) continue; + $old_code = file_get_contents($file); + $new_code = preg_replace("#^HTMLPurifier_ConfigSchema::.+?\);[\n\r]*#ms", '', $old_code); + if ($old_code !== $new_code) { + file_put_contents($file, $new_code); + } + if (preg_match('#^\s+HTMLPurifier_ConfigSchema::#m', $new_code)) { + echo "Indented ConfigSchema call in $file\n"; + } +} + +// vim: et sw=4 sts=4 diff --git a/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/maintenance/regenerate-docs.sh b/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/maintenance/regenerate-docs.sh new file mode 100644 index 0000000000..6f4d720ff3 --- /dev/null +++ b/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/maintenance/regenerate-docs.sh @@ -0,0 +1,5 @@ +#!/bin/bash -e +./compile-doxygen.sh +cd ../docs +scp doxygen.tgz htmlpurifier.org:/home/ezyang/htmlpurifier.org +ssh htmlpurifier.org "cd /home/ezyang/htmlpurifier.org && ./reload-docs.sh" diff --git a/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/maintenance/remove-trailing-whitespace.php b/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/maintenance/remove-trailing-whitespace.php new file mode 100644 index 0000000000..857870546a --- /dev/null +++ b/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/maintenance/remove-trailing-whitespace.php @@ -0,0 +1,37 @@ +#!/usr/bin/php +globr('.', '{,.}*', GLOB_BRACE); +foreach ($files as $file) { + if ( + !is_file($file) || + prefix_is('./.git', $file) || + prefix_is('./docs/doxygen', $file) || + postfix_is('.ser', $file) || + postfix_is('.tgz', $file) || + postfix_is('.patch', $file) || + postfix_is('.dtd', $file) || + postfix_is('.ent', $file) || + $file == './library/HTMLPurifier/Lexer/PH5P.php' || + $file == './maintenance/PH5P.php' + ) continue; + $contents = file_get_contents($file); + $result = preg_replace('/^(.*?)[ \t]+(\r?)$/m', '\1\2', $contents, -1, $count); + if (!$count) continue; + echo "$file\n"; + file_put_contents($file, $result); +} + +// vim: et sw=4 sts=4 diff --git a/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/maintenance/rename-config.php b/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/maintenance/rename-config.php new file mode 100644 index 0000000000..6e59e2a791 --- /dev/null +++ b/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/maintenance/rename-config.php @@ -0,0 +1,84 @@ +#!/usr/bin/php +buildFile($interchange, $file); +$contents = file_get_contents($file); + +if (strpos($contents, "\r\n") !== false) { + $nl = "\r\n"; +} elseif (strpos($contents, "\r") !== false) { + $nl = "\r"; +} else { + $nl = "\n"; +} + +// replace name with new name +$contents = str_replace($old, $new, $contents); + +if ($interchange->directives[$old]->aliases) { + $pos_alias = strpos($contents, 'ALIASES:'); + $pos_ins = strpos($contents, $nl, $pos_alias); + if ($pos_ins === false) $pos_ins = strlen($contents); + $contents = + substr($contents, 0, $pos_ins) . ", $old" . substr($contents, $pos_ins); + file_put_contents($file, $contents); +} else { + $lines = explode($nl, $contents); + $insert = false; + foreach ($lines as $n => $line) { + if (strncmp($line, '--', 2) === 0) { + $insert = $n; + break; + } + } + if (!$insert) { + $lines[] = "ALIASES: $old"; + } else { + array_splice($lines, $insert, 0, "ALIASES: $old"); + } + file_put_contents($file, implode($nl, $lines)); +} + +rename("$old.txt", "$new.txt") || exit(1); diff --git a/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/maintenance/update-config.php b/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/maintenance/update-config.php new file mode 100644 index 0000000000..2d8a7a9c10 --- /dev/null +++ b/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/maintenance/update-config.php @@ -0,0 +1,34 @@ +#!/usr/bin/php +set and $config->get to the new + * format, as described by docs/dev-config-bcbreaks.txt + */ + +$FS = new FSTools(); +chdir(dirname(__FILE__) . '/..'); +$raw_files = $FS->globr('.', '*.php'); +foreach ($raw_files as $file) { + $file = substr($file, 2); // rm leading './' + if (strpos($file, 'library/standalone/') === 0) continue; + if (strpos($file, 'maintenance/update-config.php') === 0) continue; + if (strpos($file, 'test-settings.php') === 0) continue; + if (substr_count($file, '.') > 1) continue; // rm meta files + // process the file + $contents = file_get_contents($file); + $contents = preg_replace( + "#config->(set|get)\('(.+?)', '(.+?)'#", + "config->\\1('\\2.\\3'", + $contents + ); + if ($contents === '') continue; + file_put_contents($file, $contents); +} + +// vim: et sw=4 sts=4 diff --git a/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/test-settings.sample.php b/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/test-settings.sample.php index 886b974867..480b66279b 100644 --- a/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/test-settings.sample.php +++ b/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/test-settings.sample.php @@ -21,10 +21,8 @@ if ($data !== false && $data !== '') { // REQUIRED SETTINGS // Note on running SimpleTest: -// Because HTML Purifier is PHP5-only and E_STRICT compliant, SimpleTest -// 1.0.1 will not work; you need to run SimpleTest off its trunk using: -// -// $ svn co https://simpletest.svn.sourceforge.net/svnroot/simpletest/simpletest/trunk simpletest +// You want the Git copy of SimpleTest, found here: +// https://github.com/simpletest/simpletest/ // // If SimpleTest is borked with HTML Purifier, please contact me or // the SimpleTest devs; I am a developer for SimpleTest so I should be diff --git a/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/tests/path2class.func.php b/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/tests/path2class.func.php new file mode 100644 index 0000000000..bf3aa735ab --- /dev/null +++ b/wcfsetup/install/files/lib/system/api/ezyang/htmlpurifier/tests/path2class.func.php @@ -0,0 +1,15 @@ + in the direct child selector + ([#322](https://github.com/jjriv/emogrifier/pull/322)) +- Ignore empty media queries + ([#307](https://github.com/jjriv/emogrifier/pull/307)) + ([#237](https://github.com/jjriv/emogrifier/issues/237)) +- Ignore pseudo-class when combined with pseudo-element + ([#308](https://github.com/jjriv/emogrifier/pull/308)) +- First-child and last-child selectors are broken + ([#293](https://github.com/jjriv/emogrifier/pull/293)) +- Second !important rule needs to overwrite the first one + ([#292](https://github.com/jjriv/emogrifier/pull/292)) + + + +## 1.0.0 (2015-10-15) ### Added - Add branch alias ([#231](https://github.com/jjriv/emogrifier/pull/231)) diff --git a/wcfsetup/install/files/lib/system/api/pelago/emogrifier/CODE_OF_CONDUCT.md b/wcfsetup/install/files/lib/system/api/pelago/emogrifier/CODE_OF_CONDUCT.md new file mode 100644 index 0000000000..c7bc5d9e39 --- /dev/null +++ b/wcfsetup/install/files/lib/system/api/pelago/emogrifier/CODE_OF_CONDUCT.md @@ -0,0 +1,77 @@ +# Contributor Code of Conduct + +## Our Pledge + +In the interest of fostering an open and welcoming environment, we as +contributors and maintainers pledge to making participation in our project and +our community a harassment-free experience for everyone, regardless of age, +body size, disability, ethnicity, gender identity and expression, level of +experience, nationality, personal appearance, race, religion, or sexual +identity and orientation. + +## Our Standards + +Examples of behavior that contributes to creating a positive environment +include: + +* Using welcoming and inclusive language +* Being respectful of differing viewpoints and experiences +* Gracefully accepting constructive criticism +* Focusing on what is best for the community +* Showing empathy towards other community members + +Examples of unacceptable behavior by participants include: + +* The use of sexualized language or imagery and unwelcome sexual attention or + advances +* Trolling, insulting/derogatory comments, and personal or political attacks +* Public or private harassment +* Publishing others' private information, such as a physical or electronic + address, without explicit permission +* Other conduct which could reasonably be considered inappropriate in a + professional setting + + +## Our Responsibilities + +Project maintainers are responsible for clarifying the standards of acceptable +behavior and are expected to take appropriate and fair corrective action in +response to any instances of unacceptable behavior. + +Project maintainers have the right and responsibility to remove, edit, or +reject comments, commits, code, wiki edits, issues, and other contributions +that are not aligned to this Code of Conduct, or to ban temporarily or +permanently any contributor for other behaviors that they deem inappropriate, +threatening, offensive, or harmful. + +## Scope + +This Code of Conduct applies both within project spaces and in public spaces +when an individual is representing the project or its community. Examples of +representing a project or community include using an official project e-mail +address, posting via an official social media account, or acting as an +appointed representative at an online or offline event. Representation of a +project may be further defined and clarified by project maintainers. + +## Enforcement + +Instances of abusive, harassing, or otherwise unacceptable behavior may be +reported by contacting the project team at (emogrifier at myintervals dot com). +All complaints will be reviewed and investigated and will result in a response +that is deemed necessary and appropriate to the circumstances. The project team +is obligated to maintain confidentiality with regard to the reporter of an +incident. Further details of specific enforcement policies may be posted +separately. + +Project maintainers who do not follow or enforce the Code of Conduct in good +faith may face temporary or permanent repercussions as determined by other +members of the project's leadership. + +## Attribution + +This Code of Conduct is adapted from the [Contributor Covenant][homepage], +version 1.4, available at +[http://contributor-covenant.org/version/1/4/][version]. + +[homepage]: http://contributor-covenant.org +[version]: http://contributor-covenant.org/version/1/4/ diff --git a/wcfsetup/install/files/lib/system/api/pelago/emogrifier/CONTRIBUTING.md b/wcfsetup/install/files/lib/system/api/pelago/emogrifier/CONTRIBUTING.md index 95294bfb40..1bbe26ee82 100644 --- a/wcfsetup/install/files/lib/system/api/pelago/emogrifier/CONTRIBUTING.md +++ b/wcfsetup/install/files/lib/system/api/pelago/emogrifier/CONTRIBUTING.md @@ -5,6 +5,11 @@ clean-up to Emogrifier are more than welcome. When you contribute, please take the following things into account: +## Contributor Code of Conduct + +Please note that this project is released with a +[Contributor Code of Conduct](CODE_OF_CONDUCT.md). By participating in this +project, you agree to abide by its terms. ## General workflow diff --git a/wcfsetup/install/files/lib/system/api/pelago/emogrifier/Classes/Emogrifier.php b/wcfsetup/install/files/lib/system/api/pelago/emogrifier/Classes/Emogrifier.php index ca82172314..73b0a66fe0 100644 --- a/wcfsetup/install/files/lib/system/api/pelago/emogrifier/Classes/Emogrifier.php +++ b/wcfsetup/install/files/lib/system/api/pelago/emogrifier/Classes/Emogrifier.php @@ -6,12 +6,13 @@ namespace Pelago; * * For more information, please see the README.md file. * - * @version 1.0.0 + * @version 1.1.0 * * @author Cameron Brooks * @author Jaime Prado * @author Oliver Klee * @author Roman Ožana + * @author Sander Kruger */ class Emogrifier { @@ -100,7 +101,7 @@ class Emogrifier private $allowedMediaTypes = ['all' => true, 'screen' => true, 'print' => true]; /** - * @var array[] + * @var mixed[] */ private $caches = [ self::CACHE_KEY_CSS => [], @@ -121,7 +122,7 @@ class Emogrifier * the styles to apply to the nodes with the XPath paths as array keys for the outer array * and the attribute names/values as key/value pairs for the inner array * - * @var array[] + * @var string[][] */ private $styleAttributesForNodes = []; @@ -153,6 +154,77 @@ class Emogrifier */ private $shouldKeepInvisibleNodes = true; + /** + * @var string[] + */ + private $xPathRules = [ + // child + '/\\s*>\\s*/' => '/', + // adjacent sibling + '/\\s+\\+\\s+/' => '/following-sibling::*[1]/self::', + // descendant + '/\\s+(?=.*[^\\]]{1}$)/' => '//', + // :first-child + '/([^\\/]+):first-child/i' => '*[1]/self::\\1', + // :last-child + '/([^\\/]+):last-child/i' => '*[last()]/self::\\1', + // attribute only + '/^\\[(\\w+|\\w+\\=[\'"]?\\w+[\'"]?)\\]/' => '*[@\\1]', + // attribute + '/(\\w)\\[(\\w+)\\]/' => '\\1[@\\2]', + // exact attribute + '/(\\w)\\[(\\w+)\\=[\'"]?([\\w\\s]+)[\'"]?\\]/' => '\\1[@\\2="\\3"]', + // element attribute~= + '/([\\w\\*]+)\\[(\\w+)[\\s]*\\~\\=[\\s]*[\'"]?([\\w-_\\/]+)[\'"]?\\]/' + => '\\1[contains(concat(" ", @\\2, " "), concat(" ", "\\3", " "))]', + // element attribute^= + '/([\\w\\*]+)\\[(\\w+)[\\s]*\\^\\=[\\s]*[\'"]?([\\w-_\\/]+)[\'"]?\\]/' => '\\1[starts-with(@\\2, "\\3")]', + // element attribute*= + '/([\\w\\*]+)\\[(\\w+)[\\s]*\\*\\=[\\s]*[\'"]?([\\w-_\\s\\/]+)[\'"]?\\]/' => '\\1[contains(@\\2, "\\3")]', + // element attribute$= + '/([\\w\\*]+)\\[(\\w+)[\\s]*\\$\\=[\\s]*[\'"]?([\\w-_\\s\\/]+)[\'"]?\\]/' + => '\\1[substring(@\\2, string-length(@\\2) - string-length("\\3") + 1) = "\\3"]', + // element attribute|= + '/([\\w\\*]+)\\[(\\w+)[\\s]*\\|\\=[\\s]*[\'"]?([\\w-_\\s\\/]+)[\'"]?\\]/' + => '\\1[@\\2="\\3" or starts-with(@\\2, concat("\\3", "-"))]', + ]; + + /** + * Determines whether CSS styles that have an equivalent HTML attribute + * should be mapped and attached to those elements. + * + * @var bool + */ + private $shouldMapCssToHtml = false; + + /** + * This multi-level array contains simple mappings of CSS properties to + * HTML attributes. If a mapping only applies to certain HTML nodes or + * only for certain values, the mapping is an object with a whitelist + * of nodes and values. + * + * @var mixed[][] + */ + private $cssToHtmlMap = [ + 'background-color' => [ + 'attribute' => 'bgcolor', + ], + 'text-align' => [ + 'attribute' => 'align', + 'nodes' => ['p', 'div', 'td'], + 'values' => ['left', 'right', 'center', 'justify'], + ], + 'float' => [ + 'attribute' => 'align', + 'nodes' => ['table', 'img'], + 'values' => ['left', 'right'], + ], + 'border-spacing' => [ + 'attribute' => 'cellspacing', + 'nodes' => ['table'], + ], + ]; + /** * The constructor. * @@ -257,7 +329,7 @@ class Emogrifier */ protected function process(\DOMDocument $xmlDocument) { - $xpath = new \DOMXPath($xmlDocument); + $xPath = new \DOMXPath($xmlDocument); $this->clearAllCaches(); // Before be begin processing the CSS file, parse the document and normalize all existing CSS attributes. @@ -266,7 +338,7 @@ class Emogrifier // Also store a reference of nodes with existing inline styles so we don't overwrite them. $this->purgeVisitedNodes(); - $nodesWithStyleAttributes = $xpath->query('//*[@style]'); + $nodesWithStyleAttributes = $xPath->query('//*[@style]'); if ($nodesWithStyleAttributes !== false) { /** @var \DOMElement $node */ foreach ($nodesWithStyleAttributes as $node) { @@ -283,15 +355,15 @@ class Emogrifier $allCss = $this->css; if ($this->isStyleBlocksParsingEnabled) { - $allCss .= $this->getCssFromAllStyleNodes($xpath); + $allCss .= $this->getCssFromAllStyleNodes($xPath); } $cssParts = $this->splitCssAndMediaQuery($allCss); - $excludedNodes = $this->getNodesToExclude($xpath); + $excludedNodes = $this->getNodesToExclude($xPath); $cssRules = $this->parseCssRules($cssParts['css']); foreach ($cssRules as $cssRule) { // query the body for the xpath selector - $nodesMatchingCssSelectors = $xpath->query($this->translateCssToXpath($cssRule['selector'])); + $nodesMatchingCssSelectors = $xPath->query($this->translateCssToXpath($cssRule['selector'])); // ignore invalid selectors if ($nodesMatchingCssSelectors === false) { continue; @@ -311,6 +383,9 @@ class Emogrifier $oldStyleDeclarations = []; } $newStyleDeclarations = $this->parseCssDeclarationsBlock($cssRule['declarationsBlock']); + if ($this->shouldMapCssToHtml) { + $this->mapCssToHtmlAttributes($newStyleDeclarations, $node); + } $node->setAttribute( 'style', $this->generateStyleStringFromDeclarationsArrays($oldStyleDeclarations, $newStyleDeclarations) @@ -323,10 +398,150 @@ class Emogrifier } if ($this->shouldKeepInvisibleNodes) { - $this->removeInvisibleNodes($xpath); + $this->removeInvisibleNodes($xPath); + } + + $this->copyCssWithMediaToStyleNode($xmlDocument, $xPath, $cssParts['media']); + } + + /** + * Applies $styles to $node. + * + * This method maps CSS styles to HTML attributes and adds those to the + * node. + * + * @param string[] $styles the new CSS styles taken from the global styles to be applied to this node + * @param \DOMNode $node node to apply styles to + * + * @return void + */ + private function mapCssToHtmlAttributes(array $styles, \DOMNode $node) + { + foreach ($styles as $property => $value) { + // Strip !important indicator + $value = trim(str_replace('!important', '', $value)); + $this->mapCssToHtmlAttribute($property, $value, $node); + } + } + + /** + * Tries to apply the CSS style to $node as an attribute. + * + * This method maps a CSS rule to HTML attributes and adds those to the node. + * + * @param string $property the name of the CSS property to map + * @param string $value the value of the style rule to map + * @param \DOMNode $node node to apply styles to + * + * @return void + */ + private function mapCssToHtmlAttribute($property, $value, \DOMNode $node) + { + if (!$this->mapSimpleCssProperty($property, $value, $node)) { + $this->mapComplexCssProperty($property, $value, $node); + } + } + + /** + * Looks up the CSS property in the mapping table and maps it if it matches the conditions. + * + * @param string $property the name of the CSS property to map + * @param string $value the value of the style rule to map + * @param \DOMNode $node node to apply styles to + * + * @return bool true if the property cab be mapped using the simple mapping table + */ + private function mapSimpleCssProperty($property, $value, \DOMNode $node) + { + if (!isset($this->cssToHtmlMap[$property])) { + return false; + } + + $mapping = $this->cssToHtmlMap[$property]; + $nodesMatch = !isset($mapping['nodes']) || in_array($node->nodeName, $mapping['nodes'], true); + $valuesMatch = !isset($mapping['values']) || in_array($value, $mapping['values'], true); + if (!$nodesMatch || !$valuesMatch) { + return false; + } + + $node->setAttribute($mapping['attribute'], $value); + + return true; + } + + /** + * Maps CSS properties that need special transformation to an HTML attribute. + * + * @param string $property the name of the CSS property to map + * @param string $value the value of the style rule to map + * @param \DOMNode $node node to apply styles to + * + * @return void + */ + private function mapComplexCssProperty($property, $value, \DOMNode $node) + { + $nodeName = $node->nodeName; + $isTable = $nodeName === 'table'; + $isImage = $nodeName === 'img'; + $isTableOrImage = $isTable || $isImage; + + switch ($property) { + case 'background': + // Parse out the color, if any + $styles = explode(' ', $value); + $first = $styles[0]; + if (!is_numeric(substr($first, 0, 1)) && substr($first, 0, 3) !== 'url') { + // This is not a position or image, assume it's a color + $node->setAttribute('bgcolor', $first); + } + break; + case 'width': + // intentional fall-through + case 'height': + // Remove 'px'. This regex only conserves numbers and % + $number = preg_replace('/[^0-9.%]/', '', $value); + $node->setAttribute($property, $number); + break; + case 'margin': + if ($isTableOrImage) { + $margins = $this->parseCssShorthandValue($value); + if ($margins['left'] === 'auto' && $margins['right'] === 'auto') { + $node->setAttribute('align', 'center'); + } + } + break; + case 'border': + if ($isTableOrImage) { + if ($value === 'none' || $value === '0') { + $node->setAttribute('border', '0'); + } + } + break; + default: } + } - $this->copyCssWithMediaToStyleNode($xmlDocument, $xpath, $cssParts['media']); + /** + * Parses a shorthand CSS value and splits it into individual values + * + * @param string $value a string of CSS value with 1, 2, 3 or 4 sizes + * For example: padding: 0 auto; + * '0 auto' is split into top: 0, left: auto, bottom: 0, + * right: auto. + * + * @return string[] an array of values for top, right, bottom and left (using these as associative array keys) + */ + private function parseCssShorthandValue($value) + { + $values = preg_split('/\\s+/', $value); + + $css = []; + $css['top'] = $values[0]; + $css['right'] = (count($values) > 1) ? $values[1] : $css['top']; + $css['bottom'] = (count($values) > 2) ? $values[2] : $css['top']; + $css['left'] = (count($values) > 3) ? $values[3] : $css['right']; + + return $css; } /** @@ -359,7 +574,10 @@ class Emogrifier foreach ($selectors as $selector) { // don't process pseudo-elements and behavioral (dynamic) pseudo-classes; // only allow structural pseudo-classes - if (strpos($selector, ':') !== false && !preg_match('/:\\S+\\-(child|type\\()/i', $selector)) { + $hasPseudoElement = strpos($selector, '::') !== false; + $hasAnyPseudoClass = (bool) preg_match('/:[a-zA-Z]/', $selector); + $hasSupportedPseudoClass = (bool) preg_match('/:\\S+\\-(child|type\\()/i', $selector); + if ($hasPseudoElement || ($hasAnyPseudoClass && !$hasSupportedPseudoClass)) { continue; } @@ -410,6 +628,17 @@ class Emogrifier $this->shouldKeepInvisibleNodes = false; } + /** + * Enables the attachment/override of HTML attributes for which a + * corresponding CSS property has been set. + * + * @return void + */ + public function enableCssToHtmlMapping() + { + $this->shouldMapCssToHtml = true; + } + /** * Clears all caches. * @@ -554,13 +783,13 @@ class Emogrifier * not attribute values. Consequently, we need to translate() the letters that would be in 'NONE' ("NOE") * to lowercase. * - * @param \DOMXPath $xpath + * @param \DOMXPath $xPath * * @return void */ - private function removeInvisibleNodes(\DOMXPath $xpath) + private function removeInvisibleNodes(\DOMXPath $xPath) { - $nodesWithStyleDisplayNone = $xpath->query( + $nodesWithStyleDisplayNone = $xPath->query( '//*[contains(translate(translate(@style," ",""),"NOE","noe"),"display:none")]' ); if ($nodesWithStyleDisplayNone->length === 0) { @@ -645,7 +874,14 @@ class Emogrifier } foreach ($oldStyles as $attributeName => $attributeValue) { - if (isset($newStyles[$attributeName]) && strtolower(substr($attributeValue, -10)) === '!important') { + if (!isset($newStyles[$attributeName])) { + continue; + } + + $newAttributeValue = $newStyles[$attributeName]; + if ($this->attributeValueIsImportant($attributeValue) + && !$this->attributeValueIsImportant($newAttributeValue) + ) { $combinedStyles[$attributeName] = $attributeValue; } } @@ -661,16 +897,28 @@ class Emogrifier return $trimmedStyle; } + /** + * Checks whether $attributeValue is marked as !important. + * + * @param string $attributeValue + * + * @return bool + */ + private function attributeValueIsImportant($attributeValue) + { + return strtolower(substr(trim($attributeValue), -10)) === '!important'; + } + /** * Applies $css to $xmlDocument, limited to the media queries that actually apply to the document. * * @param \DOMDocument $xmlDocument the document to match against - * @param \DOMXPath $xpath + * @param \DOMXPath $xPath * @param string $css a string of CSS * * @return void */ - private function copyCssWithMediaToStyleNode(\DOMDocument $xmlDocument, \DOMXPath $xpath, $css) + private function copyCssWithMediaToStyleNode(\DOMDocument $xmlDocument, \DOMXPath $xPath, $css) { if ($css === '') { return; @@ -680,7 +928,7 @@ class Emogrifier foreach ($this->extractMediaQueriesFromCss($css) as $mediaQuery) { foreach ($this->parseCssRules($mediaQuery['css']) as $selector) { - if ($this->existsMatchForCssSelector($xpath, $selector['selector'])) { + if ($this->existsMatchForCssSelector($xPath, $selector['selector'])) { $mediaQueriesRelevantForDocument[] = $mediaQuery['query']; break; } @@ -691,7 +939,7 @@ class Emogrifier } /** - * Extracts the media queries from $css. + * Extracts the media queries from $css while skipping empty media queries. * * @param string $css * @@ -699,28 +947,32 @@ class Emogrifier */ private function extractMediaQueriesFromCss($css) { - preg_match_all('#(?@media[^{]*\\{(?(.*?)\\})(\\s*)\\})#s', $css, $mediaQueries); - $result = []; - foreach (array_keys($mediaQueries['css']) as $key) { - $result[] = [ - 'css' => $mediaQueries['css'][$key], - 'query' => $mediaQueries['query'][$key], - ]; + preg_match_all('/@media\\b[^{]*({((?:[^{}]+|(?1))*)})/', $css, $rawMediaQueries, PREG_SET_ORDER); + $parsedQueries = []; + + foreach ($rawMediaQueries as $mediaQuery) { + if ($mediaQuery[2] !== '') { + $parsedQueries[] = [ + 'css' => $mediaQuery[2], + 'query' => $mediaQuery[0], + ]; + } } - return $result; + + return $parsedQueries; } /** * Checks whether there is at least one matching element for $cssSelector. * - * @param \DOMXPath $xpath + * @param \DOMXPath $xPath * @param string $cssSelector * * @return bool */ - private function existsMatchForCssSelector(\DOMXPath $xpath, $cssSelector) + private function existsMatchForCssSelector(\DOMXPath $xPath, $cssSelector) { - $nodesMatchingSelector = $xpath->query($this->translateCssToXpath($cssSelector)); + $nodesMatchingSelector = $xPath->query($this->translateCssToXpath($cssSelector)); return $nodesMatchingSelector !== false && $nodesMatchingSelector->length !== 0; } @@ -728,13 +980,13 @@ class Emogrifier /** * Returns CSS content. * - * @param \DOMXPath $xpath + * @param \DOMXPath $xPath * * @return string */ - private function getCssFromAllStyleNodes(\DOMXPath $xpath) + private function getCssFromAllStyleNodes(\DOMXPath $xPath) { - $styleNodes = $xpath->query('//style'); + $styleNodes = $xPath->query('//style'); if ($styleNodes === false) { return ''; @@ -930,7 +1182,6 @@ class Emogrifier $hasContentTypeMetaTag = stristr($html, 'Content-Type') !== false; if ($hasContentTypeMetaTag) { return $html; - } // We are trying to insert the meta tag to the right spot in the DOM. @@ -1020,58 +1271,40 @@ class Emogrifier $paddedSelector ); $trimmedLowercaseSelector = trim($lowercasePaddedSelector); - $xpathKey = md5($trimmedLowercaseSelector); - if (!isset($this->caches[self::CACHE_KEY_XPATH][$xpathKey])) { - $cssSelectorMatches = [ - 'child' => '/\\s+>\\s+/', - 'adjacent sibling' => '/\\s+\\+\\s+/', - 'descendant' => '/\\s+/', - ':first-child' => '/([^\\/]+):first-child/i', - ':last-child' => '/([^\\/]+):last-child/i', - 'attribute only' => '/^\\[(\\w+|\\w+\\=[\'"]?\\w+[\'"]?)\\]/', - 'attribute' => '/(\\w)\\[(\\w+)\\]/', - 'exact attribute' => '/(\\w)\\[(\\w+)\\=[\'"]?(\\w+)[\'"]?\\]/', - ]; - $xPathReplacements = [ - 'child' => '/', - 'adjacent sibling' => '/following-sibling::*[1]/self::', - 'descendant' => '//', - ':first-child' => '\\1/*[1]', - ':last-child' => '\\1/*[last()]', - 'attribute only' => '*[@\\1]', - 'attribute' => '\\1[@\\2]', - 'exact attribute' => '\\1[@\\2="\\3"]', - ]; - - $roughXpath = '//' . preg_replace($cssSelectorMatches, $xPathReplacements, $trimmedLowercaseSelector); - - $xpathWithIdAttributeMatchers = preg_replace_callback( + $xPathKey = md5($trimmedLowercaseSelector); + if (!isset($this->caches[self::CACHE_KEY_XPATH][$xPathKey])) { + $roughXpath = '//' . preg_replace( + array_keys($this->xPathRules), + $this->xPathRules, + $trimmedLowercaseSelector + ); + $xPathWithIdAttributeMatchers = preg_replace_callback( self::ID_ATTRIBUTE_MATCHER, [$this, 'matchIdAttributes'], $roughXpath ); - $xpathWithIdAttributeAndClassMatchers = preg_replace_callback( + $xPathWithIdAttributeAndClassMatchers = preg_replace_callback( self::CLASS_ATTRIBUTE_MATCHER, [$this, 'matchClassAttributes'], - $xpathWithIdAttributeMatchers + $xPathWithIdAttributeMatchers ); // Advanced selectors are going to require a bit more advanced emogrification. // When we required PHP 5.3, we could do this with closures. - $xpathWithIdAttributeAndClassMatchers = preg_replace_callback( + $xPathWithIdAttributeAndClassMatchers = preg_replace_callback( '/([^\\/]+):nth-child\\(\\s*(odd|even|[+\\-]?\\d|[+\\-]?\\d?n(\\s*[+\\-]\\s*\\d)?)\\s*\\)/i', [$this, 'translateNthChild'], - $xpathWithIdAttributeAndClassMatchers + $xPathWithIdAttributeAndClassMatchers ); $finalXpath = preg_replace_callback( '/([^\\/]+):nth-of-type\\(\s*(odd|even|[+\\-]?\\d|[+\\-]?\\d?n(\\s*[+\\-]\\s*\\d)?)\\s*\\)/i', [$this, 'translateNthOfType'], - $xpathWithIdAttributeAndClassMatchers + $xPathWithIdAttributeAndClassMatchers ); - $this->caches[self::CACHE_KEY_SELECTOR][$xpathKey] = $finalXpath; + $this->caches[self::CACHE_KEY_SELECTOR][$xPathKey] = $finalXpath; } - return $this->caches[self::CACHE_KEY_SELECTOR][$xpathKey]; + return $this->caches[self::CACHE_KEY_SELECTOR][$xPathKey]; } /** @@ -1171,7 +1404,7 @@ class Emogrifier */ private function parseNth(array $match) { - if (in_array(strtolower($match[2]), ['even','odd'], true)) { + if (in_array(strtolower($match[2]), ['even', 'odd'], true)) { // we have "even" or "odd" $index = strtolower($match[2]) === 'even' ? 0 : 1; return [self::MULTIPLIER => 2, self::INDEX => $index]; @@ -1253,15 +1486,15 @@ class Emogrifier /** * Find the nodes that are not to be emogrified. * - * @param \DOMXPath $xpath + * @param \DOMXPath $xPath * * @return \DOMElement[] */ - private function getNodesToExclude(\DOMXPath $xpath) + private function getNodesToExclude(\DOMXPath $xPath) { $excludedNodes = []; foreach (array_keys($this->excludedSelectors) as $selectorToExclude) { - foreach ($xpath->query($this->translateCssToXpath($selectorToExclude)) as $node) { + foreach ($xPath->query($this->translateCssToXpath($selectorToExclude)) as $node) { $excludedNodes[] = $node; } } diff --git a/wcfsetup/install/files/lib/system/api/pelago/emogrifier/Configuration/PhpCodeSniffer/Standards/Emogrifier/ruleset.xml b/wcfsetup/install/files/lib/system/api/pelago/emogrifier/Configuration/PhpCodeSniffer/Standards/Emogrifier/ruleset.xml index 83398ffe7b..0ca655f984 100644 --- a/wcfsetup/install/files/lib/system/api/pelago/emogrifier/Configuration/PhpCodeSniffer/Standards/Emogrifier/ruleset.xml +++ b/wcfsetup/install/files/lib/system/api/pelago/emogrifier/Configuration/PhpCodeSniffer/Standards/Emogrifier/ruleset.xml @@ -1,5 +1,5 @@ - + This is the coding standard used for the Emogrifier code. This standard has been tested with to work with PHP_CodeSniffer >= 2.3.0. @@ -36,29 +36,16 @@ - - - - - - - - - - - - - @@ -78,8 +65,6 @@ - - @@ -113,14 +98,9 @@ - - */Tests/* - - - @@ -130,7 +110,4 @@ - - - \ No newline at end of file diff --git a/wcfsetup/install/files/lib/system/api/pelago/emogrifier/README.md b/wcfsetup/install/files/lib/system/api/pelago/emogrifier/README.md index b3ea9f4769..a17c6f17e3 100644 --- a/wcfsetup/install/files/lib/system/api/pelago/emogrifier/README.md +++ b/wcfsetup/install/files/lib/system/api/pelago/emogrifier/README.md @@ -28,13 +28,14 @@ in `` elements. Emogrifier solves this problem by converting CSS styles into inline style attributes in your HTML code. - [How it works](#how-it-works) +- [Installation](#installation) - [Usage](#usage) +- [Options](#options) +- [Requirements](#requirements) - [Installing with Composer](#installing-with-composer) -- [Usage](#usage) - [Supported CSS selectors](#supported-css-selectors) - [Caveats](#caveats) - [Maintainer](#maintainer) -- [Contributing](#contributing) ## How it Works @@ -44,6 +45,16 @@ inserting your CSS definitions into tags within your HTML based on your CSS selectors. +## Installation + +For installing emogrifier, either add pelago/emogrifier to your +project's composer.json, or you can use composer as below: + +``` +composer require pelago/emogrifier +``` + + ## Usage First, you provide Emogrifier with the HTML and CSS you would like to merge. @@ -102,11 +113,15 @@ calling the `emogrify` method: method to remove media types that Emogrifier keeps. * `$emogrifier->addExcludedSelector(string $selector)` - Keeps elements from being affected by emogrification. +* `$emogrifier->enableCssToHtmlMapping()` - Some email clients don't support CSS + well, even if inline and prefer HTML attributes. This function allows you to + put properties such as height, width, background color and font color in your + CSS while the transformed content will have all the available HTML tags set. ## Requirements -* PHP from 5.4 to 7.0 (with the mbstring extension) +* PHP from 5.4 to 7.0 * or HHVM @@ -148,6 +163,11 @@ Emogrifier currently support the following * adjacent * attribute presence * attribute value + * attribute value with | + * attribute value with ~ + * attribute value with ^ + * attribute value with * + * attribute value with $ * attribute only * first-child * last-child @@ -155,6 +175,7 @@ Emogrifier currently support the following The following selectors are not implemented yet: * universal + * pseudo-elements (will never be supported) ## Caveats diff --git a/wcfsetup/install/files/lib/system/api/pelago/emogrifier/Tests/Unit/EmogrifierTest.php b/wcfsetup/install/files/lib/system/api/pelago/emogrifier/Tests/Unit/EmogrifierTest.php index a535225061..bf63bbd33c 100644 --- a/wcfsetup/install/files/lib/system/api/pelago/emogrifier/Tests/Unit/EmogrifierTest.php +++ b/wcfsetup/install/files/lib/system/api/pelago/emogrifier/Tests/Unit/EmogrifierTest.php @@ -103,12 +103,9 @@ class EmogrifierTest extends \PHPUnit_Framework_TestCase { $this->subject->setHtml('

Hello

'); - $emogrifiedHtml = $this->subject->emogrify(); + $result = $this->subject->emogrify(); - self::assertContains( - '', - $emogrifiedHtml - ); + self::assertContains('', $result); } /** @@ -118,12 +115,9 @@ class EmogrifierTest extends \PHPUnit_Framework_TestCase { $this->subject->setHtml('Hello

World

'); - $emogrifiedHtml = $this->subject->emogrify(); + $result = $this->subject->emogrify(); - self::assertContains( - '', - $emogrifiedHtml - ); + self::assertContains('', $result); } /** @@ -133,12 +127,9 @@ class EmogrifierTest extends \PHPUnit_Framework_TestCase { $this->subject->setHtml('

Hello

'); - $emogrifiedHtml = $this->subject->emogrify(); + $result = $this->subject->emogrify(); - self::assertContains( - '', - $emogrifiedHtml - ); + self::assertContains('', $result); } /** @@ -148,12 +139,9 @@ class EmogrifierTest extends \PHPUnit_Framework_TestCase { $this->subject->setHtml('

World

'); - $emogrifiedHtml = $this->subject->emogrify(); + $result = $this->subject->emogrify(); - self::assertContains( - '', - $emogrifiedHtml - ); + self::assertContains('', $result); } /** @@ -162,14 +150,12 @@ class EmogrifierTest extends \PHPUnit_Framework_TestCase public function emogrifyKeepsDollarSignsAndSquareBrackets() { $templateMarker = '$[USER:NAME]$'; - $html = $this->html5DocumentType . '

' . $templateMarker . '

'; $this->subject->setHtml($html); - self::assertContains( - $templateMarker, - $this->subject->emogrify() - ); + $result = $this->subject->emogrify(); + + self::assertContains($templateMarker, $result); } /** @@ -178,14 +164,12 @@ class EmogrifierTest extends \PHPUnit_Framework_TestCase public function emogrifyKeepsUtf8UmlautsInHtml5() { $umlautString = 'Küss die Hand, schöne Frau.'; - $html = $this->html5DocumentType . '

' . $umlautString . '

'; $this->subject->setHtml($html); - self::assertContains( - $umlautString, - $this->subject->emogrify() - ); + $result = $this->subject->emogrify(); + + self::assertContains($umlautString, $result); } /** @@ -194,14 +178,12 @@ class EmogrifierTest extends \PHPUnit_Framework_TestCase public function emogrifyKeepsUtf8UmlautsInXhtml() { $umlautString = 'Öösel läks õunu täis ämber uhkelt ümber.'; - $html = $this->xhtml1StrictDocumentType . '' . $umlautString . '

'; $this->subject->setHtml($html); - self::assertContains( - $umlautString, - $this->subject->emogrify() - ); + $result = $this->subject->emogrify(); + + self::assertContains($umlautString, $result); } /** @@ -210,15 +192,12 @@ class EmogrifierTest extends \PHPUnit_Framework_TestCase public function emogrifyKeepsUtf8UmlautsInHtml4() { $umlautString = 'Öösel läks õunu täis ämber uhkelt ümber.'; - $html = $this->html4TransitionalDocumentType . '

' . $umlautString . '

'; $this->subject->setHtml($html); - self::assertContains( - $umlautString, - $umlautString, - $this->subject->emogrify() - ); + $result = $this->subject->emogrify(); + + self::assertContains($umlautString, $result); } /** @@ -227,14 +206,12 @@ class EmogrifierTest extends \PHPUnit_Framework_TestCase public function emogrifyKeepsHtmlEntities() { $entityString = 'a & b > c'; - $html = $this->html5DocumentType . '

' . $entityString . '

'; $this->subject->setHtml($html); - self::assertContains( - $entityString, - $this->subject->emogrify() - ); + $result = $this->subject->emogrify(); + + self::assertContains($entityString, $result); } /** @@ -243,14 +220,12 @@ class EmogrifierTest extends \PHPUnit_Framework_TestCase public function emogrifyKeepsHtmlEntitiesInXhtml() { $entityString = 'a & b > c'; - $html = $this->xhtml1StrictDocumentType . '' . $entityString . '

'; $this->subject->setHtml($html); - self::assertContains( - $entityString, - $this->subject->emogrify() - ); + $result = $this->subject->emogrify(); + + self::assertContains($entityString, $result); } /** @@ -259,15 +234,12 @@ class EmogrifierTest extends \PHPUnit_Framework_TestCase public function emogrifyKeepsHtmlEntitiesInHtml4() { $entityString = 'a & b > c'; - $html = $this->html4TransitionalDocumentType . '

' . $entityString . '

'; $this->subject->setHtml($html); - self::assertContains( - $entityString, - $entityString, - $this->subject->emogrify() - ); + $result = $this->subject->emogrify(); + + self::assertContains($entityString, $result); } /** @@ -276,14 +248,12 @@ class EmogrifierTest extends \PHPUnit_Framework_TestCase public function emogrifyKeepsUtf8UmlautsWithoutDocumentType() { $umlautString = 'Küss die Hand, schöne Frau.'; - $html = '

' . $umlautString . '

'; $this->subject->setHtml($html); - self::assertContains( - $umlautString, - $this->subject->emogrify() - ); + $result = $this->subject->emogrify(); + + self::assertContains($umlautString, $result); } /** @@ -292,14 +262,12 @@ class EmogrifierTest extends \PHPUnit_Framework_TestCase public function emogrifyKeepsUtf8UmlautsWithoutDocumentTypeAndWithoutHtmlAndWithoutHead() { $umlautString = 'Küss die Hand, schöne Frau.'; - $html = '

' . $umlautString . '

'; $this->subject->setHtml($html); - self::assertContains( - $umlautString, - $this->subject->emogrify() - ); + $result = $this->subject->emogrify(); + + self::assertContains($umlautString, $result); } /** @@ -308,14 +276,12 @@ class EmogrifierTest extends \PHPUnit_Framework_TestCase public function emogrifyKeepsUtf8UmlautsWithoutDocumentTypeAndWithHtmlAndWithoutHead() { $umlautString = 'Küss die Hand, schöne Frau.'; - $html = '

' . $umlautString . '

'; $this->subject->setHtml($html); - self::assertContains( - $umlautString, - $this->subject->emogrify() - ); + $result = $this->subject->emogrify(); + + self::assertContains($umlautString, $result); } /** @@ -324,14 +290,12 @@ class EmogrifierTest extends \PHPUnit_Framework_TestCase public function emogrifyKeepsUtf8UmlautsWithoutDocumentTypeAndWithoutHtmlAndWithHead() { $umlautString = 'Küss die Hand, schöne Frau.'; - $html = '

' . $umlautString . '

'; $this->subject->setHtml($html); - self::assertContains( - $umlautString, - $this->subject->emogrify() - ); + $result = $this->subject->emogrify(); + + self::assertContains($umlautString, $result); } /** @@ -343,10 +307,9 @@ class EmogrifierTest extends \PHPUnit_Framework_TestCase $this->subject->setHtml($html); $this->subject->setCss(''); - self::assertContains( - $this->html5DocumentType, - $this->subject->emogrify() - ); + $result = $this->subject->emogrify(); + + self::assertContains($this->html5DocumentType, $result); } /** @@ -357,10 +320,9 @@ class EmogrifierTest extends \PHPUnit_Framework_TestCase $html = $this->xhtml1StrictDocumentType . ''; $this->subject->setHtml($html); - self::assertContains( - $this->xhtml1StrictDocumentType, - $this->subject->emogrify() - ); + $result = $this->subject->emogrify(); + + self::assertContains($this->xhtml1StrictDocumentType, $result); } /** @@ -371,10 +333,9 @@ class EmogrifierTest extends \PHPUnit_Framework_TestCase $html = $this->html5DocumentType . ''; $this->subject->setHtml($html); - self::assertContains( - $this->html5DocumentType, - $this->subject->emogrify() - ); + $result = $this->subject->emogrify(); + + self::assertContains($this->html5DocumentType, $result); } /** @@ -385,10 +346,9 @@ class EmogrifierTest extends \PHPUnit_Framework_TestCase $html = $this->html5DocumentType . '

Hello

'; $this->subject->setHtml($html); - self::assertContains( - '', - $this->subject->emogrify() - ); + $result = $this->subject->emogrify(); + + self::assertContains('', $result); } /** @@ -402,10 +362,8 @@ class EmogrifierTest extends \PHPUnit_Framework_TestCase $this->subject->setHtml($html); $numberOfContentTypeMetaTags = substr_count($this->subject->emogrify(), 'Content-Type'); - self::assertSame( - 1, - $numberOfContentTypeMetaTags - ); + + self::assertSame(1, $numberOfContentTypeMetaTags); } /** @@ -416,10 +374,9 @@ class EmogrifierTest extends \PHPUnit_Framework_TestCase $html = $this->html5DocumentType . 'foobar'; $this->subject->setHtml($html); - self::assertContains( - 'foobar', - $this->subject->emogrify() - ); + $result = $this->subject->emogrify(); + + self::assertContains('foobar', $result); } /** @@ -427,15 +384,13 @@ class EmogrifierTest extends \PHPUnit_Framework_TestCase */ public function addUnprocessableTagCausesGivenEmptyTagToBeRemoved() { - $this->subject->addUnprocessableHtmlTag('p'); - $html = $this->html5DocumentType . '

'; $this->subject->setHtml($html); - self::assertNotContains( - '

', - $this->subject->emogrify() - ); + $this->subject->addUnprocessableHtmlTag('p'); + $result = $this->subject->emogrify(); + + self::assertNotContains('

', $result); } /** @@ -443,15 +398,13 @@ class EmogrifierTest extends \PHPUnit_Framework_TestCase */ public function addUnprocessableTagNotRemovesGivenTagWithContent() { - $this->subject->addUnprocessableHtmlTag('p'); - $html = $this->html5DocumentType . '

foobar

'; $this->subject->setHtml($html); - self::assertContains( - '

', - $this->subject->emogrify() - ); + $this->subject->addUnprocessableHtmlTag('p'); + $result = $this->subject->emogrify(); + + self::assertContains('

', $result); } /** @@ -459,16 +412,14 @@ class EmogrifierTest extends \PHPUnit_Framework_TestCase */ public function removeUnprocessableHtmlTagCausesTagToStayAgain() { - $this->subject->addUnprocessableHtmlTag('p'); - $this->subject->removeUnprocessableHtmlTag('p'); - $html = $this->html5DocumentType . '

foo
bar

'; $this->subject->setHtml($html); - self::assertContains( - '

', - $this->subject->emogrify() - ); + $this->subject->addUnprocessableHtmlTag('p'); + $this->subject->removeUnprocessableHtmlTag('p'); + $result = $this->subject->emogrify(); + + self::assertContains('

', $result); } /** @@ -481,9 +432,11 @@ class EmogrifierTest extends \PHPUnit_Framework_TestCase $styleRule = 'color: #000;'; $this->subject->setCss('html {' . $styleRule . '}'); + $result = $this->subject->emogrify(); + self::assertContains( '', - $this->subject->emogrify() + $result ); } @@ -496,10 +449,9 @@ class EmogrifierTest extends \PHPUnit_Framework_TestCase $this->subject->setHtml($html); $this->subject->setCss('p {color:#000;}'); - self::assertContains( - '', - $this->subject->emogrify() - ); + $result = $this->subject->emogrify(); + + self::assertContains('', $result); } /** @@ -512,9 +464,11 @@ class EmogrifierTest extends \PHPUnit_Framework_TestCase $styleRule = 'color: #000;'; $this->subject->setCss('p {' . $styleRule . '}'); + $result = $this->subject->emogrify(); + self::assertSame( 2, - substr_count($this->subject->emogrify(), '

') + substr_count($result, '

') ); } @@ -526,12 +480,14 @@ class EmogrifierTest extends \PHPUnit_Framework_TestCase $html = $this->html5DocumentType . '

'; $this->subject->setHtml($html); $styleRulesIn = 'color:#000; text-align:left;'; - $styleRulesOut = 'color: #000; text-align: left;'; $this->subject->setCss('p {' . $styleRulesIn . '}'); + $result = $this->subject->emogrify(); + + $styleRulesOut = 'color: #000; text-align: left;'; self::assertContains( '

', - $this->subject->emogrify() + $result ); } @@ -544,10 +500,9 @@ class EmogrifierTest extends \PHPUnit_Framework_TestCase $this->subject->setHtml($html); $this->subject->setCss('[hidden] { color:red; }'); - self::assertContains( - '

', - $this->subject->emogrify() + $result ); } @@ -578,9 +535,11 @@ class EmogrifierTest extends \PHPUnit_Framework_TestCase $styleRule2 = 'text-align: left;'; $this->subject->setCss('p {' . $styleRule1 . '} .x {' . $styleRule2 . '}'); + $result = $this->subject->emogrify(); + self::assertContains( '

', - $this->subject->emogrify() + $result ); } @@ -611,9 +570,13 @@ class EmogrifierTest extends \PHPUnit_Framework_TestCase => ['body span {' . $styleRule . '} ', '##'], 'child selector P > SPAN matches direct child' => ['p > span {' . $styleRule . '} ', '##'], - 'child selector BODY > SPAN not matches grandchild' + 'child selector P > SPAN matches direct child without space after >' + => ['p >span {' . $styleRule . '} ', '##'], + 'child selector P > SPAN matches direct child without space before >' + => ['p> span {' . $styleRule . '} ', '##'], + 'child selector BODY > SPAN does not match grandchild' => ['body > span {' . $styleRule . '} ', '##'], - 'adjacent selector P + P not matches first P' => ['p + p {' . $styleRule . '} ', '#

#'], + 'adjacent selector P + P does not match first P' => ['p + p {' . $styleRule . '} ', '#

#'], 'adjacent selector P + P matches second P' => ['p + p {' . $styleRule . '} ', '#

#'], 'adjacent selector P + P matches third P' @@ -626,7 +589,7 @@ class EmogrifierTest extends \PHPUnit_Framework_TestCase => ['p.p-1 {' . $styleRule . '} ', '#

#'], 'attribute presence selector SPAN[title] matches element with matching attribute' => ['span[title] {' . $styleRule . '} ', '##'], - 'attribute presence selector SPAN[title] not matches element without any attributes' + 'attribute presence selector SPAN[title] does not match element without any attributes' => ['span[title] {' . $styleRule . '} ', '##'], 'attribute value selector [id="html"] matches element with matching attribute value' => [ '[id="html"] {' . $styleRule . '} ', '##' @@ -634,21 +597,85 @@ class EmogrifierTest extends \PHPUnit_Framework_TestCase 'attribute value selector SPAN[title] matches element with matching attribute value' => [ 'span[title="bonjour"] {' . $styleRule . '} ', '##' ], - 'attribute value selector SPAN[title] not matches element with other attribute value' + 'attribute value selector SPAN[title] matches element with matching attribute value two words' => [ + 'span[title="buenas dias"] {' . $styleRule . '} ', '##' + ], + 'attribute value selector SPAN[title] matches element with matching attribute value four words' => [ + 'span[title="buenas dias bom dia"] {' . $styleRule . '} ', '##' + ], + 'attribute value selector SPAN[title~] matches element with an attribute value with just that word' => [ + 'span[title~="bonjour"] {' . $styleRule . '} ', '##' + ], + 'attribute value selector SPAN[title~] matches element with attribute value with that word as 2nd of 2' => [ + 'span[title~="dias"] {' . $styleRule . '} ', '##' + ], + 'attribute value selector SPAN[title~] matches element with attribute value with that word as 1st of 2' => [ + 'span[title~="buenas"] {' . $styleRule . '} ', '##' + ], + 'attribute value selector SPAN[title*] matches element with an attribute value with just that word' => [ + 'span[title*="bonjour"] {' . $styleRule . '} ', '##' + ], + 'attribute value selector SPAN[title*] matches element with attribute value with that word as 2nd of 2' => [ + 'span[title*="dias"] {' . $styleRule . '} ', '##' + ], + 'attribute value selector SPAN[title*] matches element with an attribute value with parts two words' => [ + 'span[title*="enas di"] {' . $styleRule . '} ', '##' + ], + 'attribute value selector SPAN[title^] matches element with attribute value that is exactly that word' => [ + 'span[title^="bonjour"] {' . $styleRule . '} ', '##' + ], + 'attribute value selector SPAN[title^] matches element with an attribute value that begins that word' => [ + 'span[title^="bonj"] {' . $styleRule . '} ', '##' + ], + 'attribute value selector SPAN[title^] matches element with an attribute value that begins that word ' + . 'and contains other words' => [ + 'span[title^="buenas"] {' . $styleRule . '} ', '##' + ], + 'attribute value selector SPAN[title$] matches element with attribute value that is exactly that word' => [ + 'span[title$="bonjour"] {' . $styleRule . '} ', '##' + ], + 'attribute value selector SPAN[title$] matches element with an attribute value with two words' => [ + 'span[title$="buenas dias"] {' . $styleRule . '} ', '##' + ], + 'attribute value selector SPAN[title$] matches element with an attribute value that end that word' => [ + 'span[title$="jour"] {' . $styleRule . '} ', '##' + ], + 'attribute value selector SPAN[title$] matches element with an attribute value that end that word ' + . 'and contains other words' => [ + 'span[title$="dias"] {' . $styleRule . '} ', '##' + ], + 'attribute value selector SPAN[title|] matches element with attribute value that is exactly that word' => [ + 'span[title|="bonjour"] {' . $styleRule . '} ', '##' + ], + 'attribute value selector SPAN[title|] matches element with an attribute value with two words' => [ + 'span[title|="buenas dias"] {' . $styleRule . '} ', '##' + ], + 'attribute value selector SPAN[title|] matches element with an attribute value with 2 words with hypen' => [ + 'span[title|="avez"] {' . $styleRule . '} ', '##' + ], + 'attribute value selector SPAN[title] does not match element with other attribute value' => ['span[title="bonjour"] {' . $styleRule . '} ', '##'], - 'attribute value selector SPAN[title] not matches element without any attributes' + 'attribute value selector SPAN[title] does not match element without any attributes' => ['span[title="bonjour"] {' . $styleRule . '} ', '##'], - 'BODY:first-child matches first child' - => ['body:first-child {' . $styleRule . '} ', '#

#'], - 'BODY:first-child not matches middle child' - => ['body:first-child {' . $styleRule . '} ', '#

#'], - 'BODY:first-child not matches last child' - => ['body:first-child {' . $styleRule . '} ', '#

#'], - 'BODY:last-child not matches first child' => ['body:last-child {' . $styleRule . '} ', '#

#'], - 'BODY:last-child not matches middle child' - => ['body:last-child {' . $styleRule . '} ', '#

#'], - 'BODY:last-child matches last child' - => ['body:last-child {' . $styleRule . '} ', '#

#'], + 'P:first-child matches first child with matching tag' + => ['p:first-child {' . $styleRule . '} ', '#

#'], + 'DIV:first-child does not match first child with mismatching tag' + => ['div:first-child {' . $styleRule . '} ', '#

#'], + 'P:first-child does not match middle child' + => ['p:first-child {' . $styleRule . '} ', '#

#'], + 'P:first-child does not match last child' + => ['p:first-child {' . $styleRule . '} ', '#

#'], + 'P:last-child does not match first child' => ['p:last-child {' . $styleRule . '} ', '#

#'], + 'P:last-child does not match middle child' + => ['p:last-child {' . $styleRule . '} ', '#

#'], + 'P:last-child matches last child' + => ['p:last-child {' . $styleRule . '} ', '#

#'], + 'DIV:last-child does not match last child with mismatching tag' + => ['div:last-child {' . $styleRule . '} ', '#

#'], ]; } @@ -668,6 +695,8 @@ class EmogrifierTest extends \PHPUnit_Framework_TestCase '

some text

' . '

some text

' . '

some more text

' . + '

some more text

' . + '

some more text

' . ' ' . ''; $this->subject->setHtml($html); @@ -675,10 +704,7 @@ class EmogrifierTest extends \PHPUnit_Framework_TestCase $result = $this->subject->emogrify(); - self::assertRegExp( - $htmlRegularExpression, - $result - ); + self::assertRegExp($htmlRegularExpression, $result); } /** @@ -769,13 +795,14 @@ class EmogrifierTest extends \PHPUnit_Framework_TestCase { $html = $this->html5DocumentType . ''; $css = 'html {' . $cssDeclarationBlock . '}'; - $this->subject->setHtml($html); $this->subject->setCss($css); + $result = $this->subject->emogrify(); + self::assertContains( 'html style="' . $expectedStyleAttributeContent . '">', - $this->subject->emogrify() + $result ); } @@ -805,14 +832,12 @@ class EmogrifierTest extends \PHPUnit_Framework_TestCase { $html = $this->html5DocumentType . ''; $css = 'html {' . $cssDeclarationBlock . '}'; - $this->subject->setHtml($html); $this->subject->setCss($css); - self::assertContains( - '', - $this->subject->emogrify() - ); + $result = $this->subject->emogrify(); + + self::assertContains('', $result); } /** @@ -824,10 +849,9 @@ class EmogrifierTest extends \PHPUnit_Framework_TestCase $html = $this->html5DocumentType . ''; $this->subject->setHtml($html); - self::assertContains( - $styleAttribute, - $this->subject->emogrify() - ); + $result = $this->subject->emogrify(); + + self::assertContains($styleAttribute, $result); } /** @@ -838,14 +862,15 @@ class EmogrifierTest extends \PHPUnit_Framework_TestCase $styleAttributeValue = 'color: #ccc;'; $html = $this->html5DocumentType . ''; $this->subject->setHtml($html); - $cssDeclarations = 'margin: 0 2px;'; $css = 'html {' . $cssDeclarations . '}'; $this->subject->setCss($css); + $result = $this->subject->emogrify(); + self::assertContains( 'style="' . $styleAttributeValue . ' ' . $cssDeclarations . '"', - $this->subject->emogrify() + $result ); } @@ -858,10 +883,9 @@ class EmogrifierTest extends \PHPUnit_Framework_TestCase $this->subject->setHtml($html); $this->subject->setCss('p{color:blue;}html{color:red;}'); - self::assertContains( - '', - $this->subject->emogrify() - ); + $result = $this->subject->emogrify(); + + self::assertContains('', $result); } /** @@ -872,10 +896,9 @@ class EmogrifierTest extends \PHPUnit_Framework_TestCase $html = $this->html5DocumentType . ''; $this->subject->setHtml($html); - self::assertContains( - 'style="color: #ccc;"', - $this->subject->emogrify() - ); + $result = $this->subject->emogrify(); + + self::assertContains('style="color: #ccc;"', $result); } /** @@ -886,13 +909,11 @@ class EmogrifierTest extends \PHPUnit_Framework_TestCase $html = $this->html5DocumentType . ''; $this->subject->setHtml($html); $cssIn = 'html {mArGiN:0 2pX;}'; - $cssOut = 'margin: 0 2pX;'; $this->subject->setCss($cssIn); - self::assertContains( - 'style="' . $cssOut . '"', - $this->subject->emogrify() - ); + $result = $this->subject->emogrify(); + + self::assertContains('style="margin: 0 2pX;"', $result); } /** @@ -905,9 +926,11 @@ class EmogrifierTest extends \PHPUnit_Framework_TestCase $this->subject->setHtml($html); $this->subject->setCss('p {' . $css . '}'); + $result = $this->subject->emogrify(); + self::assertContains( '

target

', - $this->subject->emogrify() + $result ); } @@ -921,9 +944,11 @@ class EmogrifierTest extends \PHPUnit_Framework_TestCase $css . '}

target

'; $this->subject->setHtml($html); + $result = $this->subject->emogrify(); + self::assertContains( '

target

', - $this->subject->emogrify() + $result ); } @@ -935,10 +960,9 @@ class EmogrifierTest extends \PHPUnit_Framework_TestCase $html = $this->html5DocumentType . ''; $this->subject->setHtml($html); - self::assertNotContains( - '

'; $this->subject->setHtml($html); - self::assertContains( - $css, - $this->subject->emogrify() - ); + $result = $this->subject->emogrify(); + + self::assertContains($css, $result); } /** @@ -1209,16 +1220,15 @@ class EmogrifierTest extends \PHPUnit_Framework_TestCase $this->subject->setHtml($html); $this->subject->setCss($css); - self::assertNotContains( - 'style="color:red"', - $this->subject->emogrify() - ); + $result = $this->subject->emogrify(); + + self::assertNotContains('style="color:red"', $result); } /** * Invalid media query which need to be strip * - * @return array[] + * @return string[][] */ public function invalidMediaPreserveDataProvider() { @@ -1240,16 +1250,15 @@ class EmogrifierTest extends \PHPUnit_Framework_TestCase * * @dataProvider invalidMediaPreserveDataProvider */ - public function emogrifyWithInvalidMediaQueryaNotContainsInnerCss($css) + public function emogrifyWithInvalidMediaQueryNotContainsInnerCss($css) { $html = $this->html5DocumentType . PHP_EOL . '

'; $this->subject->setHtml($html); $this->subject->setCss($css); - self::assertNotContains( - $css, - $this->subject->emogrify() - ); + $result = $this->subject->emogrify(); + + self::assertNotContains($css, $result); } /** @@ -1265,10 +1274,9 @@ class EmogrifierTest extends \PHPUnit_Framework_TestCase $this->subject->setHtml($html); $this->subject->setCss($css); - self::assertNotContains( - 'style="color: red"', - $this->subject->emogrify() - ); + $result = $this->subject->emogrify(); + + self::assertNotContains('style="color: red"', $result); } /** @@ -1284,11 +1292,10 @@ class EmogrifierTest extends \PHPUnit_Framework_TestCase '

'; $this->subject->setHtml($html); - self::assertNotContains( - $css, - $this->subject->emogrify() - ); - } + $result = $this->subject->emogrify(); + + self::assertNotContains($css, $result); + } /** * @test @@ -1303,10 +1310,41 @@ class EmogrifierTest extends \PHPUnit_Framework_TestCase '

'; $this->subject->setHtml($html); - self::assertNotContains( - 'style="color: red"', - $this->subject->emogrify() - ); + $result = $this->subject->emogrify(); + + self::assertNotContains('style="color: red"', $result); + } + + /** + * @test + */ + public function emogrifyIgnoresEmptyMediaQuery() + { + $css = '@media screen {} @media tv { h1 { color: red; } }'; + $html = $this->html5DocumentType . PHP_EOL . '

'; + $this->subject->setHtml($html); + $this->subject->setCss($css); + + $result = $this->subject->emogrify(); + + self::assertNotContains('style="color: red"', $result); + self::assertNotContains('@media screen', $result); + } + + /** + * @test + */ + public function emogrifyIgnoresMediaQueryWithWhitespaceOnly() + { + $css = '@media screen { } @media tv { h1 { color: red; } }'; + $html = $this->html5DocumentType . PHP_EOL . '

'; + $this->subject->setHtml($html); + $this->subject->setCss($css); + + $result = $this->subject->emogrify(); + + self::assertNotContains('style="color: red"', $result); + self::assertNotContains('@media screen', $result); } /** @@ -1319,9 +1357,11 @@ class EmogrifierTest extends \PHPUnit_Framework_TestCase ''; $this->subject->setHtml($html); + $result = $this->subject->emogrify(); + self::assertContains( '', - $this->subject->emogrify() + $result ); } @@ -1336,9 +1376,11 @@ class EmogrifierTest extends \PHPUnit_Framework_TestCase $this->subject->setHtml($html); $this->subject->disableStyleBlocksParsing(); + $result = $this->subject->emogrify(); + self::assertNotContains( '', - $this->subject->emogrify() + $result ); } @@ -1350,13 +1392,14 @@ class EmogrifierTest extends \PHPUnit_Framework_TestCase $styleAttributeValue = 'text-align: center;'; $html = $this->html5DocumentType . '' . '

paragraph

'; - $expected = '

'; $this->subject->setHtml($html); $this->subject->disableStyleBlocksParsing(); + $result = $this->subject->emogrify(); + self::assertContains( - $expected, - $this->subject->emogrify() + '

', + $result ); } @@ -1370,10 +1413,9 @@ class EmogrifierTest extends \PHPUnit_Framework_TestCase $this->subject->setHtml($html); $this->subject->disableInlineStyleAttributesParsing(); - self::assertNotContains( - 'subject->emogrify() - ); + $result = $this->subject->emogrify(); + + self::assertNotContains('html5DocumentType . '' . '

paragraph

'; - $expected = '

'; $this->subject->setHtml($html); $this->subject->disableInlineStyleAttributesParsing(); + $result = $this->subject->emogrify(); + self::assertContains( - $expected, - $this->subject->emogrify() + '

', + $result ); } @@ -1402,13 +1445,11 @@ class EmogrifierTest extends \PHPUnit_Framework_TestCase { $html = $this->html5DocumentType . '

paragraph

'; - $expected = '

'; $this->subject->setHtml($html); - self::assertContains( - $expected, - $this->subject->emogrify() - ); + $result = $this->subject->emogrify(); + + self::assertContains('

', $result); } /** @@ -1420,13 +1461,11 @@ class EmogrifierTest extends \PHPUnit_Framework_TestCase $html = $this->html5DocumentType . '' . '

some content

'; - $expected = '

'; $this->subject->setHtml($html); - self::assertContains( - $expected, - $this->subject->emogrify() - ); + $result = $this->subject->emogrify(); + + self::assertContains('

', $result); } /** @@ -1439,13 +1478,14 @@ class EmogrifierTest extends \PHPUnit_Framework_TestCase $html = $this->html5DocumentType . '' . '

some content

'; - $expected = '

'; $this->subject->setHtml($html); $this->subject->setCss($css); + $result = $this->subject->emogrify(); + self::assertContains( - $expected, - $this->subject->emogrify() + '

', + $result ); } @@ -1458,14 +1498,12 @@ class EmogrifierTest extends \PHPUnit_Framework_TestCase $html = $this->html5DocumentType . '' . '

some content

'; - $expected = '

'; $this->subject->setHtml($html); $this->subject->setCss($css); - self::assertContains( - $expected, - $this->subject->emogrify() - ); + $result = $this->subject->emogrify(); + + self::assertContains('

', $result); } /** @@ -1475,16 +1513,12 @@ class EmogrifierTest extends \PHPUnit_Framework_TestCase { $css = 'div.foo { display: none; }'; $html = $this->html5DocumentType . '

'; - - $expected = '
'; - $this->subject->setHtml($html); $this->subject->setCss($css); - self::assertContains( - $expected, - $this->subject->emogrify() - ); + $result = $this->subject->emogrify(); + + self::assertContains('
', $result); } /** @@ -1495,15 +1529,11 @@ class EmogrifierTest extends \PHPUnit_Framework_TestCase $html = $this->html5DocumentType . '
' . ''; - - $expected = '
'; - $this->subject->setHtml($html); - self::assertContains( - $expected, - $this->subject->emogrify() - ); + $result = $this->subject->emogrify(); + + self::assertContains('
', $result); } /** @@ -1513,17 +1543,13 @@ class EmogrifierTest extends \PHPUnit_Framework_TestCase { $css = 'div.foo { display: none; }'; $html = $this->html5DocumentType . '
'; - - $expected = '