"prepend-autoloader": false
},
"require": {
- "ezyang/htmlpurifier": "4.8.*",
+ "ezyang/htmlpurifier": "4.9.*",
"erusev/parsedown": "1.6.*",
"pelago/emogrifier": "1.1.*"
}
"Read more about it at https://getcomposer.org/doc/01-basic-usage.md#composer-lock-the-lock-file",
"This file is @generated automatically"
],
- "content-hash": "d9f12e8c782481ed844e535718c478c3",
+ "content-hash": "a16ac53b137b152868cb4414ceb081a4",
"packages": [
{
"name": "erusev/parsedown",
},
{
"name": "ezyang/htmlpurifier",
- "version": "v4.8.0",
+ "version": "v4.9.3",
"source": {
"type": "git",
"url": "https://github.com/ezyang/htmlpurifier.git",
- "reference": "d0c392f77d2f2a3dcf7fcb79e2a1e2b8804e75b2"
+ "reference": "95e1bae3182efc0f3422896a3236e991049dac69"
},
"dist": {
"type": "zip",
- "url": "https://api.github.com/repos/ezyang/htmlpurifier/zipball/d0c392f77d2f2a3dcf7fcb79e2a1e2b8804e75b2",
- "reference": "d0c392f77d2f2a3dcf7fcb79e2a1e2b8804e75b2",
+ "url": "https://api.github.com/repos/ezyang/htmlpurifier/zipball/95e1bae3182efc0f3422896a3236e991049dac69",
+ "reference": "95e1bae3182efc0f3422896a3236e991049dac69",
"shasum": ""
},
"require": {
"php": ">=5.2"
},
+ "require-dev": {
+ "simpletest/simpletest": "^1.1"
+ },
"type": "library",
"autoload": {
"psr-0": {
"keywords": [
"html"
],
- "time": "2016-07-16T12:58:58+00:00"
+ "time": "2017-06-03T02:28:16+00:00"
},
{
"name": "pelago/emogrifier",
"homepage": "http://www.pelagodesign.com/sidecar/emogrifier/"
},
{
- "name": "ezyang/htmlpurifier",
- "version": "v4.8.0",
- "version_normalized": "4.8.0.0",
+ "name": "erusev/parsedown",
+ "version": "1.6.2",
+ "version_normalized": "1.6.2.0",
"source": {
"type": "git",
- "url": "https://github.com/ezyang/htmlpurifier.git",
- "reference": "d0c392f77d2f2a3dcf7fcb79e2a1e2b8804e75b2"
+ "url": "https://github.com/erusev/parsedown.git",
+ "reference": "1bf24f7334fe16c88bf9d467863309ceaf285b01"
},
"dist": {
"type": "zip",
- "url": "https://api.github.com/repos/ezyang/htmlpurifier/zipball/d0c392f77d2f2a3dcf7fcb79e2a1e2b8804e75b2",
- "reference": "d0c392f77d2f2a3dcf7fcb79e2a1e2b8804e75b2",
+ "url": "https://api.github.com/repos/erusev/parsedown/zipball/1bf24f7334fe16c88bf9d467863309ceaf285b01",
+ "reference": "1bf24f7334fe16c88bf9d467863309ceaf285b01",
"shasum": ""
},
"require": {
- "php": ">=5.2"
+ "php": ">=5.3.0"
},
- "time": "2016-07-16T12:58:58+00:00",
+ "time": "2017-03-29T16:04:15+00:00",
"type": "library",
"installation-source": "dist",
"autoload": {
"psr-0": {
- "HTMLPurifier": "library/"
- },
- "files": [
- "library/HTMLPurifier.composer.php"
- ]
+ "Parsedown": ""
+ }
},
"notification-url": "https://packagist.org/downloads/",
"license": [
- "LGPL"
+ "MIT"
],
"authors": [
{
- "name": "Edward Z. Yang",
- "email": "admin@htmlpurifier.org",
- "homepage": "http://ezyang.com"
+ "name": "Emanuil Rusev",
+ "email": "hello@erusev.com",
+ "homepage": "http://erusev.com"
}
],
- "description": "Standards compliant HTML filter written in PHP",
- "homepage": "http://htmlpurifier.org/",
+ "description": "Parser for Markdown.",
+ "homepage": "http://parsedown.org",
"keywords": [
- "html"
+ "markdown",
+ "parser"
]
},
{
- "name": "erusev/parsedown",
- "version": "1.6.2",
- "version_normalized": "1.6.2.0",
+ "name": "ezyang/htmlpurifier",
+ "version": "v4.9.3",
+ "version_normalized": "4.9.3.0",
"source": {
"type": "git",
- "url": "https://github.com/erusev/parsedown.git",
- "reference": "1bf24f7334fe16c88bf9d467863309ceaf285b01"
+ "url": "https://github.com/ezyang/htmlpurifier.git",
+ "reference": "95e1bae3182efc0f3422896a3236e991049dac69"
},
"dist": {
"type": "zip",
- "url": "https://api.github.com/repos/erusev/parsedown/zipball/1bf24f7334fe16c88bf9d467863309ceaf285b01",
- "reference": "1bf24f7334fe16c88bf9d467863309ceaf285b01",
+ "url": "https://api.github.com/repos/ezyang/htmlpurifier/zipball/95e1bae3182efc0f3422896a3236e991049dac69",
+ "reference": "95e1bae3182efc0f3422896a3236e991049dac69",
"shasum": ""
},
"require": {
- "php": ">=5.3.0"
+ "php": ">=5.2"
},
- "time": "2017-03-29T16:04:15+00:00",
+ "require-dev": {
+ "simpletest/simpletest": "^1.1"
+ },
+ "time": "2017-06-03T02:28:16+00:00",
"type": "library",
"installation-source": "dist",
"autoload": {
"psr-0": {
- "Parsedown": ""
- }
+ "HTMLPurifier": "library/"
+ },
+ "files": [
+ "library/HTMLPurifier.composer.php"
+ ]
},
"notification-url": "https://packagist.org/downloads/",
"license": [
- "MIT"
+ "LGPL"
],
"authors": [
{
- "name": "Emanuil Rusev",
- "email": "hello@erusev.com",
- "homepage": "http://erusev.com"
+ "name": "Edward Z. Yang",
+ "email": "admin@htmlpurifier.org",
+ "homepage": "http://ezyang.com"
}
],
- "description": "Parser for Markdown.",
- "homepage": "http://parsedown.org",
+ "description": "Standards compliant HTML filter written in PHP",
+ "homepage": "http://htmlpurifier.org/",
"keywords": [
- "markdown",
- "parser"
+ "html"
]
}
]
. Internal change
==========================
+4.9.3, released 2017-06-02
+- Workaround PHP 7.1 infinite loop when opcode cache is enabled.
+ Thanks @Xiphin (#134, #135)
+- Don't use autoloader when testing for DOMDocument. Hypothetically,
+ this could cause your install to start using DirectLex if you had
+ previously been monkeypatching in a custom, autoloaded implementation
+ of DOMDocument. Don't do that. Thanks @Izumi-kun (#130)
+
+4.9.2, released 2017-03-12
+- Fixes PHP 5.3 compatibility
+- Fix breakage when decoding decimal entities. Thanks @rybakit (#129)
+
+4.9.1, released 2017-03-08
+! %URI.DefaultScheme can now be set to null, in which case
+ all relative paths are removed.
+! New CSS properties: min-width, max-width, min-height, max-height (#94)
+! Transparency (rgba) and hsl/hsla supported where color CSS is present.
+ Thanks @fxbt for contributing the patch. (#118)
+- When idn_to_ascii is defined, we might accept malformed
+ hostnames. Apply validation to the result in such cases.
+- Close directory when done in Serializer DefinitionCache (#100)
+- Deleted some asserts to avoid linters from choking (#97)
+- Rework Serializer cache behavior to avoid chmod'ing if possible (#32)
+- Embedded semicolons in strings in CSS are now handled correctly!
+- We accidentally dropped certain Unicode characters if there was
+ one or more invalid characters. This has been fixed, thanks
+ to mpyw <ryosuke_i_628@yahoo.co.jp>
+- Fix for "Don't truncate upon encountering </div> when using DOMLex"
+ caused a regression with HTML 4.01 Strict parsing with libxml 2.9.1
+ (and maybe later versions, but known OK with libxml 2.9.4). The
+ fix is to go about handling truncation a bit more cleverly so that
+ we can wrap with divs (sidestepping the bug) but slurping out the
+ rest of the text in case it ran off the end. (#78)
+- Fix PREG_BACKTRACK_LIMIT_ERROR in HTMLPurifier_Filter_ExtractStyle.
+ Thanks @breathbath for contributing the report and fix (#120)
+- Fix entity decoding algorithm to be more conservative about
+ decoding entities that are missing trailing semicolon.
+ To get old behavior, set %Core.LegacyEntityDecoder to true.
+ (#119)
+- Workaround libxml bug when HTML tags are embedded inside
+ script tags. To disable workaround set %Core.AggressivelyRemoveScript
+ to false. (#83)
+# By default, when a link has a target attribute associated
+ with it, we now also add rel="noopener" in order to
+ prevent the new window from being able to overwrite
+ the original frame. To disable this protection,
+ set %HTML.TargetNoopener to FALSE.
+
+4.9.0 was cut on Git but never properly released; when we did the
+real release we decided to skip this version number.
+
4.8.0, released 2016-07-16
# By default, when a link has a target attribute associated
with it, we now also add rel="noreferrer" in order to
-HTML Purifier
+HTML Purifier [![Build Status](https://secure.travis-ci.org/ezyang/htmlpurifier.svg?branch=master)](http://travis-ci.org/ezyang/htmlpurifier)
=============
HTML Purifier is an HTML filtering solution that uses a unique combination
-4.8.0
\ No newline at end of file
+4.9.3
\ No newline at end of file
-HTML Purifier 4.8.0 is a bugfix release, collecting a year
-of accumulated bug fixes. In particular, we fixed some minor
-bugs and now declare full PHP 7 compatibility. The primary
-backwards-incompatible change is that HTML Purifier will now
-add rel="noreferrer" to all links with target attributes
-(you can disable this with %HTML.TargetNoReferrer.) Other
-changes: new configuration options %CSS.AllowDuplicates and
-%Attr.ID.HTML5; border-radius is partially supported when
-%CSS.AllowProprietary, and tel URIs are supported by default.
+HTML Purifier 4.9.x is a maintenance release, collecting a year
+of accumulated bug fixes plus a few new features. New features
+include support for min/max-width/height CSS, and rgba/hsl/hsla
+in color specifications. Major bugfixes include improvements
+in the Serializer cache to avoid chmod'ing directories, better
+entity decoding (we won't accidentally encode entities that occur
+in URLs) and rel="noopener" on links with target attributes,
+to prevent them from overwriting the original frame.
+
+4.9.3 works around an infinite loop bug in PHP 7.1 with the opcode
+cache (and has one other, minor bugfix, avoiding using autoloading
+when testing for DOMDocument presence). If these bugs do not
+affect you, you do not need to upgrade.
"require": {
"php": ">=5.2"
},
+ "require-dev": {
+ "simpletest/simpletest": "^1.1"
+ },
"autoload": {
"psr-0": { "HTMLPurifier": "library/" },
"files": ["library/HTMLPurifier.composer.php"]
* primary concern and you are using an opcode cache. PLEASE DO NOT EDIT THIS
* FILE, changes will be overwritten the next time the script is run.
*
- * @version 4.8.0
+ * @version 4.9.3
*
* @warning
* You must *not* include any other HTML Purifier files before this file,
require 'HTMLPurifier/AttrTransform/SafeParam.php';
require 'HTMLPurifier/AttrTransform/ScriptRequired.php';
require 'HTMLPurifier/AttrTransform/TargetBlank.php';
+require 'HTMLPurifier/AttrTransform/TargetNoopener.php';
require 'HTMLPurifier/AttrTransform/TargetNoreferrer.php';
require 'HTMLPurifier/AttrTransform/Textarea.php';
require 'HTMLPurifier/ChildDef/Chameleon.php';
require 'HTMLPurifier/HTMLModule/Tables.php';
require 'HTMLPurifier/HTMLModule/Target.php';
require 'HTMLPurifier/HTMLModule/TargetBlank.php';
+require 'HTMLPurifier/HTMLModule/TargetNoopener.php';
require 'HTMLPurifier/HTMLModule/TargetNoreferrer.php';
require 'HTMLPurifier/HTMLModule/Text.php';
require 'HTMLPurifier/HTMLModule/Tidy.php';
*/
/*
- HTML Purifier 4.8.0 - Standards Compliant HTML Filtering
+ HTML Purifier 4.9.3 - Standards Compliant HTML Filtering
Copyright (C) 2006-2008 Edward Z. Yang
This library is free software; you can redistribute it and/or
* Version of HTML Purifier.
* @type string
*/
- public $version = '4.8.0';
+ public $version = '4.9.3';
/**
* Constant with version of HTML Purifier.
*/
- const VERSION = '4.8.0';
+ const VERSION = '4.9.3';
/**
* Global configuration object.
require_once $__dir . '/HTMLPurifier/AttrTransform/SafeParam.php';
require_once $__dir . '/HTMLPurifier/AttrTransform/ScriptRequired.php';
require_once $__dir . '/HTMLPurifier/AttrTransform/TargetBlank.php';
+require_once $__dir . '/HTMLPurifier/AttrTransform/TargetNoopener.php';
require_once $__dir . '/HTMLPurifier/AttrTransform/TargetNoreferrer.php';
require_once $__dir . '/HTMLPurifier/AttrTransform/Textarea.php';
require_once $__dir . '/HTMLPurifier/ChildDef/Chameleon.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/Tables.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/Target.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/TargetBlank.php';
+require_once $__dir . '/HTMLPurifier/HTMLModule/TargetNoopener.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/TargetNoreferrer.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/Text.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/Tidy.php';
if ($token instanceof HTMLPurifier_Token_End) {
$token->start = null; // [MUT]
$r = array_pop($stack);
- assert($r->name === $token->name);
- assert(empty($token->attr));
+ //assert($r->name === $token->name);
+ //assert(empty($token->attr));
$r->endCol = $token->col;
$r->endLine = $token->line;
$r->endArmor = $token->armor;
$stack[] = $node;
}
}
- assert(count($stack) == 1);
+ //assert(count($stack) == 1);
return $stack[0];
}
*/
protected function mungeRgb($string)
{
- return preg_replace('/rgb\((\d+)\s*,\s*(\d+)\s*,\s*(\d+)\)/', 'rgb(\1,\2,\3)', $string);
+ $p = '\s*(\d+(\.\d+)?([%]?))\s*';
+
+ if (preg_match('/(rgba|hsla)\(/', $string)) {
+ return preg_replace('/(rgba|hsla)\('.$p.','.$p.','.$p.','.$p.'\)/', '\1(\2,\5,\8,\11)', $string);
+ }
+
+ return preg_replace('/(rgb|hsl)\('.$p.','.$p.','.$p.'\)/', '\1(\2,\5,\8)', $string);
}
/**
$definition = $config->getCSSDefinition();
$allow_duplicates = $config->get("CSS.AllowDuplicates");
- // we're going to break the spec and explode by semicolons.
- // This is because semicolon rarely appears in escaped form
- // Doing this is generally flaky but fast
- // IT MIGHT APPEAR IN URIs, see HTMLPurifier_AttrDef_CSSURI
- // for details
- $declarations = explode(';', $css);
+ // According to the CSS2.1 spec, the places where a
+ // non-delimiting semicolon can appear are in strings
+ // escape sequences. So here is some dumb hack to
+ // handle quotes.
+ $len = strlen($css);
+ $accum = "";
+ $declarations = array();
+ $quoted = false;
+ for ($i = 0; $i < $len; $i++) {
+ $c = strcspn($css, ";'\"", $i);
+ $accum .= substr($css, $i, $c);
+ $i += $c;
+ if ($i == $len) break;
+ $d = $css[$i];
+ if ($quoted) {
+ $accum .= $d;
+ if ($d == $quoted) {
+ $quoted = false;
+ }
+ } else {
+ if ($d == ";") {
+ $declarations[] = $accum;
+ $accum = "";
+ } else {
+ $accum .= $d;
+ $quoted = $d;
+ }
+ }
+ }
+ if ($accum != "") $declarations[] = $accum;
+
$propvalues = array();
$new_declarations = '';
class HTMLPurifier_AttrDef_CSS_Color extends HTMLPurifier_AttrDef
{
+ /**
+ * @type HTMLPurifier_AttrDef_CSS_AlphaValue
+ */
+ protected $alpha;
+
+ public function __construct()
+ {
+ $this->alpha = new HTMLPurifier_AttrDef_CSS_AlphaValue();
+ }
+
/**
* @param string $color
* @param HTMLPurifier_Config $config
return $colors[$lower];
}
- if (strpos($color, 'rgb(') !== false) {
- // rgb literal handling
+ if (preg_match('#(rgb|rgba|hsl|hsla)\(#', $color, $matches) === 1) {
$length = strlen($color);
if (strpos($color, ')') !== $length - 1) {
return false;
}
- $triad = substr($color, 4, $length - 4 - 1);
- $parts = explode(',', $triad);
- if (count($parts) !== 3) {
+
+ // get used function : rgb, rgba, hsl or hsla
+ $function = $matches[1];
+
+ $parameters_size = 3;
+ $alpha_channel = false;
+ if (substr($function, -1) === 'a') {
+ $parameters_size = 4;
+ $alpha_channel = true;
+ }
+
+ /*
+ * Allowed types for values :
+ * parameter_position => [type => max_value]
+ */
+ $allowed_types = array(
+ 1 => array('percentage' => 100, 'integer' => 255),
+ 2 => array('percentage' => 100, 'integer' => 255),
+ 3 => array('percentage' => 100, 'integer' => 255),
+ );
+ $allow_different_types = false;
+
+ if (strpos($function, 'hsl') !== false) {
+ $allowed_types = array(
+ 1 => array('integer' => 360),
+ 2 => array('percentage' => 100),
+ 3 => array('percentage' => 100),
+ );
+ $allow_different_types = true;
+ }
+
+ $values = trim(str_replace($function, '', $color), ' ()');
+
+ $parts = explode(',', $values);
+ if (count($parts) !== $parameters_size) {
return false;
}
- $type = false; // to ensure that they're all the same type
+
+ $type = false;
$new_parts = array();
+ $i = 0;
+
foreach ($parts as $part) {
+ $i++;
$part = trim($part);
+
if ($part === '') {
return false;
}
- $length = strlen($part);
- if ($part[$length - 1] === '%') {
- // handle percents
- if (!$type) {
- $type = 'percentage';
- } elseif ($type !== 'percentage') {
+
+ // different check for alpha channel
+ if ($alpha_channel === true && $i === count($parts)) {
+ $result = $this->alpha->validate($part, $config, $context);
+
+ if ($result === false) {
return false;
}
- $num = (float)substr($part, 0, $length - 1);
- if ($num < 0) {
- $num = 0;
- }
- if ($num > 100) {
- $num = 100;
- }
- $new_parts[] = "$num%";
+
+ $new_parts[] = (string)$result;
+ continue;
+ }
+
+ if (substr($part, -1) === '%') {
+ $current_type = 'percentage';
} else {
- // handle integers
- if (!$type) {
- $type = 'integer';
- } elseif ($type !== 'integer') {
- return false;
- }
- $num = (int)$part;
- if ($num < 0) {
- $num = 0;
- }
- if ($num > 255) {
- $num = 255;
- }
- $new_parts[] = (string)$num;
+ $current_type = 'integer';
+ }
+
+ if (!array_key_exists($current_type, $allowed_types[$i])) {
+ return false;
+ }
+
+ if (!$type) {
+ $type = $current_type;
+ }
+
+ if ($allow_different_types === false && $type != $current_type) {
+ return false;
+ }
+
+ $max_value = $allowed_types[$i][$current_type];
+
+ if ($current_type == 'integer') {
+ // Return value between range 0 -> $max_value
+ $new_parts[] = (int)max(min($part, $max_value), 0);
+ } elseif ($current_type == 'percentage') {
+ $new_parts[] = (float)max(min(rtrim($part, '%'), $max_value), 0) . '%';
}
}
- $new_triad = implode(',', $new_parts);
- $color = "rgb($new_triad)";
+
+ $new_values = implode(',', $new_parts);
+
+ $color = $function . '(' . $new_values . ')';
} else {
// hexadecimal handling
if ($color[0] === '#') {
}
return $color;
}
+
}
// vim: et sw=4 sts=4
// PHP 5.3 and later support this functionality natively
if (function_exists('idn_to_ascii')) {
- return idn_to_ascii($string);
+ $string = idn_to_ascii($string);
// If we have Net_IDNA2 support, we can support IRIs by
// punycoding them. (This is the most portable thing to do,
}
}
$string = implode('.', $new_parts);
- if (preg_match("/^($domainlabel\.)*$toplabel\.?$/i", $string)) {
- return $string;
- }
} catch (Exception $e) {
// XXX error reporting
}
}
+ // Try again
+ if (preg_match("/^($domainlabel\.)*$toplabel\.?$/i", $string)) {
+ return $string;
+ }
return false;
}
}
--- /dev/null
+<?php
+
+// must be called POST validation
+
+/**
+ * Adds rel="noopener" to any links which target a different window
+ * than the current one. This is used to prevent malicious websites
+ * from silently replacing the original window, which could be used
+ * to do phishing.
+ * This transform is controlled by %HTML.TargetNoopener.
+ */
+class HTMLPurifier_AttrTransform_TargetNoopener extends HTMLPurifier_AttrTransform
+{
+ /**
+ * @param array $attr
+ * @param HTMLPurifier_Config $config
+ * @param HTMLPurifier_Context $context
+ * @return array
+ */
+ public function transform($attr, $config, $context)
+ {
+ if (isset($attr['rel'])) {
+ $rels = explode(' ', $attr['rel']);
+ } else {
+ $rels = array();
+ }
+ if (isset($attr['target']) && !in_array('noopener', $rels)) {
+ $rels[] = 'noopener';
+ }
+ if (!empty($rels) || isset($attr['rel'])) {
+ $attr['rel'] = implode(' ', $rels);
+ }
+
+ return $attr;
+ }
+}
+
);
$max = $config->get('CSS.MaxImgLength');
+ $this->info['min-width'] =
+ $this->info['max-width'] =
+ $this->info['min-height'] =
+ $this->info['max-height'] =
$this->info['width'] =
$this->info['height'] =
$max === null ?
// a little sanity check to make sure it's not ALL whitespace
$all_whitespace = true;
- $current_li = false;
+ $current_li = null;
foreach ($children as $node) {
if (!empty($node->is_whitespace)) {
// to handle non-list elements; non-list elements should
// not be appended to an existing li; only li created
// for non-list. This distinction is not currently made.
- if ($current_li === false) {
+ if ($current_li === null) {
$current_li = new HTMLPurifier_Node_Element('li');
$result[] = $current_li;
}
$current_tr_tbody->children[] = $node;
break;
case '#PCDATA':
- assert($node->is_whitespace);
+ //assert($node->is_whitespace);
if ($current_tr_tbody === null) {
$ret[] = $node;
} else {
* HTML Purifier's version
* @type string
*/
- public $version = '4.8.0';
+ public $version = '4.9.3';
/**
* Whether or not to automatically finalize
}
// Raw type might be negative when using the fully optimized form
- // of stdclass, which indicates allow_null == true
+ // of stdClass, which indicates allow_null == true
$rtype = is_int($def) ? $def : $def->type;
if ($rtype < 0) {
$type = -$rtype;
*
* array(
* 'Namespace' => array(
- * 'Directive' => new stdclass(),
+ * 'Directive' => new stdClass(),
* )
* )
*
- * The stdclass may have the following properties:
+ * The stdClass may have the following properties:
*
* - If isAlias isn't set:
* - type: Integer type of directive, see HTMLPurifier_VarParser for definitions
* - namespace: Namespace this directive aliases to
* - name: Directive name this directive aliases to
*
- * In certain degenerate cases, stdclass will actually be an integer. In
- * that case, the value is equivalent to an stdclass with the type
+ * In certain degenerate cases, stdClass will actually be an integer. In
+ * that case, the value is equivalent to an stdClass with the type
* property set to the integer. If the integer is negative, type is
* equal to the absolute value of integer, and allow_null is true.
*
*/
public function add($key, $default, $type, $allow_null)
{
- $obj = new stdclass();
+ $obj = new stdClass();
$obj->type = is_int($type) ? $type : HTMLPurifier_VarParser::$types[$type];
if ($allow_null) {
$obj->allow_null = true;
*/
public function addAlias($key, $new_key)
{
- $obj = new stdclass;
+ $obj = new stdClass;
$obj->key = $new_key;
$obj->isAlias = true;
$this->info[$key] = $obj;
}
/**
- * Replaces any stdclass that only has the type property with type integer.
+ * Replaces any stdClass that only has the type property with type integer.
*/
public function postProcess()
{
--- /dev/null
+Core.AggressivelyRemoveScript
+TYPE: bool
+VERSION: 4.9.0
+DEFAULT: true
+--DESCRIPTION--
+<p>
+ This directive enables aggressive pre-filter removal of
+ script tags. This is not necessary for security,
+ but it can help work around a bug in libxml where embedded
+ HTML elements inside script sections cause the parser to
+ choke. To revert to pre-4.9.0 behavior, set this to false.
+ This directive has no effect if %Core.Trusted is true,
+ %Core.RemoveScriptContents is false, or %Core.HiddenElements
+ does not contain script.
+</p>
+--# vim: et sw=4 sts=4
--- /dev/null
+Core.LegacyEntityDecoder
+TYPE: bool
+VERSION: 4.9.0
+DEFAULT: false
+--DESCRIPTION--
+<p>
+ Prior to HTML Purifier 4.9.0, entities were decoded by performing
+ a global search replace for all entities whose decoded versions
+ did not have special meanings under HTML, and replaced them with
+ their decoded versions. We would match all entities, even if they did
+ not have a trailing semicolon, but only if there weren't any trailing
+ alphanumeric characters.
+</p>
+<table>
+<tr><th>Original</th><th>Text</th><th>Attribute</th></tr>
+<tr><td>&yen;</td><td>¥</td><td>¥</td></tr>
+<tr><td>&yen</td><td>¥</td><td>¥</td></tr>
+<tr><td>&yena</td><td>&yena</td><td>&yena</td></tr>
+<tr><td>&yen=</td><td>¥=</td><td>¥=</td></tr>
+</table>
+<p>
+ In HTML Purifier 4.9.0, we changed the behavior of entity parsing
+ to match entities that had missing trailing semicolons in less
+ cases, to more closely match HTML5 parsing behavior:
+</p>
+<table>
+<tr><th>Original</th><th>Text</th><th>Attribute</th></tr>
+<tr><td>&yen;</td><td>¥</td><td>¥</td></tr>
+<tr><td>&yen</td><td>¥</td><td>¥</td></tr>
+<tr><td>&yena</td><td>¥a</td><td>&yena</td></tr>
+<tr><td>&yen=</td><td>¥=</td><td>&yen=</td></tr>
+</table>
+<p>
+ This flag reverts back to pre-HTML Purifier 4.9.0 behavior.
+</p>
+--# vim: et sw=4 sts=4
--- /dev/null
+--# vim: et sw=4 sts=4
+HTML.TargetNoopener
+TYPE: bool
+VERSION: 4.8.0
+DEFAULT: TRUE
+--DESCRIPTION--
+If enabled, noopener rel attributes are added to links which have
+a target attribute associated with them. This prevents malicious
+destinations from overwriting the original window.
+--# vim: et sw=4 sts=4
URI.DefaultScheme
-TYPE: string
+TYPE: string/null
DEFAULT: 'http'
--DESCRIPTION--
Defines through what scheme the output will be served, in order to
select the proper object validator when no scheme information is present.
</p>
+
+<p>
+ Starting with HTML Purifier 4.9.0, the default scheme can be null, in
+ which case we reject all URIs which do not have explicit schemes.
+</p>
--# vim: et sw=4 sts=4
}
unlink($dir . '/' . $filename);
}
+ closedir($dh);
return true;
}
unlink($dir . '/' . $filename);
}
}
+ closedir($dh);
return true;
}
if ($result !== false) {
// set permissions of the new file (no execute)
$chmod = $config->get('Cache.SerializerPermissions');
- if ($chmod === null) {
- // don't do anything
- } else {
- $chmod = $chmod & 0666;
- chmod($file, $chmod);
+ if ($chmod !== null) {
+ chmod($file, $chmod & 0666);
}
}
return $result;
{
$directory = $this->generateDirectoryPath($config);
$chmod = $config->get('Cache.SerializerPermissions');
+ if ($chmod === null) {
+ // TODO: This races
+ if (is_dir($directory)) return true;
+ return mkdir($directory);
+ }
if (!is_dir($directory)) {
$base = $this->generateBaseDirectoryPath($config);
if (!is_dir($base)) {
} elseif (!$this->_testPermissions($base, $chmod)) {
return false;
}
- if ($chmod === null) {
+ if (!mkdir($directory, $chmod)) {
trigger_error(
- 'Base directory ' . $base . ' does not exist,
- please create or change using %Cache.SerializerPath',
+ 'Could not create directory ' . $directory . '',
E_USER_WARNING
);
return false;
}
- if ($chmod !== null) {
- mkdir($directory, $chmod);
- } else {
- mkdir($directory);
- }
if (!$this->_testPermissions($directory, $chmod)) {
- trigger_error(
- 'Base directory ' . $base . ' does not exist,
- please create or change using %Cache.SerializerPath',
- E_USER_WARNING
- );
return false;
}
} elseif (!$this->_testPermissions($directory, $chmod)) {
* It will parse according to UTF-8 and return a valid UTF8 string, with
* non-SGML codepoints excluded.
*
+ * Specifically, it will permit:
+ * \x{9}\x{A}\x{D}\x{20}-\x{7E}\x{A0}-\x{D7FF}\x{E000}-\x{FFFD}\x{10000}-\x{10FFFF}
+ * Source: https://www.w3.org/TR/REC-xml/#NT-Char
+ * Arguably this function should be modernized to the HTML5 set
+ * of allowed characters:
+ * https://www.w3.org/TR/html5/syntax.html#preprocessing-the-input-stream
+ * which simultaneously expand and restrict the set of allowed characters.
+ *
* @param string $str The string to clean
* @param bool $force_php
* @return string
* function that needs to be able to understand UTF-8 characters.
* As of right now, only smart lossless character encoding converters
* would need that, and I'm probably not going to implement them.
- * Once again, PHP 6 should solve all our problems.
*/
public static function cleanUTF8($str, $force_php = false)
{
// UTF-8 validity is checked since PHP 4.3.5
// This is an optimization: if the string is already valid UTF-8, no
// need to do PHP stuff. 99% of the time, this will be the case.
- // The regexp matches the XML char production, as well as well as excluding
- // non-SGML codepoints U+007F to U+009F
if (preg_match(
'/^[\x{9}\x{A}\x{D}\x{20}-\x{7E}\x{A0}-\x{D7FF}\x{E000}-\x{FFFD}\x{10000}-\x{10FFFF}]*$/Du',
$str
// 7F-9F is not strictly prohibited by XML,
// but it is non-SGML, and thus we don't allow it
(0xA0 <= $mUcs4 && 0xD7FF >= $mUcs4) ||
+ (0xE000 <= $mUcs4 && 0xFFFD >= $mUcs4) ||
(0x10000 <= $mUcs4 && 0x10FFFF >= $mUcs4)
)
) {
*/
protected $_entity_lookup;
+ /**
+ * Callback regex string for entities in text.
+ * @type string
+ */
+ protected $_textEntitiesRegex;
+
+ /**
+ * Callback regex string for entities in attributes.
+ * @type string
+ */
+ protected $_attrEntitiesRegex;
+
+ /**
+ * Tests if the beginning of a string is a semi-optional regex
+ */
+ protected $_semiOptionalPrefixRegex;
+
+ public function __construct() {
+ // From
+ // http://stackoverflow.com/questions/15532252/why-is-reg-being-rendered-as-without-the-bounding-semicolon
+ $semi_optional = "quot|QUOT|lt|LT|gt|GT|amp|AMP|AElig|Aacute|Acirc|Agrave|Aring|Atilde|Auml|COPY|Ccedil|ETH|Eacute|Ecirc|Egrave|Euml|Iacute|Icirc|Igrave|Iuml|Ntilde|Oacute|Ocirc|Ograve|Oslash|Otilde|Ouml|REG|THORN|Uacute|Ucirc|Ugrave|Uuml|Yacute|aacute|acirc|acute|aelig|agrave|aring|atilde|auml|brvbar|ccedil|cedil|cent|copy|curren|deg|divide|eacute|ecirc|egrave|eth|euml|frac12|frac14|frac34|iacute|icirc|iexcl|igrave|iquest|iuml|laquo|macr|micro|middot|nbsp|not|ntilde|oacute|ocirc|ograve|ordf|ordm|oslash|otilde|ouml|para|plusmn|pound|raquo|reg|sect|shy|sup1|sup2|sup3|szlig|thorn|times|uacute|ucirc|ugrave|uml|uuml|yacute|yen|yuml";
+
+ // NB: three empty captures to put the fourth match in the right
+ // place
+ $this->_semiOptionalPrefixRegex = "/&()()()($semi_optional)/";
+
+ $this->_textEntitiesRegex =
+ '/&(?:'.
+ // hex
+ '[#]x([a-fA-F0-9]+);?|'.
+ // dec
+ '[#]0*(\d+);?|'.
+ // string (mandatory semicolon)
+ // NB: order matters: match semicolon preferentially
+ '([A-Za-z_:][A-Za-z0-9.\-_:]*);|'.
+ // string (optional semicolon)
+ "($semi_optional)".
+ ')/';
+
+ $this->_attrEntitiesRegex =
+ '/&(?:'.
+ // hex
+ '[#]x([a-fA-F0-9]+);?|'.
+ // dec
+ '[#]0*(\d+);?|'.
+ // string (mandatory semicolon)
+ // NB: order matters: match semicolon preferentially
+ '([A-Za-z_:][A-Za-z0-9.\-_:]*);|'.
+ // string (optional semicolon)
+ // don't match if trailing is equals or alphanumeric (URL
+ // like)
+ "($semi_optional)(?![=;A-Za-z0-9])".
+ ')/';
+
+ }
+
+ /**
+ * Substitute entities with the parsed equivalents. Use this on
+ * textual data in an HTML document (as opposed to attributes.)
+ *
+ * @param string $string String to have entities parsed.
+ * @return string Parsed string.
+ */
+ public function substituteTextEntities($string)
+ {
+ return preg_replace_callback(
+ $this->_textEntitiesRegex,
+ array($this, 'entityCallback'),
+ $string
+ );
+ }
+
+ /**
+ * Substitute entities with the parsed equivalents. Use this on
+ * attribute contents in documents.
+ *
+ * @param string $string String to have entities parsed.
+ * @return string Parsed string.
+ */
+ public function substituteAttrEntities($string)
+ {
+ return preg_replace_callback(
+ $this->_attrEntitiesRegex,
+ array($this, 'entityCallback'),
+ $string
+ );
+ }
+
+ /**
+ * Callback function for substituteNonSpecialEntities() that does the work.
+ *
+ * @param array $matches PCRE matches array, with 0 the entire match, and
+ * either index 1, 2 or 3 set with a hex value, dec value,
+ * or string (respectively).
+ * @return string Replacement string.
+ */
+
+ protected function entityCallback($matches)
+ {
+ $entity = $matches[0];
+ $hex_part = @$matches[1];
+ $dec_part = @$matches[2];
+ $named_part = empty($matches[3]) ? @$matches[4] : $matches[3];
+ if ($hex_part !== NULL && $hex_part !== "") {
+ return HTMLPurifier_Encoder::unichr(hexdec($hex_part));
+ } elseif ($dec_part !== NULL && $dec_part !== "") {
+ return HTMLPurifier_Encoder::unichr((int) $dec_part);
+ } else {
+ if (!$this->_entity_lookup) {
+ $this->_entity_lookup = HTMLPurifier_EntityLookup::instance();
+ }
+ if (isset($this->_entity_lookup->table[$named_part])) {
+ return $this->_entity_lookup->table[$named_part];
+ } else {
+ // exact match didn't match anything, so test if
+ // any of the semicolon optional match the prefix.
+ // Test that this is an EXACT match is important to
+ // prevent infinite loop
+ if (!empty($matches[3])) {
+ return preg_replace_callback(
+ $this->_semiOptionalPrefixRegex,
+ array($this, 'entityCallback'),
+ $entity
+ );
+ }
+ return $entity;
+ }
+ }
+ }
+
+ // LEGACY CODE BELOW
+
/**
* Callback regex string for parsing entities.
* @type string
$entity;
} else {
return isset($this->_special_ent2dec[$matches[3]]) ?
- $this->_special_ent2dec[$matches[3]] :
+ $this->_special_dec2str[$this->_special_ent2dec[$matches[3]]] :
$entity;
}
}
if ($tidy !== null) {
$this->_tidy = $tidy;
}
- $html = preg_replace_callback('#<style(?:\s.*)?>(.+)</style>#isU', array($this, 'styleCallback'), $html);
+ // NB: this must be NON-greedy because if we have
+ // <style>foo</style> <style>bar</style>
+ // we must not grab foo</style> <style>bar
+ $html = preg_replace_callback('#<style(?:\s.*)?>(.*)<\/style>#isU', array($this, 'styleCallback'), $html);
$style_blocks = $this->_styleMatches;
$this->_styleMatches = array(); // reset
$context->register('StyleBlocks', $style_blocks); // $context must not be reused
$attr = $this->generateAttributes($token->attr, $token->name);
if ($this->_flashCompat) {
if ($token->name == "object") {
- $flash = new stdclass();
+ $flash = new stdClass();
$flash->attr = $token->attr;
$flash->param = array();
$this->_flashStack[] = $flash;
--- /dev/null
+<?php
+
+/**
+ * Module adds the target-based noopener attribute transformation to a tags. It
+ * is enabled by HTML.TargetNoopener
+ */
+class HTMLPurifier_HTMLModule_TargetNoopener extends HTMLPurifier_HTMLModule
+{
+ /**
+ * @type string
+ */
+ public $name = 'TargetNoopener';
+
+ /**
+ * @param HTMLPurifier_Config $config
+ */
+ public function setup($config) {
+ $a = $this->addBlankElement('a');
+ $a->attr_transform_post[] = new HTMLPurifier_AttrTransform_TargetNoopener();
+ }
+}
if ($config->get('HTML.TargetBlank')) {
$modules[] = 'TargetBlank';
}
- // NB: HTML.TargetNoreferrer must be AFTER HTML.TargetBlank
+ // NB: HTML.TargetNoreferrer and HTML.TargetNoopener must be AFTER HTML.TargetBlank
// so that its post-attr-transform gets run afterwards.
if ($config->get('HTML.TargetNoreferrer')) {
$modules[] = 'TargetNoreferrer';
}
+ if ($config->get('HTML.TargetNoopener')) {
+ $modules[] = 'TargetNoopener';
+ }
// merge in custom modules
$modules = array_merge($modules, $this->userModules);
break;
}
- if (class_exists('DOMDocument') &&
+ if (class_exists('DOMDocument', false) &&
method_exists('DOMDocument', 'loadHTML') &&
!extension_loaded('domxml')
) {
''' => "'"
);
+ public function parseText($string, $config) {
+ return $this->parseData($string, false, $config);
+ }
+
+ public function parseAttr($string, $config) {
+ return $this->parseData($string, true, $config);
+ }
+
/**
* Parses special entities into the proper characters.
*
* This string will translate escaped versions of the special characters
* into the correct ones.
*
- * @warning
- * You should be able to treat the output of this function as
- * completely parsed, but that's only because all other entities should
- * have been handled previously in substituteNonSpecialEntities()
- *
* @param string $string String character data to be parsed.
* @return string Parsed character data.
*/
- public function parseData($string)
+ public function parseData($string, $is_attr, $config)
{
// following functions require at least one character
if ($string === '') {
}
// hmm... now we have some uncommon entities. Use the callback.
- $string = $this->_entity_parser->substituteSpecialEntities($string);
+ if ($config->get('Core.LegacyEntityDecoder')) {
+ $string = $this->_entity_parser->substituteSpecialEntities($string);
+ } else {
+ if ($is_attr) {
+ $string = $this->_entity_parser->substituteAttrEntities($string);
+ } else {
+ $string = $this->_entity_parser->substituteTextEntities($string);
+ }
+ }
return $string;
}
}
// expand entities that aren't the big five
- $html = $this->_entity_parser->substituteNonSpecialEntities($html);
+ if ($config->get('Core.LegacyEntityDecoder')) {
+ $html = $this->_entity_parser->substituteNonSpecialEntities($html);
+ }
// clean into wellformed UTF-8 string for an SGML context: this has
// to be done after entity expansion because the entities sometimes
$html = preg_replace('#<\?.+?\?>#s', '', $html);
}
+ $hidden_elements = $config->get('Core.HiddenElements');
+ if ($config->get('Core.AggressivelyRemoveScript') &&
+ !($config->get('HTML.Trusted') || !$config->get('Core.RemoveScriptContents')
+ || empty($hidden_elements["script"]))) {
+ $html = preg_replace('#<script[^>]*>.*?</script>#i', '', $html);
+ }
+
return $html;
}
$doc->loadHTML($html);
restore_error_handler();
+ $body = $doc->getElementsByTagName('html')->item(0)-> // <html>
+ getElementsByTagName('body')->item(0); // <body>
+
+ $div = $body->getElementsByTagName('div')->item(0); // <div>
$tokens = array();
- $this->tokenizeDOM(
- $doc->getElementsByTagName('html')->item(0)-> // <html>
- getElementsByTagName('body')->item(0), // <body>
- $tokens
- );
+ $this->tokenizeDOM($div, $tokens, $config);
+ // If the div has a sibling, that means we tripped across
+ // a premature </div> tag. So remove the div we parsed,
+ // and then tokenize the rest of body. We can't tokenize
+ // the sibling directly as we'll lose the tags in that case.
+ if ($div->nextSibling) {
+ $body->removeChild($div);
+ $this->tokenizeDOM($body, $tokens, $config);
+ }
return $tokens;
}
* @param HTMLPurifier_Token[] $tokens Array-list of already tokenized tokens.
* @return HTMLPurifier_Token of node appended to previously passed tokens.
*/
- protected function tokenizeDOM($node, &$tokens)
+ protected function tokenizeDOM($node, &$tokens, $config)
{
$level = 0;
$nodes = array($level => new HTMLPurifier_Queue(array($node)));
while (!$nodes[$level]->isEmpty()) {
$node = $nodes[$level]->shift(); // FIFO
$collect = $level > 0 ? true : false;
- $needEndingTag = $this->createStartNode($node, $tokens, $collect);
+ $needEndingTag = $this->createStartNode($node, $tokens, $collect, $config);
if ($needEndingTag) {
$closingNodes[$level][] = $node;
}
* @return bool if the token needs an endtoken
* @todo data and tagName properties don't seem to exist in DOMNode?
*/
- protected function createStartNode($node, &$tokens, $collect)
+ protected function createStartNode($node, &$tokens, $collect, $config)
{
// intercept non element nodes. WE MUST catch all of them,
// but we're not getting the character reference nodes because
}
}
}
- $tokens[] = $this->factory->createText($this->parseData($data));
+ $tokens[] = $this->factory->createText($this->parseText($data, $config));
return false;
} elseif ($node->nodeType === XML_COMMENT_NODE) {
// this is code is only invoked for comments in script/style in versions
* @param HTMLPurifier_Context $context
* @return string
*/
- protected function wrapHTML($html, $config, $context)
+ protected function wrapHTML($html, $config, $context, $use_div = true)
{
$def = $config->getDefinition('HTML');
$ret = '';
$ret .= '<html><head>';
$ret .= '<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />';
// No protection if $html contains a stray </div>!
- $ret .= '</head><body>' . $html . '</body></html>';
+ $ret .= '</head><body>';
+ if ($use_div) $ret .= '<div>';
+ $ret .= $html;
+ if ($use_div) $ret .= '</div>';
+ $ret .= '</body></html>';
return $ret;
}
}
// We are not inside tag and there still is another tag to parse
$token = new
HTMLPurifier_Token_Text(
- $this->parseData(
+ $this->parseText(
substr(
$html,
$cursor,
$position_next_lt - $cursor
- )
+ ), $config
)
);
if ($maintain_line_numbers) {
// Create Text of rest of string
$token = new
HTMLPurifier_Token_Text(
- $this->parseData(
+ $this->parseText(
substr(
$html,
$cursor
- )
+ ), $config
)
);
if ($maintain_line_numbers) {
$token = new
HTMLPurifier_Token_Text(
'<' .
- $this->parseData(
- substr($html, $cursor)
+ $this->parseText(
+ substr($html, $cursor), $config
)
);
if ($maintain_line_numbers) {
if ($value === false) {
$value = '';
}
- return array($key => $this->parseData($value));
+ return array($key => $this->parseAttr($value, $config));
}
// setup loop environment
if ($value === false) {
$value = '';
}
- $array[$key] = $this->parseData($value);
+ $array[$key] = $this->parseAttr($value, $config);
$cursor++;
} else {
// boolattr
public function tokenizeHTML($html, $config, $context)
{
$new_html = $this->normalize($html, $config, $context);
- $new_html = $this->wrapHTML($new_html, $config, $context);
+ $new_html = $this->wrapHTML($new_html, $config, $context, false /* no div */);
try {
$parser = new HTML5($new_html);
$doc = $parser->save();
$tokens = array();
$this->tokenizeDOM(
$doc->getElementsByTagName('html')->item(0)-> // <html>
- getElementsByTagName('body')->item(0) // <body>
+ getElementsByTagName('body')->item(0) // <body>
,
- $tokens
+ $tokens, $config
);
return $tokens;
}
// Consume the maximum number of characters possible, with the
// consumed characters case-sensitively matching one of the
// identifiers in the first column of the entities table.
+
$e_name = $this->characters('0-9A-Za-z;', $this->char + 1);
$len = strlen($e_name);
// Return a character token for the character corresponding to the
// entity name (as given by the second column of the entities table).
- return html_entity_decode('&' . $entity . ';', ENT_QUOTES, 'UTF-8');
+ return html_entity_decode('&' . rtrim($entity, ';') . ';', ENT_QUOTES, 'UTF-8');
}
private function emitToken($token)
if (empty($zipper->front)) break;
$token = $zipper->prev($token);
// indicate that other injectors should not process this token,
- // but we need to reprocess it
+ // but we need to reprocess it. See Note [Injector skips]
unset($token->skip[$i]);
$token->rewind = $i;
if ($token instanceof HTMLPurifier_Token_Start) {
if ($token instanceof HTMLPurifier_Token_Text) {
foreach ($this->injectors as $i => $injector) {
if (isset($token->skip[$i])) {
+ // See Note [Injector skips]
continue;
}
if ($token->rewind !== null && $token->rewind !== $i) {
if ($ok) {
foreach ($this->injectors as $i => $injector) {
if (isset($token->skip[$i])) {
+ // See Note [Injector skips]
continue;
}
if ($token->rewind !== null && $token->rewind !== $i) {
$token->start = $current_parent;
foreach ($this->injectors as $i => $injector) {
if (isset($token->skip[$i])) {
+ // See Note [Injector skips]
continue;
}
if ($token->rewind !== null && $token->rewind !== $i) {
*/
protected function processToken($token, $injector = -1)
{
+ // Zend OpCache miscompiles $token = array($token), so
+ // avoid this pattern. See: https://github.com/ezyang/htmlpurifier/issues/108
+
// normalize forms of token
if (is_object($token)) {
- $token = array(1, $token);
+ $tmp = $token;
+ $token = array(1, $tmp);
}
if (is_int($token)) {
- $token = array($token);
+ $tmp = $token;
+ $token = array($tmp);
}
if ($token === false) {
$token = array(1);
list($old, $r) = $this->zipper->splice($this->token, $delete, $token);
if ($injector > -1) {
- // determine appropriate skips
+ // See Note [Injector skips]
+ // Determine appropriate skips. Here's what the code does:
+ // *If* we deleted one or more tokens, copy the skips
+ // of those tokens into the skips of the new tokens (in $token).
+ // Also, mark the newly inserted tokens as having come from
+ // $injector.
$oldskip = isset($old[0]) ? $old[0]->skip : array();
foreach ($token as $object) {
$object->skip = $oldskip;
}
}
+// Note [Injector skips]
+// ~~~~~~~~~~~~~~~~~~~~~
+// When I originally designed this class, the idea behind the 'skip'
+// property of HTMLPurifier_Token was to help avoid infinite loops
+// in injector processing. For example, suppose you wrote an injector
+// that bolded swear words. Naively, you might write it so that
+// whenever you saw ****, you replaced it with <strong>****</strong>.
+//
+// When this happens, we will reprocess all of the tokens with the
+// other injectors. Now there is an opportunity for infinite loop:
+// if we rerun the swear-word injector on these tokens, we might
+// see **** and then reprocess again to get
+// <strong><strong>****</strong></strong> ad infinitum.
+//
+// Thus, the idea of a skip is that once we process a token with
+// an injector, we mark all of those tokens as having "come from"
+// the injector, and we never run the injector again on these
+// tokens.
+//
+// There were two more complications, however:
+//
+// - With HTMLPurifier_Injector_RemoveEmpty, we noticed that if
+// you had <b><i></i></b>, after you removed the <i></i>, you
+// really would like this injector to go back and reprocess
+// the <b> tag, discovering that it is now empty and can be
+// removed. So we reintroduced the possibility of infinite looping
+// by adding a "rewind" function, which let you go back to an
+// earlier point in the token stream and reprocess it with injectors.
+// Needless to say, we need to UN-skip the token so it gets
+// reprocessed.
+//
+// - Suppose that you successfuly process a token, replace it with
+// one with your skip mark, but now another injector wants to
+// process the skipped token with another token. Should you continue
+// to skip that new token, or reprocess it? If you reprocess,
+// you can end up with an infinite loop where one injector converts
+// <a> to <b>, and then another injector converts it back. So
+// we inherit the skips, but for some reason, I thought that we
+// should inherit the skip from the first token of the token
+// that we deleted. Why? Well, it seems to work OK.
+//
+// If I were to redesign this functionality, I would absolutely not
+// go about doing it this way: the semantics are just not very well
+// defined, and in any case you probably wanted to operate on trees,
+// not token streams.
+
// vim: et sw=4 sts=4
public $armor = array();
/**
- * Used during MakeWellFormed.
+ * Used during MakeWellFormed. See Note [Injector skips]
* @type
*/
public $skip;
$def = $config->getDefinition('URI');
$scheme_obj = $def->getDefaultScheme($config, $context);
if (!$scheme_obj) {
- // something funky happened to the default scheme object
- trigger_error(
- 'Default scheme object "' . $def->defaultScheme . '" was not readable',
- E_USER_WARNING
- );
+ if ($def->defaultScheme !== null) {
+ // something funky happened to the default scheme object
+ trigger_error(
+ 'Default scheme object "' . $def->defaultScheme . '" was not readable',
+ E_USER_WARNING
+ );
+ } // suppress error if it's null
return false;
}
}
--- /dev/null
+<?php
+
+// This file is the configuration for Travis testing.
+
+// Note: The only external library you *need* is SimpleTest; everything else
+// is optional.
+
+// We've got a lot of tests, so we recommend turning the limit off.
+set_time_limit(0);
+
+// Turning off output buffering will prevent mysterious errors from core dumps.
+$data = @ob_get_clean();
+if ($data !== false && $data !== '') {
+ echo "Output buffer contains data [".urlencode($data)."]\n";
+ exit;
+}
+
+// -----------------------------------------------------------------------------
+// REQUIRED SETTINGS
+
+// Note on running SimpleTest:
+// You want the Git copy of SimpleTest, found here:
+// https://github.com/simpletest/simpletest/
+//
+// If SimpleTest is borked with HTML Purifier, please contact me or
+// the SimpleTest devs; I am a developer for SimpleTest so I should be
+// able to quickly assess a fix. SimpleTest's problem is my problem!
+
+// Where is SimpleTest located? Remember to include a trailing slash!
+$simpletest_location = dirname(__FILE__) . '/simpletest/';
+
+// -----------------------------------------------------------------------------
+// OPTIONAL SETTINGS
+
+// Note on running PHPT:
+// Vanilla PHPT from https://github.com/tswicegood/PHPT_Core should
+// work fine on Linux w/o multitest.
+//
+// To do multitest or Windows testing, you'll need some more
+// patches at https://github.com/ezyang/PHPT_Core
+//
+// I haven't tested the Windows setup in a while so I don't know if
+// it still works.
+
+// Should PHPT tests be enabled?
+$GLOBALS['HTMLPurifierTest']['PHPT'] = false;
+
+// If PHPT isn't in your Path via PEAR, set that here:
+// set_include_path('/path/to/phpt/Core/src' . PATH_SEPARATOR . get_include_path());
+
+// Where is CSSTidy located? (Include trailing slash. Leave false to disable.)
+$csstidy_location = false;
+
+// For tests/multitest.php, which versions to test?
+$versions_to_test = array();
+
+// Stable PHP binary to use when invoking maintenance scripts.
+$php = 'php';
+
+// For tests/multitest.php, what is the multi-version executable? It must
+// accept an extra parameter (version number) before all other arguments
+$phpv = false;
+
+// Should PEAR tests be run? If you've got a valid PEAR installation, set this
+// to true (or, if it's not in the include path, to its install directory).
+$GLOBALS['HTMLPurifierTest']['PEAR'] = false;
+
+// If PEAR is enabled, what PEAR tests should be run? (Note: you will
+// need to ensure these libraries are installed)
+$GLOBALS['HTMLPurifierTest']['Net_IDNA2'] = true;
+
+// vim: et sw=4 sts=4