26 $parser =
new HTML5($new_html);
27 $doc = $parser->save();
28 }
catch (DOMException $e) {
31 $context->register(
'PH5PError', $e);
36 $doc->getElementsByTagName(
'html')->item(0)->
37 getElementsByTagName(
'body')->item(0)
78 private $content_model;
79 private $escape =
false;
80 private $entities = array(
465 $this->
EOF = strlen($data);
469 $this->state =
'data';
471 while ($this->state !==
null) {
472 $this->{$this->state .
'State'}();
478 return $this->tree->save();
481 private function char()
483 return ($this->char < $this->
EOF)
484 ? $this->data[$this->char]
488 private function character(
$s, $l = 0)
490 if (
$s + $l < $this->
EOF) {
492 return $this->data[
$s];
494 return substr($this->data,
$s, $l);
499 private function characters($char_class, $start)
501 return preg_replace(
'#^([' . $char_class .
']+).*#s',
'\\1', substr($this->data, $start));
504 private function dataState()
508 $char = $this->char();
510 if ($char ===
'&' && ($this->content_model === self::PCDATA || $this->content_model === self::RCDATA)) {
515 $this->state =
'entityData';
517 } elseif ($char ===
'-') {
524 if (($this->content_model === self::RCDATA || $this->content_model ===
525 self::CDATA) && $this->escape ===
false &&
526 $this->
char >= 3 && $this->character($this->
char - 4, 4) ===
'<!--'
528 $this->escape =
true;
535 'type' => self::CHARACTR,
541 } elseif ($char ===
'<' && ($this->content_model === self::PCDATA ||
542 (($this->content_model === self::RCDATA ||
543 $this->content_model === self::CDATA) && $this->escape ===
false))
553 $this->state =
'tagOpen';
556 } elseif ($char ===
'>') {
562 if (($this->content_model === self::RCDATA ||
563 $this->content_model === self::CDATA) && $this->escape ===
true &&
564 $this->character($this->
char, 3) ===
'-->'
566 $this->escape =
false;
573 'type' => self::CHARACTR,
578 } elseif ($this->
char === $this->EOF) {
583 } elseif ($this->content_model === self::PLAINTEXT) {
589 'type' => self::CHARACTR,
590 'data' => substr($this->data, $this->
char)
601 $len = strcspn($this->data,
'<&', $this->
char);
602 $char = substr($this->data, $this->
char, $len);
603 $this->
char += $len - 1;
607 'type' => self::CHARACTR,
612 $this->state =
'data';
616 private function entityDataState()
619 $entity = $this->entity();
623 $char = (!$entity) ?
'&' : $entity;
626 'type' => self::CHARACTR,
632 $this->state =
'data';
635 private function tagOpenState()
637 switch ($this->content_model) {
645 if ($this->character($this->
char + 1) ===
'/') {
647 $this->state =
'closeTagOpen';
652 'type' => self::CHARACTR,
657 $this->state =
'data';
665 $char = $this->char();
670 $this->state =
'markupDeclarationOpen';
672 } elseif ($char ===
'/') {
675 $this->state =
'closeTagOpen';
677 } elseif (preg_match(
'/^[A-Za-z]$/', $char)) {
683 $this->token = array(
684 'name' => strtolower($char),
685 'type' => self::STARTTAG,
689 $this->state =
'tagName';
691 } elseif ($char ===
'>') {
697 'type' => self::CHARACTR,
702 $this->state =
'data';
704 } elseif ($char ===
'?') {
707 $this->state =
'bogusComment';
715 'type' => self::CHARACTR,
721 $this->state =
'data';
727 private function closeTagOpenState()
729 $next_node = strtolower($this->characters(
'A-Za-z', $this->
char + 1));
730 $the_same = count($this->tree->stack) > 0 && $next_node === end($this->tree->stack)->nodeName;
732 if (($this->content_model === self::RCDATA || $this->content_model === self::CDATA) &&
733 (!$the_same || ($the_same && (!preg_match(
734 '/[\t\n\x0b\x0c >\/]/',
735 $this->character($this->
char + 1 + strlen($next_node))
736 ) || $this->EOF === $this->
char)))
755 'type' => self::CHARACTR,
760 $this->state =
'data';
767 $char = $this->char();
769 if (preg_match(
'/^[A-Za-z]$/', $char)) {
775 $this->token = array(
776 'name' => strtolower($char),
777 'type' => self::ENDTAG
780 $this->state =
'tagName';
782 } elseif ($char ===
'>') {
785 $this->state =
'data';
787 } elseif ($this->
char === $this->EOF) {
793 'type' => self::CHARACTR,
799 $this->state =
'data';
803 $this->state =
'bogusComment';
808 private function tagNameState()
812 $char = $this->character($this->
char);
814 if (preg_match(
'/^[\t\n\x0b\x0c ]$/', $char)) {
821 $this->state =
'beforeAttributeName';
823 } elseif ($char ===
'>') {
826 $this->emitToken($this->token);
827 $this->state =
'data';
829 } elseif ($this->
char === $this->EOF) {
833 $this->emitToken($this->token);
836 $this->state =
'data';
838 } elseif ($char ===
'/') {
842 $this->state =
'beforeAttributeName';
848 $this->token[
'name'] .= strtolower($char);
849 $this->state =
'tagName';
853 private function beforeAttributeNameState()
857 $char = $this->character($this->
char);
859 if (preg_match(
'/^[\t\n\x0b\x0c ]$/', $char)) {
866 $this->state =
'beforeAttributeName';
868 } elseif ($char ===
'>') {
871 $this->emitToken($this->token);
872 $this->state =
'data';
874 } elseif ($char ===
'/') {
878 $this->state =
'beforeAttributeName';
880 } elseif ($this->
char === $this->EOF) {
884 $this->emitToken($this->token);
887 $this->state =
'data';
894 $this->token[
'attr'][] = array(
895 'name' => strtolower($char),
899 $this->state =
'attributeName';
903 private function attributeNameState()
907 $char = $this->character($this->
char);
909 if (preg_match(
'/^[\t\n\x0b\x0c ]$/', $char)) {
916 $this->state =
'afterAttributeName';
918 } elseif ($char ===
'=') {
921 $this->state =
'beforeAttributeValue';
923 } elseif ($char ===
'>') {
926 $this->emitToken($this->token);
927 $this->state =
'data';
929 } elseif ($char ===
'/' && $this->character($this->
char + 1) !==
'>') {
933 $this->state =
'beforeAttributeName';
935 } elseif ($this->
char === $this->EOF) {
939 $this->emitToken($this->token);
942 $this->state =
'data';
948 $last = count($this->token[
'attr']) - 1;
949 $this->token[
'attr'][
$last][
'name'] .= strtolower($char);
951 $this->state =
'attributeName';
955 private function afterAttributeNameState()
959 $char = $this->character($this->
char);
961 if (preg_match(
'/^[\t\n\x0b\x0c ]$/', $char)) {
968 $this->state =
'afterAttributeName';
970 } elseif ($char ===
'=') {
973 $this->state =
'beforeAttributeValue';
975 } elseif ($char ===
'>') {
978 $this->emitToken($this->token);
979 $this->state =
'data';
981 } elseif ($char ===
'/' && $this->character($this->
char + 1) !==
'>') {
985 $this->state =
'beforeAttributeName';
987 } elseif ($this->
char === $this->EOF) {
991 $this->emitToken($this->token);
994 $this->state =
'data';
1001 $this->token[
'attr'][] = array(
1002 'name' => strtolower($char),
1006 $this->state =
'attributeName';
1010 private function beforeAttributeValueState()
1014 $char = $this->character($this->
char);
1016 if (preg_match(
'/^[\t\n\x0b\x0c ]$/', $char)) {
1023 $this->state =
'beforeAttributeValue';
1025 } elseif ($char ===
'"') {
1028 $this->state =
'attributeValueDoubleQuoted';
1030 } elseif ($char ===
'&') {
1035 $this->state =
'attributeValueUnquoted';
1037 } elseif ($char ===
'\'') {
1040 $this->state =
'attributeValueSingleQuoted';
1042 } elseif ($char ===
'>') {
1045 $this->emitToken($this->token);
1046 $this->state =
'data';
1052 $last = count($this->token[
'attr']) - 1;
1053 $this->token[
'attr'][
$last][
'value'] .= $char;
1055 $this->state =
'attributeValueUnquoted';
1059 private function attributeValueDoubleQuotedState()
1063 $char = $this->character($this->
char);
1065 if ($char ===
'"') {
1068 $this->state =
'beforeAttributeName';
1070 } elseif ($char ===
'&') {
1073 $this->entityInAttributeValueState(
'double');
1075 } elseif ($this->
char === $this->EOF) {
1079 $this->emitToken($this->token);
1082 $this->state =
'data';
1088 $last = count($this->token[
'attr']) - 1;
1089 $this->token[
'attr'][
$last][
'value'] .= $char;
1091 $this->state =
'attributeValueDoubleQuoted';
1095 private function attributeValueSingleQuotedState()
1099 $char = $this->character($this->
char);
1101 if ($char ===
'\'') {
1104 $this->state =
'beforeAttributeName';
1106 } elseif ($char ===
'&') {
1109 $this->entityInAttributeValueState(
'single');
1111 } elseif ($this->
char === $this->EOF) {
1115 $this->emitToken($this->token);
1118 $this->state =
'data';
1124 $last = count($this->token[
'attr']) - 1;
1125 $this->token[
'attr'][
$last][
'value'] .= $char;
1127 $this->state =
'attributeValueSingleQuoted';
1131 private function attributeValueUnquotedState()
1135 $char = $this->character($this->
char);
1137 if (preg_match(
'/^[\t\n\x0b\x0c ]$/', $char)) {
1144 $this->state =
'beforeAttributeName';
1146 } elseif ($char ===
'&') {
1149 $this->entityInAttributeValueState();
1151 } elseif ($char ===
'>') {
1154 $this->emitToken($this->token);
1155 $this->state =
'data';
1161 $last = count($this->token[
'attr']) - 1;
1162 $this->token[
'attr'][
$last][
'value'] .= $char;
1164 $this->state =
'attributeValueUnquoted';
1168 private function entityInAttributeValueState()
1171 $entity = $this->entity();
1180 $last = count($this->token[
'attr']) - 1;
1181 $this->token[
'attr'][
$last][
'value'] .= $char;
1184 private function bogusCommentState()
1194 $data = $this->characters(
'^>', $this->
char);
1198 'type' => self::COMMENT
1202 $this->
char += strlen($data);
1205 $this->state =
'data';
1208 if ($this->
char === $this->EOF) {
1209 $this->
char = $this->EOF - 1;
1213 private function markupDeclarationOpenState()
1218 if ($this->character($this->
char + 1, 2) ===
'--') {
1220 $this->state =
'comment';
1221 $this->token = array(
1223 'type' => self::COMMENT
1229 } elseif (strtolower($this->character($this->
char + 1, 7)) ===
'doctype') {
1231 $this->state =
'doctype';
1238 $this->state =
'bogusComment';
1242 private function commentState()
1246 $char = $this->char();
1249 if ($char ===
'-') {
1251 $this->state =
'commentDash';
1254 } elseif ($this->
char === $this->EOF) {
1257 $this->emitToken($this->token);
1259 $this->state =
'data';
1265 $this->token[
'data'] .= $char;
1269 private function commentDashState()
1273 $char = $this->char();
1276 if ($char ===
'-') {
1278 $this->state =
'commentEnd';
1281 } elseif ($this->
char === $this->EOF) {
1284 $this->emitToken($this->token);
1286 $this->state =
'data';
1292 $this->token[
'data'] .=
'-' . $char;
1293 $this->state =
'comment';
1297 private function commentEndState()
1301 $char = $this->char();
1303 if ($char ===
'>') {
1304 $this->emitToken($this->token);
1305 $this->state =
'data';
1307 } elseif ($char ===
'-') {
1308 $this->token[
'data'] .=
'-';
1310 } elseif ($this->
char === $this->EOF) {
1311 $this->emitToken($this->token);
1313 $this->state =
'data';
1316 $this->token[
'data'] .=
'--' . $char;
1317 $this->state =
'comment';
1321 private function doctypeState()
1325 $char = $this->char();
1327 if (preg_match(
'/^[\t\n\x0b\x0c ]$/', $char)) {
1328 $this->state =
'beforeDoctypeName';
1332 $this->state =
'beforeDoctypeName';
1336 private function beforeDoctypeNameState()
1340 $char = $this->char();
1342 if (preg_match(
'/^[\t\n\x0b\x0c ]$/', $char)) {
1345 } elseif (preg_match(
'/^[a-z]$/', $char)) {
1346 $this->token = array(
1347 'name' => strtoupper($char),
1348 'type' => self::DOCTYPE,
1352 $this->state =
'doctypeName';
1354 } elseif ($char ===
'>') {
1358 'type' => self::DOCTYPE,
1363 $this->state =
'data';
1365 } elseif ($this->
char === $this->EOF) {
1369 'type' => self::DOCTYPE,
1375 $this->state =
'data';
1378 $this->token = array(
1380 'type' => self::DOCTYPE,
1384 $this->state =
'doctypeName';
1388 private function doctypeNameState()
1392 $char = $this->char();
1394 if (preg_match(
'/^[\t\n\x0b\x0c ]$/', $char)) {
1395 $this->state =
'AfterDoctypeName';
1397 } elseif ($char ===
'>') {
1398 $this->emitToken($this->token);
1399 $this->state =
'data';
1401 } elseif (preg_match(
'/^[a-z]$/', $char)) {
1402 $this->token[
'name'] .= strtoupper($char);
1404 } elseif ($this->
char === $this->EOF) {
1405 $this->emitToken($this->token);
1407 $this->state =
'data';
1410 $this->token[
'name'] .= $char;
1413 $this->token[
'error'] = ($this->token[
'name'] ===
'HTML')
1418 private function afterDoctypeNameState()
1422 $char = $this->char();
1424 if (preg_match(
'/^[\t\n\x0b\x0c ]$/', $char)) {
1427 } elseif ($char ===
'>') {
1428 $this->emitToken($this->token);
1429 $this->state =
'data';
1431 } elseif ($this->
char === $this->EOF) {
1432 $this->emitToken($this->token);
1434 $this->state =
'data';
1437 $this->token[
'error'] =
true;
1438 $this->state =
'bogusDoctype';
1442 private function bogusDoctypeState()
1446 $char = $this->char();
1448 if ($char ===
'>') {
1449 $this->emitToken($this->token);
1450 $this->state =
'data';
1452 } elseif ($this->
char === $this->EOF) {
1453 $this->emitToken($this->token);
1455 $this->state =
'data';
1462 private function entity()
1464 $start = $this->char;
1472 switch ($this->character($this->
char + 1)) {
1478 switch ($this->character($this->
char + 1)) {
1490 $char_class =
'0-9A-Fa-f';
1499 $char_class =
'0-9';
1506 $e_name = $this->characters($char_class, $this->
char + $char + 1);
1507 $entity = $this->character($start, $this->
char);
1508 $cond = strlen($e_name) > 0;
1519 $e_name = $this->characters(
'0-9A-Za-z;', $this->
char + 1);
1520 $len = strlen($e_name);
1522 for ($c = 1; $c <= $len; $c++) {
1523 $id = substr($e_name, 0, $c);
1526 if (in_array($id, $this->
entities)) {
1527 if ($e_name[$c - 1] !==
';') {
1528 if ($c < $len && $e_name[$c] ==
';') {
1537 $cond = isset($entity);
1545 $this->
char = $start;
1551 return html_entity_decode(
'&' . rtrim($entity,
';') .
';', ENT_QUOTES,
'UTF-8');
1554 private function emitToken($token)
1556 $emit = $this->tree->emitToken($token);
1558 if (is_int($emit)) {
1559 $this->content_model = $emit;
1561 } elseif ($token[
'type'] === self::ENDTAG) {
1566 private function EOF()
1568 $this->state =
null;
1569 $this->tree->emitToken(
1584 private $foster_parent =
null;
1585 private $a_formatting = array();
1587 private $head_pointer =
null;
1588 private $form_pointer =
null;
1590 private $scoping = array(
'button',
'caption',
'html',
'marquee',
'object',
'table',
'td',
'th');
1591 private $formatting = array(
1606 private $special = array(
1704 $this->dom =
new DOMDocument;
1706 $this->dom->encoding =
'UTF-8';
1707 $this->dom->preserveWhiteSpace =
true;
1708 $this->dom->substituteEntities =
true;
1709 $this->dom->strictErrorChecking =
false;
1715 switch ($this->phase) {
1717 return $this->initPhase($token);
1720 return $this->rootElementPhase($token);
1723 return $this->mainPhase($token);
1726 return $this->trailingEndPhase($token);
1731 private function initPhase($token)
1744 if ((isset($token[
'error']) && $token[
'error']) ||
1750 !preg_match(
'/^[\t\n\x0b\x0c ]+$/', $token[
'data']))
1758 return $this->rootElementPhase($token);
1761 } elseif (isset($token[
'error']) && !$token[
'error']) {
1766 $doctype =
new DOMDocumentType(
null,
null,
'HTML');
1775 } elseif (isset($token[
'data']) && preg_match(
1776 '/^[\t\n\x0b\x0c ]+$/',
1781 $text = $this->dom->createTextNode($token[
'data']);
1782 $this->dom->appendChild($text);
1786 private function rootElementPhase($token)
1799 $comment = $this->dom->createComment($token[
'data']);
1800 $this->dom->appendChild($comment);
1806 preg_match(
'/^[\t\n\x0b\x0c ]+$/', $token[
'data'])
1809 $text = $this->dom->createTextNode($token[
'data']);
1810 $this->dom->appendChild($text);
1819 !preg_match(
'/^[\t\n\x0b\x0c ]+$/', $token[
'data'])) ||
1827 $html = $this->dom->createElement(
'html');
1828 $this->dom->appendChild(
$html);
1829 $this->stack[] =
$html;
1832 return $this->mainPhase($token);
1836 private function mainPhase($token)
1845 } elseif ($token[
'type'] ===
HTML5::STARTTAG && $token[
'name'] ===
'html') {
1853 foreach ($token[
'attr']
as $attr) {
1854 if (!$this->stack[0]->hasAttribute($attr[
'name'])) {
1855 $this->stack[0]->setAttribute($attr[
'name'], $attr[
'value']);
1862 $this->generateImpliedEndTags();
1867 switch ($this->mode) {
1869 return $this->beforeHead($token);
1872 return $this->inHead($token);
1875 return $this->afterHead($token);
1878 return $this->inBody($token);
1881 return $this->inTable($token);
1884 return $this->inCaption($token);
1887 return $this->inColumnGroup($token);
1890 return $this->inTableBody($token);
1893 return $this->inRow($token);
1896 return $this->inCell($token);
1899 return $this->inSelect($token);
1902 return $this->afterBody($token);
1905 return $this->inFrameset($token);
1908 return $this->afterFrameset($token);
1911 return $this->trailingEndPhase($token);
1917 private function beforeHead($token)
1925 preg_match(
'/^[\t\n\x0b\x0c ]+$/', $token[
'data'])
1928 $this->insertText($token[
'data']);
1934 $this->insertComment($token[
'data']);
1937 } elseif ($token[
'type'] ===
HTML5::STARTTAG && $token[
'name'] ===
'head') {
1940 $element = $this->insertElement($token);
1943 $this->head_pointer = $element;
1954 ($token[
'type'] ===
HTML5::ENDTAG && $token[
'name'] ===
'html') ||
1956 '/^[\t\n\x0b\x0c ]$/',
1970 return $this->inHead($token);
1978 private function inHead($token)
1990 preg_match(
'/^[\t\n\x0b\x0c ]+$/', $token[
'data'])) || (
1992 end($this->stack)->nodeName,
1993 array(
'title',
'style',
'script')
1997 $this->insertText($token[
'data']);
2003 $this->insertComment($token[
'data']);
2006 in_array($token[
'name'], array(
'title',
'style',
'script'))
2008 array_pop($this->stack);
2012 } elseif ($token[
'type'] ===
HTML5::STARTTAG && $token[
'name'] ===
'title') {
2016 if ($this->head_pointer !==
null) {
2017 $element = $this->insertElement($token,
false);
2018 $this->head_pointer->appendChild($element);
2021 $element = $this->insertElement($token);
2028 } elseif ($token[
'type'] ===
HTML5::STARTTAG && $token[
'name'] ===
'style') {
2032 if ($this->head_pointer !==
null) {
2033 $element = $this->insertElement($token,
false);
2034 $this->head_pointer->appendChild($element);
2037 $this->insertElement($token);
2044 } elseif ($token[
'type'] ===
HTML5::STARTTAG && $token[
'name'] ===
'script') {
2046 $element = $this->insertElement($token,
false);
2047 $this->head_pointer->appendChild($element);
2055 array(
'base',
'link',
'meta')
2061 if ($this->head_pointer !==
null) {
2062 $element = $this->insertElement($token,
false);
2063 $this->head_pointer->appendChild($element);
2064 array_pop($this->stack);
2067 $this->insertElement($token);
2071 } elseif ($token[
'type'] ===
HTML5::ENDTAG && $token[
'name'] ===
'head') {
2074 if ($this->head_pointer->isSameNode(end($this->stack))) {
2075 array_pop($this->stack);
2086 } elseif (($token[
'type'] ===
HTML5::STARTTAG && $token[
'name'] ===
'head') ||
2087 ($token[
'type'] ===
HTML5::ENDTAG && $token[
'name'] !==
'html')
2095 if ($this->head_pointer->isSameNode(end($this->stack))) {
2109 return $this->afterHead($token);
2113 private function afterHead($token)
2121 preg_match(
'/^[\t\n\x0b\x0c ]+$/', $token[
'data'])
2124 $this->insertText($token[
'data']);
2130 $this->insertComment($token[
'data']);
2133 } elseif ($token[
'type'] ===
HTML5::STARTTAG && $token[
'name'] ===
'body') {
2135 $this->insertElement($token);
2141 } elseif ($token[
'type'] ===
HTML5::STARTTAG && $token[
'name'] ===
'frameset') {
2143 $this->insertElement($token);
2152 array(
'base',
'link',
'meta',
'script',
'style',
'title')
2158 return $this->inHead($token);
2172 return $this->inBody($token);
2176 private function inBody($token)
2180 switch ($token[
'type']) {
2184 $this->reconstructActiveFormattingElements();
2187 $this->insertText($token[
'data']);
2194 $this->insertComment($token[
'data']);
2198 switch ($token[
'name']) {
2205 return $this->inHead($token);
2216 return $this->inHead($token);
2225 if (count($this->stack) === 1 || $this->stack[1]->nodeName !==
'body') {
2234 foreach ($token[
'attr']
as $attr) {
2235 if (!$this->stack[1]->hasAttribute($attr[
'name'])) {
2236 $this->stack[1]->setAttribute($attr[
'name'], $attr[
'value']);
2260 if ($this->elementInScope(
'p')) {
2270 $this->insertElement($token);
2277 if ($this->form_pointer !==
null) {
2285 if ($this->elementInScope(
'p')) {
2296 $element = $this->insertElement($token);
2297 $this->form_pointer = $element;
2308 if ($this->elementInScope(
'p')) {
2317 $stack_length = count($this->stack) - 1;
2319 for ($n = $stack_length; 0 <= $n; $n--) {
2323 $node = $this->stack[$n];
2324 $cat = $this->getElementCategory($node->tagName);
2329 if ($token[
'name'] === $node->tagName || ($token[
'name'] !==
'li'
2330 && ($node->tagName ===
'dd' || $node->tagName ===
'dt'))
2332 for ($x = $stack_length; $x >= $n; $x--) {
2333 array_pop($this->stack);
2342 if ($cat !== self::FORMATTING && $cat !== self::PHRASING &&
2343 $node->tagName !==
'address' && $node->tagName !==
'div'
2351 $this->insertElement($token);
2359 if ($this->elementInScope(
'p')) {
2369 $this->insertElement($token);
2384 if ($this->elementInScope(
'p')) {
2398 while ($this->elementInScope(array(
'h1',
'h2',
'h3',
'h4',
'h5',
'h6'))) {
2399 array_pop($this->stack);
2403 $this->insertElement($token);
2417 $leng = count($this->a_formatting);
2419 for ($n = $leng - 1; $n >= 0; $n--) {
2420 if ($this->a_formatting[$n] === self::MARKER) {
2423 } elseif ($this->a_formatting[$n]->nodeName ===
'a') {
2435 $this->reconstructActiveFormattingElements();
2438 $el = $this->insertElement($token);
2442 $this->a_formatting[] = $el;
2460 $this->reconstructActiveFormattingElements();
2463 $el = $this->insertElement($token);
2467 $this->a_formatting[] = $el;
2476 if ($this->elementInScope(
'button')) {
2486 $this->reconstructActiveFormattingElements();
2489 $this->insertElement($token);
2500 $this->reconstructActiveFormattingElements();
2503 $this->insertElement($token);
2513 $this->reconstructActiveFormattingElements();
2516 $this->insertElement($token);
2526 if ($this->elementInScope(
'p')) {
2536 $this->insertElement($token);
2554 $this->reconstructActiveFormattingElements();
2557 $this->insertElement($token);
2560 array_pop($this->stack);
2567 if ($this->elementInScope(
'p')) {
2577 $this->insertElement($token);
2580 array_pop($this->stack);
2587 $token[
'name'] =
'img';
2588 return $this->inBody($token);
2594 $this->reconstructActiveFormattingElements();
2597 $element = $this->insertElement($token,
false);
2602 $this->form_pointer !==
null
2603 ? $this->form_pointer->appendChild($element)
2604 : end($this->stack)->appendChild($element);
2607 array_pop($this->stack);
2617 if ($this->form_pointer ===
null) {
2660 'This is a searchable index. ' .
2661 'Insert your search keywords here: '
2668 $attr = $token[
'attr'];
2669 $attr[] = array(
'name' =>
'name',
'value' =>
'isindex');
2682 'This is a searchable index. ' .
2683 'Insert your search keywords here: '
2726 $this->insertElement($token);
2738 $this->insertElement($token);
2747 $this->reconstructActiveFormattingElements();
2750 $this->insertElement($token);
2779 case 'event-source':
2794 $this->reconstructActiveFormattingElements();
2796 $this->insertElement($token,
true,
true);
2802 switch ($token[
'name']) {
2808 if (count($this->stack) < 2 || $this->stack[1]->nodeName !==
'body') {
2813 } elseif (end($this->stack)->nodeName !==
'body') {
2833 return $this->afterBody($token);
2854 if ($this->elementInScope($token[
'name'])) {
2855 $this->generateImpliedEndTags();
2866 for ($n = count($this->stack) - 1; $n >= 0; $n--) {
2867 if ($this->stack[$n]->nodeName === $token[
'name']) {
2871 array_pop($this->stack);
2881 if ($this->elementInScope($token[
'name'])) {
2882 $this->generateImpliedEndTags();
2886 if (end($this->stack)->nodeName !== $token[
'name']) {
2896 array_pop($this->stack);
2900 $this->form_pointer =
null;
2907 if ($this->elementInScope(
'p')) {
2908 $this->generateImpliedEndTags(array(
'p'));
2917 for ($n = count($this->stack) - 1; $n >= 0; $n--) {
2918 if ($this->elementInScope(
'p')) {
2919 array_pop($this->stack);
2936 if ($this->elementInScope($token[
'name'])) {
2937 $this->generateImpliedEndTags(array($token[
'name']));
2947 for ($n = count($this->stack) - 1; $n >= 0; $n--) {
2948 if ($this->stack[$n]->nodeName === $token[
'name']) {
2952 array_pop($this->stack);
2965 $elements = array(
'h1',
'h2',
'h3',
'h4',
'h5',
'h6');
2970 if ($this->elementInScope($elements)) {
2971 $this->generateImpliedEndTags();
2981 while ($this->elementInScope($elements)) {
2982 array_pop($this->stack);
3010 for ($a = count($this->a_formatting) - 1; $a >= 0; $a--) {
3011 if ($this->a_formatting[$a] === self::MARKER) {
3014 } elseif ($this->a_formatting[$a]->tagName === $token[
'name']) {
3015 $formatting_element = $this->a_formatting[$a];
3016 $in_stack = in_array($formatting_element, $this->stack,
true);
3026 if (!isset($formatting_element) || ($in_stack &&
3027 !$this->elementInScope($token[
'name']))
3035 } elseif (isset($formatting_element) && !$in_stack) {
3036 unset($this->a_formatting[$fe_af_pos]);
3037 $this->a_formatting = array_merge($this->a_formatting);
3046 $fe_s_pos = array_search($formatting_element, $this->stack,
true);
3047 $length = count($this->stack);
3049 for (
$s = $fe_s_pos + 1;
$s < $length;
$s++) {
3050 $category = $this->getElementCategory($this->stack[
$s]->nodeName);
3052 if ($category !== self::PHRASING && $category !== self::FORMATTING) {
3053 $furthest_block = $this->stack[
$s];
3063 if (!isset($furthest_block)) {
3064 for ($n = $length - 1; $n >= $fe_s_pos; $n--) {
3065 array_pop($this->stack);
3068 unset($this->a_formatting[$fe_af_pos]);
3069 $this->a_formatting = array_merge($this->a_formatting);
3076 $common_ancestor = $this->stack[$fe_s_pos - 1];
3080 if ($furthest_block->parentNode !==
null) {
3081 $furthest_block->parentNode->removeChild($furthest_block);
3088 $bookmark = $fe_af_pos;
3092 $node = $furthest_block;
3093 $last_node = $furthest_block;
3096 for ($n = array_search($node, $this->stack,
true) - 1; $n >= 0; $n--) {
3099 $node = $this->stack[$n];
3105 if (!in_array($node, $this->a_formatting,
true)) {
3106 unset($this->stack[$n]);
3107 $this->stack = array_merge($this->stack);
3117 if ($node === $formatting_element) {
3124 } elseif ($last_node === $furthest_block) {
3125 $bookmark = array_search($node, $this->a_formatting,
true) + 1;
3134 if ($node->hasChildNodes()) {
3135 $clone = $node->cloneNode();
3136 $s_pos = array_search($node, $this->stack,
true);
3137 $a_pos = array_search($node, $this->a_formatting,
true);
3139 $this->stack[$s_pos] = $clone;
3140 $this->a_formatting[$a_pos] = $clone;
3146 if ($last_node->parentNode !==
null) {
3147 $last_node->parentNode->removeChild($last_node);
3150 $node->appendChild($last_node);
3160 if ($last_node->parentNode !==
null) {
3161 $last_node->parentNode->removeChild($last_node);
3164 $common_ancestor->appendChild($last_node);
3168 $clone = $formatting_element->cloneNode();
3173 while ($furthest_block->hasChildNodes()) {
3174 $child = $furthest_block->firstChild;
3175 $furthest_block->removeChild($child);
3176 $clone->appendChild($child);
3180 $furthest_block->appendChild($clone);
3186 $fe_af_pos = array_search($formatting_element, $this->a_formatting,
true);
3187 unset($this->a_formatting[$fe_af_pos]);
3188 $this->a_formatting = array_merge($this->a_formatting);
3190 $af_part1 = array_slice($this->a_formatting, 0, $bookmark - 1);
3191 $af_part2 = array_slice($this->a_formatting, $bookmark, count($this->a_formatting));
3192 $this->a_formatting = array_merge($af_part1, array($clone), $af_part2);
3199 $fe_s_pos = array_search($formatting_element, $this->stack,
true);
3200 $fb_s_pos = array_search($furthest_block, $this->stack,
true);
3201 unset($this->stack[$fe_s_pos]);
3203 $s_part1 = array_slice($this->stack, 0, $fb_s_pos);
3204 $s_part2 = array_slice($this->stack, $fb_s_pos + 1, count($this->stack));
3205 $this->stack = array_merge($s_part1, array($clone), $s_part2);
3208 unset($formatting_element, $fe_af_pos, $fe_s_pos, $furthest_block);
3220 if ($this->elementInScope($token[
'name'])) {
3221 $this->generateImpliedEndTags();
3232 for ($n = count($this->stack) - 1; $n >= 0; $n--) {
3233 if ($this->stack[$n]->nodeName === $token[
'name']) {
3237 array_pop($this->stack);
3240 $marker = end(array_keys($this->a_formatting, self::MARKER,
true));
3242 for ($n = count($this->a_formatting) - 1; $n > $marker; $n--) {
3243 array_pop($this->a_formatting);
3276 for ($n = count($this->stack) - 1; $n >= 0; $n--) {
3279 $node = end($this->stack);
3283 if ($token[
'name'] === $node->nodeName) {
3285 $this->generateImpliedEndTags();
3294 for ($x = count($this->stack) - $n; $x >= $n; $x--) {
3295 array_pop($this->stack);
3299 $category = $this->getElementCategory($node);
3301 if ($category !==
self::SPECIAL && $category !== self::SCOPING) {
3316 private function inTable($token)
3318 $clear = array(
'html',
'table');
3324 preg_match(
'/^[\t\n\x0b\x0c ]+$/', $token[
'data'])
3327 $text = $this->dom->createTextNode($token[
'data']);
3328 end($this->stack)->appendChild($text);
3334 $comment = $this->dom->createComment($token[
'data']);
3335 end($this->stack)->appendChild($comment);
3339 $token[
'name'] ===
'caption'
3342 $this->clearStackToTableContext($clear);
3350 $this->insertElement($token);
3355 $token[
'name'] ===
'colgroup'
3358 $this->clearStackToTableContext($clear);
3362 $this->insertElement($token);
3367 $token[
'name'] ===
'col'
3371 'name' =>
'colgroup',
3377 $this->inColumnGroup($token);
3382 array(
'tbody',
'tfoot',
'thead')
3386 $this->clearStackToTableContext($clear);
3390 $this->insertElement($token);
3395 in_array($token[
'name'], array(
'td',
'th',
'tr'))
3407 return $this->inTableBody($token);
3411 $token[
'name'] ===
'table'
3423 return $this->mainPhase($token);
3427 $token[
'name'] ===
'table'
3432 if (!$this->elementInScope($token[
'name'],
true)) {
3438 $this->generateImpliedEndTags();
3447 $current = end($this->stack)->nodeName;
3448 array_pop($this->stack);
3450 if ($current ===
'table') {
3456 $this->resetInsertionMode();
3489 end($this->stack)->nodeName,
3490 array(
'table',
'tbody',
'tfoot',
'thead',
'tr')
3504 for ($n = count($this->stack) - 1; $n >= 0; $n--) {
3505 if ($this->stack[$n]->nodeName ===
'table') {
3506 $table = $this->stack[$n];
3511 if (isset($table) && $table->parentNode !==
null) {
3512 $this->foster_parent = $table->parentNode;
3514 } elseif (!isset($table)) {
3515 $this->foster_parent = $this->stack[0];
3517 } elseif (isset($table) && ($table->parentNode ===
null ||
3518 $table->parentNode->nodeType !== XML_ELEMENT_NODE)
3520 $this->foster_parent = $this->stack[$n - 1];
3524 $this->inBody($token);
3528 private function inCaption($token)
3531 if ($token[
'type'] ===
HTML5::ENDTAG && $token[
'name'] ===
'caption') {
3535 if (!$this->elementInScope($token[
'name'],
true)) {
3541 $this->generateImpliedEndTags();
3550 $node = end($this->stack)->nodeName;
3551 array_pop($this->stack);
3553 if ($node ===
'caption') {
3560 $this->clearTheActiveFormattingElementsUpToTheLastMarker();
3583 $token[
'name'] ===
'table')
3590 'name' =>
'caption',
3595 return $this->inTable($token);
3619 $this->inBody($token);
3623 private function inColumnGroup($token)
3629 preg_match(
'/^[\t\n\x0b\x0c ]+$/', $token[
'data'])
3632 $text = $this->dom->createTextNode($token[
'data']);
3633 end($this->stack)->appendChild($text);
3639 $comment = $this->dom->createComment($token[
'data']);
3640 end($this->stack)->appendChild($comment);
3643 } elseif ($token[
'type'] ===
HTML5::STARTTAG && $token[
'name'] ===
'col') {
3646 $this->insertElement($token);
3647 array_pop($this->stack);
3651 $token[
'name'] ===
'colgroup'
3655 if (end($this->stack)->nodeName ===
'html') {
3662 array_pop($this->stack);
3667 } elseif ($token[
'type'] ===
HTML5::ENDTAG && $token[
'name'] ===
'col') {
3674 $this->inColumnGroup(
3676 'name' =>
'colgroup',
3681 return $this->inTable($token);
3685 private function inTableBody($token)
3687 $clear = array(
'tbody',
'tfoot',
'thead',
'html');
3692 $this->clearStackToTableContext($clear);
3696 $this->insertElement($token);
3701 ($token[
'name'] ===
'th' || $token[
'name'] ===
'td')
3713 return $this->inRow($token);
3717 in_array($token[
'name'], array(
'tbody',
'tfoot',
'thead'))
3722 if (!$this->elementInScope($token[
'name'],
true)) {
3728 $this->clearStackToTableContext($clear);
3732 array_pop($this->stack);
3740 array(
'caption',
'col',
'colgroup',
'tbody',
'tfoor',
'thead')
3747 if (!$this->elementInScope(array(
'tbody',
'thead',
'tfoot'),
true)) {
3753 $this->clearStackToTableContext($clear);
3760 'name' => end($this->stack)->nodeName,
3765 return $this->mainPhase($token);
3772 array(
'body',
'caption',
'col',
'colgroup',
'html',
'td',
'th',
'tr')
3780 $this->inTable($token);
3784 private function inRow($token)
3786 $clear = array(
'tr',
'html');
3790 ($token[
'name'] ===
'th' || $token[
'name'] ===
'td')
3793 $this->clearStackToTableContext($clear);
3797 $this->insertElement($token);
3805 } elseif ($token[
'type'] ===
HTML5::ENDTAG && $token[
'name'] ===
'tr') {
3809 if (!$this->elementInScope($token[
'name'],
true)) {
3815 $this->clearStackToTableContext($clear);
3820 array_pop($this->stack);
3828 array(
'caption',
'col',
'colgroup',
'tbody',
'tfoot',
'thead',
'tr')
3840 return $this->inCell($token);
3844 in_array($token[
'name'], array(
'tbody',
'tfoot',
'thead'))
3849 if (!$this->elementInScope($token[
'name'],
true)) {
3863 return $this->inCell($token);
3870 array(
'body',
'caption',
'col',
'colgroup',
'html',
'td',
'th',
'tr')
3878 $this->inTable($token);
3882 private function inCell($token)
3886 ($token[
'name'] ===
'td' || $token[
'name'] ===
'th')
3891 if (!$this->elementInScope($token[
'name'],
true)) {
3898 $this->generateImpliedEndTags(array($token[
'name']));
3907 $node = end($this->stack)->nodeName;
3908 array_pop($this->stack);
3910 if ($node === $token[
'name']) {
3917 $this->clearTheActiveFormattingElementsUpToTheLastMarker();
3944 if (!$this->elementInScope(array(
'td',
'th'),
true)) {
3951 return $this->inRow($token);
3974 if (!$this->elementInScope(array(
'td',
'th'),
true)) {
3981 return $this->inRow($token);
3988 array(
'body',
'caption',
'col',
'colgroup',
'html')
3997 array(
'table',
'tbody',
'tfoot',
'thead',
'tr')
4004 if (!$this->elementInScope($token[
'name'],
true)) {
4011 return $this->inRow($token);
4017 $this->inBody($token);
4021 private function inSelect($token)
4028 $this->insertText($token[
'data']);
4034 $this->insertComment($token[
'data']);
4038 $token[
'name'] ===
'option'
4042 if (end($this->stack)->nodeName ===
'option') {
4052 $this->insertElement($token);
4056 $token[
'name'] ===
'optgroup'
4060 if (end($this->stack)->nodeName ===
'option') {
4071 if (end($this->stack)->nodeName ===
'optgroup') {
4074 'name' =>
'optgroup',
4081 $this->insertElement($token);
4085 $token[
'name'] ===
'optgroup'
4091 $elements_in_stack = count($this->stack);
4093 if ($this->stack[$elements_in_stack - 1]->nodeName ===
'option' &&
4094 $this->stack[$elements_in_stack - 2]->nodeName ===
'optgroup'
4107 if ($this->stack[$elements_in_stack - 1] ===
'optgroup') {
4108 array_pop($this->stack);
4113 $token[
'name'] ===
'option'
4118 if (end($this->stack)->nodeName ===
'option') {
4119 array_pop($this->stack);
4124 $token[
'name'] ===
'select'
4129 if (!$this->elementInScope($token[
'name'],
true)) {
4137 $current = end($this->stack)->nodeName;
4138 array_pop($this->stack);
4140 if ($current ===
'select') {
4146 $this->resetInsertionMode();
4150 } elseif ($token[
'name'] ===
'select' &&
4185 if ($this->elementInScope($token[
'name'],
true)) {
4193 $this->mainPhase($token);
4202 private function afterBody($token)
4210 preg_match(
'/^[\t\n\x0b\x0c ]+$/', $token[
'data'])
4214 $this->inBody($token);
4221 $comment = $this->dom->createComment($token[
'data']);
4222 $this->stack[0]->appendChild($comment);
4225 } elseif ($token[
'type'] ===
HTML5::ENDTAG && $token[
'name'] ===
'html') {
4239 return $this->inBody($token);
4243 private function inFrameset($token)
4251 preg_match(
'/^[\t\n\x0b\x0c ]+$/', $token[
'data'])
4254 $this->insertText($token[
'data']);
4260 $this->insertComment($token[
'data']);
4263 } elseif ($token[
'name'] ===
'frameset' &&
4266 $this->insertElement($token);
4269 } elseif ($token[
'name'] ===
'frameset' &&
4274 if (end($this->stack)->nodeName ===
'html') {
4280 array_pop($this->stack);
4290 } elseif ($token[
'name'] ===
'frame' &&
4294 $this->insertElement($token);
4297 array_pop($this->stack);
4300 } elseif ($token[
'name'] ===
'noframes' &&
4304 $this->inBody($token);
4312 private function afterFrameset($token)
4320 preg_match(
'/^[\t\n\x0b\x0c ]+$/', $token[
'data'])
4323 $this->insertText($token[
'data']);
4329 $this->insertComment($token[
'data']);
4332 } elseif ($token[
'name'] ===
'html' &&
4339 } elseif ($token[
'name'] ===
'noframes' &&
4343 $this->inBody($token);
4351 private function trailingEndPhase($token)
4364 $comment = $this->dom->createComment($token[
'data']);
4365 $this->dom->appendChild($comment);
4371 preg_match(
'/^[\t\n\x0b\x0c ]+$/', $token[
'data'])
4374 $this->mainPhase($token);
4380 preg_match(
'/^[\t\n\x0b\x0c ]+$/', $token[
'data'])) ||
4386 return $this->mainPhase($token);
4394 private function insertElement($token, $append =
true, $check =
false)
4400 $token[
'name'] = preg_replace(
'/[^a-z0-9-]/i',
'', $token[
'name']);
4402 $token[
'name'] = ltrim($token[
'name'],
'-0..9');
4404 if ($token[
'name'] ===
'') {
4405 $token[
'name'] =
'span';
4409 $el = $this->dom->createElement($token[
'name']);
4411 foreach ($token[
'attr']
as $attr) {
4412 if (!$el->hasAttribute($attr[
'name'])) {
4413 $el->setAttribute($attr[
'name'], $attr[
'value']);
4417 $this->appendToRealParent($el);
4418 $this->stack[] = $el;
4423 private function insertText($data)
4425 $text = $this->dom->createTextNode($data);
4426 $this->appendToRealParent($text);
4429 private function insertComment($data)
4431 $comment = $this->dom->createComment($data);
4432 $this->appendToRealParent($comment);
4435 private function appendToRealParent($node)
4437 if ($this->foster_parent ===
null) {
4438 end($this->stack)->appendChild($node);
4440 } elseif ($this->foster_parent !==
null) {
4447 for ($n = count($this->stack) - 1; $n >= 0; $n--) {
4448 if ($this->stack[$n]->nodeName ===
'table' &&
4449 $this->stack[$n]->parentNode !==
null
4451 $table = $this->stack[$n];
4456 if (isset($table) && $this->foster_parent->isSameNode($table->parentNode)) {
4457 $this->foster_parent->insertBefore($node, $table);
4459 $this->foster_parent->appendChild($node);
4462 $this->foster_parent =
null;
4466 private function elementInScope($el, $table =
false)
4468 if (is_array($el)) {
4469 foreach ($el
as $element) {
4470 if ($this->elementInScope($element, $table)) {
4478 $leng = count($this->stack);
4480 for ($n = 0; $n < $leng; $n++) {
4483 $node = $this->stack[$leng - 1 - $n];
4485 if ($node->tagName === $el) {
4489 } elseif ($node->tagName ===
'table') {
4494 } elseif ($table ===
true && in_array(
4511 } elseif ($node === $node->ownerDocument->documentElement) {
4526 private function reconstructActiveFormattingElements()
4530 $formatting_elements = count($this->a_formatting);
4532 if ($formatting_elements === 0) {
4538 $entry = end($this->a_formatting);
4544 if ($entry === self::MARKER || in_array($entry, $this->stack,
true)) {
4548 for ($a = $formatting_elements - 1; $a >= 0;
true) {
4552 $step_seven =
false;
4559 $entry = $this->a_formatting[$a];
4563 if ($entry === self::MARKER || in_array($entry, $this->stack,
true)) {
4571 if (isset($step_seven) && $step_seven ===
true) {
4573 $entry = $this->a_formatting[$a];
4577 $clone = $entry->cloneNode();
4581 end($this->stack)->appendChild($clone);
4582 $this->stack[] = $clone;
4586 $this->a_formatting[$a] = $clone;
4590 if (end($this->a_formatting) !== $clone) {
4598 private function clearTheActiveFormattingElementsUpToTheLastMarker()
4607 $entry = end($this->a_formatting);
4610 array_pop($this->a_formatting);
4614 if ($entry === self::MARKER) {
4620 private function generateImpliedEndTags($exclude = array())
4627 $node = end($this->stack);
4628 $elements = array_diff(array(
'dd',
'dt',
'li',
'p',
'td',
'th',
'tr'), $exclude);
4630 while (in_array(end($this->stack)->nodeName, $elements)) {
4631 array_pop($this->stack);
4635 private function getElementCategory($node)
4637 $name = $node->tagName;
4638 if (in_array($name, $this->
special)) {
4640 } elseif (in_array($name, $this->scoping)) {
4642 } elseif (in_array($name, $this->formatting)) {
4649 private function clearStackToTableContext($elements)
4657 $node = end($this->stack)->nodeName;
4659 if (in_array($node, $elements)) {
4662 array_pop($this->stack);
4667 private function resetInsertionMode()
4671 $leng = count($this->stack);
4673 for ($n = $leng - 1; $n >= 0; $n--) {
4675 $node = $this->stack[$n];
4681 if ($this->stack[0]->isSameNode($node)) {
4687 if ($node->nodeName ===
'select') {
4693 } elseif ($node->nodeName ===
'td' || $node->nodeName ===
'th') {
4699 } elseif ($node->nodeName ===
'tr') {
4705 } elseif (in_array($node->nodeName, array(
'tbody',
'thead',
'tfoot'))) {
4711 } elseif ($node->nodeName ===
'caption') {
4717 } elseif ($node->nodeName ===
'colgroup') {
4723 } elseif ($node->nodeName ===
'table') {
4730 } elseif ($node->nodeName ===
'head') {
4736 } elseif ($node->nodeName ===
'body') {
4742 } elseif ($node->nodeName ===
'frameset') {
4750 } elseif ($node->nodeName ===
'html') {
4751 $this->mode = ($this->head_pointer ===
null)
4766 private function closeCell()
4770 foreach (array(
'td',
'th')
as $cell) {
4771 if ($this->elementInScope($cell,
true)) {