73 public function loadHTML($html, $options = 0): bool
75 $tmpDOM = new \DOMDocument();
77 $encoding = mb_http_input();
78 if ($encoding ==
'') {
83 $success = @$tmpDOM->loadHTML(
"<meta http-equiv=\"content-type\" content=\"text/html; charset=$encoding\">$html");
85 $xpath = new \DOMXPath($tmpDOM);
86 $nodelist = $xpath->query(
"//body/*");
88 $this->resolveExternals =
true;
89 $this->loadXML(
'<?xml version="1.0" encoding="utf-8"?>
91 <!ENTITY nbsp " ">
94 if ($tmpDOM->encoding !=
'') {
95 $this->encoding = $tmpDOM->encoding;
97 $rootnode = $this->documentElement;
99 foreach ($nodelist as $node) {
101 $newnode = $this->importNode($node,
true);
102 $rootnode->appendChild($newnode);
119 $xpath = new \DOMXPath($this);
132 preg_match(
"/([a-zA-Z0-9]*)(\.(.*))?/", $t, $matches);
134 $tag = $matches[1] ??
"";
135 $class = $matches[3] ??
"";
137 if (!isset($classByTag[$tag])) {
138 $classByTag[$tag] = [];
140 if (!empty($class)) {
141 $classByTag[$tag][] = $class;
143 $classByTag[$tag][] =
"";
147 $nodelist = $xpath->query(
"//body//*");
149 for ($i = $nodelist->length - 1; $i >= 0; $i--) {
150 $node = $nodelist->item($i);
152 if (!isset($tags[$node->nodeName])) {
154 while ($node->firstChild !=
null) {
155 if ($node->parentNode->nodeName ==
"body" && $node->firstChild->nodeType == XML_TEXT_NODE) {
157 $paragraph = $node->parentNode->insertBefore($this->createElement(
"p"), $node);
158 $paragraph->appendChild($node->firstChild);
160 $node->parentNode->insertBefore($node->firstChild, $node);
165 $node->parentNode->removeChild($node);
168 for ($j = $node->attributes->length - 1; $j >= 0; $j--) {
169 $attr = $node->attributes->item($j);
173 $node->removeAttribute($attr->name);
178 if ($node->getAttribute(
"class") !=
"") {
179 $attr = implode(
" ", array_intersect(
180 explode(
" ", $node->getAttribute(
"class")),
181 $classByTag[$node->nodeName],
184 $node->removeAttribute(
"class");
186 $node->setAttribute(
"class", $attr);
191 $nodelist = $xpath->query(
"//p[. = '' and count(br) = 0]");
193 foreach ($nodelist as $node) {
194 $node->appendChild($this->createElement(
"br"));
196 $nodelist = $xpath->query(
"//li");
197 $parentNodes = [
"ul",
"ol",
"menu"];
199 foreach ($nodelist as $node) {
200 if (!in_array($node->parentNode->nodeName, $parentNodes)) {
202 $previous = $node->previousSibling;
203 while (!is_null($previous) && $previous->nodeType != XML_ELEMENT_NODE) {
204 $previous = $previous->previousSibling;
207 if (!is_null($previous) && in_array($previous->nodeName, $parentNodes)) {
209 $listNode->appendChild($node);
212 $listNode = $node->parentNode->insertBefore($this->createElement(
"ul"), $node);
213 $listNode->appendChild($node);
217 $nodelist = $xpath->query(
"//b[not(node())] | //i[not(node())] | //strong[not(node())] | //span[not(node())] | //a[not(node())] | //u[not(node())]");
219 for ($i = $nodelist->length - 1; $i >= 0; $i--) {
220 $node = $nodelist->item($i);
222 $node->parentNode->removeChild($node);
225 if ($nodes->length == 1) {
226 $node = $nodes->item(0);
227 if ($node->nodeName ==
"p" && $node->childNodes->length == 1 && $node->childNodes->item(0)->nodeName ==
"br") {
228 $node->parentNode->removeChild($node);
240 $charsToRemove = mb_strlen($this->documentElement->textContent) - $max;
242 if ($charsToRemove <= 0) {
246 $xpath = new \DOMXPath($this);
247 $textNodes = $xpath->query(
"//text()");
248 $i = $textNodes->length - 1;
249 while ($charsToRemove > 0 && $i >= 0) {
250 $n = $textNodes->item($i);
251 $len = mb_strlen($n->textContent);
252 $parent = $n->parentNode;
254 if ($len <= $charsToRemove) {
255 $parent->removeChild($n);
256 $charsToRemove -= $len;
258 $restNode = $n->splitText($len - $charsToRemove);
259 $parent->removeChild($restNode);
264 if (mb_strlen($parent->textContent) == 0) {
265 $parent->parentNode->removeChild($parent);