94 public function loadHTML($html, $options =
null)
96 $tmpDOM = new \DOMDocument();
98 $encoding = mb_http_input();
99 if ($encoding ==
'') {
104 $success = @$tmpDOM->loadHTML(
"<meta http-equiv=\"content-type\" content=\"text/html; charset=$encoding\">$html");
106 $xpath = new \DOMXPath($tmpDOM);
107 $nodelist = $xpath->query(
"//body/*");
109 $this->resolveExternals =
true;
110 $this->loadXML(
'<?xml version="1.0" encoding="utf-8"?>
112 <!ENTITY nbsp " ">
115 if ($tmpDOM->encoding !=
'') {
116 $this->encoding = $tmpDOM->encoding;
118 $rootnode = $this->documentElement;
120 foreach ($nodelist as $node) {
122 $newnode = $this->importNode($node,
true);
123 $rootnode->appendChild($newnode);
140 $xpath = new \DOMXPath($this);
153 preg_match(
"/([a-zA-Z0-9]*)(\.(.*))?/", $t, $matches);
155 $tag = $matches[1] ??
"";
156 $class = $matches[3] ??
"";
158 if (!isset($classByTag[$tag])) {
159 $classByTag[$tag] = [];
161 if (!empty($class)) {
162 $classByTag[$tag][] = $class;
164 $classByTag[$tag][] =
"";
168 $nodelist = $xpath->query(
"//body//*");
170 for ($i = $nodelist->length - 1; $i >= 0; $i--) {
171 $node = $nodelist->item($i);
173 if (!isset($tags[$node->nodeName])) {
175 while ($node->firstChild !=
null) {
176 if ($node->parentNode->nodeName ==
"body" && $node->firstChild->nodeType == XML_TEXT_NODE) {
178 $paragraph = $node->parentNode->insertBefore($this->createElement(
"p"), $node);
179 $paragraph->appendChild($node->firstChild);
181 $node->parentNode->insertBefore($node->firstChild, $node);
186 $node->parentNode->removeChild($node);
189 for ($j = $node->attributes->length - 1; $j >= 0; $j--) {
190 $attr = $node->attributes->item($j);
194 $node->removeAttribute($attr->name);
199 if ($node->getAttribute(
"class") !=
"") {
200 $attr = implode(
" ", array_intersect(
201 explode(
" ", $node->getAttribute(
"class")),
202 $classByTag[$node->nodeName]
205 $node->removeAttribute(
"class");
207 $node->setAttribute(
"class", $attr);
213 $nodelist = $xpath->query(
"//p[. = '' and count(br) = 0]");
215 foreach ($nodelist as $node) {
216 $node->appendChild($this->createElement(
"br"));
218 $nodelist = $xpath->query(
"//li");
219 $parentNodes = array(
"ul",
"ol",
"menu");
221 foreach ($nodelist as $node) {
222 if (!in_array($node->parentNode->nodeName, $parentNodes)) {
224 $previous = $node->previousSibling;
225 while (!is_null($previous) && $previous->nodeType != XML_ELEMENT_NODE) {
226 $previous = $previous->previousSibling;
229 if (!is_null($previous) && in_array($previous->nodeName, $parentNodes)) {
231 $listNode->appendChild($node);
234 $listNode = $node->parentNode->insertBefore($this->createElement(
"ul"), $node);
235 $listNode->appendChild($node);
239 $nodelist = $xpath->query(
"//b[not(node())] | //i[not(node())] | //strong[not(node())] | //span[not(node())] | //a[not(node())]");
241 for ($i = $nodelist->length - 1; $i >= 0; $i--) {
242 $node = $nodelist->item($i);
244 $node->parentNode->removeChild($node);
247 if ($nodes->length == 1) {
248 $node = $nodes->item(0);
249 if ($node->nodeName ==
"p" && $node->childNodes->length == 1 && $node->childNodes->item(0)->nodeName ==
"br") {
250 $node->parentNode->removeChild($node);
262 $charsToRemove = mb_strlen($this->documentElement->textContent) - $max;
264 if ($charsToRemove <= 0) {
268 $xpath = new \DOMXPath($this);
269 $textNodes = $xpath->query(
"//text()");
270 $i = $textNodes->length - 1;
271 while ($charsToRemove > 0 && $i >= 0) {
272 $n = $textNodes->item($i);
273 $len = mb_strlen($n->textContent);
274 $parent = $n->parentNode;
276 if ($len <= $charsToRemove) {
277 $parent->removeChild($n);
278 $charsToRemove -= $len;
280 $restNode = $n->splitText($len - $charsToRemove);
281 $parent->removeChild($restNode);
286 if (mb_strlen($parent->textContent) == 0) {
287 $parent->parentNode->removeChild($parent);