Skip to content

Commit

Permalink
Fix encoding issues
Browse files Browse the repository at this point in the history
  • Loading branch information
HedicGuibert committed Jun 4, 2022
1 parent a3269c8 commit 4b458ba
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 8 deletions.
21 changes: 14 additions & 7 deletions src/JoliTypo/Fixer.php
Original file line number Diff line number Diff line change
Expand Up @@ -305,16 +305,13 @@ private function loadDOMDocument($content)
$dom->substituteEntities = false;
$dom->formatOutput = false;

// Change mb and libxml config
// Change libxml config
$libxmlCurrent = libxml_use_internal_errors(true);
$mbDetectCurrent = mb_detect_order();
mb_detect_order('ASCII,UTF-8,ISO-8859-1,windows-1252,iso-8859-15');

$loaded = $dom->loadHTML($this->fixContentEncoding($content));

// Restore mb and libxml config
// Restore libxml config
libxml_use_internal_errors($libxmlCurrent);
mb_detect_order(implode(',', $mbDetectCurrent));

if (!$loaded) {
throw new InvalidMarkupException("Can't load the given HTML via DomDocument");
Expand Down Expand Up @@ -345,7 +342,15 @@ private function fixContentEncoding($content)
$content = $hack . $content;
}

$encoding = mb_detect_encoding($content);
$encoding = '';

foreach (['UTF-8', 'ASCII', 'ISO-8859-1', 'windows-1252', 'iso-8859-15'] as $testedEncoding) {
if (mb_detect_encoding($content, $testedEncoding, true)) {
$encoding = $testedEncoding;
break;
}
}

$headPos = mb_strpos($content, '<head>');

// Add a meta to the <head> section
Expand All @@ -356,7 +361,9 @@ private function fixContentEncoding($content)
mb_substr($content, $headPos);
}

$content = mb_convert_encoding($content, 'HTML-ENTITIES', $encoding);
if ('UTF-8' !== $encoding) {
$content = mb_convert_encoding($content, 'UTF-8', $encoding);
}
}

return $content;
Expand Down
2 changes: 1 addition & 1 deletion tests/JoliTypo/Tests/Html5Test.php
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ public function testFullPageMarkup()
HTML;

$fixed = <<<'STRING'
&#8220;Who Let the Dogs Out?&#8221; is a song written and originally recorded by Anslem Douglas (titled &#8220;Doggie&#8221;).
Who Let the Dogs Out? is a song written and originally recorded by Anslem Douglas (titled Doggie).
STRING;

$this->assertEquals($fixed, $fixer->fix($html));
Expand Down

0 comments on commit 4b458ba

Please sign in to comment.