XPathHTMLParser.php in Feeds XPath Parser 8
File
lib/Drupal/feeds_xpathparser/Plugin/feeds/Parser/XPathHTMLParser.php
View source
<?php
namespace Drupal\feeds_xpathparser\Plugin\feeds\Parser;
use Drupal\Component\Annotation\Plugin;
use Drupal\Core\Annotation\Translation;
use Drupal\feeds\FetcherResultInterface;
use Drupal\feeds_xpathparser\ParserBase;
class XPathHTMLParser extends ParserBase {
protected function setup(array $feed_config, FetcherResultInterface $fetcher_result) {
if (!empty($feed_config['tidy'])) {
$config = array(
'merge-divs' => FALSE,
'merge-spans' => FALSE,
'join-styles' => FALSE,
'drop-empty-paras' => FALSE,
'wrap' => 0,
'tidy-mark' => FALSE,
'escape-cdata' => TRUE,
'word-2000' => TRUE,
);
$encoding = $feed_config['tidy_encoding'];
$raw = tidy_repair_string(trim($fetcher_result
->getRaw()), $config, $encoding);
}
else {
$raw = $fetcher_result
->getRaw();
}
$doc = new \DOMDocument();
$use = $this
->errorStart();
$success = $doc
->loadHTML($raw);
unset($raw);
$this
->errorStop($use, $feed_config['errors']);
if (!$success) {
throw new \RuntimeException(t('There was an error parsing the HTML document.'));
}
return $doc;
}
protected function getRaw(\DOMNode $node) {
return $this->doc
->saveHTML($node);
}
}