View source
<?php
class FeedsQueryPathParser extends FeedsParser {
public function parse(FeedsSource $source, FeedsFetcherResult $fetcher_result) {
$mappings = $this
->getOwnMappings();
$this->source_config = $source
->getConfigFor($this);
if (empty($this->source_config)) {
$this->source_config = $this
->getConfig();
}
$this->rawXML = array_keys(array_filter($this->source_config['rawXML']));
$this->debug = array_keys(array_filter($this->source_config['debug']['options']));
$raw = trim($fetcher_result
->getRaw());
if (empty($raw)) {
throw new Exception(t('Feeds QueryPath parser: The document is empty.'));
}
$opts = array(
'ignore_parser_warnings' => TRUE,
);
$result = new FeedsParserResult();
$fetcher_config = $source
->getConfigFor($source->importer->fetcher);
$result->link = $fetcher_config['source'];
$this
->includeQueryPath();
$doc = @qp($raw, NULL, $opts);
$ContentType = qp($doc, 'meta[http-equiv="content-type"]');
if ($ContentType
->hasAttr('content') && preg_match('/charset=([-\\w]*)/i', $ContentType
->attr('content'), $matches)) {
$ContentType
->attr('content', preg_replace('/charset=([-\\w]*)/i', 'charset=utf-8', $ContentType
->attr('content')));
qp($doc, 'meta[http-equiv="content-type"]')
->remove();
qp($doc, 'head')
->prepend($ContentType
->html());
$doc = qp(drupal_convert_to_utf8(utf8_decode($doc
->html()), $matches[1]), NULL, $opts);
}
$result->title = qp($doc, 'title', $opts)
->text();
$context = qp($doc, $this->source_config['context'], $opts);
$this
->debug($context, 'context');
foreach ($context as $item) {
$parsed_item = $variables = array();
foreach ($this->source_config['sources'] as $source => $query) {
$query = strtr($query, $variables);
$parsed = $this
->parseSourceElement($item, $query, $source);
if (isset($parsed)) {
if (!is_array($parsed)) {
$variables['{' . $mappings[$source] . '}'] = $parsed;
}
else {
$variables['{' . $mappings[$source] . '}'] = '';
}
$parsed_item[$source] = $parsed;
}
}
if (!empty($parsed_item)) {
$result->items[] = $parsed_item;
}
}
return $result;
}
protected function parseSourceElement($item, $query, $source) {
$attr = $this->source_config['attrs'][$source];
if ($query == '' && $attr == '') {
return;
}
if ($query != '') {
$item = qp($item, $query);
}
$results = array();
foreach ($item as $k => $i) {
if ($attr != '') {
$results[] = $i
->attr($attr);
}
else {
if (in_array($source, $this->rawXML)) {
$results[] = $i
->html();
}
else {
$results[] = $i
->text();
}
}
}
$this
->debug($results, $source);
if (count($results) === 1) {
return $results[0];
}
if (count($results) === 0) {
return;
}
return $results;
}
public function sourceForm($source_config) {
$form = array();
if (empty($source_config)) {
$source_config = $this->config;
}
$mappings_ = feeds_importer($this->id)->processor->config['mappings'];
$uniques = $mappings = array();
foreach ($mappings_ as $mapping) {
if (strpos($mapping['source'], 'querypathparser:') === 0) {
$mappings[$mapping['source']] = $mapping['target'];
if ($mapping['unique']) {
$uniques[] = $mapping['target'];
}
}
}
$form['querypath'] = array(
'#type' => 'fieldset',
'#title' => t('QueryPath Parser Settings'),
'#tree' => TRUE,
'#collapsible' => TRUE,
'#collapsed' => TRUE,
);
if (empty($mappings)) {
$form['querypath']['error_message']['#markup'] = '<div class="help">' . t('FeedsQueryPathParser: No mappings were defined. Define mappings !link.', array(
'!link' => l('here', 'admin/structure/feeds/' . $this->id . '/mapping'),
)) . '</div>';
return $form;
}
$form['querypath']['context'] = array(
'#type' => 'textfield',
'#title' => t('Context'),
'#required' => TRUE,
'#description' => t('The element that represents the beginning of a new item, like h1 or body. If you identify a context that occurs more than once in a feed, a new node or item will be created each time it is encountered.'),
'#default_value' => isset($source_config['context']) ? $source_config['context'] : '',
'#maxlength' => 1024,
);
$form['querypath']['sources'] = array(
'#title' => t('Selectors'),
'#type' => 'fieldset',
'#description' => t('Indicate the CSS selector that marks where each field is located within the context, like div#content or h2:first.'),
);
$form['querypath']['attrs'] = array(
'#title' => t('Attributes'),
'#type' => 'fieldset',
'#description' => t('Identify the attribute value to use for a field, if desired, like src or title. The element text will be used if no attribute is identified.'),
'#collapsible' => TRUE,
'#collapsed' => TRUE,
);
if (!empty($uniques)) {
$items = array(
format_plural(count($uniques), t('Field <strong>!column</strong> is mandatory and considered unique: only one item per !column value will be created.', array(
'!column' => implode(', ', $uniques),
)), t('Fields <strong>!columns</strong> are mandatory and values in these columns are considered unique: only one entry per value in one of these columns will be created.', array(
'!columns' => implode(', ', $uniques),
))),
);
$form['querypath']['sources']['help']['#markup'] = '<div class="help">' . theme('item_list', array(
'items' => $items,
)) . '</div>';
}
$variables = array();
foreach ($mappings as $source => $target) {
$form['querypath']['sources'][$source] = array(
'#type' => 'textfield',
'#title' => $target,
'#description' => t('The CSS selector for this field.'),
'#default_value' => isset($source_config['sources'][$source]) ? $source_config['sources'][$source] : '',
'#maxlength' => 1024,
);
if (!empty($variables)) {
$form['querypath']['sources'][$source]['#description'] .= '<br>' . t('The variables ' . implode(', ', $variables) . ' are available for replacement.');
}
$variables[] = '{' . $target . '}';
$form['querypath']['attrs'][$source] = array(
'#type' => 'textfield',
'#title' => $target,
'#description' => t('The attribute to return.'),
'#default_value' => isset($source_config['attrs'][$source]) ? $source_config['attrs'][$source] : '',
'#maxlength' => 1024,
);
}
$form['querypath']['rawXML'] = array(
'#type' => 'checkboxes',
'#options' => $mappings,
'#default_value' => isset($source_config['rawXML']) ? $source_config['rawXML'] : array(),
);
$form['querypath']['debug'] = array(
'#type' => 'fieldset',
'#title' => t('Debug'),
'#collapsible' => TRUE,
'#collapsed' => TRUE,
);
$form['querypath']['debug']['options'] = array(
'#type' => 'checkboxes',
'#title' => t('Debug query'),
'#options' => array_merge(array(
'context' => 'context',
), $mappings),
'#default_value' => isset($source_config['debug']['options']) ? $source_config['debug']['options'] : array(),
);
return $form;
}
public function configForm(&$form_state) {
$form = $this
->sourceForm($this->config);
$form['querypath']['context']['#required'] = FALSE;
$form['querypath']['#collapsed'] = FALSE;
return $form;
}
public function sourceDefaults() {
return array();
}
public function configDefaults() {
return array(
'context' => '',
'sources' => array(),
'debug' => array(),
'attrs' => array(),
'rawXML' => array(),
);
}
public function sourceFormValidate(&$values) {
$values = $values['querypath'];
ksort($values);
ksort($this->config);
if ($values === $this->config) {
$values = array();
return;
}
$this
->configFormValidate($values);
}
public function configFormValidate(&$values) {
$config = FALSE;
$mappings = $this
->getOwnMappings();
$doc = '<html></html>';
if (isset($values['querypath'])) {
$values = $values['querypath'];
$config = TRUE;
}
$values['context'] = trim($values['context']);
try {
$this
->includeQueryPath();
qp($doc, $values['context']);
} catch (CSSParseException $e) {
$elem = 'feeds][FeedsQueryPathParser][querypath][context';
if ($config) {
$elem = 'querypath][context';
}
form_set_error($elem, $e
->getMessage());
}
foreach ($values['sources'] as $key => &$query) {
$query = trim($query);
try {
qp($doc, $query);
} catch (CSSParseException $e) {
$variable_present = FALSE;
foreach ($mappings as $target) {
if (strpos($query, '{' . $target . '}') !== FALSE) {
$variable_present = TRUE;
}
}
if (!$variable_present) {
$elem = 'feeds][FeedsQueryPathParser][querypath][sources][';
if ($config) {
$elem = 'querypath][sources][';
}
form_set_error($elem . $key, $e
->getMessage());
}
}
}
}
public function getMappingSources() {
$mappings = $this
->getOwnMappings();
$next = 0;
if (!empty($mappings)) {
$keys = array_keys($mappings);
$last_mapping = end($keys);
$next = explode(':', $last_mapping);
$next = $next[1] + 1;
}
return array(
'querypathparser:' . $next => array(
'name' => t('QueryPath Expression'),
'description' => t('Allows you to configure a CSS selector expression that will populate this field.'),
),
) + parent::getMappingSources();
}
protected function getOwnMappings() {
$importer_config = feeds_importer($this->id)
->getConfig();
return $this
->filterMappings($importer_config['processor']['config']['mappings']);
}
protected function filterMappings($mappings) {
$our_mappings = array();
foreach ($mappings as $mapping) {
if (strpos($mapping['source'], 'querypathparser:') === 0) {
$our_mappings[$mapping['source']] = $mapping['target'];
}
}
return $our_mappings;
}
protected function debug($item, $source) {
if (in_array($source, $this->debug)) {
$o = '<ul>';
foreach ($item as $i) {
if (is_object($i)) {
$i = $i
->html();
}
$o .= '<li>' . check_plain(var_export($i, TRUE)) . '</li>';
}
$o .= '</ul>';
drupal_set_message($source . ':' . $o);
}
}
protected function includeQueryPath() {
if (function_exists('querypath_include_code')) {
querypath_include_code();
}
}
}
function feeds_querypath_parser_form_feeds_ui_mapping_form_alter(&$form, &$form_state) {
$form['help']['#markup'] .= '<p>' . t('The QueryPath Expression source allows you to use QueryPath to populate each field. Add a new QueryPath Expression source for each target you want to map.') . '</p>';
}