View source
<?php
function feeds_crawler_admin_form($form_state) {
if (variable_get('feeds_source_class', NULL) == 'FeedsSourceCrawler') {
variable_del('feeds_source_class');
}
$form = array();
$feeds = feeds_enabled_importers();
$options = array();
foreach ($feeds as $feed_id) {
$feed = feeds_importer($feed_id);
if ($feed->config['content_type'] != '') {
$query = db_query("SELECT nid, title FROM {node} WHERE type='%s'", $feed->config['content_type']);
while ($result = db_fetch_object($query)) {
$options += array(
$feed_id . '$$$' . $result->nid => $result->title,
);
}
}
else {
$options += array(
$feed_id => $feed_id,
);
}
}
$form['importer'] = array(
'#type' => 'select',
'#title' => t('Importer'),
'#options' => $options,
'#default_value' => variable_get('feeds_crawler_importer', 0),
);
$form['offest_url'] = array(
'#type' => 'textfield',
'#title' => t('Offset URL'),
'#description' => t('Enter a URL here if you want to start at a page other than the first one.'),
'#default_value' => variable_get('feeds_crawler_offset_url', ''),
);
$form['autodetect'] = array(
'#type' => 'checkbox',
'#title' => t('Autodetect'),
'#description' => t('Feeds Crawler can attempt to autodetect the next link for RSS and ATOM feeds.'),
'#default_value' => variable_get('feeds_crawler_autodetect', FALSE),
);
$form['html'] = array(
'#type' => 'radios',
'#title' => t('HTML or XML'),
'#description' => t('Select whether the content is HTML or XML.'),
'#options' => array(
'xml' => 'XML',
'html' => 'HTML',
),
'#default_value' => variable_get('feeds_crawler_html', 'xml'),
);
$form['xpath'] = array(
'#type' => 'textfield',
'#title' => t('XPath'),
'#description' => t('This is the XPath query that points to the \'Next\' button on the pager.'),
'#default_value' => variable_get('feeds_crawler_xpath', ''),
);
$form['count'] = array(
'#type' => 'textfield',
'#title' => t('Number of pages'),
'#required' => TRUE,
'#description' => t('This is how many pages you would like to crawl. 0 means crawl all of them.'),
'#default_value' => variable_get('feeds_crawler_count', 10),
);
$form['next'] = array(
'#type' => 'submit',
'#value' => t('Crawl'),
);
return $form;
}
function feeds_crawler_admin_form_validate($form, &$form_state) {
}
function feeds_crawler_admin_form_submit($form, &$form_state) {
$importer = $form_state['values']['importer'];
variable_set('feeds_crawler_importer', $importer);
if (strpos($importer, '$$$') === FALSE) {
$importer_id = $importer;
$feed_nid = 0;
}
else {
list($importer_id, $feed_nid) = explode('$$$', $importer);
}
$xpath = trim($form_state['values']['xpath']);
variable_set('feeds_crawler_xpath', $xpath);
variable_set('feeds_crawler_html', $form_state['values']['html']);
variable_set('feeds_crawler_count', $form_state['values']['count']);
variable_set('feeds_crawler_autodetect', $form_state['values']['autodetect']);
$offest_url = NULL;
if ($form_state['values']['offest_url'] != '') {
$offest_url = $form_state['values']['offest_url'];
}
$batch = array(
'title' => t('Crawling %count pages.', array(
'%count' => $form_state['values']['count'],
)),
'progress_message' => '',
'operations' => array(
array(
'feeds_crawler_batch',
array(
$importer_id,
$feed_nid,
$xpath,
$offest_url,
$form_state['values']['count'],
$form_state['values']['autodetect'],
$form_state['values']['html'],
),
),
),
);
batch_set($batch);
}