biblio_advanced_import.module in Biblio Advanced Import 6
Same filename and directory in other branches
Biblio add-on.
Instead of creating duplicate biblio records, existing ones could be updated or the import could be skipped depending on a configurable duplicate detection strategy.
@author Markus Kalkbrenner | Cocomore AG
File
biblio_advanced_import.moduleView source
<?php
/**
* @file
* Biblio add-on.
*
* Instead of creating duplicate biblio records,
* existing ones could be updated or the import could
* be skipped depending on a configurable duplicate
* detection strategy.
*
* @see biblio.module
*
* @author Markus Kalkbrenner | Cocomore AG
* @see http://drupal.org/user/124705
*/
/**
* Implements hook_menu().
*/
function biblio_advanced_import_menu() {
$items['admin/settings/biblio/advanced_import'] = array(
'title' => 'Advanced Import',
'page callback' => 'drupal_get_form',
'page arguments' => array(
'biblio_advanced_import_settings_form',
),
'access arguments' => array(
'administer biblio',
),
'file' => 'biblio_advanced_import.admin.inc',
'type' => MENU_LOCAL_TASK,
'weight' => 2,
);
return $items;
}
/**
* Implements hook_nodeapi().
*
* Instead of creating duplicate biblio records,
* existing ones could be updated or the import could
* be skipped.
*
* @see biblio_advanced_import_settings_form()
*/
function biblio_advanced_import_nodeapi(&$node, $op, $a1, $a2) {
switch ($op) {
case 'presave':
if ('biblio' == $node->type && !$node->nid) {
biblio_advanced_import_pitfall_workarounds($node);
if (variable_get('biblio_auto_citekey', 1)) {
// on new entries, override citekeys generated by parses, depending on settings
$citekey = biblio_advanced_import_create_citekey($node);
if ($citekey) {
$node->biblio_citekey = $citekey;
}
}
$order = '';
$limit = '';
$skip = FALSE;
$revision = FALSE;
switch (variable_get('biblio_advanced_import_duplicate_strategy', 'create duplicate')) {
case 'create duplicate':
return;
case 'skip import':
// There's no way to stop an already running node_save()
// in a safe way without breaking a batch process.
// So we do a little trick to realize the 'skip import':
// We simply replace the current node to be saved by the
// unmodified oldest duplicate and save this one instead
$skip = TRUE;
$order = 'ORDER BY nid DESC';
$limit = 'LIMIT 1';
break;
case 'new rev latest':
$revision = TRUE;
case 'update latest':
$order = 'ORDER BY nid DESC';
$limit = 'LIMIT 1';
break;
case 'new rev oldest':
$revision = TRUE;
case 'update oldest':
$order = 'ORDER BY nid ASC';
$limit = 'LIMIT 1';
break;
case 'new rev all':
$revision = TRUE;
case 'update all':
break;
}
$or_fields = array();
$joins = array();
foreach (variable_get('biblio_advanced_import_detect_duplicate_strategy', array(
'md5' => 'md5',
)) as $field) {
switch ((string) $field) {
case 'md5':
$or_fields[] = "biblio_md5 = '" . db_escape_string(biblio_advanced_import_hash($node)) . "'";
break;
case 'isbn':
case 'issn':
case 'doi':
$field_property = 'biblio_' . $field;
if (!empty($node->{$field_property})) {
$or_fields[] = $field_property . " = '" . db_escape_string($node->{$field_property}) . "'";
}
break;
case 'pubmed':
if (module_exists('biblio_pm') && !empty($node->biblio_pubmed_id)) {
$joins[] = 'JOIN {biblio_pubmed} USING (nid)';
$or_fields[] = "biblio_pubmed_id = '" . db_escape_string($node->biblio_pubmed_id) . "'";
}
break;
}
}
if (!empty($or_fields)) {
$biblio_advanced_import_recursion = FALSE;
$query = "SELECT nid FROM {biblio} JOIN {node} USING (nid, vid) " . implode(' ', $joins) . " WHERE " . implode(' OR ', $or_fields) . " {$order} {$limit}";
if ($result = db_query($query)) {
$node_new = (array) $node;
while ($row = db_fetch_object($result)) {
// there're duplicates:
$node_old = node_load($row->nid);
if (!$skip) {
// update an existing biblio node with new data
$merge = FALSE;
foreach ($node_new as $key => $value) {
if (strpos($key, 'biblio') === 0 && 'biblio_citekey' != $key || strpos($key, 'contributors') === 0 || 'title' == $key) {
$strategy = variable_get('biblio_advanced_import_merge_strategy', 'override');
if ('override' == $strategy || 'override but keep additional' == $strategy && !empty($value) || 'add new' == $strategy && !empty($value) && empty($node_old->{$key}) || 'override existing non empty' == $strategy && !empty($node_old->{$key}) || 'override existing non empty with non empty' == $strategy && !empty($value) && !empty($node_old->{$key})) {
if ($node_old->{$key} != $value) {
$node_old->{$key} = $value;
$merge = TRUE;
}
}
}
}
if ($revision && $merge) {
$node_old->revision = TRUE;
$node_old->log = t('New revision created automatically by Biblio Advanced Import.');
}
}
else {
// There's no way to stop an already running node_save()
// in a safe way without breaking a batch process.
// So we do a little trick to realize the 'skip import':
// We simply replace the current node to be saved by the
// unmodified oldest duplicate and save this one instead
}
if ($biblio_advanced_import_recursion) {
node_save($node_old);
}
else {
$biblio_advanced_import_recursion = TRUE;
$node = $node_old;
}
}
}
}
}
break;
case 'insert':
case 'update':
if ('biblio' == $node->type) {
biblio_advanced_import_update_hash($node);
}
break;
}
}
/**
* Helper function to create a hash from a biblio node
* depending on a configurable duplicate detection
* strategy.
*
* @see biblio_advanced_import_settings_form()
*
* @param $node
* a biblio node
*
* @return
* a md5 hash
*/
function biblio_advanced_import_hash($node) {
$hash_string = '';
$duplicate_criteria = variable_get('biblio_advanced_import_duplicate_criteria', array(
'title' => 'title',
'biblio_year' => 'biblio_year',
));
foreach ($duplicate_criteria as $field) {
if ($field) {
$field_value = '';
if (isset($node->{$field})) {
$field_value = $node->{$field};
}
if ('biblio_year' == $field && empty($field_value)) {
// If the year is empty, it will be set to 9999 by biblio on save.
// 9999 => "Submitted"
// Therefore we have to do the same here to not break duplicate detection.
$field_value = 9999;
}
if ($field_value) {
if (is_array($field_value) || is_object($field_value)) {
$hash_string .= serialize($field_value);
}
else {
$hash_string .= preg_replace("/\\s+/", '', mb_strtolower(mb_substr($field_value, 0, 256)));
}
}
}
}
return md5(strtolower($hash_string));
}
/**
* Helper function to update the hash of a biblio node.
*
* @see biblio_advanced_import_settings_form()
*
* @param $node
* a biblio node
*/
function biblio_advanced_import_update_hash(&$node) {
$node->biblio_md5 = biblio_advanced_import_hash($node);
db_query("UPDATE {biblio} SET biblio_md5 = '%s' WHERE nid = %d AND vid = %d", $node->biblio_md5, $node->nid, $node->vid);
}
/**
* Helper function to create a configurable biblio node citekey.
*
* @see biblio_advanced_import_settings_form()
* @see biblio_advanced_import_settings_form_submit()
*
* @param $node
* a biblio node
*/
function biblio_advanced_import_create_citekey($node) {
$citekey = '';
switch (variable_get('biblio_advanced_import_citekey_creation_strategy', 'biblio')) {
case 'fields':
$citekey_parts = array();
$prefix = variable_get('biblio_citekey_prefix', '');
if (!empty($prefix)) {
$citekey_parts[] = $prefix;
}
foreach (variable_get('biblio_advanced_import_citekey_creation', array(
'title' => 'title',
'biblio_year' => 'biblio_year',
)) as $field) {
if (!empty($field) && !empty($node->{$field})) {
$citekey_parts[] = $node->{$field};
}
}
$citekey = implode('|', $citekey_parts);
// biblio stores citekey as varchar(255), we make sure it fits
$citekey = mb_substr($citekey, 0, 255);
// strip trailing pipe symbol, if any
$citekey = preg_replace('@\\|+$@', '', $citekey);
$citekey = trim($citekey);
break;
}
if ($citekey) {
if (db_result(db_query("SELECT 1 FROM {biblio} WHERE biblio_citekey = '%s'", $citekey))) {
switch (variable_get('biblio_advanced_import_duplicate_citekey_strategy', 'skip')) {
case 'skip':
$citekey = '';
break;
case 'append counter':
$counter = variable_get('biblio_advanced_import_citekey_creation_counter', 0) + 1;
variable_set('biblio_advanced_import_citekey_creation_counter', $counter);
// biblio stores citekey as varchar(255), so we have to ensure that the counter is saved
$citekey = mb_substr($citekey, 0, 254 - strlen($counter)) . '|' . $counter;
}
}
}
return $citekey;
}
function biblio_advanced_import_form_biblio_admin_settings_alter(&$form, &$form_state) {
if ('fields' == variable_get('biblio_advanced_import_citekey_creation_strategy', 'biblio')) {
$form['citekey']['biblio_citekey_field1']['#type'] = 'value';
$form['citekey']['biblio_citekey_field1']['#value'] = $form['citekey']['biblio_citekey_field1']['#default_value'];
$form['citekey']['biblio_citekey_field2']['#type'] = 'value';
$form['citekey']['biblio_citekey_field2']['#value'] = $form['citekey']['biblio_citekey_field2']['#default_value'];
$form['citekey']['biblio_citekey_phpcode']['#type'] = 'value';
$form['citekey']['biblio_citekey_phpcode']['#value'] = $form['citekey']['biblio_citekey_phpcode']['#default_value'];
}
}
function biblio_advanced_import_pitfall_workarounds(&$node) {
switch (variable_get('biblio_advanced_import_fix_issn', 'as is')) {
case 'as is':
break;
case 'normalize from isbn':
if (empty($node->biblio_issn) || !empty($node->biblio_isbn)) {
// RIS format does not distinguish between ISBN and ISSN
$node->biblio_issn = $node->biblio_isbn;
}
// no break
case 'normalize':
// @see http://en.wikipedia.org/wiki/International_Standard_Serial_Number
if (!empty($node->biblio_issn)) {
if (preg_match("@\\b([0-9]{4})-?([0-9X]{4})\\b@i", $node->biblio_issn, $matches)) {
$issn = strtoupper($matches[1] . $matches[2]);
$sum = 0;
for ($i = 0; $i < 7; $i++) {
$sum += $issn[$i] * (8 - $i);
}
$checksum = 11 - $sum % 11;
if ($checksum == $issn[7] || 10 == $checksum && 'X' == $issn[7]) {
$node->biblio_issn = $issn;
}
else {
unset($node->biblio_issn);
}
}
else {
unset($node->biblio_issn);
}
}
break;
}
switch (variable_get('biblio_advanced_import_fix_isbn', 'as is')) {
case 'as is':
break;
case 'remove':
// @see http://en.wikipedia.org/wiki/International_Standard_Book_Number
if (!empty($node->biblio_isbn)) {
module_load_include('class.php', 'biblio_advanced_import', 'lib/isbntest');
$currISBN = new ISBNtest();
$currISBN
->set_isbn($matches[0]);
if ($currISBN
->valid_isbn10() || $currISBN
->valid_isbn13() || $currISBN
->valid_gtin14()) {
$node->biblio_isbn = $currISBN
->get_gtin14();
}
else {
unset($node->biblio_isbn);
}
}
break;
case 'convert 13':
// @see http://en.wikipedia.org/wiki/International_Standard_Book_Number
if (!empty($node->biblio_isbn)) {
if (preg_match("@[0-9\\-]{10,}@", $node->biblio_isbn, $matches)) {
module_load_include('class.php', 'biblio_advanced_import', 'lib/isbntest');
$currISBN = new ISBNtest();
$currISBN
->set_isbn($matches[0]);
if ($currISBN
->valid_isbn13()) {
$node->biblio_isbn = $currISBN
->get_isbn13();
}
elseif ($currISBN
->valid_gtin14()) {
$node->biblio_isbn = $currISBN
->get_gtin14();
}
else {
unset($node->biblio_isbn);
}
}
else {
unset($node->biblio_isbn);
}
}
break;
}
switch (variable_get('biblio_advanced_import_fix_doi', 'as is')) {
case 'as is':
break;
case 'one valid':
// @see http://en.wikipedia.org/wiki/Digital_object_identifier
if (!empty($node->biblio_doi)) {
if (preg_match("@10\\.\\d{4,}/[^\\s]+@i", $node->biblio_doi, $matches)) {
$node->biblio_doi = $matches[0];
}
else {
unset($node->biblio_doi);
}
}
break;
}
switch (variable_get('biblio_advanced_import_fix_title', 'as is')) {
case 'as is':
break;
case 'mendeley bibtex':
if (!empty($node->title)) {
// strip off enclosing curly braces, but only a matching pair
$node->title = preg_replace('@^\\{(.*)\\}$@', '$1', $node->title);
}
break;
}
switch (variable_get('biblio_advanced_import_fix_url', 'as is')) {
case 'as is':
break;
case 'one valid':
if (!empty($node->biblio_url)) {
if (preg_match("@(http|https)://[^\\s]+@i", $node->biblio_url, $matches)) {
// ris import runs together lists of urls without a delimiter
$urls = explode('http', str_replace(array(
'HTTP:',
'HTTPS:',
), array(
'http:',
'https:',
), $matches[0]));
$node->biblio_url = 'http' . $urls[1];
}
else {
unset($node->biblio_url);
}
}
break;
}
}
Functions
Name![]() |
Description |
---|---|
biblio_advanced_import_create_citekey | Helper function to create a configurable biblio node citekey. |
biblio_advanced_import_form_biblio_admin_settings_alter | |
biblio_advanced_import_hash | Helper function to create a hash from a biblio node depending on a configurable duplicate detection strategy. |
biblio_advanced_import_menu | Implements hook_menu(). |
biblio_advanced_import_nodeapi | Implements hook_nodeapi(). |
biblio_advanced_import_pitfall_workarounds | |
biblio_advanced_import_update_hash | Helper function to update the hash of a biblio node. |