 * @file
 * Biblio add-on.
 * Instead of creating duplicate biblio records,
 * existing ones could be updated or the import could
 * be skipped depending on a configurable duplicate
 * detection strategy.
 * @see biblio.module
 * @author Markus Kalkbrenner | Cocomore AG
 *   @see

 * Implements hook_menu().
function biblio_advanced_import_menu() {
  $items['admin/config/content/biblio/advanced_import'] = array(
    'title' => 'Advanced Import',
    'page callback' => 'drupal_get_form',
    'page arguments' => array(
    'access arguments' => array(
      'administer biblio',
    'file' => '',
    'type' => MENU_LOCAL_TASK,
    'weight' => 2,
  return $items;

 * Implements hook_node_presave().
function biblio_advanced_import_node_presave($node) {
  if ('biblio' == $node->type && empty($node->nid)) {
    $verbose_messages = (bool) variable_get('biblio_advanced_import_verbose_messages', '0');
    if (variable_get('biblio_auto_citekey', 1)) {

      // on new entries, override citekeys generated by parses, depending on settings
      $citekey = biblio_advanced_import_create_citekey($node);
      if ($citekey) {
        $node->biblio_citekey = $citekey;
    $query = db_select('biblio', 'b');
    $alias = $query
      ->innerJoin('node', 'n', 'b.nid = n.nid AND b.vid = n.vid');
      ->fields('b', array(
    $skip = FALSE;
    $revision = FALSE;
    switch (variable_get('biblio_advanced_import_duplicate_strategy', 'create duplicate')) {
      case 'create duplicate':
        if ($verbose_messages) {
          drupal_set_message(t('Creating duplicate of node %node_title', array(
            '%node_title' => $node->title,
      case 'skip import':

        // There's no way to stop an already running node_save()
        // in a safe way without breaking a batch process.
        // So we do a little trick to realize the 'skip import':
        // We simply replace the current node to be saved by the
        // unmodified oldest duplicate and save this one instead
        $skip = TRUE;
          ->orderBy('b.nid', 'DESC')
          ->range(0, 1);
      case 'new rev latest':
        $revision = TRUE;
      case 'update latest':
          ->orderBy('b.nid', 'DESC')
          ->range(0, 1);
      case 'new rev oldest':
        $revision = TRUE;
      case 'update oldest':
          ->orderBy('b.nid', 'ASC')
          ->range(0, 1);
      case 'new rev all':
        $revision = TRUE;
      case 'update all':
    $condition_exists = FALSE;
    $or_condition = db_or();
    foreach (variable_get('biblio_advanced_import_detect_duplicate_strategy', array(
      'md5' => 'md5',
    )) as $field) {
      switch ((string) $field) {
        case 'md5':
            ->condition('b.biblio_md5', biblio_advanced_import_hash($node));
          $condition_exists = TRUE;
        case 'isbn':
        case 'issn':
        case 'doi':
          $field_property = 'biblio_' . $field;
          if (!empty($node->{$field_property})) {
              ->condition('b.' . $field_property, $node->{$field_property});
            $condition_exists = TRUE;
        case 'pubmed':
          if (module_exists('biblio_pm') && !empty($node->biblio_pubmed_id)) {
              ->innerJoin('biblio_pubmed', 'bp', 'b.nid = bp.nid');
              ->condition('bp.biblio_pubmed_id', $node->biblio_pubmed_id);
            $condition_exists = TRUE;
    if ($condition_exists) {
      $result = $query
      $is_first_duplicate = TRUE;
      $node_new = (array) $node;
      while ($row = $result
        ->fetchObject()) {

        // there are duplicates:
        $node_old = node_load($row->nid);

        // we need to set this or the node module will throw notices
        // (if this node becomes the one to be really saved instead of the original one)
        $node_old->is_new = FALSE;
        if (!$skip) {

          // update an existing biblio node with new data
          if ($verbose_messages) {
            drupal_set_message(t('Updating node %node_title (node %nid)', array(
              '%nid' => $node_old->nid,
              '%node_title' => $node_old->title,
          $merge = FALSE;
          foreach ($node_new as $key => $value) {
            if (strpos($key, 'biblio') === 0 && 'biblio_citekey' != $key || strpos($key, 'contributors') === 0 || 'title' == $key) {
              $strategy = variable_get('biblio_advanced_import_merge_strategy', 'override');
              if ('override' == $strategy || 'override but keep additional' == $strategy && !empty($value) || 'add new' == $strategy && !empty($value) && empty($node_old->{$key}) || 'override existing non empty' == $strategy && !empty($node_old->{$key}) || 'override existing non empty with non empty' == $strategy && !empty($value) && !empty($node_old->{$key})) {
                if (!property_exists($node_old, $key) || $node_old->{$key} != $value) {
                  $node_old->{$key} = $value;
                  $merge = TRUE;
          if ($revision && $merge) {
            $node_old->revision = TRUE;
            $node_old->log = t('New revision created automatically by Biblio Advanced Import.');
        else {

          // There's no way to stop an already running node_save()
          // in a safe way without breaking the batch process.
          // So we use a little trick to implement the 'skip import':
          // We replace the current node to be saved with the
          // unmodified first duplicate and let drupal save that one instead.
          if ($verbose_messages) {
            drupal_set_message(t('Skipping update of node %node_title (node %nid)', array(
              '%nid' => $node_old->nid,
              '%node_title' => $node_old->title,
        if ($is_first_duplicate) {

          // the content of the node being saved gets replaced with the values from the first duplicate node
          // (replacing the whole object with the loaded node did not seem to work
          // so we do it property by property ...)
          $is_first_duplicate = FALSE;

          // clear existing object
          foreach (get_object_vars($node) as $key => $value) {

          // copy values over
          foreach (get_object_vars($node_old) as $key => $value) {
            $node->{$key} = $value;
        else {

          // save any other existing duplicates, with values updated

 * Implements hook_node_insert().
function biblio_advanced_import_node_insert($node) {
  if ('biblio' == $node->type) {

 * Implements hook_node_update().
function biblio_advanced_import_node_update($node) {
  if ('biblio' == $node->type) {

 * Helper function to create a hash from a biblio node
 * depending on a configurable duplicate detection
 * strategy.
 * @see biblio_advanced_import_settings_form()
 * @param $node
 *   a biblio node
 * @return
 *   a md5 hash
function biblio_advanced_import_hash($node) {
  $hash_string = '';
  $duplicate_criteria = variable_get('biblio_advanced_import_duplicate_criteria', array(
    'title' => 'title',
    'biblio_year' => 'biblio_year',
  foreach ($duplicate_criteria as $field) {
    if ($field) {
      $field_value = '';
      if (isset($node->{$field})) {
        $field_value = $node->{$field};
      if ('biblio_year' == $field && (empty($field_value) || $field_value == t('Submitted'))) {

        // If the year is empty, it will be set to 9999 by biblio on save.
        // 9999 => "Submitted"
        // Therefore we have to do the same here to not break duplicate detection.
        // Furthermore, on load this magic value will is replaced with a translated version
        // of the string "Submitted"; we hit this case when we rehash after a configuration change;
        // we standardize all these cases on 9999 to arrive at consistent hash values
        $field_value = 9999;
      if ($field_value) {
        if (is_array($field_value) || is_object($field_value)) {
          $hash_string .= serialize($field_value);
        else {
          $hash_string .= preg_replace("/\\s+/", '', mb_strtolower(mb_substr($field_value, 0, 256)));
  return md5(strtolower($hash_string));

 * Helper function to update the hash of a biblio node.
 * @see biblio_advanced_import_settings_form()
 * @param $node
 *   a biblio node
function biblio_advanced_import_update_hash(&$node) {
  $node->biblio_md5 = biblio_advanced_import_hash($node);
    'biblio_md5' => $node->biblio_md5,
    ->condition('nid', $node->nid)
    ->condition('vid', $node->vid)

 * Helper function to create a configurable biblio node citekey.
 * @see biblio_advanced_import_settings_form()
 * @see biblio_advanced_import_settings_form_submit()
 * @param $node
 *   a biblio node
function biblio_advanced_import_create_citekey($node) {
  $citekey = '';
  switch (variable_get('biblio_advanced_import_citekey_creation_strategy', 'biblio')) {
    case 'fields':
      $citekey_parts = array();
      $prefix = variable_get('biblio_citekey_prefix', '');
      if (!empty($prefix)) {
        $citekey_parts[] = $prefix;
      foreach (variable_get('biblio_advanced_import_citekey_creation', array(
        'title' => 'title',
        'biblio_year' => 'biblio_year',
      )) as $field) {
        if (!empty($field) && !empty($node->{$field})) {
          $citekey_parts[] = $node->{$field};
      $citekey = implode('|', $citekey_parts);

      // biblio stores citekey as varchar(255), we need to make sure it fits
      // or a PDO Exception is thrown
      $citekey = mb_substr($citekey, 0, 255);

      // strip trailing pipe symbol, if any
      $citekey = preg_replace('@\\|+$@', '', $citekey);
      $citekey = trim($citekey);
  if ($citekey) {
    if (db_query("SELECT 1 FROM {biblio} WHERE biblio_citekey = :biblio_citekey", array(
      ':biblio_citekey' => $citekey,
      ->fetchField()) {
      switch (variable_get('biblio_advanced_import_duplicate_citekey_strategy', 'skip')) {
        case 'skip':
          $citekey = '';
        case 'append counter':
          $counter = variable_get('biblio_advanced_import_citekey_creation_counter', 0) + 1;
          variable_set('biblio_advanced_import_citekey_creation_counter', $counter);

          // biblio stores citekey as varchar(255), so we have to ensure that the counter is saved
          $citekey = mb_substr($citekey, 0, 254 - strlen($counter)) . '|' . $counter;
  return $citekey;

 * @todo Please document this function.
 * @see
function biblio_advanced_import_form_biblio_admin_settings_alter(&$form, &$form_state) {
  if ('fields' == variable_get('biblio_advanced_import_citekey_creation_strategy', 'biblio')) {
    $form['citekey']['biblio_citekey_field1']['#type'] = 'value';
    $form['citekey']['biblio_citekey_field1']['#value'] = $form['citekey']['biblio_citekey_field1']['#default_value'];
    $form['citekey']['biblio_citekey_field2']['#type'] = 'value';
    $form['citekey']['biblio_citekey_field2']['#value'] = $form['citekey']['biblio_citekey_field2']['#default_value'];
    $form['citekey']['biblio_citekey_phpcode']['#type'] = 'value';
    $form['citekey']['biblio_citekey_phpcode']['#value'] = $form['citekey']['biblio_citekey_phpcode']['#default_value'];

 * This function implements some optional data cleanup / normalization
 * that can be activated on the "advanced import" tab.
function biblio_advanced_import_pitfall_workarounds(&$node) {
  switch (variable_get('biblio_advanced_import_fix_issn', 'as is')) {
    case 'as is':
    case 'normalize from isbn':
      if (empty($node->biblio_issn) || !empty($node->biblio_isbn)) {

        // RIS format does not distinguish between ISBN and ISSN
        $node->biblio_issn = $node->biblio_isbn;

    // no break
    case 'normalize':

      // @see
      if (!empty($node->biblio_issn)) {
        if (preg_match("@\\b([0-9]{4})-?([0-9X]{4})\\b@i", $node->biblio_issn, $matches)) {
          $issn = strtoupper($matches[1] . $matches[2]);
          $sum = 0;
          for ($i = 0; $i < 7; $i++) {
            $sum += $issn[$i] * (8 - $i);
          $checksum = 11 - $sum % 11;
          if ($checksum == $issn[7] || 10 == $checksum && 'X' == $issn[7]) {
            $node->biblio_issn = $issn;
          else {
        else {
  switch (variable_get('biblio_advanced_import_fix_isbn', 'as is')) {
    case 'as is':
    case 'remove':

      // @see
      if (!empty($node->biblio_isbn)) {
        module_load_include('class.php', 'biblio_advanced_import', 'lib/isbntest');
        $currISBN = new ISBNtest();
        if ($currISBN
          ->valid_isbn10() || $currISBN
          ->valid_isbn13() || $currISBN
          ->valid_gtin14()) {
          $node->biblio_isbn = $currISBN
        else {
    case 'convert 13':

      // @see
      if (!empty($node->biblio_isbn)) {
        if (preg_match("@[0-9\\-]{10,}@", $node->biblio_isbn, $matches)) {
          module_load_include('class.php', 'biblio_advanced_import', 'lib/isbntest');
          $currISBN = new ISBNtest();
          if ($currISBN
            ->valid_isbn13()) {
            $node->biblio_isbn = $currISBN
          elseif ($currISBN
            ->valid_gtin14()) {
            $node->biblio_isbn = $currISBN
          else {
        else {
  switch (variable_get('biblio_advanced_import_fix_doi', 'as is')) {
    case 'as is':
    case 'one valid':

      // @see
      if (!empty($node->biblio_doi)) {
        if (preg_match("@10\\.\\d{4,}/[^\\s]+@i", $node->biblio_doi, $matches)) {
          $node->biblio_doi = $matches[0];
        else {
  switch (variable_get('biblio_advanced_import_fix_title', 'as is')) {
    case 'as is':
    case 'mendeley bibtex':
      if (!empty($node->title)) {

        // strip off enclosing curly braces, but only a matching pair
        $node->title = preg_replace('@^\\{(.*)\\}$@', '$1', $node->title);
  switch (variable_get('biblio_advanced_import_fix_url', 'as is')) {
    case 'as is':
    case 'one valid':
      if (!empty($node->biblio_url)) {
        if (preg_match("@(http|https)://[^\\s]+@i", $node->biblio_url, $matches)) {

          // ris import runs together lists of urls without a delimiter
          $urls = explode('http', str_replace(array(
          ), array(
          ), $matches[0]));
          $node->biblio_url = 'http' . $urls[1];
        else {


