You are here

class PARSECREATORS in Bibliography Module 6

Same name and namespace in other branches
  1. 5 bibtexParse/PARSECREATORS.php \PARSECREATORS
  2. 6.2 modules/bibtexParse/PARSECREATORS.php \PARSECREATORS
  3. 7.3 plugins/biblio_style/bibtex/PARSECREATORS.php \PARSECREATORS
  4. 7 modules/bibtexParse/PARSECREATORS.php \PARSECREATORS
  5. 7.2 modules/bibtexParse/PARSECREATORS.php \PARSECREATORS

Hierarchy

Expanded class hierarchy of PARSECREATORS

File

bibtexParse/PARSECREATORS.php, line 178

View source
class PARSECREATORS {
  function PARSECREATORS() {
  }
  function parse($input, $type = 'author') {
    $input = trim($input);

    // split on ' and '
    $authorArray = preg_split("/\\s(and|&)\\s/i", $input);
    return $this
      ->parseArray($authorArray, $type);
  }
  function parseArray($authorArray, $type = 'author') {
    foreach ($authorArray as $author) {
      $this->authors[] = $this
        ->parseAuthor($author, $type);
    }
  }

  /* Create writer arrays from bibtex input.
  'author field can be (delimiters between authors are 'and' or '&'):
  1. <first-tokens> <von-tokens> <last-tokens>
  2. <von-tokens> <last-tokens>, <first-tokens>
  3. <von-tokens> <last-tokens>, <jr-tokens>, <first-tokens>
  */
  function parseAuthor($value, $type = 'author') {
    $appellation = $prefix = $surname = $firstname = $initials = '';
    $this->prefix = array();
    $author = explode(",", preg_replace("/\\s{2,}/", ' ', trim($value)));
    $size = sizeof($author);

    // No commas therefore something like Mark Grimshaw, Mark Nicholas Grimshaw, M N Grimshaw, Mark N. Grimshaw
    if ($size == 1) {

      // Is complete surname enclosed in {...}, unless the string starts with a backslash (\) because then it is
      // probably a special latex-sign..
      // 2006.02.11 DR: in the last case, any NESTED curly braces should also be taken into account! so second
      // clause rules out things such as author="a{\"{o}}"
      //
      if (preg_match("/(.*){([^\\\\].*)}/", $value, $matches) && !preg_match("/(.*){\\\\.{.*}.*}/", $value, $matches2)) {
        $author = split(" ", $matches[1]);
        $surname = $matches[2];
      }
      else {
        $author = split(" ", $value);

        // last of array is surname (no prefix if entered correctly)
        $surname = array_pop($author);
      }
    }
    else {
      if ($size == 2) {

        // first of array is surname (perhaps with prefix)
        list($surname, $prefix) = $this
          ->grabSurname(array_shift($author));
      }
      else {

        // middle of array is 'Jr.', 'IV' etc.
        $appellation = join(' ', array_splice($author, 1, 1));

        // first of array is surname (perhaps with prefix)
        list($surname, $prefix) = $this
          ->grabSurname(array_shift($author));
      }
    }
    $remainder = join(" ", $author);
    list($firstname, $initials) = $this
      ->grabFirstnameInitials($remainder);
    if (!empty($this->prefix)) {
      $prefix = join(' ', $this->prefix);
    }
    $surname = $surname . ' ' . $appellation;
    $creator = array(
      'firstname' => utf8_encode(trim($firstname)),
      'initials' => utf8_encode(trim($initials)),
      'lastname' => utf8_encode(trim($surname)),
      'prefix' => trim($prefix),
    );
    if (isset($creator)) {
      $creator['type'] = $this->typeMap[$type];
      $creator['md5'] = $this
        ->md5sum($creator);
      return $creator;
    }
    return FALSE;
  }
  function md5sum($creator) {
    $string = $creator['firstname'] . $creator['initials'] . $creator['lastname'];
    $string = str_replace(' ', '', drupal_strtolower($string));
    return md5($string);
  }

  // grab firstname and initials which may be of form "A.B.C." or "A. B. C. " or " A B C " etc.
  function grabFirstnameInitials($remainder) {
    $firstname = $initials = '';
    $array = split(" ", $remainder);
    foreach ($array as $value) {
      $firstChar = substr($value, 0, 1);
      if (ord($firstChar) >= 97 && ord($firstChar) <= 122) {
        $this->prefix[] = $value;
      }
      else {
        if (preg_match("/[a-zA-Z]{2,}/", trim($value))) {
          $firstnameArray[] = trim($value);
        }
        else {
          $initialsArray[] = str_replace(".", " ", trim($value));
        }
      }
    }
    if (isset($initialsArray)) {
      foreach ($initialsArray as $initial) {
        $initials .= ' ' . trim($initial);
      }
    }
    if (isset($firstnameArray)) {
      $firstname = join(" ", $firstnameArray);
    }
    return array(
      $firstname,
      $initials,
    );
  }

  // surname may have title such as 'den', 'von', 'de la' etc. - characterised by first character lowercased.  Any
  // uppercased part means lowercased parts following are part of the surname (e.g. Van den Bussche)
  function grabSurname($input) {
    $surnameArray = split(" ", $input);
    $noPrefix = $surname = FALSE;
    foreach ($surnameArray as $value) {
      $firstChar = substr($value, 0, 1);
      if (!$noPrefix && ord($firstChar) >= 97 && ord($firstChar) <= 122) {
        $prefix[] = $value;
      }
      else {
        $surname[] = $value;
        $noPrefix = TRUE;
      }
    }
    if ($surname) {
      $surname = join(" ", $surname);
    }
    if (isset($prefix)) {
      $prefix = join(" ", $prefix);
      return array(
        $surname,
        $prefix,
      );
    }
    return array(
      $surname,
      FALSE,
    );
  }

}

Members