PARSECREATORS.php in Bibliography Module 7
Same filename and directory in other branches
Classes Creators and PARSECREATORS.
File
modules/bibtexParse/PARSECREATORS.phpView source
<?php
/**
* @file
* Classes Creators and PARSECREATORS.
*/
/**
*
*/
class Creators extends PARSECREATORS {
protected $authors = array();
private $existing_authors = array();
protected $typeMap = array();
private $md5 = array();
/**
*
*/
public function __construct($init = NULL) {
$this
->buildTypeMap();
if (is_array($init)) {
$this
->setCreators($init);
}
elseif (is_numeric($init)) {
$this
->loadCreators($init);
}
}
/**
*
*/
public function buildTypeMap() {
$result = db_query("SELECT * FROM {biblio_contributor_type} ;");
while ($type = db_fetch_object($result)) {
$this->typeMap[$type->type] = $type->ctid;
}
}
/**
*
*/
public function getCreatorByName($name) {
// [[:<:]], [[:>:]] These stand for word boundaries; see biblio.pages.inc.
$result = db_query('SELECT *
FROM {biblio_contributor_data}
WHERE lastname RLIKE :lastname', array(
'lastname' => '[[:<:]]' . preg_quote($name) . '[[:>:]]',
));
}
/**
*
*/
public function getCreatorCount() {
return count($this->authors);
}
/**
*
*/
public function getCreatorString() {
foreach ($this->authors as $key => $author) {
$author_array[$author['rank']] = $author['firstname'] . ' ' . $author['initials'] . ' ' . $author['lastname'];
}
ksort($author_array);
return implode(', ', $author_array);
}
/**
*
*/
private function loadMD5() {
$result = db_query('SELECT md5,cid FROM {biblio_contributor_data} ');
while ($row = db_fetch_array($result)) {
$this->md5[$row['cid']] = $row['md5'];
}
}
/**
*
*/
public function loadCreators($vid) {
$query = 'SELECT bcd.lastname, bcd.firstname, bcd.initials,
bcd.affiliation, bct.type, bc.rank
FROM {biblio_contributor} bc,
{biblio_contributor_data} bcd,
{biblio_contributor_type} bct
WHERE bc.vid = %d
AND bc.cid = bcd.cid
AND bc.ctid = bct.ctid
ORDER BY bc.ctid ASC, bc.rank ASC;';
$result = db_query($query, array(
$vid,
));
while ($creator = db_fetch_array($result)) {
$this->authors[] = $creator;
}
}
/**
*
*/
public function saveCreators($nid, $vid) {
if (!empty($this->authors)) {
$this
->loadMD5();
db_query('DELETE FROM {biblio_contributor} WHERE nid = %d AND vid = %d', $nid, $vid);
foreach ($this->authors as $rank => $author) {
if (empty($author['cid']) && !empty($this->md5)) {
$author['cid'] = array_search($author['md5'], $this->md5);
}
if (empty($author['cid'])) {
drupal_write_record('biblio_contributor_data', $author);
$cid = db_last_insert_id('biblio_contributor_data', 'cid');
}
else {
$cid = $author['cid'];
}
$link_array = array(
'nid' => $nid,
'vid' => $vid,
'cid' => $cid,
'rank' => $rank,
'ctid' => $author['type'],
);
drupal_write_record('biblio_contributor', $link_array);
}
}
}
/**
*
*/
public function getAuthorArray() {
return $this->authors;
}
/**
*
*/
public function getAuthor($rank) {
return $this->authors[$rank];
}
/**
* Update object with an array of authors.
*
* @param $authors
* an array containing two keys "name" and "type"
* the name is the full name of the contributor which will be parsed into
* component pieces, and type contains a string indicating the author type
*/
public function setCreators($authors) {
foreach ($authors as $author) {
if (strlen(trim($author['name']))) {
$this->authors[] = $this
->parseAuthor($author['name'], $author['type']);
}
}
}
/**
*
*/
public function setCreator($author, $type = 'author') {
$this->authors[] = $this
->parseAuthor($author, $type);
}
}
/**
* Released through http://bibliophile.sourceforge.net under the GPL licence.
* Do whatever you like with this -- some credit to the author(s) would be appreciated.
*
* A collection of PHP classes to manipulate bibtex files.
*
* If you make improvements, please consider contacting the administrators at bibliophile.sourceforge.net so that your improvements can be added to the release package.
*
* Mark Grimshaw 2004/2005
* http://bibliophile.sourceforge.net
*
* 28/04/2005 - Mark Grimshaw.
* Efficiency improvements.
*
* 11/02/2006 - Daniel Reidsma.
* Changes to preg_matching to account for Latex characters in names such as {\"{o}}
*
* For a quick command-line test (php -f PARSECREATORS.php) after installation, uncomment these lines:
*
* $authors = "Mark \~N. Grimshaw and Bush III, G.W. & M. C. H{\\'a}mmer Jr. and von Frankenstein, Ferdinand Cecil, P.H. & Charles Louis Xavier Joseph de la Vallee P{\\\"{o}}ussin";
* $creator = new PARSECREATORS();
* $creatorArray = $creator->parse($authors);
* print_r($creatorArray);
*/
class PARSECREATORS {
/**
*
*/
public function parse($input, $type = 'author') {
$input = trim($input);
// Split on ' and '.
$authorArray = preg_split("/\\s(and|&)\\s/i", $input);
return $this
->parseArray($authorArray, $type);
}
/**
*
*/
public function parseArray($authorArray, $type = 'author') {
foreach ($authorArray as $author) {
$this->authors[] = $this
->parseAuthor($author, $type);
}
}
/**
* Create writer arrays from bibtex input.
*
* 'author field can be (delimiters between authors are 'and' or '&'):
* 1. <first-tokens> <von-tokens> <last-tokens>
* 2. <von-tokens> <last-tokens>, <first-tokens>
* 3. <von-tokens> <last-tokens>, <jr-tokens>, <first-tokens>
*/
public function parseAuthor($value, $type = 'author') {
$appellation = $prefix = $surname = $firstname = $initials = '';
$this->prefix = array();
$author = explode(",", preg_replace("/\\s{2,}/", ' ', trim($value)));
$size = count($author);
// No commas therefore something like Mark Grimshaw, Mark Nicholas Grimshaw, M N Grimshaw, Mark N. Grimshaw.
if ($size == 1) {
// Is complete surname enclosed in {...}, unless the string starts with a backslash (\) because then it is
// probably a special latex-sign..
// 2006.02.11 DR: in the last case, any NESTED curly braces should also be taken into account! so second
// clause rules out things such as author="a{\"{o}}".
if (preg_match("/(.*) {([^\\\\].*)}/", $value, $matches) && !preg_match("/(.*) {\\\\.{.*}.*}/", $value, $matches2)) {
$author = preg_split(" ", $matches[1]);
$surname = $matches[2];
}
else {
$author = preg_split(" ", $value);
// Last of array is surname (no prefix if entered correctly)
$surname = array_pop($author);
}
}
elseif ($size == 2) {
// First of array is surname (perhaps with prefix)
list($surname, $prefix) = $this
->grabSurname(array_shift($author));
}
else {
// Middle of array is 'Jr.', 'IV' etc.
$appellation = join(' ', array_splice($author, 1, 1));
// First of array is surname (perhaps with prefix)
list($surname, $prefix) = $this
->grabSurname(array_shift($author));
}
$remainder = join(" ", $author);
list($firstname, $initials) = $this
->grabFirstnameInitials($remainder);
if (!empty($this->prefix)) {
$prefix = join(' ', $this->prefix);
}
$surname = $surname . ' ' . $appellation;
$creator = array(
'firstname' => utf8_encode(trim($firstname)),
'initials' => utf8_encode(trim($initials)),
'lastname' => utf8_encode(trim($surname)),
'prefix' => trim($prefix),
);
if (isset($creator)) {
$creator['type'] = $this->typeMap[$type];
$creator['md5'] = $this
->md5sum($creator);
return $creator;
}
return FALSE;
}
/**
*
*/
public function md5sum($creator) {
$string = $creator['firstname'] . $creator['initials'] . $creator['lastname'];
$string = str_replace(' ', '', drupal_strtolower($string));
return md5($string);
}
/**
* Grab firstname and initials which may be of form "A.B.C." or "A. B. C. " or " A B C " etc.
*/
public function grabFirstnameInitials($remainder) {
$firstname = $initials = '';
$array = preg_split(" ", $remainder);
foreach ($array as $value) {
$firstChar = substr($value, 0, 1);
if (ord($firstChar) >= 97 && ord($firstChar) <= 122) {
$this->prefix[] = $value;
}
elseif (preg_match("/[a-zA-Z]{2,}/", trim($value))) {
$firstnameArray[] = trim($value);
}
else {
$initialsArray[] = str_replace(".", " ", trim($value));
}
}
if (isset($initialsArray)) {
foreach ($initialsArray as $initial) {
$initials .= ' ' . trim($initial);
}
}
if (isset($firstnameArray)) {
$firstname = join(" ", $firstnameArray);
}
return array(
$firstname,
$initials,
);
}
/**
* Surname may have title such as 'den', 'von', 'de la' etc. - characterised by first character lowercased. Any
* uppercased part means lowercased parts following are part of the surname (e.g. Van den Bussche)
*/
public function grabSurname($input) {
$surnameArray = preg_split(" ", $input);
$noPrefix = $surname = FALSE;
foreach ($surnameArray as $value) {
$firstChar = substr($value, 0, 1);
if (!$noPrefix && ord($firstChar) >= 97 && ord($firstChar) <= 122) {
$prefix[] = $value;
}
else {
$surname[] = $value;
$noPrefix = TRUE;
}
}
if ($surname) {
$surname = join(" ", $surname);
}
if (isset($prefix)) {
$prefix = join(" ", $prefix);
return array(
$surname,
$prefix,
);
}
return array(
$surname,
FALSE,
);
}
}
Classes
Name![]() |
Description |
---|---|
Creators | |
PARSECREATORS | Released through http://bibliophile.sourceforge.net under the GPL licence. Do whatever you like with this -- some credit to the author(s) would be appreciated. |