class HumanNameParser_Parser in Bibliography Module 7
Same name and namespace in other branches
- 6.2 includes/Parser.php \HumanNameParser_Parser
Works with a Name object to parse out the parts of a name.
Example usage: $parser = new Parser("John Q. Smith"); echo $parser->getLast() . ", " . $parser->getFirst(); //returns "Smith, John"
Hierarchy
- class \HumanNameParser_Parser
Expanded class hierarchy of HumanNameParser_Parser
File
- includes/
Parser.php, line 11
View source
class HumanNameParser_Parser {
private $name;
private $nameParts = array();
private $leadingInit;
private $first;
private $nicknames;
private $middle;
private $last;
private $suffix;
private $category;
private $type;
private $literal;
private $suffixes;
private $prefixes;
/**
* Constructor.
*
* @param mixed $name
* Either a name as a string or as a Name object.
*/
public function __construct($name = NULL) {
$this->suffixes = array(
'esq',
'esquire',
'jr',
'sr',
'2',
'ii',
'iii',
'iv',
);
$this->prefixes = array(
'bar',
'ben',
'bin',
'da',
'dal',
'de la',
'de la Rue du',
'de',
'del',
'der',
'di',
'ibn',
'la',
'le',
'san',
'st',
'ste',
'van',
'van der',
'van den',
'vel',
'von',
);
$this
->setName($name);
}
/**
*
*/
public function parseName($name = NULL, $category = NULL) {
$this->literal = 0;
$this->category = 1;
$this->type = 1;
if (is_array($name) && isset($name['name'])) {
if (isset($name['auth_category']) && !empty($name['auth_category']) && empty($category)) {
$this->category = $name['auth_category'];
}
elseif (!empty($category)) {
$this->category = $category;
}
if (isset($name['auth_type']) && !empty($name['auth_type'])) {
$this->type = $name['auth_type'];
}
$this->nameParts = $name;
$this
->setName($name['name'], $category);
}
else {
$this->nameParts['name'] = $name;
$this
->setName($name, $category);
}
return $this
->getArray();
}
/**
* Sets name string and parses it.
* Takes Name object or a simple string (converts the string into a Name obj),
* parses and loads its constituant parts.
*
* @param mixed $name
* Either a name as a string or as a Name object.
*/
public function setName($name = NULL, $category = NULL) {
if ($name) {
$this->category == $category;
// This is mostly for testing.
if (is_object($name) && get_class($name) == "HumanNameParser_Name") {
$this->name = $name;
}
elseif (is_array($name) && isset($name['name'])) {
$this->name = new HumanNameParser_Name($name['name']);
$this->nameParts = $name;
}
else {
$this->name = new HumanNameParser_Name($name);
}
$this->leadingInit = "";
$this->first = "";
$this->nicknames = "";
$this->middle = "";
$this->last = "";
$this->suffix = "";
if ($this->category == 5 || $this->type == 5) {
$this->last = $name;
$this->literal = TRUE;
}
else {
$this
->parse();
}
}
}
/**
*
*/
public function getleadingInit() {
return $this->leadingInit;
}
/**
*
*/
public function getFirst() {
return $this->first;
}
/**
*
*/
public function getNicknames() {
return $this->nicknames;
}
/**
*
*/
public function getMiddle() {
return $this->middle;
}
/**
*
*/
public function getLast() {
return $this->last;
}
/**
*
*/
public function getSuffix() {
return $this->suffix;
}
/**
*
*/
public function getName() {
return $this->name;
}
/**
* Returns all the parts of the name as an array.
*
* @param string $arrType
* pass 'int' to get an integer-indexed array (default is associative)
*
* @return array An array of the name-parts
*/
public function getArray($arrType = 'assoc') {
$arr = array();
$arr['prefix'] = $this->leadingInit;
$arr['firstname'] = $this->first;
$arr['nicknames'] = $this->nicknames;
$arr['initials'] = substr($this->middle, 0, 10);
$arr['lastname'] = $this->last;
$arr['suffix'] = $this->suffix;
$arr['md5'] = md5(json_encode($arr));
$arr['literal'] = $this->literal;
if ($arrType == 'assoc') {
return array_merge($this->nameParts, $arr);
}
elseif ($arrType == 'int') {
return array_values($arr);
}
else {
throw new Exception("Array must be associative ('assoc') or numeric ('num').");
}
}
/**
* Parse the name into its constituent parts.
*
* Sequentially captures each name-part, working in from the ends and
* trimming the namestring as it goes.
*
* @return boolean true on success
*/
private function parse() {
// Each suffix gets a "\.*" behind it.
$suffixes = implode("\\.*|\\s", $this->suffixes) . "\\.*";
// Each prefix gets a " " behind it.
$prefixes = implode(" |", $this->prefixes) . " ";
// The regex use is a bit tricky. *Everything* matched by the regex will be replaced,
// but you can select a particular parenthesized submatch to be returned.
// Also, note that each regex requres that the preceding ones have been run, and matches chopped out.
// names that starts or end w/ an apostrophe break this.
$nicknamesRegex = "/ ('|\"|\\(\"*'*)(.+?)('|\"|\"*'*\\)) /";
$suffixRegex = "/,* *({$suffixes})\$/";
$lastRegex = "/(?!^)\\b([^ ]+ y |{$prefixes})*[^ ]+\$/u";
// Note the lookahead, which isn't returned or replaced.
$leadingInitRegex = "/^(.\\.*)(?= \\p{L}{2})/";
// .
$firstRegex = "/^[^ ]+/";
// Short circuit for a simple single string that would otherwise cause an Exception;
// we take this as the last name and everything else will be empty (the default)
if (preg_match('@^\\s*(\\p{L}+)\\s*$@u', $this->name
->getStr(), $matches)) {
$this->last = $matches[1];
return TRUE;
}
// Get nickname, if there is one.
$this->nicknames = $this->name
->chopWithRegex($nicknamesRegex, 2);
// Get suffix, if there is one.
$this->suffix = $this->name
->chopWithRegex($suffixRegex, 1);
// Flip the before-comma and after-comma parts of the name.
$this->name
->flip(",");
// Get the last name.
$this->last = $this->name
->chopWithRegex($lastRegex, 0);
if (!$this->last) {
throw new Exception("Couldn't find a last name in '{$this->name->getStr()}'.");
}
// Get the first initial, if there is one.
$this->leadingInit = $this->name
->chopWithRegex($leadingInitRegex, 1);
// Get the first name.
$this->first = $this->name
->chopWithRegex($firstRegex, 0);
if (!$this->first && $this->category != 5) {
throw new Exception("Couldn't find a first name in '{$this->name->getStr()}'");
}
// If anything's left, that's the middle name.
$this->middle = $this->name
->getStr();
return TRUE;
}
}