You are here

protected function ContributorNames::setupPatterns in Bibliography Module 7.2

Setup the patterns

1 call to ContributorNames::setupPatterns()
ContributorNames::__construct in lib/msrc-authortool/src/Analyzer/ContributorNames.php
Normalizer

File

lib/msrc-authortool/src/Analyzer/ContributorNames.php, line 237

Class

ContributorNames
Match Contributor Names with Predefined Regex

Namespace

Analyzer

Code

protected function setupPatterns() {
  $ppLastNamePattern = "([\\p{L}\\p{Ll}-' ]+)";

  //"([a-zA-Z-' ]+)"; //Unicode form: ([\p{L}\p{Ll}-' ]+)
  $lastNamePattern = "([\\p{L}\\p{Ll}-']+)";

  //"([a-zA-Z-']+)";  //Unicode form:  ([\p{L}\p{Ll}-']+)
  $suffixPattern = "(?i)(Jr|Sr|Esq|Ph\\.?D|2nd|3rd|Psy\\.D|M\\.S|II|III|IV)\\.?";

  //Where we have initials only, it's pretty clear

  //Lastname, FMS, Suf.
  $this->normalizer
    ->appendPattern("/^{$ppLastNamePattern},[ +?]?([A-Z])\\.?[ +?]?([A-Z])\\.?[ +?]?([A-Z])\\.?,[ +?]?{$suffixPattern}/u", array(
    'lastName',
    'firstInitial',
    'middleInitial',
    'secondMiddleInitial',
    'suffix',
  ));

  //Lastname, FM, Suf.
  $this->normalizer
    ->appendPattern("/^{$ppLastNamePattern},[ +?]?([A-Z])\\.?[ +?]?([A-Z])\\.?,[ +?]?{$suffixPattern}/u", array(
    'lastName',
    'firstInitial',
    'middleInitial',
    'suffix',
  ));

  //Lastname, F, Suf.
  $this->normalizer
    ->appendPattern("/{$ppLastNamePattern},[ +?]?([A-Z])\\.?(?![a-z'-]),[ +?]?{$suffixPattern}/u", array(
    'lastName',
    'firstInitial',
    'suffix',
  ));

  //Lastname Suf., FMS
  $this->normalizer
    ->appendPattern("/^{$ppLastNamePattern} {$suffixPattern}?,[ +?]?([A-Z])\\.?[ +?]?([A-Z])\\.?[ +?]?([A-Z])\\.?\$/u", array(
    'lastName',
    'suffix',
    'firstInitial',
    'middleInitial',
    'secondMiddleInitial',
  ));

  //Lastname Suf., FM
  $this->normalizer
    ->appendPattern("/^{$ppLastNamePattern} {$suffixPattern}?,[ +?]?([A-Z])\\.?[ +?]?([A-Z])\\.?(?![a-z'-])\\.?\$/u", array(
    'lastName',
    'suffix',
    'firstInitial',
    'middleInitial',
  ));

  //Lastname Suf., F
  $this->normalizer
    ->appendPattern("/^{$ppLastNamePattern} {$suffixPattern}?,[ +?]?([A-Z])\\.?(?![a-z'-])\\.?\$/u", array(
    'lastName',
    'suffix',
    'firstInitial',
  ));

  //Lastname, FMS
  $this->normalizer
    ->appendPattern("/^{$ppLastNamePattern},[ +?]?([A-Z])\\.?[ +?]?([A-Z])\\.?[ +?]?([A-Z])\\.?\$/u", array(
    'lastName',
    'firstInitial',
    'middleInitial',
    'secondMiddleInitial',
  ));

  //Lastname, FM
  $this->normalizer
    ->appendPattern("/^{$ppLastNamePattern},[ +?]?([A-Z])\\.?[ +?]?([A-Z])\\.?(?![a-zA-Z'-])\\.?\$/u", array(
    'lastName',
    'firstInitial',
    'middleInitial',
  ));

  //Lastname, F
  $this->normalizer
    ->appendPattern("/^{$ppLastNamePattern},[ +?]?([A-Z])\\.?(?![a-z'-])\\.?\$/u", array(
    'lastName',
    'firstInitial',
  ));

  //All FirstName Patterns are incorrect below this line!

  //?? Add ??: Lastname, Firstname, M[\.| ]?S[\.]?

  //Lastname, FirstName, MiddleName S, Suf.
  $this->normalizer
    ->appendPattern("/^{$ppLastNamePattern},[ +?]?([\\p{L}-]+)\\.?[ +?]([\\p{L}]+)\\.?[ +?]([A-Z])\\.?,[ +?]?{$suffixPattern}/ui", array(
    'lastName',
    'firstName',
    'middleName',
    'secondMiddleInitial',
    'suffix',
  ));

  //Lastname, Firstname, MiddleName, Suf.
  $this->normalizer
    ->appendPattern("/^{$ppLastNamePattern},[ +?]?([\\p{L}-]+)\\.?[ +?]([\\p{L}]+)\\.?,[ +?]?{$suffixPattern}/ui", array(
    'lastName',
    'firstName',
    'middleName',
    'suffix',
  ));

  //Lastname, Firstname, Suf.
  $this->normalizer
    ->appendPattern("/^{$ppLastNamePattern},[ +?]?([\\p{L}-]+)\\.?,[ +?]?{$suffixPattern}/ui", array(
    'lastName',
    'firstName',
    'suffix',
  ));

  //Lastname, FirstName MiddleName S
  $this->normalizer
    ->appendPattern("/^{$ppLastNamePattern},[ +?]?([\\p{L}-]+)\\.?[ +?]([\\p{L}]+)\\.?[ +?]([A-Z])\\.?/ui", array(
    'lastName',
    'firstName',
    'middleName',
    'secondMiddleInitial',
  ));

  //Lastname, Firstname, MiddleName
  $this->normalizer
    ->appendPattern("/^{$ppLastNamePattern},[ +?]?([\\p{L}-]+)\\.?[ +?]([\\p{L}]+)\\.?/ui", array(
    'lastName',
    'firstName',
    'middleName',
  ));

  //Lastname, Firstname
  $this->normalizer
    ->appendPattern("/^{$ppLastNamePattern},[ +?]?([\\p{L}-]+)\\.?/ui", array(
    'lastName',
    'firstName',
  ));

  //F.M.S. Lastname
  $this->normalizer
    ->appendPattern("/^([a-zA-Z])[\\. ]([a-zA-Z])[\\. ]([a-zA-Z])[\\.]? {$lastNamePattern}/u", array(
    'firstInitial',
    'middleInitial',
    'secondMiddleInitial',
    'lastName',
  ));

  //F.M. Lastname
  $this->normalizer
    ->appendPattern("/^([a-zA-Z])[\\. ]([a-zA-Z])[\\.]? {$lastNamePattern}/u", array(
    'firstInitial',
    'middleInitial',
    'lastName',
  ));

  //Firstname MS Lastname, Suf.
  $this->normalizer
    ->appendPattern("/^([\\p{L}-]+)\\.? ([A-Z])[\\.| ]?([A-Z])\\.? {$lastNamePattern},[ +?]?{$suffixPattern}/u", array(
    'firstName',
    'middleName',
    'secondMiddleInitial',
    'lastName',
    'suffix',
  ));

  //Firstname Middle S Lastname, Suf.
  $this->normalizer
    ->appendPattern("/^([\\p{L}-]+)\\.?[ +?]([a-z][a-z]+)[ +?]([A-Z])\\.?[ +?]{$lastNamePattern},[ +?]?{$suffixPattern}/ui", array(
    'firstName',
    'middleName',
    'secondMiddleInitial',
    'lastName',
    'suffix',
  ));

  //Firstname Middle Lastname, Suf.
  $this->normalizer
    ->appendPattern("/^([\\p{L}-]+)\\.?[ +?]([a-z][a-z]+)[ +?]{$lastNamePattern},[ +?]?{$suffixPattern}/ui", array(
    'firstName',
    'middleName',
    'lastName',
    'suffix',
  ));

  //Firstname Lastname, Suf.
  $this->normalizer
    ->appendPattern("/^([\\p{L}-]+)\\.?[ +?]{$lastNamePattern},[ +?]?{$suffixPattern}/ui", array(
    'firstName',
    'lastName',
    'suffix',
  ));

  //Firstname MS Lastname
  $this->normalizer
    ->appendPattern("/^([\\p{L}-]+)\\.?[ +?]([A-Z])[\\.| ]?([A-Z])\\.?[ +?]{$lastNamePattern}/u", array(
    'firstName',
    'middleName',
    'secondMiddleInitial',
    'lastName',
  ));

  //Firstname Middle S Lastname
  $this->normalizer
    ->appendPattern("/^([\\p{L}-]+)\\.?[ +?]([a-z][a-z]+)[ +?]([A-Z])\\.?[ +?]{$lastNamePattern}/ui", array(
    'firstName',
    'middleName',
    'secondMiddleInitial',
    'lastName',
  ));

  //Firstname Middle Lastname
  $this->normalizer
    ->appendPattern("/^([\\p{L}-]+)\\.?[ +?]([a-z][a-z]+)[ +?]{$lastNamePattern}/ui", array(
    'firstName',
    'middleName',
    'lastName',
  ));

  //Firstname Lastname
  $this->normalizer
    ->appendPattern("/^([\\p{L}-]+)\\.?[ +?]{$lastNamePattern}/ui", array(
    'firstName',
    'lastName',
  ));

  //Suf.,Firstname M. Lastname
  $this->normalizer
    ->appendPattern("/^{$suffixPattern},\\s?([\\p{L}-]+)\\.?[ +?]([a-z]+)\\.?[ +?]{$lastNamePattern}/ui", array(
    'suffix',
    'firstName',
    'middleInitial',
    'lastName',
  ));
}