class PoStreamReader in Localization update 7.2
Implements Gettext PO stream reader.
The PO file format parsing is implemented according to the documentation at http://www.gnu.org/software/gettext/manual/gettext.html#PO-Files.
Hierarchy
- class \PoStreamReader implements PoReaderInterface, PoStreamInterface
Expanded class hierarchy of PoStreamReader
File
- includes/
gettext/ PoStreamReader.php, line 14 - Contains \Drupal\Component\Gettext\PoStreamReader.
View source
class PoStreamReader implements PoStreamInterface, PoReaderInterface {
/**
* Source line number of the stream being parsed.
*
* @var int
*/
private $_line_number = 0;
/**
* Parser context for the stream reader state machine.
*
* Possible contexts are:
* - 'COMMENT' (#)
* - 'MSGID' (msgid)
* - 'MSGID_PLURAL' (msgid_plural)
* - 'MSGCTXT' (msgctxt)
* - 'MSGSTR' (msgstr or msgstr[])
* - 'MSGSTR_ARR' (msgstr_arg)
*
* @var string
*/
private $_context = 'COMMENT';
/**
* Current entry being read. Incomplete.
*
* @var array
*/
private $_current_item = array();
/**
* Current plural index for plural translations.
*
* @var int
*/
private $_current_plural_index = 0;
/**
* URI of the PO stream that is being read.
*
* @var string
*/
private $_uri = '';
/**
* Language code for the PO stream being read.
*
* @var string
*/
private $_langcode = NULL;
/**
* Size of the current PO stream.
*
* @var int
*/
private $_size;
/**
* File handle of the current PO stream.
*
* @var resource
*/
private $_fd;
/**
* The PO stream header.
*
* @var PoHeader
*/
private $_header;
/**
* Object wrapper for the last read source/translation pair.
*
* @var PoItem
*/
private $_last_item;
/**
* Indicator of whether the stream reading is finished.
*
* @var boolean
*/
private $_finished;
/**
* Array of translated error strings recorded on reading this stream so far.
*
* @var array
*/
private $_errors;
/**
* Implements PoMetadataInterface::getLangcode().
*/
public function getLangcode() {
return $this->_langcode;
}
/**
* Implements PoMetadataInterface::setLangcode().
*/
public function setLangcode($langcode) {
$this->_langcode = $langcode;
}
/**
* Implements PoMetadataInterface::getHeader().
*/
public function getHeader() {
return $this->_header;
}
/**
* Implements PoMetadataInterface::setHeader().
*
* Not applicable to stream reading and therefore not implemented.
*/
public function setHeader(PoHeader $header) {
}
/**
* Implements PoStreamInterface::getURI().
*/
public function getURI() {
return $this->_uri;
}
/**
* Implements PoStreamInterface::setURI().
*/
public function setURI($uri) {
$this->_uri = $uri;
}
/**
* Implements PoStreamInterface::open().
*
* Opens the stream and reads the header. The stream is ready for reading
* items after.
*
* @throws Exception
* If the URI is not yet set.
*/
public function open() {
if (!empty($this->_uri)) {
$this->_fd = fopen($this->_uri, 'rb');
$this->_size = ftell($this->_fd);
$this
->readHeader();
}
else {
throw new \Exception('Cannot open stream without URI set.');
}
}
/**
* Implements PoStreamInterface::close().
*
* @throws Exception
* If the stream is not open.
*/
public function close() {
if ($this->_fd) {
fclose($this->_fd);
}
else {
throw new \Exception('Cannot close stream that is not open.');
}
}
/**
* Implements PoReaderInterface::readItem().
*/
public function readItem() {
// Clear out the last item.
$this->_last_item = NULL;
// Read until finished with the stream or a complete item was identified.
while (!$this->_finished && is_null($this->_last_item)) {
$this
->readLine();
}
return $this->_last_item;
}
/**
* Sets the seek position for the current PO stream.
*
* @param int $seek
* The new seek position to set.
*/
public function setSeek($seek) {
fseek($this->_fd, $seek);
}
/**
* Returns the pointer position of the current PO stream.
*/
public function getSeek() {
return ftell($this->_fd);
}
/**
* Read the header from the PO stream.
*
* The header is a special case PoItem, using the empty string as source and
* key-value pairs as translation. We just reuse the item reader logic to
* read the header.
*/
private function readHeader() {
$item = $this
->readItem();
// Handle the case properly when the .po file is empty (0 bytes).
if (!$item) {
return;
}
$header = new PoHeader();
$header
->setFromString(trim($item
->getTranslation()));
$this->_header = $header;
}
/**
* Reads a line from the PO stream and stores data internally.
*
* Expands $this->_current_item based on new data for the current item. If
* this line ends the current item, it is saved with setItemFromArray() with
* data from $this->_current_item.
*
* An internal state machine is maintained in this reader using
* $this->_context as the reading state. PO items are in between COMMENT
* states (when items have at least one line or comment in between them or
* indicated by MSGSTR or MSGSTR_ARR followed immediately by an MSGID or
* MSGCTXT (when items closely follow each other).
*
* @return FALSE|NULL
* FALSE if an error was logged, NULL otherwise. The errors are considered
* non-blocking, so reading can continue, while the errors are collected
* for later presentation.
*/
private function readLine() {
// Read a line and set the stream finished indicator if it was not
// possible anymore.
$line = fgets($this->_fd);
$this->_finished = $line === FALSE;
// Initialize common values for error logging.
$log_vars = array(
'%uri' => $this
->getURI(),
'%line' => &$this->_line_number,
);
if (!$this->_finished) {
if ($this->_line_number == 0) {
// The first line might come with a UTF-8 BOM, which should be removed.
$line = str_replace("", '', $line);
// Current plurality for 'msgstr[]'.
$this->_current_plural_index = 0;
}
// Track the line number for error reporting.
$this->_line_number++;
// Trim away the linefeed. \\n might appear at the end of the string if
// another line continuing the same string follows. We can remove that.
$line = trim(strtr($line, array(
"\\\n" => "",
)));
if (!strncmp('#', $line, 1)) {
// Lines starting with '#' are comments.
if ($this->_context == 'COMMENT') {
// Already in comment context, add to current comment.
$this->_current_item['#'][] = substr($line, 1);
}
elseif ($this->_context == 'MSGSTR' || $this->_context == 'MSGSTR_ARR') {
// We are currently in string context, save current item.
$this
->setItemFromArray($this->_current_item);
// Start a new entry for the comment.
$this->_current_item = array();
$this->_current_item['#'][] = substr($line, 1);
$this->_context = 'COMMENT';
return NULL;
}
else {
// A comment following any other context is a syntax error.
$this->_errors[] = format_string('The translation stream %uri contains an error: "msgstr" was expected but not found on line %line.', $log_vars);
return FALSE;
}
return NULL;
}
elseif (!strncmp('msgid_plural', $line, 12)) {
// A plural form for the current source string.
if ($this->_context != 'MSGID') {
// A plural form can only be added to an msgid directly.
$this->_errors[] = format_string('The translation stream %uri contains an error: "msgid_plural" was expected but not found on line %line.', $log_vars);
return FALSE;
}
// Remove 'msgid_plural' and trim away whitespace.
$line = trim(substr($line, 12));
// Only the plural source string is left, parse it.
$quoted = $this
->parseQuoted($line);
if ($quoted === FALSE) {
// The plural form must be wrapped in quotes.
$this->_errors[] = format_string('The translation stream %uri contains a syntax error on line %line.', $log_vars);
return FALSE;
}
// Append the plural source to the current entry.
if (is_string($this->_current_item['msgid'])) {
// The first value was stored as string. Now we know the context is
// plural, it is converted to array.
$this->_current_item['msgid'] = array(
$this->_current_item['msgid'],
);
}
$this->_current_item['msgid'][] = $quoted;
$this->_context = 'MSGID_PLURAL';
return NULL;
}
elseif (!strncmp('msgid', $line, 5)) {
// Starting a new message.
if ($this->_context == 'MSGSTR' || $this->_context == 'MSGSTR_ARR') {
// We are currently in string context, save current item.
$this
->setItemFromArray($this->_current_item);
// Start a new context for the msgid.
$this->_current_item = array();
}
elseif ($this->_context == 'MSGID') {
// We are currently already in the context, meaning we passed an id
// with no data.
$this->_errors[] = format_string('The translation stream %uri contains an error: "msgid" is unexpected on line %line.', $log_vars);
return FALSE;
}
// Remove 'msgid' and trim away whitespace.
$line = trim(substr($line, 5));
// Only the message id string is left, parse it.
$quoted = $this
->parseQuoted($line);
if ($quoted === FALSE) {
// The message id must be wrapped in quotes.
$this->_errors[] = format_string('The translation stream %uri contains an error: invalid format for "msgid" on line %line.', $log_vars);
return FALSE;
}
$this->_current_item['msgid'] = $quoted;
$this->_context = 'MSGID';
return NULL;
}
elseif (!strncmp('msgctxt', $line, 7)) {
// Starting a new context.
if ($this->_context == 'MSGSTR' || $this->_context == 'MSGSTR_ARR') {
// We are currently in string context, save current item.
$this
->setItemFromArray($this->_current_item);
$this->_current_item = array();
}
elseif (!empty($this->_current_item['msgctxt'])) {
// A context cannot apply to another context.
$this->_errors[] = format_string('The translation stream %uri contains an error: "msgctxt" is unexpected on line %line.', $log_vars);
return FALSE;
}
// Remove 'msgctxt' and trim away whitespaces.
$line = trim(substr($line, 7));
// Only the msgctxt string is left, parse it.
$quoted = $this
->parseQuoted($line);
if ($quoted === FALSE) {
// The context string must be quoted.
$this->_errors[] = format_string('The translation stream %uri contains an error: invalid format for "msgctxt" on line %line.', $log_vars);
return FALSE;
}
$this->_current_item['msgctxt'] = $quoted;
$this->_context = 'MSGCTXT';
return NULL;
}
elseif (!strncmp('msgstr[', $line, 7)) {
// A message string for a specific plurality.
if ($this->_context != 'MSGID' && $this->_context != 'MSGCTXT' && $this->_context != 'MSGID_PLURAL' && $this->_context != 'MSGSTR_ARR') {
// Plural message strings must come after msgid, msgxtxt,
// msgid_plural, or other msgstr[] entries.
$this->_errors[] = format_string('The translation stream %uri contains an error: "msgstr[]" is unexpected on line %line.', $log_vars);
return FALSE;
}
// Ensure the plurality is terminated.
if (strpos($line, ']') === FALSE) {
$this->_errors[] = format_string('The translation stream %uri contains an error: invalid format for "msgstr[]" on line %line.', $log_vars);
return FALSE;
}
// Extract the plurality.
$frombracket = strstr($line, '[');
$this->_current_plural_index = substr($frombracket, 1, strpos($frombracket, ']') - 1);
// Skip to the next whitespace and trim away any further whitespace,
// bringing $line to the message text only.
$line = trim(strstr($line, " "));
$quoted = $this
->parseQuoted($line);
if ($quoted === FALSE) {
// The string must be quoted.
$this->_errors[] = format_string('The translation stream %uri contains an error: invalid format for "msgstr[]" on line %line.', $log_vars);
return FALSE;
}
if (!isset($this->_current_item['msgstr']) || !is_array($this->_current_item['msgstr'])) {
$this->_current_item['msgstr'] = array();
}
$this->_current_item['msgstr'][$this->_current_plural_index] = $quoted;
$this->_context = 'MSGSTR_ARR';
return NULL;
}
elseif (!strncmp("msgstr", $line, 6)) {
// A string pair for an msgidid (with optional context).
if ($this->_context != 'MSGID' && $this->_context != 'MSGCTXT') {
// Strings are only valid within an id or context scope.
$this->_errors[] = format_string('The translation stream %uri contains an error: "msgstr" is unexpected on line %line.', $log_vars);
return FALSE;
}
// Remove 'msgstr' and trim away away whitespaces.
$line = trim(substr($line, 6));
// Only the msgstr string is left, parse it.
$quoted = $this
->parseQuoted($line);
if ($quoted === FALSE) {
// The string must be quoted.
$this->_errors[] = format_string('The translation stream %uri contains an error: invalid format for "msgstr" on line %line.', $log_vars);
return FALSE;
}
$this->_current_item['msgstr'] = $quoted;
$this->_context = 'MSGSTR';
return NULL;
}
elseif ($line != '') {
// Anything that is not a token may be a continuation of a previous
// token.
$quoted = $this
->parseQuoted($line);
if ($quoted === FALSE) {
// This string must be quoted.
$this->_errors[] = format_string('The translation stream %uri contains an error: string continuation expected on line %line.', $log_vars);
return FALSE;
}
// Append the string to the current item.
if ($this->_context == 'MSGID' || $this->_context == 'MSGID_PLURAL') {
if (is_array($this->_current_item['msgid'])) {
// Add string to last array element for plural sources.
$last_index = count($this->_current_item['msgid']) - 1;
$this->_current_item['msgid'][$last_index] .= $quoted;
}
else {
// Singular source, just append the string.
$this->_current_item['msgid'] .= $quoted;
}
}
elseif ($this->_context == 'MSGCTXT') {
// Multiline context name.
$this->_current_item['msgctxt'] .= $quoted;
}
elseif ($this->_context == 'MSGSTR') {
// Multiline translation string.
$this->_current_item['msgstr'] .= $quoted;
}
elseif ($this->_context == 'MSGSTR_ARR') {
// Multiline plural translation string.
$this->_current_item['msgstr'][$this->_current_plural_index] .= $quoted;
}
else {
// No valid context to append to.
$this->_errors[] = format_string('The translation stream %uri contains an error: unexpected string on line %line.', $log_vars);
return FALSE;
}
return NULL;
}
}
// Empty line read or EOF of PO stream, close out the last entry.
if ($this->_context == 'MSGSTR' || $this->_context == 'MSGSTR_ARR') {
$this
->setItemFromArray($this->_current_item);
$this->_current_item = array();
}
elseif ($this->_context != 'COMMENT') {
$this->_errors[] = format_string('The translation stream %uri ended unexpectedly at line %line.', $log_vars);
return FALSE;
}
return NULL;
}
/**
* Store the parsed values as a PoItem object.
*/
public function setItemFromArray($value) {
$plural = FALSE;
$comments = '';
$textgroup = 'default';
if (isset($value['#'])) {
$comments = $this
->shortenComments($value['#']);
$textgroup = $this
->fetchGroupFromComment($comments);
}
if (is_array($value['msgstr'])) {
// Sort plural variants by their form index.
ksort($value['msgstr']);
$plural = TRUE;
}
$item = new PoItem();
$item
->setContext(isset($value['msgctxt']) ? $value['msgctxt'] : '');
$item
->setSource($value['msgid']);
$item
->setTranslation($value['msgstr']);
$item
->setPlural($plural);
$item
->setComment($comments);
$item
->setLangcode($this->_langcode);
$item
->setTextgroup($textgroup);
$this->_last_item = $item;
$this->_context = 'COMMENT';
}
/**
* Parses a string in quotes.
*
* @param string $string
* A string specified with enclosing quotes.
*
* @return string|FALSE
* The string parsed from inside the quotes.
*/
public function parseQuoted($string) {
if (substr($string, 0, 1) != substr($string, -1, 1)) {
// Start and end quotes must be the same.
return FALSE;
}
$quote = substr($string, 0, 1);
$string = substr($string, 1, -1);
if ($quote == '"') {
// Double quotes: strip slashes.
return stripcslashes($string);
}
elseif ($quote == "'") {
// Simple quote: return as-is.
return $string;
}
else {
// Unrecognized quote.
return FALSE;
}
}
/**
* Generates a short, one-string version of the passed comment array.
*
* @param string|array $comment
* An array of strings containing a comment.
*
* @return string
* Short one-string version of the comment.
*/
private function shortenComments($comment) {
$comm = '';
while (count($comment)) {
$test = $comm . substr(array_shift($comment), 1) . ', ';
if (strlen($comm) < 130) {
$comm = $test;
}
else {
break;
}
}
return trim(substr($comm, 0, -2));
}
/**
* Determine a translation text group using a source's comment-string.
*
* @param string $comment
* Comment string.
*
* @return string
* The comment's text group.
*/
private function fetchGroupFromComment($comment) {
// Only if i18n_string is installed, check for and set textgroups.
if (module_exists('i18n_string') && strpos($comment, ':') !== FALSE) {
// Fetch available textgroups.
$groups = array_keys(i18n_string_group_info());
// Parse textgroup from comment (assume default drupal exports).
$comment_array = explode(':', $comment);
if (!empty($comment_array) && in_array($comment_array[0], $groups)) {
return $comment_array[0];
}
}
return 'default';
}
}
Members
Name | Modifiers | Type | Description | Overrides |
---|---|---|---|---|
PoStreamReader:: |
private | property | Parser context for the stream reader state machine. | |
PoStreamReader:: |
private | property | Current entry being read. Incomplete. | |
PoStreamReader:: |
private | property | Current plural index for plural translations. | |
PoStreamReader:: |
private | property | Array of translated error strings recorded on reading this stream so far. | |
PoStreamReader:: |
private | property | File handle of the current PO stream. | |
PoStreamReader:: |
private | property | Indicator of whether the stream reading is finished. | |
PoStreamReader:: |
private | property | The PO stream header. | |
PoStreamReader:: |
private | property | Language code for the PO stream being read. | |
PoStreamReader:: |
private | property | Object wrapper for the last read source/translation pair. | |
PoStreamReader:: |
private | property | Source line number of the stream being parsed. | |
PoStreamReader:: |
private | property | Size of the current PO stream. | |
PoStreamReader:: |
private | property | URI of the PO stream that is being read. | |
PoStreamReader:: |
public | function |
Implements PoStreamInterface::close(). Overrides PoStreamInterface:: |
|
PoStreamReader:: |
private | function | Determine a translation text group using a source's comment-string. | |
PoStreamReader:: |
public | function |
Implements PoMetadataInterface::getHeader(). Overrides PoMetadataInterface:: |
|
PoStreamReader:: |
public | function |
Implements PoMetadataInterface::getLangcode(). Overrides PoMetadataInterface:: |
|
PoStreamReader:: |
public | function | Returns the pointer position of the current PO stream. | |
PoStreamReader:: |
public | function |
Implements PoStreamInterface::getURI(). Overrides PoStreamInterface:: |
|
PoStreamReader:: |
public | function |
Implements PoStreamInterface::open(). Overrides PoStreamInterface:: |
|
PoStreamReader:: |
public | function | Parses a string in quotes. | |
PoStreamReader:: |
private | function | Read the header from the PO stream. | |
PoStreamReader:: |
public | function |
Implements PoReaderInterface::readItem(). Overrides PoReaderInterface:: |
|
PoStreamReader:: |
private | function | Reads a line from the PO stream and stores data internally. | |
PoStreamReader:: |
public | function |
Implements PoMetadataInterface::setHeader(). Overrides PoMetadataInterface:: |
|
PoStreamReader:: |
public | function | Store the parsed values as a PoItem object. | |
PoStreamReader:: |
public | function |
Implements PoMetadataInterface::setLangcode(). Overrides PoMetadataInterface:: |
|
PoStreamReader:: |
public | function | Sets the seek position for the current PO stream. | |
PoStreamReader:: |
public | function |
Implements PoStreamInterface::setURI(). Overrides PoStreamInterface:: |
|
PoStreamReader:: |
private | function | Generates a short, one-string version of the passed comment array. |