You are here

function remove_invalid_xml_characters in Lingotek Translation 7.7

Replaces invalid XML characters with the unicode replacement character

Parameters

string element string to be checked:

Return value

bool TRUE if string contained invalid XML characters, FALSE otherwise

1 call to remove_invalid_xml_characters()
lingotek_xml_fields in ./lingotek.util.inc

File

./lingotek.util.inc, line 521
Utility functions.

Code

function remove_invalid_xml_characters(&$element) {
  $invalid = FALSE;
  $replacement = '�';

  // Valid XML Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]
  $result = preg_replace('/[^\\x{0009}\\x{000a}\\x{000d}\\x{0020}-\\x{D7FF}\\x{E000}-\\x{FFFD}\\x{10000}-\\x{10FFFF}]+/u', $replacement, $element);
  if ($result === NULL && preg_last_error() == PREG_BAD_UTF8_ERROR) {
    $invalid = TRUE;
    $temp = remove_invalid_sequences($element, ';--;');
    $result = preg_replace('/[^\\x{0009}\\x{000a}\\x{000d}\\x{0020}-\\x{D7FF}\\x{E000}-\\x{FFFD}\\x{10000}-\\x{10FFFF}]+|(;--;)/u', $replacement, $temp);
    if ($result !== NULL) {
      $element = $result;
    }
  }
  elseif ($result !== $element && $result !== NULL) {
    $invalid = TRUE;
    $element = $result;
  }
  return $invalid;
}