function remove_invalid_xml_characters in Lingotek Translation 7.7
Replaces invalid XML characters with the unicode replacement character
Parameters
string element string to be checked:
Return value
bool TRUE if string contained invalid XML characters, FALSE otherwise
1 call to remove_invalid_xml_characters()
File
- ./
lingotek.util.inc, line 521 - Utility functions.
Code
function remove_invalid_xml_characters(&$element) {
$invalid = FALSE;
$replacement = '�';
// Valid XML Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]
$result = preg_replace('/[^\\x{0009}\\x{000a}\\x{000d}\\x{0020}-\\x{D7FF}\\x{E000}-\\x{FFFD}\\x{10000}-\\x{10FFFF}]+/u', $replacement, $element);
if ($result === NULL && preg_last_error() == PREG_BAD_UTF8_ERROR) {
$invalid = TRUE;
$temp = remove_invalid_sequences($element, ';--;');
$result = preg_replace('/[^\\x{0009}\\x{000a}\\x{000d}\\x{0020}-\\x{D7FF}\\x{E000}-\\x{FFFD}\\x{10000}-\\x{10FFFF}]+|(;--;)/u', $replacement, $temp);
if ($result !== NULL) {
$element = $result;
}
}
elseif ($result !== $element && $result !== NULL) {
$invalid = TRUE;
$element = $result;
}
return $invalid;
}