function _remove_duplicates_get_nodes in Remove Duplicates 7
Get all nodes with selected type / field.
Parameters
string $node_type_machine_name: The node type to fetch.
string $node_field_machine_name: The {field} to join with the fetched node type.
bool $case_sensitive: If TRUE, duplicates detection is case sensitive Otherwise, duplicates detection is case insensitive.
Return value
array An array of node objects with 3 properties : nid, status, {field}.
1 call to _remove_duplicates_get_nodes()
- _remove_duplicates_get_duplicate_node_groups in ./
remove_duplicates.module - Get all duplicate node grouped according to selected field.
File
- ./
remove_duplicates.module, line 747 - Remove duplicate nodes according to node fields or Custom fields.
Code
function _remove_duplicates_get_nodes($node_type_machine_name = NULL, $node_field_machine_name = NULL, $case_sensitive = TRUE) {
$records = array();
// EntityFieldQuery does not support GROUP BY nor DISTINCT.
// See : https://drupal.org/node/1565708
// Using raw database calls instead.
if (in_array(strtolower($node_field_machine_name), array(
'title',
))) {
// For basic title field.
$field = $node_field_machine_name;
if (Database::getConnection()
->databaseType() == 'pgsql') {
// In PostgreSQL string comparisons are case sensitive by default.
// Nested Query pattern (case sensitive) :
// @code
// SELECT s.{field} AS {field},
// COUNT( * ) AS duplicate
// FROM node s
// WHERE (
// s.type = {node_type_machine_name}
// )
// GROUP BY {$field}
// @endcode
// Nested Query pattern (case insensitive) :
// @code
// SELECT s.lowered as {field},
// SUM(s.duplicate) AS duplicate
// FROM (
// SELECT s.{field} AS {field}, lower(s.{field}) AS lowered,
// COUNT( * ) AS duplicate
// FROM node s
// WHERE (
// s.type = {node_type_machine_name}
// )
// GROUP BY lowered, {field}
// ) s
// GROUP BY lowered
// @endcode
$nested_query = db_select('node', 's');
$nested_query
->fields('s', array(
$field,
));
$nested_query
->addExpression('COUNT(*)', 'duplicate');
$nested_query
->condition('s.type', $node_type_machine_name, '=');
if ($case_sensitive) {
$nested_query
->groupBy($field);
}
else {
$nested_query
->addExpression('lower(s.' . $field . ')', 'lowered');
$nested_query
->groupBy('lowered');
$nested_query
->groupBy($field);
$nested_query = db_select($nested_query, 's');
$nested_query
->addExpression('s.lowered', $field);
$nested_query
->addExpression('SUM(s.duplicate)', 'duplicate');
$nested_query
->groupBy('lowered');
}
// Sub Query pattern (both cases) :
// @code
// SELECT n.{field} AS {field}
// FROM {$nested_query} n
// WHERE duplicates > 1
// @endcode
$sub_query = db_select($nested_query, 'n');
$sub_query
->fields('n', array(
$field,
));
$sub_query
->condition('duplicate', 1, '>');
}
else {
// In MySQL nonbinary string comparisons are case insensitive by default.
// See : https://dev.mysql.com/doc/refman/5.0/en/case-sensitivity.html
// Sub Query pattern (case sensitive) :
// @code
// SELECT n.{field} AS {field}, md5(n.{field}) AS checksum,
// COUNT( * ) AS duplicate
// FROM node n
// WHERE (
// n.type = {node_type_machine_name}
// )
// GROUP BY checksum
// HAVING count( duplicate ) >1
// @endcode
// Sub Query pattern (case insensitive) :
// @code
// SELECT n.{field} AS {field},
// COUNT( * ) AS duplicate
// FROM node n
// WHERE (
// n.type = {node_type_machine_name}
// )
// GROUP BY {field}
// HAVING count( duplicate ) > 1
// @endcode
$sub_query = db_select('node', 'n');
$sub_query
->fields('n', array(
$field,
));
$sub_query
->addExpression('COUNT(*)', 'duplicate');
if ($case_sensitive) {
$sub_query
->addExpression('md5(n.' . $field . ')', 'checksum');
$sub_query
->groupBy('checksum');
}
else {
$sub_query
->groupBy($field);
}
$sub_query
->condition('n.type', $node_type_machine_name, '=');
$sub_query
->havingCondition('duplicate', 1, '>');
}
// Main Query pattern (case sensitive) :
// @code
// SELECT n.nid AS nid, n.uid AS uid, n.status AS status,
// n.created AS created, n.changed AS changed,
// n.{field} AS {field}, u.name AS name
// FROM node n
// INNER JOIN users u ON n.uid = u.uid
// INNER JOIN (
// {sub_query}
// ) nn ON md5(n.{field}) = md5(nn.{field})
// WHERE (
// n.type = {node_type_machine_name}
// )
// ORDER BY {field} ASC
// @endcode
// Main Query pattern (case insensitive) :
// @code
// SELECT n.nid AS nid, n.uid AS uid, n.status AS status,
// n.created AS created, n.changed AS changed,
// n.{field} AS {field}, u.name AS name
// FROM node n
// INNER JOIN users u ON n.uid = u.uid
// INNER JOIN (
// {sub_query}
// ) nn ON n.{field} = nn.{field}
// WHERE (
// n.type = {node_type_machine_name}
// )
// ORDER BY {field} ASC
// @endcode
$main_query = db_select('node', 'n');
$main_query
->fields('n', array(
'nid',
'uid',
'status',
'created',
'changed',
$field,
));
$main_query
->fields('u', array(
'name',
));
$main_query
->join('users', 'u', 'n.uid = u.uid');
if ($case_sensitive) {
$main_query
->join($sub_query, 'nn', 'md5(n.' . $field . ') = md5(nn.' . $field . ')');
}
else {
if (Database::getConnection()
->databaseType() == 'pgsql') {
$main_query
->join($sub_query, 'nn', 'lower(n.' . $field . ') = lower(nn.' . $field . ')');
}
else {
$main_query
->join($sub_query, 'nn', 'n.' . $field . ' = nn.' . $field);
}
}
$main_query
->condition('n.type', $node_type_machine_name, '=');
$main_query
->orderBy($field, 'ASC');
$records = $main_query
->execute()
->fetchall();
}
else {
// For Custom Fields.
$field_info = field_info_field($node_field_machine_name);
if (!empty($field_info['storage']['details']['sql']['FIELD_LOAD_CURRENT'])) {
$table = key($field_info['storage']['details']['sql']['FIELD_LOAD_CURRENT']);
$field = current($field_info['storage']['details']['sql']['FIELD_LOAD_CURRENT'][$table]);
if (db_table_exists($table) && db_field_exists($table, $field)) {
if (Database::getConnection()
->databaseType() == 'pgsql') {
// Nested Query pattern (case sensitive) :
// @code
// SELECT cf.{field} AS {field},
// COUNT( * ) AS duplicate
// FROM node s
// INNER JOIN {table} cf ON cf.bundle = {node_type_machine_name}
// AND cf.entity_type = 'node' AND cf.entity_id = s.nid
// WHERE (
// s.type = {node_type_machine_name}
// )
// GROUP BY {$field}
// @endcode
// Nested Query pattern (case insensitive) :
// @code
// SELECT s.lowered as {field},
// SUM(s.duplicate) AS duplicate
// FROM (
// SELECT cf.{field} AS {field}, lower(cf.{field}) AS lowered,
// COUNT( * ) AS duplicate
// FROM node s
// INNER JOIN {table} cf ON cf.bundle = {node_type_machine_name}
// AND cf.entity_type = 'node' AND cf.entity_id = s.nid
// WHERE (
// s.type = {node_type_machine_name}
// )
// GROUP BY lowered, {field}
// ) s
// GROUP BY lowered
// @endcode
$nested_query = db_select('node', 's');
$nested_query
->addJoin('INNER', $table, 'cf', 'cf.bundle = :bundle AND cf.entity_type = :entity_type AND s.nid = cf.entity_id ', array(
':bundle' => $node_type_machine_name,
':entity_type' => 'node',
));
$nested_query
->fields('cf', array(
$field,
));
$nested_query
->addExpression('COUNT(*)', 'duplicate');
$nested_query
->condition('s.type', $node_type_machine_name, '=');
if ($case_sensitive) {
$nested_query
->groupBy($field);
}
else {
$nested_query
->addExpression('lower(cf.' . $field . ')', 'lowered');
$nested_query
->groupBy('lowered');
$nested_query
->groupBy($field);
$nested_query = db_select($nested_query, 's');
$nested_query
->addExpression('s.lowered', $field);
$nested_query
->addExpression('SUM(s.duplicate)', 'duplicate');
$nested_query
->groupBy('lowered');
}
// Sub Query pattern (both cases) :
// @code
// SELECT cf.{field} AS {field}
// FROM {$nested_query} cf
// WHERE duplicates > 1
// @endcode
$sub_query = db_select($nested_query, 'cf');
$sub_query
->fields('cf', array(
$field,
));
$sub_query
->condition('duplicate', 1, '>');
}
else {
// Sub Query pattern (case sensitive) :
// @code
// SELECT cf.{field} AS {field}, md5(cf.{field}) AS checksum,
// COUNT( * ) AS duplicate
// FROM node n
// INNER JOIN {table} cf ON cf.bundle = {node_type_machine_name}
// AND cf.entity_type = 'node' AND cf.entity_id = n.nid
// WHERE (
// n.type = {node_type_machine_name}
// )
// GROUP BY checksum
// HAVING count( duplicate ) > 1
// @endcode
// Sub Query pattern (case insensitive) :
// @code
// SELECT cf.{field} AS {field},
// COUNT( * ) AS duplicate
// FROM node n
// INNER JOIN {table} cf ON cf.bundle = {node_type_machine_name}
// AND cf.entity_type = 'node' AND cf.entity_id = n.nid
// WHERE (
// n.type = {node_type_machine_name}
// )
// GROUP BY {field}
// HAVING count( duplicate ) > 1
// @endcode
$sub_query = db_select('node', 'n');
$sub_query
->addJoin('INNER', $table, 'cf', 'cf.bundle = :bundle AND cf.entity_type = :entity_type AND n.nid = cf.entity_id ', array(
':bundle' => $node_type_machine_name,
':entity_type' => 'node',
));
$sub_query
->condition('n.type', $node_type_machine_name, '=');
$sub_query
->fields('cf', array(
$field,
));
$sub_query
->addExpression('COUNT(*)', 'duplicate');
if ($case_sensitive) {
$sub_query
->addExpression('md5(cf.' . $field . ')', 'checksum');
$sub_query
->groupBy('checksum');
}
else {
$sub_query
->groupBy($field);
}
$sub_query
->havingCondition('duplicate', 1, '>');
}
// Main Query pattern (case sensitive) :
// @code
// SELECT n.nid AS nid, n.uid AS uid, n.status AS status,
// n.created AS created, n.changed AS changed, n.title AS title,
// u.name AS name, cf.{field} AS {field}
// FROM node n
// INNER JOIN users u ON n.uid = u.uid
// INNER JOIN {table} cf ON cf.bundle = {node_type_machine_name}
// AND cf.entity_type = 'node' AND cf.entity_id = n.nid
// INNER JOIN (
// {sub_query}
// ) nn ON md5(cf.{field}) = md5(nn.{field})
// WHERE (
// n.type = {node_type_machine_name}
// )
// ORDER BY {field} ASC
// @endcode
// Main Query pattern (case insensitive) :
// @code
// SELECT n.nid AS nid, n.uid AS uid, n.status AS status,
// n.created AS created, n.changed AS changed, n.title AS title,
// u.name AS name, cf.{field} AS {field}
// FROM node n
// INNER JOIN users u ON n.uid = u.uid
// INNER JOIN {table} cf ON cf.bundle = {node_type_machine_name}
// AND cf.entity_type = 'node' AND cf.entity_id = n.nid
// INNER JOIN (
// {sub_query}
// ) nn ON cf.{field} = nn.{field}
// WHERE (
// n.type = {node_type_machine_name}
// )
// ORDER BY {field} ASC
// @endcode
$main_query = db_select('node', 'n');
$main_query
->fields('n', array(
'nid',
'uid',
'status',
'created',
'changed',
'title',
));
$main_query
->fields('u', array(
'name',
));
$main_query
->fields('cf', array(
$field,
));
$main_query
->addJoin('INNER', $table, 'cf', 'cf.bundle = :bundle AND cf.entity_type = :entity_type AND n.nid = cf.entity_id ', array(
':bundle' => $node_type_machine_name,
':entity_type' => 'node',
));
$main_query
->join('users', 'u', 'n.uid = u.uid');
if ($case_sensitive) {
$main_query
->join($sub_query, 'nn', 'md5(cf.' . $field . ') = md5(nn.' . $field . ')');
}
else {
if (Database::getConnection()
->databaseType() == 'pgsql') {
$main_query
->join($sub_query, 'nn', 'lower(cf.' . $field . ') = lower(nn.' . $field . ')');
}
else {
$main_query
->join($sub_query, 'nn', 'cf.' . $field . ' = nn.' . $field);
}
}
$main_query
->condition('n.type', $node_type_machine_name, '=');
$main_query
->orderBy($field, 'ASC');
$records = $main_query
->execute()
->fetchall();
}
}
}
return $records;
}