View source
<?php
require_once 'Matrix.php';
define('TOP_N_LIMIT', 1000);
define('INSERT_LIMIT', 5000);
class Recommender {
protected $appName;
protected $appId;
protected $tableName;
protected $fieldMouse;
protected $fieldCheese;
protected $fieldWeight;
protected $options;
protected $performance;
protected $missing;
protected $created;
protected $duplicate;
protected $directMatrix;
protected $mouseMap;
protected $cheeseMap;
protected $similarityMatrix;
protected $predictionMatrix;
protected $mouseNum;
protected $cheeseNum;
function __construct($appName, $tableName, $fieldMouse, $fieldCheese, $fieldWeight, $options = array()) {
$this->appName = $appName;
$this->appId = self::convertAppId($appName);
$this->tableName = $tableName;
$this->fieldMouse = $fieldMouse;
$this->fieldCheese = $fieldCheese;
$this->fieldWeight = $fieldWeight;
$this->options = $options;
$this->created = time();
$this->mouseNum = NULL;
$this->cheeseNum = NULL;
$this->performance = @$options['performance'];
if (!isset($this->performance) || $this->performance != 'database' && $this->performance != 'memory' && $this->performance != 'java') {
$this->performance = 'auto';
}
$this->missing = @$options['missing'];
if (!isset($this->missing) || $this->missing != 'none' && $this->missing != 'zero') {
$this->missing = 'none';
}
$this->duplicate = @$options['duplicate'];
if (!isset($this->duplicate) || $this->duplicate != 'keep' && $this->duplicate != 'remove') {
$this->duplicate = 'remove';
}
$this
->initialize();
}
protected function initialize() {
}
protected function prepareData($performance, $missing = 'none') {
if ($performance == 'database') {
$this
->processTable();
if ($missing == 'zero') {
}
}
else {
if ($performance == 'memory') {
$sparse = $missing == 'zero' ? FALSE : TRUE;
$this
->loadDirectMatrix($sparse);
}
}
}
protected function processTable() {
if (stripos($this->tableName, 'SELECT ') === 0) {
db_query("TRUNCATE {recommender_helper_staging}");
db_query("INSERT INTO {recommender_helper_staging} SELECT {$this->fieldMouse}, {$this->fieldCheese}, {$this->fieldWeight} FROM ({$this->tableName}) sql_table");
$this->tableName = "recommender_helper_staging";
$this->fieldMouse = "mouse_id";
$this->fieldCheese = "cheese_id";
$this->fieldWeight = "weight";
}
}
protected function loadDirectMatrix($sparse = FALSE) {
watchdog('recommender', "Please be patient while loading data into memory. This step may fail if you don't have enough memory");
if (stripos($this->tableName, 'SELECT ') === 0) {
$sql = $this->tableName;
}
else {
$sql = "SELECT {$this->fieldMouse}, {$this->fieldCheese}, SUM({$this->fieldWeight}) {$this->fieldWeight}\n FROM {{$this->tableName}} GROUP BY {$this->fieldMouse}, {$this->fieldCheese}";
}
$result = db_query($sql);
$type = $sparse ? 'SparseMatrix' : 'RealMatrix';
$this->directMatrix = Matrix::create($type, $this
->getMouseNum(), $this
->getCheeseNum());
$this->mouseMap = array();
$this->cheeseMap = array();
while ($line = db_fetch_array($result)) {
$id_mouse = $line[$this->fieldMouse];
$id_cheese = $line[$this->fieldCheese];
$weight = $line[$this->fieldWeight];
if (!array_key_exists($id_mouse, $this->mouseMap)) {
$this->mouseMap[$id_mouse] = count($this->mouseMap);
}
if (!array_key_exists($id_cheese, $this->cheeseMap)) {
$this->cheeseMap[$id_cheese] = count($this->cheeseMap);
}
$this->directMatrix
->set($this->mouseMap[$id_mouse], $this->cheeseMap[$id_cheese], $weight);
}
}
protected function saveSimilarityMatrix($lowerbound = 0) {
watchdog('recommender', "Saving similarity result to database. Please wait.");
$map = array_flip($this->mouseMap);
$data = array();
$values = $this->similarityMatrix
->raw_values();
foreach ($map as $v1 => $mouse1) {
foreach ($map as $v2 => $mouse2) {
if (!isset($values[$v1][$v2])) {
continue;
}
$score = $values[$v1][$v2];
if (!is_nan($score) && $score >= $lowerbound) {
$data[] = "({$this->appId}, {$mouse1}, {$mouse2}, {$score}, {$this->created})";
}
}
}
$this
->batchInsert("INSERT INTO {recommender_similarity}(app_id, mouse1_id, mouse2_id, similarity, created) VALUES", $data);
}
protected function loadSimilarityMatrix() {
watchdog('recommender', "Please be patient while loading similarity data into memory. This step may fail if you don't have enough memory");
$sql = "SELECT mouse1_id, mouse2_id, similarity FROM {recommender_similarity} WHERE app_id={$this->appId}";
$result = db_query($sql);
$m = $this
->getMouseNum();
$this->similarityMatrix = Matrix::create('SparseMatrix', $m, $m);
$this->mouseMap = array();
while ($line = db_fetch_array($result)) {
$id_mouse1 = $line["mouse1_id"];
$id_mouse2 = $line["mouse2_id"];
$weight = $line["similarity"];
if (!array_key_exists($id_mouse1, $this->mouseMap)) {
$this->mouseMap[$id_mouse1] = count($this->mouseMap);
}
if (!array_key_exists($id_mouse2, $this->mouseMap)) {
$this->mouseMap[$id_mouse2] = count($this->mouseMap);
}
$this->similarityMatrix
->set($this->mouseMap[$id_mouse1], $this->mouseMap[$id_mouse2], $weight);
$this->similarityMatrix
->set($this->mouseMap[$id_mouse2], $this->mouseMap[$id_mouse1], $weight);
}
}
protected function getMouseNum($may_cache = TRUE) {
if (!$may_cache || $this->mouseNum == NULL) {
$this->mouseNum = $this
->getEntityNum($this->fieldMouse);
}
return $this->mouseNum;
}
protected function getCheeseNum($may_cache = TRUE) {
if (!$may_cache || $this->cheeseNum == NULL) {
$this->cheeseNum = $this
->getEntityNum($this->fieldCheese);
}
return $this->cheeseNum;
}
protected function getEntityNum($field) {
$sql = "SELECT COUNT(DISTINCT {$field}) FROM ";
if (stripos($this->tableName, 'SELECT ') === 0) {
$sql .= "({$this->tableName}) sql_table";
}
else {
$sql .= "{{$this->tableName}}";
}
return db_result(db_query($sql));
}
protected function cleanupMemory() {
unset($this->directMatrix);
unset($this->similarityMatrix);
unset($this->predictionMatrix);
}
public function computeSimilarity() {
watchdog("recommender", "Computing similarity. Might take a long time. Please be patient.");
switch ($this->performance) {
case 'database':
$this
->prepareData('database', $this->missing);
$this
->computeSimilarityDatabase();
break;
case 'java':
$this
->computeSimilarityJava();
break;
case 'memory':
case 'auto':
default:
$this
->prepareData('memory', $this->missing);
$this
->computeSimilarityMemory();
}
}
protected function computeSimilarityMemory() {
$msg = "ERROR: computing in memory is not support. Exit.";
watchdog("recommender", $msg, array(), WATCHDOG_ERROR);
throw new Exception($msg);
}
protected function computeSimilarityDatabase() {
$msg = "ERROR: computing in database is not support. Exit.";
watchdog("recommender", $msg, array(), WATCHDOG_ERROR);
throw new Exception($msg);
}
protected function computeSimilarityJava() {
$msg = "ERROR: computing using java is not support. Exit.";
watchdog("recommender", $msg, array(), WATCHDOG_ERROR);
throw new Exception($msg);
}
public function computePrediction() {
watchdog("recommender", "Computing prediction. Might take a long time. Please be patient.");
switch ($this->performance) {
case 'database':
$this
->prepareData('database', $this->missing);
$this
->computePredictionDatabase();
break;
case 'java':
$this
->computePredictionJava();
break;
case 'memory':
case 'auto':
default:
$this
->prepareData('memory', $this->missing);
$this
->loadSimilarityMatrix();
$this
->computePredictionMemory();
}
}
protected function computePredictionMemory() {
$msg = "ERROR: computing in memory is not support. Exit.";
watchdog("recommender", $msg, array(), WATCHDOG_ERROR);
throw new Exception($msg);
}
protected function computePredictionDatabase() {
$msg = "ERROR: computing in database is not support. Exit.";
watchdog("recommender", $msg, array(), WATCHDOG_ERROR);
throw new Exception($msg);
}
protected function computePredictionJava() {
$msg = "ERROR: computing using java is not support. Exit.";
watchdog("recommender", $msg, array(), WATCHDOG_ERROR);
throw new Exception($msg);
}
static function convertAppId($appName) {
$id = db_result(db_query("SELECT app_id FROM {recommender_app_map} WHERE app_name='%s'", $appName));
if (!isset($id) || empty($id) || $id === FALSE) {
db_query("INSERT INTO {recommender_app_map}(app_name) VALUE('%s')", $appName);
$id = db_result(db_query("SELECT app_id FROM {recommender_app_map} WHERE app_name='%s'", $appName));
}
return $id;
}
public function getAppId() {
return $this->appId;
}
static function purgeApp($appName) {
$app_id = self::convertAppId($appName);
db_query("DELETE FROM {recommender_similarity} WHERE app_id=%d", $app_id);
db_query("DELETE FROM {recommender_prediction} WHERE app_id=%d", $app_id);
db_query("DELETE FROM {recommender_slopeone_dev} WHERE app_id=%d", $app_id);
db_query("DELETE FROM {recommender_app_map} WHERE app_id=%d", $app_id);
}
protected function purgeOutdatedRecords($table) {
update_sql("DELETE FROM {recommender_{$table}} WHERE app_id={$this->appId} AND created<>{$this->created}");
}
protected function batchInsert($insert_sql, &$data) {
$chunks = array_chunk($data, INSERT_LIMIT, TRUE);
foreach ($chunks as $chunk) {
update_sql($insert_sql . implode(',', $chunk));
}
}
public function retrieveSimilarity($mouse1, $mouse2) {
$result = @db_query("SELECT similarity FROM {recommender_similarity}\n WHERE app_id=%d AND mouse1_id=%d AND mouse2_id=%d", $this->appId, $mouse1, $mouse2);
$similarity = db_result($result);
return $similarity !== FALSE ? $similarity : NAN;
}
public function retrievePrediction($mouse, $cheese) {
$result = @db_query("SELECT prediction FROM {recommender_prediction}\n WHERE app_id=%d AND mouse_id=%d AND cheese_id=%d", $this->appId, $mouse, $cheese);
$prediction = db_result($result);
return $prediction !== FALSE ? $prediction : NAN;
}
public function topSimilarity($mouse, $topN, $testFunc = NULL) {
$list = array();
$result = db_query_range("SELECT mouse2_id id, similarity score FROM {recommender_similarity}\n WHERE app_id=%d AND mouse1_id=%d AND mouse2_id<>mouse1_id\n ORDER BY similarity DESC, created DESC, mouse2_id ASC", $this->appId, $mouse, 0, TOP_N_LIMIT);
while (($item = db_fetch_array($result)) && count($list) < $topN) {
if ($testFunc === NULL || call_user_func($testFunc, $item)) {
$list[] = $item;
}
}
return $list;
}
public function topPrediction($mouse, $topN, $testFunc = NULL) {
$list = array();
$result = db_query_range("SELECT cheese_id id, prediction score FROM {recommender_prediction}\n WHERE app_id=%d AND mouse_id=%d\n ORDER BY prediction DESC, created DESC, mouse_id ASC", $this->appId, $mouse, 0, TOP_N_LIMIT);
while (($item = db_fetch_array($result)) && count($list) < $topN) {
if ($testFunc === NULL || call_user_func($testFunc, $item)) {
$list[] = $item;
}
}
return $list;
}
}
class CorrelationRecommender extends Recommender {
private $lowerbound;
private $sim_pred;
protected function initialize() {
$this->lowerbound = @$this->options['lowerbound'];
if (!isset($this->lowerbound) || $this->lowerbound === NULL) {
$this->lowerbound = -INF;
}
else {
$this->lowerbound = @floatval($this->lowerbound);
}
$this->sim_pred = @$this->options['sim_pred'];
if (!isset($this->sim_pred) || $this->sim_pred != TRUE) {
$this->sim_pred = FALSE;
}
$this->knn = @$this->options['knn'];
if (!isset($this->knn) || $this->knn === NULL) {
$this->knn = 0;
}
}
protected function computeSimilarityMemory() {
watchdog('recommender', "Computing similarity scores in memory. Could be CPU resource intensive. Please be patient");
$this->similarityMatrix = Matrix::correlation($this->directMatrix);
$this
->saveSimilarityMatrix($this->lowerbound);
$this
->purgeOutdatedRecords('similarity');
if ($this->sim_pred == FALSE) {
$this
->cleanupMemory();
}
}
public function computePrediction() {
watchdog('recommender', "Only support prediction in-memory computation.");
if ($this->sim_pred == FALSE) {
$this
->prepareData('memory', $this->missing);
$this
->loadSimilarityMatrix();
}
$this
->computePredictionMemory();
}
function _computePredictionMemory() {
$m = $this
->getMouseNum();
$this->predictionMatrix = Matrix::create('SparseMatrix', $m, $n);
$mouseVectors = $this->directMatrix
->row_vectors();
foreach ($this->mouseMap as $mouse_id => $mouse_index) {
foreach ($this->cheeseMap as $cheese_id => $cheese_index) {
}
}
}
protected $mouseVectors;
protected function computePredictionMemory() {
$this->mouseVectors = $this->directMatrix
->row_vectors();
$aux_matrix = array();
$m = $this
->getMouseNum();
$n = $this
->getCheeseNum();
$nan = $this->missing == 'none' ? TRUE : FALSE;
$data = array();
foreach ($this->mouseVectors as $mouse_index => $mouse_vec) {
$mean = $mouse_vec
->mean(TRUE);
for ($cheese_index = 0; $cheese_index < $n; $cheese_index++) {
if (!is_nan($mouse_vec
->get($cheese_index))) {
$aux_matrix[$mouse_index][$cheese_index] = $mouse_vec
->get($cheese_index) - $mean;
}
}
}
$values = $this->similarityMatrix
->raw_values();
$this->predictionMatrix = Matrix::create('SparseMatrix', $m, $n);
foreach ($this->mouseMap as $mouse_id => $mouse_index) {
if ($this->knn > 0) {
$sim_scores = $values[$mouse_index];
if (empty($sim_scores)) {
continue;
}
arsort($sim_scores);
$sim_scores = array_slice($sim_scores, 0, $this->knn);
$neighbor = array_keys($sim_scores);
}
foreach ($this->cheeseMap as $cheese_id => $cheese_index) {
if ($this->duplicate == 'remove' && $this
->recordExists($mouse_id, $cheese_id, $nan)) {
continue;
}
$numerator = 0;
$denomenator = 0;
for ($j = 0; $j < $m; $j++) {
if (isset($neighbor) && !in_array($j, $neighbor)) {
continue;
}
if (!array_key_exists($cheese_index, $aux_matrix[$j])) {
continue;
}
if ($j == $mouse_index) {
continue;
}
$norm_weight = $aux_matrix[$j][$cheese_index];
$sim = $this->similarityMatrix
->get($j, $mouse_index);
if (is_nan($sim)) {
continue;
}
$numerator += $norm_weight * $sim;
$denomenator += abs($sim);
}
if ($denomenator != 0) {
$prediction = $this->mouseVectors[$mouse_index]
->mean(TRUE, $nan) + $numerator / $denomenator;
if ($prediction > $this->lowerbound) {
$data[] = "({$this->appId}, {$mouse_id}, {$cheese_id}, {$prediction}, {$this->created})";
}
}
}
}
$this
->batchInsert($this
->savePredictionSql(), $data);
$this
->purgeOutdatedRecords('prediction');
$this
->cleanupMemory();
}
protected function savePredictionSql() {
return "INSERT INTO {recommender_prediction}(app_id, mouse_id, cheese_id, prediction, created) VALUES";
}
private function getFromDirectMatrix($mouse_id, $cheese_id) {
return $this->directMatrix
->get($this->mouseMap[$mouse_id], $this->cheeseMap[$cheese_id]);
}
private function recordExists($mouse_id, $cheese_id, $nan) {
if ($nan && is_nan($this
->getFromDirectMatrix($mouse_id, $cheese_id))) {
return FALSE;
}
else {
if (!$nan && $this
->getFromDirectMatrix($mouse_id, $cheese_id) == 0) {
return FALSE;
}
else {
return TRUE;
}
}
}
}
class User2UserRecommender extends CorrelationRecommender {
}
class Item2ItemRecommender extends CorrelationRecommender {
protected function initialize() {
parent::initialize();
$temp = $this->fieldCheese;
$this->fieldCheese = $this->fieldMouse;
$this->fieldMouse = $temp;
}
protected function computePredictionMemory() {
parent::computePredictionMemory();
}
protected function savePredictionSql() {
return "INSERT INTO {recommender_prediction}(app_id, cheese_id, mouse_id, prediction, created) VALUES";
}
}
class CooccurrenceRecommender extends Recommender {
function __construct($appName, $tableName, $fieldMouse, $fieldCheese, $fieldWeight = NULL, $options = array()) {
parent::__construct($appName, $tableName, $fieldMouse, $fieldCheese, $fieldWeight, $options);
}
function computeSimilarity() {
$this
->prepareData('database');
$this
->computeSimilarityDatabase();
}
protected function computeSimilarityDatabase() {
watchdog("recommender", "Computing similarity in database. Might take a long time. Please be patient.");
if ($this->fieldWeight === NULL) {
$count = "COUNT(*)";
}
else {
$count = "SUM((n1.{$this->fieldWeight}+n2.{$this->fieldWeight})/2)";
}
$sql = "INSERT INTO {recommender_similarity}(app_id, mouse1_id, mouse2_id, similarity, created)\n SELECT {$this->appId}, n1.{$this->fieldMouse}, n2.{$this->fieldMouse}, {$count}, {$this->created}\n FROM {{$this->tableName}} n1 INNER JOIN {{$this->tableName}} n2 ON n1.{$this->fieldCheese}=n2.{$this->fieldCheese}\n GROUP BY n1.{$this->fieldMouse}, n2.{$this->fieldMouse}";
update_sql($sql);
$this
->purgeOutdatedRecords('similarity');
}
}
class SlopeOneRecommender extends Recommender {
private $extention;
protected function initialize() {
$this->extension = isset($this->options['extension']) ? $this->options['extension'] : 'weighted';
}
public function computePrediction() {
$this
->prepareData('database');
$this
->computePredictionDatabase();
}
protected function computePredictionDatabase() {
$app_id = $this->appId;
$table_name = $this->tableName;
$field_mouse = $this->fieldMouse;
$field_cheese = $this->fieldCheese;
$field_weight = $this->fieldWeight;
$created = $this->created;
$duplicate = $this->duplicate;
db_query("DELETE FROM {recommender_slopeone_dev} WHERE app_id=%d", $app_id);
db_query("INSERT INTO {recommender_slopeone_dev}(app_id, cheese1_id, cheese2_id, count, dev)\n SELECT %d, n1.{$field_cheese}, n2.{$field_cheese},\n COUNT(*), AVG(n1.{$field_weight}-n2.{$field_weight}) FROM {{$table_name}} n1\n INNER JOIN {{$table_name}} n2 ON n1.{$field_mouse}=n2.{$field_mouse}\n AND n1.{$field_cheese} <> n2.{$field_cheese}\n GROUP BY n1.{$field_cheese}, n2.{$field_cheese}", $app_id);
if ($this->extension == 'basic') {
$extension_sql = "AVG(t.{$field_weight}+p.dev)";
}
else {
if ($this->extension == 'weighted') {
$extension_sql = "SUM((t.{$field_weight}+p.dev)*p.count)/SUM(p.count)";
}
}
db_query("INSERT INTO {recommender_prediction}(app_id, mouse_id, cheese_id, prediction, created)\n SELECT %d, t.{$field_mouse}, p.cheese1_id, {$extension_sql}, %d\n FROM {recommender_slopeone_dev} p INNER JOIN {{$table_name}} t ON p.cheese2_id=t.{$field_cheese}\n GROUP BY t.{$field_mouse}, p.cheese1_id", $app_id, $created);
if ($duplicate == 'remove') {
db_query("DELETE FROM {recommender_prediction} WHERE app_id=%d AND created=%d AND (mouse_id, cheese_id)\n IN (SELECT {$field_mouse}, {$field_cheese} FROM {{$table_name}})", $app_id, $created);
}
$this
->purgeOutdatedRecords('prediction');
}
}
class SVDRecommender extends Recommender {
}