class CorrelationRecommender in Recommender API 6.2
The recommender implementation for the classical correlation-coefficient based algorithm
Hierarchy
- class \Recommender
- class \CorrelationRecommender
Expanded class hierarchy of CorrelationRecommender
File
- ./
Recommender.php, line 462
View source
class CorrelationRecommender extends Recommender {
private $lowerbound;
private $sim_pred;
protected function initialize() {
$this->lowerbound = @$this->options['lowerbound'];
if (!isset($this->lowerbound) || $this->lowerbound === NULL) {
$this->lowerbound = -INF;
// save everything.
}
else {
$this->lowerbound = @floatval($this->lowerbound);
}
$this->sim_pred = @$this->options['sim_pred'];
if (!isset($this->sim_pred) || $this->sim_pred != TRUE) {
$this->sim_pred = FALSE;
}
$this->knn = @$this->options['knn'];
if (!isset($this->knn) || $this->knn === NULL) {
$this->knn = 0;
// take all the neighbors, not only the k nearest.
}
}
protected function computeSimilarityMemory() {
// data is already loaded by $this->loadDirectMatrix()
watchdog('recommender', "Computing similarity scores in memory. Could be CPU resource intensive. Please be patient");
$this->similarityMatrix = Matrix::correlation($this->directMatrix);
// cleanaup
$this
->saveSimilarityMatrix($this->lowerbound);
$this
->purgeOutdatedRecords('similarity');
if ($this->sim_pred == FALSE) {
$this
->cleanupMemory();
}
}
// TODO: think about whether to elevate this prediction method to the super class.
public function computePrediction() {
watchdog('recommender', "Only support prediction in-memory computation.");
if ($this->sim_pred == FALSE) {
// means we need to reload data. otherwise data already in memory after the similarity computation
$this
->prepareData('memory', $this->missing);
$this
->loadSimilarityMatrix();
// need to load similarity matrix too.
}
$this
->computePredictionMemory();
}
// Caution: this is a coding-in-progress function for [#483112]
// $this->similarityMatrix should be loaded before calling this function.
function _computePredictionMemory() {
$m = $this
->getMouseNum();
$this->predictionMatrix = Matrix::create('SparseMatrix', $m, $n);
$mouseVectors = $this->directMatrix
->row_vectors();
foreach ($this->mouseMap as $mouse_id => $mouse_index) {
foreach ($this->cheeseMap as $cheese_id => $cheese_index) {
}
}
}
protected $mouseVectors;
// TODO: written cursively. might need refactoring. [#483112]
// 1. move code to Matrix.php?
// 2. elevate code up to the super class?
protected function computePredictionMemory() {
// we do the computation based on $this->directMatrix loaded in memory, not on database
$this->mouseVectors = $this->directMatrix
->row_vectors();
$aux_matrix = array();
// this is to store the normalized data (rating minus mean)
$m = $this
->getMouseNum();
$n = $this
->getCheeseNum();
$nan = $this->missing == 'none' ? TRUE : FALSE;
$data = array();
// calculate the difference matrix
foreach ($this->mouseVectors as $mouse_index => $mouse_vec) {
$mean = $mouse_vec
->mean(TRUE);
for ($cheese_index = 0; $cheese_index < $n; $cheese_index++) {
if (!is_nan($mouse_vec
->get($cheese_index))) {
$aux_matrix[$mouse_index][$cheese_index] = $mouse_vec
->get($cheese_index) - $mean;
}
}
}
$values = $this->similarityMatrix
->raw_values();
// not needed 'cause data will be saved directly to db.
$this->predictionMatrix = Matrix::create('SparseMatrix', $m, $n);
// calculate prediction for each mouse-cheese pair, and (optionally) save
foreach ($this->mouseMap as $mouse_id => $mouse_index) {
// (note: to improve performance w/ knn, move the for($j) loop here.)
// implement knn
if ($this->knn > 0) {
$sim_scores = $values[$mouse_index];
// make another copy
if (empty($sim_scores)) {
continue;
}
// if there's no knn, just skip.
arsort($sim_scores);
$sim_scores = array_slice($sim_scores, 0, $this->knn);
$neighbor = array_keys($sim_scores);
}
foreach ($this->cheeseMap as $cheese_id => $cheese_index) {
if ($this->duplicate == 'remove' && $this
->recordExists($mouse_id, $cheese_id, $nan)) {
continue;
}
$numerator = 0;
$denomenator = 0;
for ($j = 0; $j < $m; $j++) {
if (isset($neighbor) && !in_array($j, $neighbor)) {
continue;
}
// if not k-nearest-neighbor, skip
if (!array_key_exists($cheese_index, $aux_matrix[$j])) {
continue;
}
// if no rating, skip.
if ($j == $mouse_index) {
continue;
}
// skip my own rating
$norm_weight = $aux_matrix[$j][$cheese_index];
$sim = $this->similarityMatrix
->get($j, $mouse_index);
if (is_nan($sim)) {
continue;
}
$numerator += $norm_weight * $sim;
$denomenator += abs($sim);
}
if ($denomenator != 0) {
$prediction = $this->mouseVectors[$mouse_index]
->mean(TRUE, $nan) + $numerator / $denomenator;
// note: we use the same lowerbound setting for prediction generation.
if ($prediction > $this->lowerbound) {
$data[] = "({$this->appId}, {$mouse_id}, {$cheese_id}, {$prediction}, {$this->created})";
}
}
}
}
$this
->batchInsert($this
->savePredictionSql(), $data);
$this
->purgeOutdatedRecords('prediction');
$this
->cleanupMemory();
}
// this is to aid override for derived classes
protected function savePredictionSql() {
return "INSERT INTO {recommender_prediction}(app_id, mouse_id, cheese_id, prediction, created) VALUES";
}
private function getFromDirectMatrix($mouse_id, $cheese_id) {
return $this->directMatrix
->get($this->mouseMap[$mouse_id], $this->cheeseMap[$cheese_id]);
}
// FIXME: it is buggy to use 0 to test whether record exists or not!
private function recordExists($mouse_id, $cheese_id, $nan) {
if ($nan && is_nan($this
->getFromDirectMatrix($mouse_id, $cheese_id))) {
return FALSE;
}
else {
if (!$nan && $this
->getFromDirectMatrix($mouse_id, $cheese_id) == 0) {
return FALSE;
}
else {
return TRUE;
}
}
}
}