You are here

class CorrelationRecommender in Recommender API 6.2

The recommender implementation for the classical correlation-coefficient based algorithm

Hierarchy

Expanded class hierarchy of CorrelationRecommender

File

./Recommender.php, line 462

View source
class CorrelationRecommender extends Recommender {
  private $lowerbound;
  private $sim_pred;
  protected function initialize() {
    $this->lowerbound = @$this->options['lowerbound'];
    if (!isset($this->lowerbound) || $this->lowerbound === NULL) {
      $this->lowerbound = -INF;

      // save everything.
    }
    else {
      $this->lowerbound = @floatval($this->lowerbound);
    }
    $this->sim_pred = @$this->options['sim_pred'];
    if (!isset($this->sim_pred) || $this->sim_pred != TRUE) {
      $this->sim_pred = FALSE;
    }
    $this->knn = @$this->options['knn'];
    if (!isset($this->knn) || $this->knn === NULL) {
      $this->knn = 0;

      // take all the neighbors, not only the k nearest.
    }
  }
  protected function computeSimilarityMemory() {

    // data is already loaded by $this->loadDirectMatrix()
    watchdog('recommender', "Computing similarity scores in memory. Could be CPU resource intensive. Please be patient");
    $this->similarityMatrix = Matrix::correlation($this->directMatrix);

    // cleanaup
    $this
      ->saveSimilarityMatrix($this->lowerbound);
    $this
      ->purgeOutdatedRecords('similarity');
    if ($this->sim_pred == FALSE) {
      $this
        ->cleanupMemory();
    }
  }

  // TODO: think about whether to elevate this prediction method to the super class.
  public function computePrediction() {
    watchdog('recommender', "Only support prediction in-memory computation.");
    if ($this->sim_pred == FALSE) {

      // means we need to reload data. otherwise data already in memory after the similarity computation
      $this
        ->prepareData('memory', $this->missing);
      $this
        ->loadSimilarityMatrix();

      // need to load similarity matrix too.
    }
    $this
      ->computePredictionMemory();
  }

  // Caution: this is a coding-in-progress function for [#483112]
  // $this->similarityMatrix should be loaded before calling this function.
  function _computePredictionMemory() {
    $m = $this
      ->getMouseNum();
    $this->predictionMatrix = Matrix::create('SparseMatrix', $m, $n);
    $mouseVectors = $this->directMatrix
      ->row_vectors();
    foreach ($this->mouseMap as $mouse_id => $mouse_index) {
      foreach ($this->cheeseMap as $cheese_id => $cheese_index) {
      }
    }
  }
  protected $mouseVectors;

  // TODO: written cursively. might need refactoring. [#483112]
  // 1. move code to Matrix.php?
  // 2. elevate code up to the super class?
  protected function computePredictionMemory() {

    // we do the computation based on $this->directMatrix loaded in memory, not on database
    $this->mouseVectors = $this->directMatrix
      ->row_vectors();
    $aux_matrix = array();

    // this is to store the normalized data (rating minus mean)
    $m = $this
      ->getMouseNum();
    $n = $this
      ->getCheeseNum();
    $nan = $this->missing == 'none' ? TRUE : FALSE;
    $data = array();

    // calculate the difference matrix
    foreach ($this->mouseVectors as $mouse_index => $mouse_vec) {
      $mean = $mouse_vec
        ->mean(TRUE);
      for ($cheese_index = 0; $cheese_index < $n; $cheese_index++) {
        if (!is_nan($mouse_vec
          ->get($cheese_index))) {
          $aux_matrix[$mouse_index][$cheese_index] = $mouse_vec
            ->get($cheese_index) - $mean;
        }
      }
    }
    $values = $this->similarityMatrix
      ->raw_values();

    // not needed 'cause data will be saved directly to db.
    $this->predictionMatrix = Matrix::create('SparseMatrix', $m, $n);

    // calculate prediction for each mouse-cheese pair, and (optionally) save
    foreach ($this->mouseMap as $mouse_id => $mouse_index) {

      // (note: to improve performance w/ knn, move the for($j) loop here.)
      // implement knn
      if ($this->knn > 0) {
        $sim_scores = $values[$mouse_index];

        // make another copy
        if (empty($sim_scores)) {
          continue;
        }

        // if there's no knn, just skip.
        arsort($sim_scores);
        $sim_scores = array_slice($sim_scores, 0, $this->knn);
        $neighbor = array_keys($sim_scores);
      }
      foreach ($this->cheeseMap as $cheese_id => $cheese_index) {
        if ($this->duplicate == 'remove' && $this
          ->recordExists($mouse_id, $cheese_id, $nan)) {
          continue;
        }
        $numerator = 0;
        $denomenator = 0;
        for ($j = 0; $j < $m; $j++) {
          if (isset($neighbor) && !in_array($j, $neighbor)) {
            continue;
          }

          // if not k-nearest-neighbor, skip
          if (!array_key_exists($cheese_index, $aux_matrix[$j])) {
            continue;
          }

          // if no rating, skip.
          if ($j == $mouse_index) {
            continue;
          }

          // skip my own rating
          $norm_weight = $aux_matrix[$j][$cheese_index];
          $sim = $this->similarityMatrix
            ->get($j, $mouse_index);
          if (is_nan($sim)) {
            continue;
          }
          $numerator += $norm_weight * $sim;
          $denomenator += abs($sim);
        }
        if ($denomenator != 0) {
          $prediction = $this->mouseVectors[$mouse_index]
            ->mean(TRUE, $nan) + $numerator / $denomenator;

          // note: we use the same lowerbound setting for prediction generation.
          if ($prediction > $this->lowerbound) {
            $data[] = "({$this->appId}, {$mouse_id}, {$cheese_id}, {$prediction}, {$this->created})";
          }
        }
      }
    }
    $this
      ->batchInsert($this
      ->savePredictionSql(), $data);
    $this
      ->purgeOutdatedRecords('prediction');
    $this
      ->cleanupMemory();
  }

  // this is to aid override for derived classes
  protected function savePredictionSql() {
    return "INSERT INTO {recommender_prediction}(app_id, mouse_id, cheese_id, prediction, created) VALUES";
  }
  private function getFromDirectMatrix($mouse_id, $cheese_id) {
    return $this->directMatrix
      ->get($this->mouseMap[$mouse_id], $this->cheeseMap[$cheese_id]);
  }

  // FIXME: it is buggy to use 0 to test whether record exists or not!
  private function recordExists($mouse_id, $cheese_id, $nan) {
    if ($nan && is_nan($this
      ->getFromDirectMatrix($mouse_id, $cheese_id))) {
      return FALSE;
    }
    else {
      if (!$nan && $this
        ->getFromDirectMatrix($mouse_id, $cheese_id) == 0) {
        return FALSE;
      }
      else {
        return TRUE;
      }
    }
  }

}

Members

Namesort descending Modifiers Type Description Overrides
CorrelationRecommender::$lowerbound private property
CorrelationRecommender::$mouseVectors protected property
CorrelationRecommender::$sim_pred private property
CorrelationRecommender::computePrediction public function Overrides Recommender::computePrediction
CorrelationRecommender::computePredictionMemory protected function Overrides Recommender::computePredictionMemory 1
CorrelationRecommender::computeSimilarityMemory protected function Overrides Recommender::computeSimilarityMemory
CorrelationRecommender::getFromDirectMatrix private function
CorrelationRecommender::initialize protected function Overrides Recommender::initialize 1
CorrelationRecommender::recordExists private function
CorrelationRecommender::savePredictionSql protected function 1
CorrelationRecommender::_computePredictionMemory function
Recommender::$appId protected property
Recommender::$appName protected property
Recommender::$cheeseMap protected property
Recommender::$cheeseNum protected property
Recommender::$created protected property
Recommender::$directMatrix protected property
Recommender::$duplicate protected property
Recommender::$fieldCheese protected property
Recommender::$fieldMouse protected property
Recommender::$fieldWeight protected property
Recommender::$missing protected property
Recommender::$mouseMap protected property
Recommender::$mouseNum protected property
Recommender::$options protected property
Recommender::$performance protected property
Recommender::$predictionMatrix protected property
Recommender::$similarityMatrix protected property
Recommender::$tableName protected property
Recommender::batchInsert protected function
Recommender::cleanupMemory protected function
Recommender::computePredictionDatabase protected function 1
Recommender::computePredictionJava protected function
Recommender::computeSimilarity public function 1
Recommender::computeSimilarityDatabase protected function 1
Recommender::computeSimilarityJava protected function
Recommender::convertAppId static function
Recommender::getAppId public function
Recommender::getCheeseNum protected function
Recommender::getEntityNum protected function
Recommender::getMouseNum protected function
Recommender::loadDirectMatrix protected function Load matrix from the database into a matrix class in memory
Recommender::loadSimilarityMatrix protected function
Recommender::prepareData protected function After calling this function, data would be ready to process. Could be: 1) if it's in database, then $->tableName, $this->$field* would store the correct info. 2) if it's in memory, then $this->directMatrix will be the matrix
Recommender::processTable protected function
Recommender::purgeApp static function
Recommender::purgeOutdatedRecords protected function
Recommender::retrievePrediction public function
Recommender::retrieveSimilarity public function Return the similarity between $mouse1 and $mouse2.
Recommender::saveSimilarityMatrix protected function
Recommender::topPrediction public function
Recommender::topSimilarity public function
Recommender::__construct function 1