You are here

class Recommender in Recommender API 6.2

The super class for all other Recommender algorithms.

Hierarchy

Expanded class hierarchy of Recommender

1 string reference to 'Recommender'
recommender_views_data in ./recommender.views.inc

File

./Recommender.php, line 21

View source
class Recommender {
  protected $appName;
  protected $appId;
  protected $tableName;
  protected $fieldMouse;
  protected $fieldCheese;
  protected $fieldWeight;

  // options
  protected $options;
  protected $performance;

  // could be 'memory', 'database', 'java', or the default 'auto'
  protected $missing;

  // determines how to handle missing data. could be 'none' (default) or 'zero'
  protected $created;

  // current UNIX timestamp as the Recommender initialize.
  protected $duplicate;

  // how to handle duplicate predication data: 'keep', 'remove'.
  // in memory computation related fields. only initialized after running loadDirectMatrix()
  protected $directMatrix;

  // mouse-cheese matrix
  protected $mouseMap;

  // real mouse_id => the index in the directMatrix
  protected $cheeseMap;
  protected $similarityMatrix;

  // mouse-mouse matrix
  protected $predictionMatrix;
  protected $mouseNum;
  protected $cheeseNum;

  // constructor. no need to override. just override the initialize() function.
  function __construct($appName, $tableName, $fieldMouse, $fieldCheese, $fieldWeight, $options = array()) {

    // remove the watchdog [#672166]

    //watchdog("recommender", "Initializing recommender with class ". get_class($this) ." for application $appName");
    $this->appName = $appName;
    $this->appId = self::convertAppId($appName);
    $this->tableName = $tableName;
    $this->fieldMouse = $fieldMouse;
    $this->fieldCheese = $fieldCheese;
    $this->fieldWeight = $fieldWeight;
    $this->options = $options;
    $this->created = time();
    $this->mouseNum = NULL;

    // init to NULL for late initialization
    $this->cheeseNum = NULL;

    // $this->performance determines whether computation is done in memory/database/java, or maybe undefined
    $this->performance = @$options['performance'];
    if (!isset($this->performance) || $this->performance != 'database' && $this->performance != 'memory' && $this->performance != 'java') {
      $this->performance = 'auto';
    }

    // $this->missing determines how to handle missing data.
    $this->missing = @$options['missing'];
    if (!isset($this->missing) || $this->missing != 'none' && $this->missing != 'zero') {
      $this->missing = 'none';
    }
    $this->duplicate = @$options['duplicate'];
    if (!isset($this->duplicate) || $this->duplicate != 'keep' && $this->duplicate != 'remove') {
      $this->duplicate = 'remove';
    }

    // give a chance to derived classes to do something.
    $this
      ->initialize();
  }
  protected function initialize() {

    // Do nothing here. Derivied classes could do something.
  }

  /**
   * After calling this function, data would be ready to process. Could be:
   * 1) if it's in database, then $->tableName, $this->$field* would store the correct info.
   * 2) if it's in memory, then $this->directMatrix will be the matrix
   * @param $performance could be 'memory' or 'database'
   * @param $missing could be 'none' or 'zero'.
   * @return unknown_type
   */
  protected function prepareData($performance, $missing = 'none') {
    if ($performance == 'database') {
      $this
        ->processTable();

      // if $tableName is SQL, then process it.
      if ($missing == 'zero') {

        // don't think expanding the data is useful for database.
        // code could be found in recommender.module v.1.10 _recommender_expand_sparse_data()
      }
    }
    else {
      if ($performance == 'memory') {
        $sparse = $missing == 'zero' ? FALSE : TRUE;
        $this
          ->loadDirectMatrix($sparse);
      }
    }
  }

  // if the table is SQL query, use {recommender_helper_staging} instead. [#394794]
  protected function processTable() {

    // if $tableName starts with SELECT, we consider it as a SQL query.
    if (stripos($this->tableName, 'SELECT ') === 0) {
      db_query("TRUNCATE {recommender_helper_staging}");
      db_query("INSERT INTO {recommender_helper_staging} SELECT {$this->fieldMouse}, {$this->fieldCheese}, {$this->fieldWeight} FROM ({$this->tableName}) sql_table");
      $this->tableName = "recommender_helper_staging";
      $this->fieldMouse = "mouse_id";
      $this->fieldCheese = "cheese_id";
      $this->fieldWeight = "weight";
    }
  }

  /**
   * Load matrix from the database into a matrix class in memory
   * @return unknown_type
   */
  protected function loadDirectMatrix($sparse = FALSE) {

    // retrieve value from the database. setup program.
    watchdog('recommender', "Please be patient while loading data into memory. This step may fail if you don't have enough memory");
    if (stripos($this->tableName, 'SELECT ') === 0) {
      $sql = $this->tableName;

      // if $tableName is a SQL query, we'll just load it into memory w/o making it to
    }
    else {

      // Note: (fieldMouse, fieldCheese) should be unique key, thus we shouldn't use SUM (which is only for fault tolerance).
      $sql = "SELECT {$this->fieldMouse}, {$this->fieldCheese}, SUM({$this->fieldWeight}) {$this->fieldWeight}\n              FROM {{$this->tableName}} GROUP BY {$this->fieldMouse}, {$this->fieldCheese}";
    }
    $result = db_query($sql);
    $type = $sparse ? 'SparseMatrix' : 'RealMatrix';

    // create the matrix, might fail if not enough memory.
    $this->directMatrix = Matrix::create($type, $this
      ->getMouseNum(), $this
      ->getCheeseNum());
    $this->mouseMap = array();
    $this->cheeseMap = array();

    // build the matrix
    while ($line = db_fetch_array($result)) {
      $id_mouse = $line[$this->fieldMouse];
      $id_cheese = $line[$this->fieldCheese];
      $weight = $line[$this->fieldWeight];
      if (!array_key_exists($id_mouse, $this->mouseMap)) {
        $this->mouseMap[$id_mouse] = count($this->mouseMap);
      }
      if (!array_key_exists($id_cheese, $this->cheeseMap)) {
        $this->cheeseMap[$id_cheese] = count($this->cheeseMap);
      }
      $this->directMatrix
        ->set($this->mouseMap[$id_mouse], $this->cheeseMap[$id_cheese], $weight);
    }
  }

  // helper function for all memory based algorithms.
  protected function saveSimilarityMatrix($lowerbound = 0) {
    watchdog('recommender', "Saving similarity result to database. Please wait.");
    $map = array_flip($this->mouseMap);

    //$m = $this->getMouseNum();
    $data = array();
    $values = $this->similarityMatrix
      ->raw_values();

    //for ($v1=0; $v1<$m; $v1++) {

    //  for ($v2=0; $v2<$m; $v2++) {
    foreach ($map as $v1 => $mouse1) {
      foreach ($map as $v2 => $mouse2) {
        if (!isset($values[$v1][$v2])) {
          continue;
        }

        // we might skip if it's undefined.
        $score = $values[$v1][$v2];
        if (!is_nan($score) && $score >= $lowerbound) {
          $data[] = "({$this->appId}, {$mouse1}, {$mouse2}, {$score}, {$this->created})";
        }

        // end of if (score)
      }

      // end of for($v2)
    }

    // end of for($v1)
    $this
      ->batchInsert("INSERT INTO {recommender_similarity}(app_id, mouse1_id, mouse2_id, similarity, created) VALUES", $data);
  }

  // TODO: lots of duplicate code from loadDirectMatrix, consider refactoring
  protected function loadSimilarityMatrix() {
    watchdog('recommender', "Please be patient while loading similarity data into memory. This step may fail if you don't have enough memory");
    $sql = "SELECT mouse1_id, mouse2_id, similarity FROM {recommender_similarity} WHERE app_id={$this->appId}";
    $result = db_query($sql);
    $m = $this
      ->getMouseNum();

    // create the matrix, might fail if not enough memory.
    $this->similarityMatrix = Matrix::create('SparseMatrix', $m, $m);
    $this->mouseMap = array();

    // build the matrix
    while ($line = db_fetch_array($result)) {
      $id_mouse1 = $line["mouse1_id"];
      $id_mouse2 = $line["mouse2_id"];
      $weight = $line["similarity"];
      if (!array_key_exists($id_mouse1, $this->mouseMap)) {
        $this->mouseMap[$id_mouse1] = count($this->mouseMap);
      }
      if (!array_key_exists($id_mouse2, $this->mouseMap)) {
        $this->mouseMap[$id_mouse2] = count($this->mouseMap);
      }
      $this->similarityMatrix
        ->set($this->mouseMap[$id_mouse1], $this->mouseMap[$id_mouse2], $weight);
      $this->similarityMatrix
        ->set($this->mouseMap[$id_mouse2], $this->mouseMap[$id_mouse1], $weight);
    }
  }
  protected function getMouseNum($may_cache = TRUE) {
    if (!$may_cache || $this->mouseNum == NULL) {

      //if (isset($this->mouseMap)) {

      //  $this->mouseNum = count($this->mouseMap);

      //} else {
      $this->mouseNum = $this
        ->getEntityNum($this->fieldMouse);

      //}
    }
    return $this->mouseNum;
  }
  protected function getCheeseNum($may_cache = TRUE) {
    if (!$may_cache || $this->cheeseNum == NULL) {

      //if (isset($this->cheeseMap)) {

      //  $this->cheeseNum = count($this->cheeseMap);

      //} else {
      $this->cheeseNum = $this
        ->getEntityNum($this->fieldCheese);

      //}
    }
    return $this->cheeseNum;
  }

  // TODO: should respect the difference between memory/database.
  // if it's memory, just read data from the memory.
  // also, should take care of async problem between database/memory.
  protected function getEntityNum($field) {
    $sql = "SELECT COUNT(DISTINCT {$field}) FROM ";
    if (stripos($this->tableName, 'SELECT ') === 0) {
      $sql .= "({$this->tableName}) sql_table";
    }
    else {
      $sql .= "{{$this->tableName}}";
    }
    return db_result(db_query($sql));
  }
  protected function cleanupMemory() {

    // huge memory waste for large dataset. better unset it after it's done.
    unset($this->directMatrix);
    unset($this->similarityMatrix);
    unset($this->predictionMatrix);
  }

  // Derived classes might override this function as well.
  // by default it's computed in memory. if exceed memory limit, then caller should use the $performance factor.
  public function computeSimilarity() {
    watchdog("recommender", "Computing similarity. Might take a long time. Please be patient.");
    switch ($this->performance) {
      case 'database':
        $this
          ->prepareData('database', $this->missing);
        $this
          ->computeSimilarityDatabase();
        break;
      case 'java':
        $this
          ->computeSimilarityJava();
        break;
      case 'memory':
      case 'auto':
      default:
        $this
          ->prepareData('memory', $this->missing);
        $this
          ->computeSimilarityMemory();
    }

    // $this->purgeOutdatedRecords('similarity');
  }

  // to be overriden. compute in memory
  protected function computeSimilarityMemory() {
    $msg = "ERROR: computing in memory is not support. Exit.";
    watchdog("recommender", $msg, array(), WATCHDOG_ERROR);
    throw new Exception($msg);
  }

  // to be overriden. compute in database
  protected function computeSimilarityDatabase() {
    $msg = "ERROR: computing in database is not support. Exit.";
    watchdog("recommender", $msg, array(), WATCHDOG_ERROR);
    throw new Exception($msg);
  }

  // to be overriden. compute using java
  protected function computeSimilarityJava() {
    $msg = "ERROR: computing using java is not support. Exit.";
    watchdog("recommender", $msg, array(), WATCHDOG_ERROR);
    throw new Exception($msg);
  }
  public function computePrediction() {
    watchdog("recommender", "Computing prediction. Might take a long time. Please be patient.");
    switch ($this->performance) {
      case 'database':
        $this
          ->prepareData('database', $this->missing);
        $this
          ->computePredictionDatabase();
        break;
      case 'java':
        $this
          ->computePredictionJava();
        break;
      case 'memory':
      case 'auto':
      default:
        $this
          ->prepareData('memory', $this->missing);
        $this
          ->loadSimilarityMatrix();

        // need to load similarity matrix too.
        $this
          ->computePredictionMemory();
    }

    // $this->purgeOutdatedRecords();
  }

  // to be overriden. compute in memory
  protected function computePredictionMemory() {
    $msg = "ERROR: computing in memory is not support. Exit.";
    watchdog("recommender", $msg, array(), WATCHDOG_ERROR);
    throw new Exception($msg);
  }

  // to be overriden. compute in database
  protected function computePredictionDatabase() {
    $msg = "ERROR: computing in database is not support. Exit.";
    watchdog("recommender", $msg, array(), WATCHDOG_ERROR);
    throw new Exception($msg);
  }

  // to be overriden. compute using java
  protected function computePredictionJava() {
    $msg = "ERROR: computing using java is not support. Exit.";
    watchdog("recommender", $msg, array(), WATCHDOG_ERROR);
    throw new Exception($msg);
  }

  //////////// utility functions //////////////
  static function convertAppId($appName) {

    //if (!isset($appName) || empty($appName)) {

    //  return NULL; // should throw an exception.

    //}
    $id = db_result(db_query("SELECT app_id FROM {recommender_app_map} WHERE app_name='%s'", $appName));
    if (!isset($id) || empty($id) || $id === FALSE) {
      db_query("INSERT INTO {recommender_app_map}(app_name) VALUE('%s')", $appName);
      $id = db_result(db_query("SELECT app_id FROM {recommender_app_map} WHERE app_name='%s'", $appName));
    }
    return $id;
  }

  // getter function
  public function getAppId() {
    return $this->appId;
  }
  static function purgeApp($appName) {
    $app_id = self::convertAppId($appName);
    db_query("DELETE FROM {recommender_similarity} WHERE app_id=%d", $app_id);
    db_query("DELETE FROM {recommender_prediction} WHERE app_id=%d", $app_id);
    db_query("DELETE FROM {recommender_slopeone_dev} WHERE app_id=%d", $app_id);
    db_query("DELETE FROM {recommender_app_map} WHERE app_id=%d", $app_id);
  }
  protected function purgeOutdatedRecords($table) {
    update_sql("DELETE FROM {recommender_{$table}} WHERE app_id={$this->appId} AND created<>{$this->created}");
  }

  // $insert_sql should look like 'INSERT ... VALUES '
  protected function batchInsert($insert_sql, &$data) {

    // without using pass-by-reference, this might use more memory [#509424]

    //$chunks = array_chunk(&$data, INSERT_LIMIT, TRUE);
    $chunks = array_chunk($data, INSERT_LIMIT, TRUE);
    foreach ($chunks as $chunk) {
      update_sql($insert_sql . implode(',', $chunk));
    }
  }

  /**
   * Return the similarity between $mouse1 and $mouse2.
   * @param $mouse1
   * @param $mouse2
   * @return float similarity score for $mouse1 and $mouse2; return NAN if error
   */
  public function retrieveSimilarity($mouse1, $mouse2) {
    $result = @db_query("SELECT similarity FROM {recommender_similarity}\n                WHERE app_id=%d AND mouse1_id=%d AND mouse2_id=%d", $this->appId, $mouse1, $mouse2);
    $similarity = db_result($result);

    // return FALSE or NULL could be confused with 0. Therefore, return NAN for error cases.
    return $similarity !== FALSE ? $similarity : NAN;
  }
  public function retrievePrediction($mouse, $cheese) {
    $result = @db_query("SELECT prediction FROM {recommender_prediction}\n                WHERE app_id=%d AND mouse_id=%d AND cheese_id=%d", $this->appId, $mouse, $cheese);
    $prediction = db_result($result);

    // return FALSE or NULL could be confused with 0. Therefore, return NAN for error cases.
    return $prediction !== FALSE ? $prediction : NAN;
  }
  public function topSimilarity($mouse, $topN, $testFunc = NULL) {
    $list = array();

    // TODO: should use pager_query(). this is a temporary solution
    $result = db_query_range("SELECT mouse2_id id, similarity score FROM {recommender_similarity}\n                              WHERE app_id=%d AND mouse1_id=%d AND mouse2_id<>mouse1_id\n                              ORDER BY similarity DESC, created DESC, mouse2_id ASC", $this->appId, $mouse, 0, TOP_N_LIMIT);
    while (($item = db_fetch_array($result)) && count($list) < $topN) {
      if ($testFunc === NULL || call_user_func($testFunc, $item)) {
        $list[] = $item;
      }
    }
    return $list;
  }
  public function topPrediction($mouse, $topN, $testFunc = NULL) {
    $list = array();
    $result = db_query_range("SELECT cheese_id id, prediction score FROM {recommender_prediction}\n                              WHERE app_id=%d AND mouse_id=%d\n                              ORDER BY prediction DESC, created DESC, mouse_id ASC", $this->appId, $mouse, 0, TOP_N_LIMIT);
    while (($item = db_fetch_array($result)) && count($list) < $topN) {
      if ($testFunc === NULL || call_user_func($testFunc, $item)) {
        $list[] = $item;
      }
    }
    return $list;
  }

}

Members

Namesort descending Modifiers Type Description Overrides
Recommender::$appId protected property
Recommender::$appName protected property
Recommender::$cheeseMap protected property
Recommender::$cheeseNum protected property
Recommender::$created protected property
Recommender::$directMatrix protected property
Recommender::$duplicate protected property
Recommender::$fieldCheese protected property
Recommender::$fieldMouse protected property
Recommender::$fieldWeight protected property
Recommender::$missing protected property
Recommender::$mouseMap protected property
Recommender::$mouseNum protected property
Recommender::$options protected property
Recommender::$performance protected property
Recommender::$predictionMatrix protected property
Recommender::$similarityMatrix protected property
Recommender::$tableName protected property
Recommender::batchInsert protected function
Recommender::cleanupMemory protected function
Recommender::computePrediction public function 2
Recommender::computePredictionDatabase protected function 1
Recommender::computePredictionJava protected function
Recommender::computePredictionMemory protected function 1
Recommender::computeSimilarity public function 1
Recommender::computeSimilarityDatabase protected function 1
Recommender::computeSimilarityJava protected function
Recommender::computeSimilarityMemory protected function 1
Recommender::convertAppId static function
Recommender::getAppId public function
Recommender::getCheeseNum protected function
Recommender::getEntityNum protected function
Recommender::getMouseNum protected function
Recommender::initialize protected function 2
Recommender::loadDirectMatrix protected function Load matrix from the database into a matrix class in memory
Recommender::loadSimilarityMatrix protected function
Recommender::prepareData protected function After calling this function, data would be ready to process. Could be: 1) if it's in database, then $->tableName, $this->$field* would store the correct info. 2) if it's in memory, then $this->directMatrix will be the matrix
Recommender::processTable protected function
Recommender::purgeApp static function
Recommender::purgeOutdatedRecords protected function
Recommender::retrievePrediction public function
Recommender::retrieveSimilarity public function Return the similarity between $mouse1 and $mouse2.
Recommender::saveSimilarityMatrix protected function
Recommender::topPrediction public function
Recommender::topSimilarity public function
Recommender::__construct function 1