You are here

function _recommender_similarity_classical_in_memory in Recommender API 5

Same name and namespace in other branches
  1. 6 recommender.module \_recommender_similarity_classical_in_memory()

Matrix computation in memory. Fast. But require lots of memory. Limitation: only support $missing='zero', will cover other cases in future release

1 call to _recommender_similarity_classical_in_memory()
recommender_similarity_classical in ./recommender.module
classical collaborative filtering algorithm based on correlation coefficient. could be used in the classical user-user or item-item algorithm see the README file for more details

File

./recommender.module, line 49
Providing generic recommender system algorithms.

Code

function _recommender_similarity_classical_in_memory($app_id, $table_name, $field_mouse, $field_cheese, $field_weight, $options) {
  $created = time();

  // retrieve value from the database. setup program.
  $list_mouse = array();
  $list_cheese = array();
  $prep_matrix = array();
  $result = db_query("SELECT {$field_mouse} field_mouse, {$field_cheese} field_cheese, SUM({$field_weight}) field_weight\n                      FROM {{$table_name}} GROUP BY field_mouse, field_cheese");
  while ($tuple = db_fetch_array($result)) {
    $id_mouse = $tuple['field_mouse'];
    $id_cheese = $tuple['field_cheese'];
    $weight = $tuple['field_weight'];
    if (!array_key_exists($id_mouse, $list_mouse)) {
      $list_mouse[$id_mouse] = count($list_mouse);
    }
    if (!array_key_exists($id_cheese, $list_cheese)) {
      $list_cheese[$id_cheese] = count($list_cheese);
    }
    $prep_matrix[$list_mouse[$id_mouse]][$list_cheese[$id_cheese]] = $weight;
  }

  // create the matrix
  $m = count($list_mouse);
  $n = count($list_cheese);
  $matrix = array();
  for ($v1 = 0; $v1 < $m; $v1++) {
    for ($v2 = 0; $v2 < $n; $v2++) {

      // basically set the missing values to be 0.
      $matrix[$v1][$v2] = isset($prep_matrix[$v1][$v2]) ? $prep_matrix[$v1][$v2] : 0;
    }
  }

  // time consuming calculation
  $cor_matrix =& _recommender_fast_correlation_coefficient($matrix);

  // save to database
  // TODO: batch insert to improve performance.
  $sensitivity = isset($options['sensitivity']) ? $options['sensitivity'] : 0.0001;
  $map = array_flip($list_mouse);
  for ($v1 = 0; $v1 < $m; $v1++) {
    for ($v2 = 0; $v2 < $m; $v2++) {
      $score = $cor_matrix[$v1][$v2];
      if (abs($score) > $sensitivity) {
        db_query("INSERT INTO {recommender_similarity}(app_id, mouse1_id, mouse2_id, similarity, created)\n                  VALUE(%d, %d, %d, %f, %d)", $app_id, $map[$v1], $map[$v2], $score, $created);
      }
    }
  }
  db_query("DELETE FROM {recommender_similarity} WHERE app_id=%d AND created<>%d", $app_id, $created);
}