You are here

function recommender_prediction_classical in Recommender API 5

Same name and namespace in other branches
  1. 6 recommender.module \recommender_prediction_classical()
  2. 6.2 recommender.module \recommender_prediction_classical()

Classical weight-average algorithm to calculate prediction from the similarity matrix, based on average weight. Limitation: we only do database operation for now. no in-memory operation available until future release. Limitation: we only do average weight. regression-based weight maybe included in future release.

Parameters

$app_name the application name that uses this function.:

$table_name the input table name:

$field_mouse the input table field for mouse:

$field_cheese the input table field for cheese:

$field_weight the input table field weight:

$options an array of options: 'missing': how to handle missing data -- 'none' do nothing; 'zero' fill in missing data with zero; 'adjusted' skip mice that don't share cheese in common. 'duplicate': how to handle predictions that already exists in mouse-cheese evaluation. 'preserve' or 'eliminate' 'sensitivity': if similarity is smaller enough to be less than a certain value, we treat it as zero

Return value

null {recommender_prediction} will be filled with prediction data

File

./recommender.module, line 306
Providing generic recommender system algorithms.

Code

function recommender_prediction_classical($app_name, $table_name = 'recommender_link', $field_mouse = 'mouse_id', $field_cheese = 'cheese_id', $field_weight = 'weight', $options = array()) {

  // get param values
  $app_id = recommender_get_app_id($app_name);
  $missing = isset($options['missing']) ? $options['missing'] : 'none';
  $duplicate = isset($options['duplicate']) ? $options['duplicate'] : 'preserve';

  // could be 'eliminate'
  $sensitivity = isset($options['sensitivity']) ? $options['sensitivity'] : 0;
  $created = time();

  // append missing data with 0s. then use table {recommender_helper_matrix} instead.
  if ($missing == 'zero' || $missing == 'adjusted') {
    _recommender_expand_sparse_data($table_name, $field_mouse, $field_cheese, $field_weight, $missing);
    $table_name = '{recommender_helper_matrix}';
    $field_mouse = 'mouse_id';
    $field_cheese = 'cheese_id';
    $field_weight = 'weight';
  }

  // calculate the mean value for each mouse, will be used as the starting point of each prediction.
  db_query("DELETE FROM {recommender_helper_single_stat}");
  db_query("INSERT INTO {recommender_helper_single_stat}(id, avg)\n            SELECT {$field_mouse}, AVG({$field_weight}) FROM {{$table_name}} GROUP BY {$field_mouse}");

  // for some reason, the following SQL, esp."INNER JOIN {recommender_helper_single_stat} m1 ON s.mouse1_id=m1.id"
  // is really time-consuming. probably it's also in the GROUP BY?
  // changed to the 2ne SQL approach is much more efficient.

  /* db_query("INSERT INTO {recommender_prediction}(app_id, mouse_id, cheese_id, prediction, created)
     SELECT %d, s.mouse1_id, t.$field_cheese,
     m1.avg+SUM(s.similarity*(t.$field_weight-m2.avg))/SUM(ABS(s.similarity)) prediction, %d
     FROM {recommender_similarity} s INNER JOIN {recommender_helper_single_stat} m1
     ON s.mouse1_id=m1.id INNER JOIN {recommender_helper_single_stat} m2
     ON s.mouse2_id=m2.id INNER JOIN {{$table_name}} t ON s.mouse2_id=t.$field_mouse
     WHERE s.app_id=%d AND ABS(s.similarity)>%f GROUP BY s.mouse1_id, t.$field_cheese",
     $app_id, $created, $app_id, $sensitivity); */

  // if we treat missing data as adjusted, then we only count opinions from mice who share cheese from other mice.
  $sql_adjusted = $missing != 'adjusted' ? '' : "INNER JOIN {recommender_helper_pair_stat} p ON s.mouse1_id=p.id1 AND s.mouse2_id=p.id2";

  // calculate prediction. could be time-consuming
  db_query("INSERT INTO {recommender_prediction}(app_id, mouse_id, cheese_id, prediction, created)\n            SELECT %d, s.mouse1_id, t.{$field_cheese},\n            SUM(s.similarity*(t.{$field_weight}-m.avg))/SUM(ABS(s.similarity)) prediction, %d\n            FROM {recommender_similarity} s INNER JOIN {recommender_helper_single_stat} m\n            ON s.mouse2_id=m.id INNER JOIN {{$table_name}} t ON s.mouse2_id=t.{$field_mouse}\n            {$sql_adjusted}\n            WHERE s.app_id=%d AND ABS(s.similarity)>%f GROUP BY s.mouse1_id, t.{$field_cheese}", $app_id, $created, $app_id, $sensitivity);

  // adjust the prediction based on each mouse's average evaluation.
  db_query("UPDATE {recommender_prediction} p, {recommender_helper_single_stat} m\n            SET prediction=prediction+m.avg WHERE p.mouse_id=m.id\n            AND p.app_id=%d AND created=%d", $app_id, $created);

  // remove duplicate prediction
  if ($duplicate == 'eliminate') {
    if ($missing == 'zero' || $missing == 'adjusted') {
      $missing_sql = "WHERE {$field_weight}<>0";
    }
    db_query("DELETE FROM {recommender_prediction} WHERE app_id=%d AND created=%d AND\n              (mouse_id, cheese_id) IN (SELECT {$field_mouse}, {$field_cheese} FROM {{$table_name}}\n              {$missing_sql})", $app_id, $created);
  }

  // clean_up
  db_query("DELETE FROM {recommender_helper_single_stat}");
  if ($missing == 'zero' || $missing == 'adjusted') {
    db_query("DELETE FROM {recommender_helper_matrix}");
    if ($missing == 'adjusted') {
      db_query("DELETE FROM {recommender_helper_pair_stat}");
    }
  }

  // remove old predictions from the last calculation
  db_query("DELETE FROM {recommender_prediction} WHERE app_id=%d AND created<>%d", $app_id, $created);
}