function recommender_prediction_classical in Recommender API 6
Same name and namespace in other branches
- 5 recommender.module \recommender_prediction_classical()
- 6.2 recommender.module \recommender_prediction_classical()
Classical weight-average algorithm to calculate prediction from the similarity matrix, based on average weight. Limitation: we only do database operation for now. no in-memory operation available until future release. Limitation: we only do average weight. regression-based weight maybe included in future release.
Parameters
$app_name the application name that uses this function.:
$table_name the input table name:
$field_mouse the input table field for mouse:
$field_cheese the input table field for cheese:
$field_weight the input table field weight:
$options an array of options: 'missing': how to handle missing data -- 'none' do nothing; 'zero' fill in missing data with zero; 'adjusted' skip mice that don't share cheese in common. 'duplicate': how to handle predictions that already exists in mouse-cheese evaluation. 'preserve' or 'eliminate' 'sensitivity': if similarity is smaller enough to be less than a certain value, we treat it as zero
Return value
null {recommender_prediction} will be filled with prediction data
File
- ./
recommender.module, line 302 - Providing generic recommender system algorithms.
Code
function recommender_prediction_classical($app_name, $table_name = 'recommender_link', $field_mouse = 'mouse_id', $field_cheese = 'cheese_id', $field_weight = 'weight', $options = array()) {
// get param values
$app_id = recommender_get_app_id($app_name);
$missing = isset($options['missing']) ? $options['missing'] : 'none';
$duplicate = isset($options['duplicate']) ? $options['duplicate'] : 'preserve';
// could be 'eliminate'
$sensitivity = isset($options['sensitivity']) ? $options['sensitivity'] : 0;
$created = time();
// append missing data with 0s. then use table {recommender_helper_matrix} instead.
if ($missing == 'zero' || $missing == 'adjusted') {
_recommender_expand_sparse_data($table_name, $field_mouse, $field_cheese, $field_weight, $missing);
$table_name = '{recommender_helper_matrix}';
$field_mouse = 'mouse_id';
$field_cheese = 'cheese_id';
$field_weight = 'weight';
}
// calculate the mean value for each mouse, will be used as the starting point of each prediction.
db_query("DELETE FROM {recommender_helper_single_stat}");
db_query("INSERT INTO {recommender_helper_single_stat}(id, avg)\n SELECT {$field_mouse}, AVG({$field_weight}) FROM {{$table_name}} GROUP BY {$field_mouse}");
// for some reason, the following SQL, esp."INNER JOIN {recommender_helper_single_stat} m1 ON s.mouse1_id=m1.id"
// is really time-consuming. probably it's also in the GROUP BY?
// changed to the 2ne SQL approach is much more efficient.
/* db_query("INSERT INTO {recommender_prediction}(app_id, mouse_id, cheese_id, prediction, created)
SELECT %d, s.mouse1_id, t.$field_cheese,
m1.avg+SUM(s.similarity*(t.$field_weight-m2.avg))/SUM(ABS(s.similarity)) prediction, %d
FROM {recommender_similarity} s INNER JOIN {recommender_helper_single_stat} m1
ON s.mouse1_id=m1.id INNER JOIN {recommender_helper_single_stat} m2
ON s.mouse2_id=m2.id INNER JOIN {{$table_name}} t ON s.mouse2_id=t.$field_mouse
WHERE s.app_id=%d AND ABS(s.similarity)>%f GROUP BY s.mouse1_id, t.$field_cheese",
$app_id, $created, $app_id, $sensitivity); */
// if we treat missing data as adjusted, then we only count opinions from mice who share cheese from other mice.
$sql_adjusted = $missing != 'adjusted' ? '' : "INNER JOIN {recommender_helper_pair_stat} p ON s.mouse1_id=p.id1 AND s.mouse2_id=p.id2";
// calculate prediction. could be time-consuming
db_query("INSERT INTO {recommender_prediction}(app_id, mouse_id, cheese_id, prediction, created)\n SELECT %d, s.mouse1_id, t.{$field_cheese},\n SUM(s.similarity*(t.{$field_weight}-m.avg))/SUM(ABS(s.similarity)) prediction, %d\n FROM {recommender_similarity} s INNER JOIN {recommender_helper_single_stat} m\n ON s.mouse2_id=m.id INNER JOIN {{$table_name}} t ON s.mouse2_id=t.{$field_mouse}\n {$sql_adjusted}\n WHERE s.app_id=%d AND ABS(s.similarity)>%f GROUP BY s.mouse1_id, t.{$field_cheese}", $app_id, $created, $app_id, $sensitivity);
// adjust the prediction based on each mouse's average evaluation.
db_query("UPDATE {recommender_prediction} p, {recommender_helper_single_stat} m\n SET prediction=prediction+m.avg WHERE p.mouse_id=m.id\n AND p.app_id=%d AND created=%d", $app_id, $created);
// remove duplicate prediction
if ($duplicate == 'eliminate') {
if ($missing == 'zero' || $missing == 'adjusted') {
$missing_sql = "WHERE {$field_weight}<>0";
}
db_query("DELETE FROM {recommender_prediction} WHERE app_id=%d AND created=%d AND\n (mouse_id, cheese_id) IN (SELECT {$field_mouse}, {$field_cheese} FROM {{$table_name}}\n {$missing_sql})", $app_id, $created);
}
// clean_up
db_query("DELETE FROM {recommender_helper_single_stat}");
if ($missing == 'zero' || $missing == 'adjusted') {
db_query("DELETE FROM {recommender_helper_matrix}");
if ($missing == 'adjusted') {
db_query("DELETE FROM {recommender_helper_pair_stat}");
}
}
// remove old predictions from the last calculation
db_query("DELETE FROM {recommender_prediction} WHERE app_id=%d AND created<>%d", $app_id, $created);
}