function _recommender_similarity_classical_in_database in Recommender API 5
Same name and namespace in other branches
- 6 recommender.module \_recommender_similarity_classical_in_database()
Classical algorithm computed in databases. Full functioning. It has no memory limits because all data are saved in database. But the performance is worse than in-memory computation.
1 call to _recommender_similarity_classical_in_database()
- recommender_similarity_classical in ./
recommender.module - classical collaborative filtering algorithm based on correlation coefficient. could be used in the classical user-user or item-item algorithm see the README file for more details
File
- ./
recommender.module, line 161 - Providing generic recommender system algorithms.
Code
function _recommender_similarity_classical_in_database($app_id, $table_name, $field_mouse, $field_cheese, $field_weight, $options) {
// get param values
$missing = isset($options['missing']) ? $options['missing'] : 'none';
$created = time();
// operations would be quite different for different missing data treament
switch ($missing) {
// in this case, we treat the missing data as missing.
case 'none':
default:
// compute pair-wise avg/stddev for each mouse, which is different in each pair.
db_query("DELETE FROM {recommender_helper_pair_stat}");
db_query("INSERT INTO {recommender_helper_pair_stat}(id1, id2, avg1, avg2, stddev1, stddev2)\n SELECT n1.{$field_mouse}, n2.{$field_mouse}, AVG(n1.{$field_weight}), AVG(n2.{$field_weight}),\n STDDEV(n1.{$field_weight}), STDDEV(n2.{$field_weight})\n FROM {{$table_name}} n1 INNER JOIN {{$table_name}} n2 ON n1.{$field_cheese}=n2.{$field_cheese}\n GROUP BY n1.{$field_mouse}, n2.{$field_mouse}");
// compute similarity. would be time-consuming
db_query("INSERT INTO {recommender_similarity}(app_id, mouse1_id, mouse2_id, similarity, created)\n SELECT %d, n1.{$field_mouse}, n2.{$field_mouse},\n (AVG((n1.{$field_weight}-a.avg1)*(n2.{$field_weight}-a.avg2)))/(a.stddev1*a.stddev2) corr, %d\n FROM {{$table_name}} n1 INNER JOIN {{$table_name}} n2 ON n1.{$field_cheese}=n2.{$field_cheese} AND n1.{$field_mouse}<=n2.{$field_mouse}\n INNER JOIN {recommender_helper_pair_stat} a ON n1.{$field_mouse}=a.id1 AND n2.{$field_mouse}=a.id2\n GROUP BY n1.{$field_mouse}, n2.{$field_mouse} HAVING corr<>0", $app_id, $created);
// insert duplicate ones (to save time, we didn't do half of the calculation in operation above)
db_query("INSERT INTO {recommender_similarity}(app_id, mouse1_id, mouse2_id, similarity, created)\n SELECT app_id, mouse2_id, mouse1_id, similarity, created FROM {recommender_similarity}\n WHERE mouse1_id<mouse2_id AND app_id=%d AND created=%d", $app_id, $created);
// clean up
db_query("DELETE FROM {recommender_helper_pair_stat}");
break;
// in this case, we treat the missing data as default value 0;
case 'zero':
// this case is similar to 'zero', but adjust the calculation to be more efficient for very sparse data.
// refer to README file
case 'adjusted':
// expand the matrix to have zeros for missing data.
_recommender_expand_sparse_data($table_name, $field_mouse, $field_cheese, $field_weight, $missing);
// calcualate mean and stddev for each vector
db_query("DELETE FROM {recommender_helper_single_stat}");
db_query("INSERT INTO {recommender_helper_single_stat}(id, avg, stddev)\n SELECT mouse_id, AVG(weight), STDDEV(weight) FROM {recommender_helper_matrix}\n GROUP BY mouse_id");
// addjust the full matrix by the mean of each vector.
db_query("UPDATE {recommender_helper_matrix} m, {recommender_helper_single_stat} s\n SET m.weight=m.weight-s.avg WHERE m.mouse_id=s.id");
// we skip those mice that don't share cheese with other mice in 'adjusted' mode.
$sql_adjusted = $missing != 'adjusted' ? '' : "INNER JOIN {recommender_helper_pair_stat} p ON n1.mouse_id=p.id1 AND n2.mouse_id=p.id2";
// compute similarity. could be time-consuming.
db_query("INSERT INTO {recommender_similarity}(app_id, mouse1_id, mouse2_id, similarity, created)\n SELECT %d, n1.mouse_id, n2.mouse_id,\n (AVG(n1.weight*n2.weight))/(s1.stddev*s2.stddev) corr, %d\n FROM {recommender_helper_matrix} n1 INNER JOIN {recommender_helper_matrix} n2\n ON n1.cheese_id=n2.cheese_id AND n1.mouse_id<=n2.mouse_id {$sql_adjusted}\n INNER JOIN {recommender_helper_single_stat} s1 ON n1.mouse_id=s1.id\n INNER JOIN {recommender_helper_single_stat} s2 ON n2.mouse_id=s2.id\n WHERE s1.stddev<>0 AND s2.stddev<>0\n GROUP BY n1.mouse_id, n2.mouse_id HAVING corr<>0", $app_id, $created);
// insert duplicate ones (to save time, we didn't do half of the calculation in operation above)
db_query("INSERT INTO {recommender_similarity}(app_id, mouse1_id, mouse2_id, similarity, created)\n SELECT app_id, mouse2_id, mouse1_id, similarity, created FROM {recommender_similarity}\n WHERE mouse1_id<mouse2_id AND app_id=%d AND created=%d", $app_id, $created);
// clear up temporary data
if ($missing == 'adjusted') {
db_query("DELETE FROM {recommender_helper_pair_stat}");
}
db_query("DELETE FROM {recommender_helper_single_stat}");
db_query("DELETE FROM {recommender_helper_matrix}");
break;
}
// remove old data.
db_query("DELETE FROM {recommender_similarity} WHERE app_id=%d AND created<>%d", $app_id, $created);
}