int main(int argc, char *argv[]) { int node_index, node_count; int size; int block_size; MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &node_index); MPI_Comm_size(MPI_COMM_WORLD, &node_count); if (node_index == 0) { sprintf(buf, "result%d.txt", node_count); freopen(buf, "w", stdout); } // Timer started MPI_Barrier(MPI_COMM_WORLD); if (node_index == 0) timer_start(); if (argc != 3 && argc != 4) { if (node_index == 0) printf("Usage: %s matrix_size block_size [input_file]\n", argv[0]); MPI_Finalize(); return -1; } else { size = atoi(argv[1]); block_size = atoi(argv[2]); if ((size < 2) || (block_size < 1) || (block_size > size)) { if (node_index == 0) { printf("Wrong format matrix_size > 1, matrix_size >= block_size > 0\n"); printf("Usage: %s matrix_size block_size [input_file]\n", argv[0]); } MPI_Finalize(); return -2; } } int memlen = size + node_count * block_size - 1; memlen /= node_count * block_size; memlen *= block_size * size; double *data = new double [memlen]; memset(data, 0, memlen * sizeof(double)); if (argc == 3) { generate_matrix(MATRIX_INDEX, size, block_size, node_index, node_count, data); } else { if (read_matrix_file(argv[3], size, block_size, node_index, node_count, data)) { if (node_index == 0) printf("Cannot open(read from) file: %s\n", argv[3]); delete[] data; MPI_Finalize(); return -3; } } print_matrix(size, block_size, node_index, node_count, data); // Initialization done! MPI_Barrier(MPI_COMM_WORLD); if (node_index == 0) print_full_time("on init"); if (spd_inverse(size, block_size, node_index, node_count, data)) { if (node_index == 0) printf("Method cannot be applied!\n"); delete[] data; MPI_Finalize(); return -4; } // Algorithm done! MPI_Barrier(MPI_COMM_WORLD); if (node_index == 0) print_full_time("on algorithm"); // Printing result on console (and in file) downtr_to_symm(size, block_size, node_index, node_count, data); print_matrix(size, block_size, node_index, node_count, data); double *workspace = new double[memlen]; memset(workspace, 0, memlen * sizeof(double)); // For this two matrices inverse is known if (MATRIX_INDEX == 1 || MATRIX_INDEX == 2) { if (MATRIX_INDEX == 1) generate_matrix(2, size, block_size, node_index, node_count, workspace); else generate_matrix(1, size, block_size, node_index, node_count, workspace); sub_array(memlen, data, workspace); double err_norm = inf_norm_matrix(size, block_size, node_index, node_count, workspace); err_norm = 0; // if (node_index == 0) // printf("Error = %11.5le\n", err_norm); } // Restore input matrix to calculate residual if (argc == 3) { generate_matrix(MATRIX_INDEX, size, block_size, node_index, node_count, workspace); } else { if (read_matrix_file(argv[3], size, block_size, node_index, node_count, workspace)) { if (node_index == 0) printf("Cannot open(read from) file: %s\n", argv[3]); delete[] data; delete[] workspace; MPI_Finalize(); return -5; } } #ifdef PRINT_RESULT_TO_FILE if (print_matrix_file(output, size, block_size, node_index, node_count, data)) { if (node_index == 0) printf("Cannot open(print to) file: %s\n", output); delete[] data; delete[] workspace; MPI_Finalize(); return -6; } #endif double *residual = new double[memlen]; tmatrix_mul(size, block_size, node_index, node_count, workspace, data, residual); generate_matrix(-2, size, block_size, node_index, node_count, residual); generate_matrix(0, size, block_size, node_index, node_count, workspace); sub_array(memlen, workspace, residual); downtr_to_symm(size, block_size, node_index, node_count, residual); double res_norm = inf_norm_matrix(size, block_size, node_index, node_count, residual); if (node_index == 0) printf("Residual = %11.5le\n", res_norm); delete[] data; delete[] workspace; delete[] residual; MPI_Finalize(); return 0; }
int main(int argc, char* argv[]) { TEST_PARAS myparas = parse_test_paras(argc, argv, testfile, embeddingfile, trainfile); printf("Predicting...\n"); if(!myparas.allow_self_transition) printf("Do not allow self-transtion.\n"); if (!myparas.underflow_correction) printf("Underflow correction disabled\n"); int new_test_song_exp = (myparas.train_test_hash_file[0] != '\0'); if(myparas.tagfile[0] == '\0' && new_test_song_exp) { printf("Have to support with a tag file if you want to test on unseen songs.\n"); exit(1); } int d; int m; int l; int i; int j; int s; int fr; int to; double* bias_terms = 0; double** X = read_matrix_file(embeddingfile, &l, &d, &bias_terms); double** realX; PDATA pd = read_playlists_data(testfile); //int k = pd.num_songs; int k; double llhood = 0.0; double uniform_llhood = 0.0; double realn = 0.0; double not_realn= 0.0; int* train_test_hash; int k_train; int k_test; TDATA td; if(!new_test_song_exp) { k = pd.num_songs; if(myparas.tagfile[0] != '\0') { td = read_tag_data(myparas.tagfile); m = td.num_tags; myparas.num_points = l / (k + m); realX = zerosarray(k * myparas.num_points, d); calculate_realX(X, realX, td, k, m, d, myparas.num_points); free_tag_data(td); if(myparas.tag_ebd_filename[0] != '\0') write_embedding_to_file(X + k * myparas.num_points, m * myparas.num_points, d, myparas.tag_ebd_filename, 0); } else { myparas.num_points = l / k; realX = zerosarray(k * myparas.num_points, d); Array2Dcopy(X, realX, l, d); } Array2Dfree(X, l, d); } else { printf("Prediction on unseen songs.\n"); td = read_tag_data(myparas.tagfile); m = td.num_tags; k = td.num_songs; train_test_hash = read_hash(myparas.train_test_hash_file, &k_train); k_test = k - k_train; printf("Number of new songs %d.\n", k_test); myparas.num_points = l / (k_train + m); realX = zerosarray(k * myparas.num_points, d); calculate_realX_with_hash(X, realX, td, k, m, d, myparas.num_points, k_train, train_test_hash); free_tag_data(td); Array2Dfree(X, l, d); } if(myparas.song_ebd_filename[0] != '\0') write_embedding_to_file(realX, k * myparas.num_points, d, myparas.song_ebd_filename, 0); if(myparas.bias_ebd_filename[0] != '\0') { FILE* fp = fopen(myparas.bias_ebd_filename, "w"); for( i = 0; i < k ;i++) { fprintf(fp, "%f", bias_terms[i]); if ( i != k - 1) fputc('\n', fp); } fclose(fp); } double** square_dist; if(myparas.square_dist_filename[0] != '\0') square_dist = zerosarray(k, k); int n = 0; for(i = 0; i < pd.num_playlists; i ++) if(pd.playlists_length[i] > 0) n += pd.playlists_length[i] - 1; printf("Altogether %d transitions.\n", n);fflush(stdout); PHASH* tcount; PHASH* tcount_train; double** tcount_full; double** tcount_full_train; if(myparas.use_hash_TTable) tcount = create_empty_hash(2 * n); else tcount_full = zerosarray(k, k); HELEM temp_elem; TPAIR temp_pair; int idx; double temp_val; for(i = 0; i < pd.num_playlists; i ++) { if(pd.playlists_length[i] > myparas.range) { for(j = 0; j < pd.playlists_length[i] - 1; j++) { temp_pair.fr = pd.playlists[i][j]; temp_pair.to = pd.playlists[i][j + myparas.range]; //printf("(%d, %d)\n", temp_pair.fr, temp_pair.to); if(temp_pair.fr >= 0 && temp_pair.to >= 0) { if(myparas.use_hash_TTable) { idx = exist_in_hash(tcount, temp_pair); if(idx < 0) { temp_elem.key = temp_pair; temp_elem.val = 1.0; add_entry(tcount, temp_elem); } else update_with(tcount, idx, 1.0); } else tcount_full[temp_pair.fr][temp_pair.to] += 1.0; } } } } TRANSITIONTABLE ttable; TRANSITIONTABLE BFStable; //Need to use the training file if(myparas.output_distr) { PDATA pd_train = read_playlists_data(trainfile); if(myparas.use_hash_TTable) tcount_train = create_empty_hash(2 * n); else tcount_full_train = zerosarray(k, k); for(i = 0; i < pd_train.num_playlists; i ++) { if(pd_train.playlists_length[i] > 1) { for(j = 0; j < pd_train.playlists_length[i] - 1; j++) { temp_pair.fr = pd_train.playlists[i][j]; temp_pair.to = pd_train.playlists[i][j + 1]; if(myparas.use_hash_TTable) { idx = exist_in_hash(tcount_train, temp_pair); if(idx < 0) { temp_elem.key = temp_pair; temp_elem.val = 1.0; add_entry(tcount_train, temp_elem); } else update_with(tcount_train, idx, 1.0); } else tcount_full_train[temp_pair.fr][temp_pair.to] += 1.0; } } } } FILE* song_distr_file; FILE* trans_distr_file; double* song_sep_ll; if(myparas.output_distr) { printf("Output likelihood distribution file turned on.\n"); if(myparas.output_distr) { song_distr_file = fopen(songdistrfile, "w"); trans_distr_file = fopen(transdistrfile, "w"); song_sep_ll = (double*)calloc(k, sizeof(double)); } } int* test_ids_for_new_songs; if(new_test_song_exp) test_ids_for_new_songs = get_test_ids(k, k_train, train_test_hash); for(fr = 0; fr < k; fr++) { int collection_size; int* collection_idx; if(myparas.fast_collection) { collection_size = (BFStable.parray)[fr].length; if (collection_size == 0) continue; collection_idx = (int*)malloc(collection_size * sizeof(int)); LINKEDELEM* tempp = (BFStable.parray)[fr].head; for(i = 0; i < collection_size; i++) { collection_idx[i] = tempp -> idx; tempp = tempp -> pnext; } } else if(new_test_song_exp) { collection_size = k_test; collection_idx = (int*)malloc(collection_size * sizeof(int)); int_list_copy(test_ids_for_new_songs, collection_idx, k_test); } else collection_size = k; double** delta = zerosarray(collection_size, d); double* p = (double*)calloc(collection_size, sizeof(double)); double** tempkd = zerosarray(collection_size, d); double* tempk = (double*)calloc(collection_size, sizeof(double)); double** mid_delta = 0; double* mid_p = 0; double** mid_tempkd = 0; // I get a seg fault when these get freed. Don't understand. if (myparas.num_points == 3) { mid_delta = zerosarray(collection_size, d); mid_p = (double*)calloc(collection_size, sizeof(double)); mid_tempkd = zerosarray(collection_size, d); } for(j = 0; j < collection_size; j++) { for(i = 0; i < d; i++) { if(myparas.fast_collection || new_test_song_exp) delta[j][i] = realX[fr][i] - realX[(myparas.num_points - 1) * k + collection_idx[j]][i]; else delta[j][i] = realX[fr][i] - realX[(myparas.num_points - 1) * k + j][i]; } if(myparas.num_points == 3) { if(myparas.fast_collection || new_test_song_exp) mid_delta[j][i] = realX[k + fr][i] - realX[k + collection_idx[j]][i]; else mid_delta[j][i] = realX[k + fr][i] - realX[k + j][i]; } } mat_mult(delta, delta, tempkd, collection_size, d); scale_mat(tempkd, collection_size, d, -1.0); sum_along_direct(tempkd, p, collection_size, d, 1); if(myparas.square_dist_filename[0] != '\0') for(i = 0; i < k; i++) square_dist[fr][i] = -p[i]; if (bias_terms != 0) add_vec(p, bias_terms, collection_size, 1.0); if (myparas.num_points == 3) { // Just use the mid_deltas (midpoint differences): square them, // then sum and add to the p vector directly, then the midpoint // probability is incorporated mat_mult(mid_delta, mid_delta, mid_tempkd, collection_size, d); scale_mat(mid_tempkd, collection_size, d, -1.0); sum_along_direct(mid_tempkd, mid_p, collection_size, d, 1); add_vec(p, mid_p, collection_size, 1.0); } if (myparas.underflow_correction == 1) { double max_val = p[0]; for(i = 0; i < collection_size; i++) max_val = p[i] > max_val? p[i] : max_val; vec_scalar_sum(p, -max_val, collection_size); } Veccopy(p, tempk, collection_size); exp_on_vec(tempk, collection_size); //exp_on_vec(p, collection_size); // underflow checking: // for (i = 0; i < collection_size; i++) // if (p[i] < 0.000001) // p[i] = 0.000001; double temp_sum; if(myparas.allow_self_transition) temp_sum = sum_vec(tempk, collection_size); else { temp_sum = 0.0; for(i = 0; i < collection_size; i++) if(!myparas.fast_collection || new_test_song_exp) temp_sum += (i != fr)? tempk[i] : 0.0; else temp_sum += (collection_idx[i] != fr)? tempk[i] : 0.0; } vec_scalar_sum(p, -log(temp_sum), collection_size); //scale_vec(p, collection_size, 1.0 / temp_sum); //printf("done...\n"); for(to = 0; to < k; to++) { if(myparas.allow_self_transition || (!myparas.allow_self_transition && fr != to)) { temp_pair.fr = fr; temp_pair.to = to; //printf("(%d, %d)\n", fr, to); if(myparas.use_hash_TTable) idx = exist_in_hash(tcount, temp_pair); else idx = tcount_full[fr][to] > 0.0? 1 : -1; //printf("%d\n", idx);fflush(stdout); int idx_train; //printf("done...\n");fflush(stdout); if(myparas.output_distr) { if(myparas.use_hash_TTable) idx_train = exist_in_hash(tcount_train, temp_pair); else idx_train = tcount_full_train[fr][to] > 0.0? 1 : -1; } if(idx >= 0) { if(myparas.fast_collection || new_test_song_exp) { s = -1; for(i = 0; i < collection_size; i++) { if(collection_idx[i] == to) { s = i; break; } } } else s = to; //printf("%d\n", idx);fflush(stdout); if(myparas.use_hash_TTable) temp_val = retrieve_value_with_idx(tcount, idx); else temp_val = tcount_full[fr][to]; if(s < 0) not_realn += temp_val; else { //printf("s = %d\n", s); llhood += temp_val * p[s]; if(new_test_song_exp) uniform_llhood += temp_val * log(1.0 / (double) k_test); realn += temp_val; if(myparas.output_distr) { //double temp_val_train = idx_train >= 0? retrieve_value_with_idx(tcount_train, idx_train): 0.0; double temp_val_train; if(idx_train < 0) temp_val_train = 0.0; else temp_val_train = myparas.use_hash_TTable ? retrieve_value_with_idx(tcount_train, idx_train) : tcount_full_train[fr][to]; song_sep_ll[fr] += temp_val * p[s]; song_sep_ll[to] += temp_val * p[s]; fprintf(trans_distr_file, "%d %d %f\n", (int)temp_val_train, (int)temp_val, temp_val * p[s]); } } } } } Array2Dfree(delta, collection_size, d); free(p); Array2Dfree(tempkd, collection_size, d); free(tempk); if (myparas.num_points == 3) { Array2Dfree(mid_delta, collection_size, d); free(mid_p); Array2Dfree(mid_tempkd, collection_size, d); } if(myparas.fast_collection || new_test_song_exp) free(collection_idx); } if(myparas.output_distr) { printf("Writing song distr.\n"); for(i = 0; i < k; i++) fprintf(song_distr_file, "%d %f\n", (int)(pd.id_counts[i]), song_sep_ll[i]); fclose(song_distr_file); fclose(trans_distr_file); free(song_sep_ll); } llhood /= realn; printf("Avg log-likelihood on test: %f\n", llhood); if(myparas.fast_collection) printf("Ratio of transitions that do not appear in the training set: %f\n", not_realn / (realn + not_realn)); if(new_test_song_exp) { uniform_llhood /= realn; printf("Avg log-likelihood for uniform baseline: %f\n", uniform_llhood); } if(myparas.use_hash_TTable) free_hash(tcount); else Array2Dfree(tcount_full, k, k); free_playlists_data(pd); if(myparas.output_distr) { if(myparas.use_hash_TTable) free_hash(tcount_train); else Array2Dfree(tcount_full_train, k, k); } Array2Dfree(realX, k * myparas.num_points, d); if(new_test_song_exp) { free(train_test_hash); free(test_ids_for_new_songs); } if(myparas.square_dist_filename[0] != '\0') { write_embedding_to_file(square_dist, k, k, myparas.square_dist_filename, 0); Array2Dfree(square_dist, k, k); } }