int main (int argc, char *argv[]) { if (argc < 4) { printf("SemWE_Test_SynSel synonym_question word_embed synonym_result candidate_num\n"); printf("Primary Designed for the TOEFL Synonym Selection Task\n"); exit(1); } strcpy(word_pair, argv[1]); strcpy(word_embed, argv[2]); strcpy(sim_result, argv[3]); candidate_num = atoi(argv[4]); char tmp_word[WORD_LEN]; float tmp_value = 0.0; printf(">> Synonym Selection on Word Embedding Model\n"); FILE *fTEST = fopen(word_pair, "r"); if (fTEST == NULL) { printf(">> Error, can not open file %s\n", word_pair); exit(1); } FILE *fRES = fopen(sim_result, "w"); if (fRES == NULL) { printf(">> Error, can not open file %s\n", sim_result); exit(1); } printf("--- Load Word Embedding from: %s\n", word_embed); FILE *fEMB = fopen(word_embed, "r"); fscanf(fEMB, "%d%d", &word_num, &vect_dim); printf("--- word num: %d\n--- vec dimen: %d\n", word_num, vect_dim); wordEmbed = (real*)malloc(sizeof(real)*word_num*vect_dim); for (int i = 0; i < word_num; i++) { fscanf(fEMB, "%s", tmp_word); for (int j = 0; j < vect_dim; j++) { fscanf(fEMB, "%f", &tmp_value); wordEmbed[i*vect_dim+j] = tmp_value; //tmp_vector.push_back(tmp_value); } wordMapper.insert(make_pair(tmp_word, i)); wordSet.insert(make_pair(tmp_word, i)); } /*if (wordMapper.size() != word_num) { }*/ fclose(fEMB); printf("--- Load finish\n"); //printf(">> Synonym Selection\n"); printf("--- Synonym question : %s\n", word_pair); printf("--- Candidate num: %d\n", candidate_num); printf("--- Word Embed: %s\n", word_embed); printf("--- Selection result: %s\n", sim_result); clock_t start = clock(); WordSim tmp_sim; char word_A[1024]; char word_B[1024]; int all_num = 0; int use_num = 0; int cand_id = 0; while (fscanf(fTEST, "%s", word_A) != EOF) { //printf("--- %s %s: ", word_A, word_B); all_num++; fprintf(fRES, "%s\t", word_A); float max_sim = -10000.0; string sel_synword = ""; // candidate for (cand_id = 0; cand_id < candidate_num; cand_id++) { fscanf(fTEST, "%s", word_B); if (cand_id == 0){ sel_synword = word_B; } float sim_AB = -100.0; if (wordSet.find(word_A) == wordSet.end() || wordSet.find(word_B) == wordSet.end()) { ; } else{ int index_A = wordSet[word_A]; int index_B = wordSet[word_B]; sim_AB = SemWE_CalcCosine(&wordEmbed[index_A*vect_dim], &wordEmbed[index_B*vect_dim], vect_dim); if (sim_AB > max_sim) { max_sim = sim_AB; sel_synword = word_B; } } fprintf(fRES, "%s(%.6f) ", word_B, sim_AB); } fprintf(fRES, "=>best: %s\n", sel_synword.c_str()); //fprintf(fRES, "%s\t%s", sel_synword.c_str()); } fclose(fRES); fclose(fTEST); ////////////////////////////////////////////////////////////////////////// double timeCost = (clock()-start)/CLOCKS_PER_SEC; //printf("--- calculate nums: %d (/%d)\n", use_num, all_num); printf("--- elapsed time: %f\n", timeCost); //printf(">> Finish.\n"); free(wordEmbed); wordEmbed = NULL; return 0; }
int main (int argc, char *argv[]) { if (argc < 4) { printf("SemWE_WordSim.exe word_pair word_embed sim_result distance_ID\n"); printf("distance_ID: cosine; euclidean\n"); exit(1); } strcpy(word_pair, argv[1]); strcpy(word_embed, argv[2]); strcpy(sim_result, argv[3]); distance_ID = argv[4]; char tmp_word[WORD_LEN]; float tmp_value = 0.0; printf(">> Word Similarity Calculation on Word Embedding Model\n"); printf("--- distance flag: %s\n", distance_ID.c_str()); FILE *fTEST = fopen(word_pair, "r"); if (fTEST == NULL) { printf(">> Error, can not open file %s\n", word_pair); exit(1); } FILE *fRES = fopen(sim_result, "w"); if (fRES == NULL) { printf(">> Error, can not open file %s\n", sim_result); exit(1); } printf(">> Load Word Embedding from: %s\n", word_embed); FILE *fEMB = fopen(word_embed, "r"); fscanf(fEMB, "%d%d", &word_num, &vect_dim); printf("--- word num: %d\n--- vec dimen: %d\n", word_num, vect_dim); wordEmbed = (real*)malloc(sizeof(real)*word_num*vect_dim); for (int i = 0; i < word_num; i++) { fscanf(fEMB, "%s", tmp_word); for (int j = 0; j < vect_dim; j++) { fscanf(fEMB, "%f", &tmp_value); wordEmbed[i*vect_dim+j] = tmp_value; //tmp_vector.push_back(tmp_value); } wordMapper.insert(make_pair(tmp_word, i)); wordSet.insert(make_pair(tmp_word, i)); } /*if (wordMapper.size() != word_num) { }*/ fclose(fEMB); printf("--- Load finish\n"); printf(">> Calculate Word Similarity: %s\n", distance_ID.c_str()); printf("--- word pair : %s\n", word_pair); printf("--- word embed: %s\n", word_embed); printf("--- calc result: %s\n", sim_result); clock_t start = clock(); WordSim tmp_sim; char word_A[1024]; char word_B[1024]; int all_num = 0; int use_num = 0; while (fscanf(fTEST, "%s%s", word_A, word_B) != EOF) { //printf("--- %s %s: ", word_A, word_B); all_num++; float sim_AB = 0.0; if (wordSet.find(word_A) == wordSet.end() || wordSet.find(word_B) == wordSet.end()) { ; } else{ use_num++; int index_A = wordSet[word_A]; int index_B = wordSet[word_B]; if (distance_ID == "cosine") { sim_AB = SemWE_CalcCosine(&wordEmbed[index_A*vect_dim], &wordEmbed[index_B*vect_dim], vect_dim); } if (distance_ID == "euclidean") { sim_AB = SemWE_CalcEuclidean(&wordEmbed[index_A*vect_dim], &wordEmbed[index_B*vect_dim], vect_dim); } fprintf(fRES, "%s %s %.6f\n", word_A, word_B, sim_AB); } } fclose(fRES); fclose(fTEST); ////////////////////////////////////////////////////////////////////////// double timeCost = (clock()-start)/CLOCKS_PER_SEC; printf("--- calculate nums: %d (/%d)\n", use_num, all_num); printf("--- elapsed time: %f\n", timeCost); //printf(">> Finish.\n"); free(wordEmbed); wordEmbed = NULL; return 0; }