/* 从文件中读取数据,存入数组中 * 文件中的数据是按照每行一个数字存放的 */ int main(int argc, char *argv[]){ setenv("MALLOC_TRACE", "memoryTraceResult.txt", 1);//trace_result是保存检测结果的文件 mtrace(); if(argc < 2){ printf("Usage : %s <filename>\n", argv[0]); exit(EXIT_SUCCESS); } char *filename = argv[1]; FILE *fp; fp = fopen_file(filename, "r"); int data_length = cal_data_length(fp); int *arr = malloc_space(data_length*sizeof(int)); assign_arr(fp, arr); fclose(fp); bucket_sort(arr, data_length); int i; for(i = 0; i < data_length; i++){ printf("%d, ", arr[i]); } printf("\n"); free(arr); muntrace(); return 0; }
int init_lda(Lda *lda) { FILE *fp = NULL; if (NULL == (fp = fopen(lda->p.in_file, "r"))) { fprintf(stderr, "can not open file \"%s\"\n", lda->p.in_file); return -1; } char buffer[LDA_LINE_LEN]; char **str_array = NULL; int count = 0, token_size = 0; IdMap *uidmap = idmap_create(); IdMap *vidmap = idmap_create(); while (NULL != fgets(buffer, LDA_LINE_LEN, fp)) { str_array = split(trim(buffer, 3), '\t', &count); if (count < 2) { goto free_str; } if (-1 == idmap_get_value(uidmap, str_array[0])) { idmap_add(uidmap, dupstr(str_array[0]), idmap_size(uidmap)); } if (-1 == idmap_get_value(vidmap, str_array[1])) { idmap_add(vidmap, dupstr(str_array[1]), idmap_size(vidmap)); } token_size += 1; free_str: free(str_array[0]); free(str_array); } lda->d = idmap_size(uidmap); lda->t = token_size; lda->v = idmap_size(vidmap); malloc_space(lda); rewind(fp); int uid = -1, vid = -1, tid = -1; int token_index = 0; while (NULL != fgets(buffer, LDA_LINE_LEN, fp)) { str_array = split(trim(buffer, 3), '\t', &count); if (count < 2) { goto str_free; } uid = idmap_get_value(uidmap, str_array[0]); strncpy(lda->id_doc_map[uid], str_array[0], KEY_SIZE - 1); lda->tokens[token_index][0] = uid; vid = idmap_get_value(vidmap, str_array[1]); strncpy(lda->id_v_map[vid], str_array[1], KEY_SIZE - 1); lda->tokens[token_index][1] = vid; tid = (int) ((1.0 + rand()) / (1.0 + RAND_MAX) * (lda->p.k)); if (count == 3){ tid = atoi(str_array[2]); } lda->tokens[token_index][2] = tid; token_index += 1; str_free: free(str_array[0]); free(str_array); } fclose(fp); idmap_free(uidmap); uidmap = NULL; idmap_free(vidmap); vidmap = NULL; return 0; }
int init_lda(Lda *lda) { FILE *fp = NULL; if (NULL == (fp = fopen(lda->p.in_file, "r"))) { fprintf(stderr, "can not open file \"%s\"\n", lda->p.in_file); return -1; } char buffer[LDA_LINE_LEN] = {'\0'}; char *string = NULL, *token = NULL; int token_size = 0; Hash * uhs = hash_create(1<<20, STRING); Hash * vhs = hash_create(1<<20, STRING); // first scan to generate uniq doc & word set while (NULL != fgets(buffer, LDA_LINE_LEN, fp)) { string = trim(buffer, 3); token = strsep(&string, "\t"); hash_add(uhs, token); token = strsep(&string, "\t"); hash_add(vhs, token); token_size += 1; } lda->d = hash_cnt(uhs); lda->v = hash_cnt(vhs); lda->t = token_size; // malloc the model space for iteration malloc_space(lda); // re scan the input file to load data rewind(fp); int uid = -1, vid = -1, tid = -1; int token_index = 0; while (NULL != fgets(buffer, LDA_LINE_LEN, fp)) { string = trim(buffer, 3); // read doc token = strsep(&string, "\t"); uid = hash_find(uhs, token); if (lda->id_doc_map[uid][0] == '\0') { strncpy(lda->id_doc_map[uid], token, KEY_SIZE - 1); } lda->tokens[token_index][0] = uid; // read word token = strsep(&string, "\t"); vid = hash_find(vhs, token); if (lda->id_v_map[vid][0] == '\0') { strncpy(lda->id_v_map[vid], token, KEY_SIZE - 1); } lda->tokens[token_index][1] = vid; // read tid tid = (int) ((0.1 + rand()) / (0.1 + RAND_MAX) * (lda->p.k)); tid += 1; token = strsep(&string, "\t"); if (token) { tid = atoi(token); } lda->tokens[token_index][2] = tid; // link the tokens for current doc lda->tokens[token_index][3] = lda->doc_entry[uid]; lda->doc_entry[uid] = token_index; // token_index ++ token_index += 1; } fclose(fp); hash_free(uhs); uhs = NULL; hash_free(vhs); vhs = NULL; return 0; }