static tdb_error find_duplicate_fieldnames(const char **ofield_names, uint64_t num_ofields) { Pvoid_t check = NULL; tdb_field i; Word_t tmp; #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wsign-compare" for (i = 0; i < num_ofields; i++){ Word_t *ptr; JSLI(ptr, check, (const uint8_t*)ofield_names[i]); if (*ptr){ JSLFA(tmp, check); return TDB_ERR_DUPLICATE_FIELDS; } *ptr = 1; } JSLFA(tmp, check); #pragma GCC diagnostic pop return 0; out_of_memory: return TDB_ERR_NOMEM; }
int main(int argc, void **argv) { char buffer[MAXLINE]; Word_t Bytes; FILE *fp = NULL; int result =0; int count = 0; int i,j; int start_len; if(argc < 2) { printf("provide file to search\n"); return; } char *file = argv[1]; // populate map and new sorted list create_index(file); fp = fopen(file, "r"); if (fp == NULL) { perror("no sorted list exist"); goto process_done; } // loop through the list from the longest word while(fgets(buffer, sizeof(buffer), fp) !=NULL) { remove_eol(buffer); start_len = strlen(buffer); while(1) { if (start_len < MIN_WORD_LEN*MIN_WORD_LEN) break; if (string_prime_div(buffer, start_len - 1)) { printf("%d.%s\n", count, buffer); count++; break; } else { start_len--; } } } printf("total %d of words found that composed of other word in the list\n", count); JSLFA(Bytes, PJArray); fclose(fp); process_done: return 0; }
void db_close() { if (!_db.is_open) { e_set(E_ERROR, "Package database is NOT open."); return; } if (!_db.readonly) sys_lock_del(_db.fd_lock); Word_t used; JSLFA(used, _db.paths); g_free(_db.topdir); g_free(_db.pkgdir); g_free(_db.scrdir); memset(&_db, 0, sizeof(_db)); }
char *test_judy() { Pvoid_t PJArray = (PWord_t)NULL; PWord_t PValue; Word_t Bytes; char dude[10]; char s[256]; strcpy(s, "one two three one 1234567890"); char *token = strtok(s, " "); while (token) { JSLI(PValue, PJArray, token); if (PValue == PJERR) { printf("malloc failed\n"); exit(1); } *PValue += 1; token = strtok(NULL, " "); } dude[0] = '\0'; JSLF(PValue, PJArray, dude); while(PValue != NULL) { printf("%s %d\n", dude, *PValue); JSLN(PValue, PJArray, dude); } JSLFA(Bytes, PJArray); printf("%lu bytes used\n", Bytes); return NULL; }
void map_free(void ** map) { Word_t bytes; JSLFA(bytes, *map); }
int main(int argc, char *argv[]) { if (argc != 8) { fprintf(stderr, "Usage: ./merge_N [out_dir_name 1] [item_#_file 2] [length_#_file 3] [count_dist_file 4] [number_of_temp_prefix 5] [from_temp_n] [to_temp_n]\n"); // /root/cx_src/src/merge_N /tmp/data /tmp/result/ino.txt /tmp/result/lno.txt /tmp/result/cdo.txt 3 0 1 return -1; } if ((itf = fopen(argv[2], "a")) == NULL) { fprintf(stderr, "Failed to open file \"%s\" for writing item numbers\n", argv[2]); return -1; } // fixing each temp file fprintf(stdout, "Start fixing temp files\n"); for (i = atoi(argv[6]); i <= atoi(argv[7]); ++i) { //for (i = 0; i < TEMP_N; ++i) { // fix the temp file i int temp_prefix_num = 0;// num of concurrent threads while (temp_prefix_num < atoi(argv[5])){ sprintf(buffer, "%s/%s%d-%d.txt", argv[1], TEMP_PREFIX, i, temp_prefix_num); fprintf(stdout, "\rWorking on temp file: \"%s\" \n", buffer); //fflush(stdout); if ((tsf[i] = fopen(buffer, "r")) == NULL) { fprintf(stderr, "Failed to open file \"%s\" for reading temp strings\n", buffer); //continue; //break; return -1; } while (fscanf(tsf[i], "%"PRId64"\t", &itemn) != EOF) { fgets(Index, BUFFER_SIZE, tsf[i]); for (Len = strlen(Index) - 1; Index[Len] == '\n' || Index[Len] == '\r'; Len--) Index[Len] = 0; ++Len; JSLI(PValNgramS, PJSLNgram, (uint8_t *)Index); if (PValNgramS == PJERR) { fprintf(stderr, "Malloc failed for \"PJSLNgram\"\n"); //return -1; } (*PValNgramS) += itemn; JLI(PValTotC, PJLTotCount, Len); if (PValTotC == PJERR) { fprintf(stderr, "Malloc failed for \"PJLTotCount\"\n"); //return -1; } *PValTotC += itemn; if (*PValNgramS == itemn) { JLI(PValNgramC, PJLNgramCount, Len); if (PValNgramC == PJERR) { fprintf(stderr, "Malloc failed for \"PJLNgramCount\"\n"); //return -1; } ++*PValNgramC; } } sprintf(buffer, "rm %s/%s%d-%d.txt", argv[1], TEMP_PREFIX, i, temp_prefix_num++); if (system(buffer) == -1) { fprintf(stderr, "Failed to execute command: \"%s\"\n", buffer); //return -1; } fclose(tsf[i]); } // write the final temp file sprintf(buffer, "%s/%s%d.txt", argv[1], TEMP_PREFIX, i); if ((tsf[i] = fopen(buffer, "w")) == NULL) { fprintf(stderr, "Failed to open file \"%s\" for writing temp strings\n", buffer); return -1; } Index[0] = '\0'; JSLF(PValNgramS, PJSLNgram, (uint8_t *)Index); while (PValNgramS != NULL) { fprintf(tsf[i], "%lu\t%s\n", *PValNgramS, Index); Count = *PValNgramS; JLI(PValCountC, PJLCountCount, Count); if (PValCountC == PJERR) { fprintf(stderr, "Malloc failed for \"PJLCountCount\"\n"); return -1; } ++*PValCountC; JSLN(PValNgramS, PJSLNgram, (uint8_t *)Index); } JSLFA(Bytes, PJSLNgram); fflush(tsf[i]); fclose(tsf[i]); fprintf(itf, "Temp file \"%s/%s%d\" uses %lu Bytes of memory\n", argv[1], TEMP_PREFIX, i, Bytes); fflush(itf); } fclose(itf); if ((lef = fopen(argv[3], "a")) == NULL) { fprintf(stderr, "Failed to open file \"%s\" for writing length number\n", argv[3]); return -1; } Total = NgramN = 0; JLF(PValTotC, PJLTotCount, Total); JLF(PValNgramC, PJLNgramCount, NgramN); while (PValTotC != NULL) { fprintf(lef, "%lu\t%lu\t%lu\n", Total, *PValNgramC, *PValTotC); JLN(PValTotC, PJLTotCount, Total); JLN(PValNgramC, PJLNgramCount, NgramN); } JLFA(Bytes, PJLTotCount); JLFA(Bytes, PJLNgramCount); fflush(lef); fclose(lef); if ((cdf = fopen(argv[4], "a")) == NULL) { fprintf(stderr, "Failed to open file \"%s\" for writing count distribuction\n", argv[4]); return -1; } Count = 0; JLF(PValCountC, PJLCountCount, Count); while (PValCountC != NULL) { fprintf(cdf, "%lu\t%lu\n", Count, *PValCountC); JLN(PValCountC, PJLCountCount, Count); } JLFA(Bytes, PJLCountCount); fflush(cdf); fclose(cdf); return 0; }
int main(int argc, char** argv) { if (argc < 2) { printf("usage: BackOffTrigramModelPipe arpafile\n"); return 1; } FILE* arpafile = fopen(argv[1], "r"); Pvoid_t UP = (Pvoid_t) NULL; /* map from unigram to probability */ Pvoid_t UB = (Pvoid_t) NULL; /* map from unigram to backoff */ Pvoid_t BP = (Pvoid_t) NULL; /* map from bigram to probability */ Pvoid_t BB = (Pvoid_t) NULL; /* map from bigram to backoff */ Pvoid_t TP = (Pvoid_t) NULL; /* map from trigram to probability */ read_arpa_file(arpafile, &UP, &UB, &BP, &BB, &TP); zbyte inputbuf[MAXTRIGRAMSIZE + 4]; zbyte* p; PWord_t ptr; size_t i; do { fgets((char*)inputbuf, MAXTRIGRAMSIZE + 4, stdin); i = strlen((char*)inputbuf); if (inputbuf[i-1] == '\n') { inputbuf[--i] = '\0'; } else { inputbuf[i] = '\0'; } p = inputbuf; if (*p == 'i') { if (*(p+1) == 'u') { // in unigrams p+=3; // command and space JSLG(ptr, UP, p); if (ptr == NULL) { printf("0\n"); fflush(stdout); } else { printf("1\n"); fflush(stdout); } } else if (*(p+1) == 'b') { // in bigrams p+=3; // command and space JSLG(ptr, BP, p); if (ptr == NULL) { printf("0\n"); fflush(stdout); } else { printf("1\n"); fflush(stdout); } } else if (*(p+1) == 't') { // in trigrams p+=3; // command and space JSLG(ptr, TP, p); if (ptr == NULL) { printf("0\n"); fflush(stdout); } else { printf("1\n"); fflush(stdout); } } } else if (*p == 'u'){ if (*(p+1) == 'p') { // unigram probability p+=3; // command and space float uniprob = unigram_prob_1(cs_as_z((char*)p), &UP); printf("%f\n", uniprob); fflush(stdout); } else if (*(p+1) == 'b'){ // unigram backoff p+=3; // command and space JSLG(ptr, UB, p); if (ptr == NULL) { printf("None\n"); fflush(stdout); } else { printf("%f\n", *(float*)ptr); fflush(stdout); } } else if (*(p+1) == 's') { // all vocabulary starting with prefix p+=3; // command and space size_t prefixlength = i - 3; zbyte prefix[MAXUNIGRAMSIZE]; memcpy (prefix, p, prefixlength); JSLF(ptr, UP, p); while ((ptr != NULL) && (memcmp(p, prefix, prefixlength) == 0)) { printf("%s ", p); fflush(stdout); JSLN(ptr, UP, p); } printf("\n"); fflush(stdout); } } else if (*p == 'b'){ if (*(p+1) == 'b'){ // bigram backoff p+=3; // command and space JSLG(ptr, BB, p); if (ptr == NULL) { printf("None\n"); fflush(stdout); } else { printf("%f\n", *(float*)ptr); fflush(stdout); } } } else if (*p == 't') { if (*(p+1) == 'p') { // trigram probability p+=3; // command and space float triprob = trigram_split_unkify_prob_3(cs_as_z((char*)p), &UP, &UB, &BP, &BB, &TP); printf("%f\n", triprob); fflush(stdout); } } else if (*p == 'U') { // is this a unk model? if (*(p+1) == 'p') { // unigram probability of unk *p = UNKBYTESTR[0]; *(p+1) = '\0'; JSLG(ptr, UP, p); if (ptr == NULL) { printf("None\n"); fflush(stdout); } else { printf("%f\n", *(float*)ptr); fflush(stdout); } } else if (*(p+1) == 'b') { // unigram backoff of unk *p = UNKBYTESTR[0]; *(p+1) = '\0'; JSLG(ptr, UB, p); if (ptr == NULL) { printf("None\n"); fflush(stdout); } else { printf("%f\n", *(float*)ptr); fflush(stdout); } } } } while ( (i > 0) && (feof(stdin) == 0) && (ferror(stdin) == 0) ); Word_t temp; JSLFA(temp, UP); JSLFA(temp, UB); JSLFA(temp, BP); JSLFA(temp, BB); JSLFA(temp, TP); fflush(stdout); return 0; }
void cjson_free(struct cjson *node) { if (node == NULL) { return; } switch (node->type) { case CJSON_ARRAY: { int status = 0; Word_t index = 0; struct cjson **value = NULL; JLF(value, node->value.array.data, index); while (value != NULL) { cjson_free(*value); JLN(value, node->value.array.data, index); } JLFA(status, node->value.array.data); } break; case CJSON_BOOLEAN: break; case CJSON_NULL: break; case CJSON_NUMBER: free(node->value.number); break; case CJSON_OBJECT: { int status = 0; uint8_t *key = ecx_malloc(node->value.object.key_length + 1); ec_with(key, free) { key[0] = '\0'; struct cjson **value = NULL; JSLF(value, node->value.object.data, key); while (value != NULL) { cjson_free(*value); JSLN(value, node->value.object.data, key); } JSLFA(status, node->value.object.data); } } break; case CJSON_PAIR: free(node->value.pair.key); cjson_free(node->value.pair.value); break; case CJSON_ROOT: { int status = 0; Word_t index = 0; struct cjson **value = NULL; JLF(value, node->value.root.data, index); while (value != NULL) { cjson_free(*value); JLN(value, node->value.root.data, index); } JLFA(status, node->value.root.data); } break; case CJSON_STRING: free(node->value.string.bytes); break; }
int main(int argc, char *argv[]) { if (argc != 7) { fprintf(stderr, "Usage: ./prepare_N [file_prefix] [from_file_num] [number_of_files] [out_dir_name] [item_#_file] [temp_prefix_n]\n"); // /root/cx_src/src/prepare_N /tmp/test/googlebooks-eng-all-5gram-20090715- 0 200 /tmp/data /tmp/result/ino.txt 0 return -1; } // check and set maximun number of newly-created descriptors getrlimit(RLIMIT_NOFILE, &rlim); if (rlim.rlim_cur < FLIMIT) { fprintf(stderr, "Maximum number of newly-created descriptors \"%"PRId64"\" is not enough\n", rlim.rlim_cur); rlim.rlim_cur = rlim.rlim_max = FLIMIT; if (setrlimit(RLIMIT_NOFILE, &rlim)) { fprintf(stderr, "Failed to set the maximum number of newly-created descriptors\n"); return -1; } } if (access(argv[4], F_OK)) { fprintf(stdout, "Directory \"%s\" does not exist, create it\n", argv[4]); if (mkdir(argv[4], S_IRWXU | S_IRWXG | S_IROTH | S_IXOTH)) { fprintf(stderr, "Failed to create directory \"%s\"\n", argv[4]); return -1; } } else fprintf(stdout, "Directory \"%s\" already existed\n", argv[4]); int temp_prefix_num = atoi(argv[6]);// num of concurrent threads for (i = 0; i < TEMP_N; ++i) { sprintf(buffer, "%s/%s%d-%d.txt", argv[4], TEMP_PREFIX, i, temp_prefix_num); if ((tsf[i] = fopen(buffer, "w")) == NULL) { fprintf(stderr, "Failed to open file \"%s\" for writing temp strings\n", buffer); for (j = 0; j < i; ++j) fclose(tsf[j]); return -1; } } if ((itf = fopen(argv[5], "a")) == NULL) { fprintf(stderr, "Failed to open file \"%s\" for writing item numbers\n", argv[5]); return -1; } fi = atoi(argv[2]); fn = fi + atoi(argv[3]); for (tot = itemn = Index[0] = 0; fi < fn; ++fi) { //for (fi = tot = itemn = Index[0] = 0; fi < fn; ++fi) { // use "unzip" command to prepare csv input file sprintf(buffer, "unzip %s%d.csv.zip -d /tmp/temp_csv_files/ 1>/dev/null", argv[1], fi); if (system(buffer) == -1) { fprintf(stderr, "Failed to execute command: \"%s\"\n", buffer); return -1; } sprintf(buffer, "/tmp/temp_csv_files/%s%d.csv", "googlebooks-eng-all-5gram-20090715-", fi); if ((inf = fopen(buffer, "r")) == NULL) { fprintf(stderr, "Failed to read file \"%s%d\"\n", "googlebooks-eng-all-5gram-20090715-", fi); return -1; } while (fgets(buffer, BUFFER_SIZE, inf) != (char *)NULL) { // data line format: "ngram TAB year TAB match_count TAB volume_count NEWLINE" // take the first TAB as the end of the ngram, and count the number of TAB for (Len = 0; buffer[Len] && buffer[Len] != '\t'; Len++) Index[Len] = buffer[Len]; Index[Len] = 0; for (i = Len, j = 0; buffer[i]; j += (buffer[i++] == '\t')); if (j != 4 || Len <= 1) continue; sscanf(buffer + Len + 1, "%d\t%d", &i, &j); JSLI(PValNgramS, PJSLNgram, (uint8_t *)Index); if (PValNgramS == PJERR) { fprintf(stderr, "Malloc failed for \"PJSLNgram\"\n"); return -1; } *PValNgramS += j; itemn += (*PValNgramS == j); tot += j; if (itemn % 200000 == 0) { fprintf(stdout, "\r%"PRId64" in %"PRId64" of %d", itemn, tot, fi); fflush(stdout); } } fclose(inf); // remove the unziped csv file sprintf(buffer, "rm /tmp/temp_csv_files/%s%d.csv", "googlebooks-eng-all-5gram-20090715-", fi); if (system(buffer) == -1) { fprintf(stderr, "Failed to execute command: \"%s\"\n", buffer); return -1; } fprintf(itf, "%"PRId64" in %"PRId64" of %d\n", itemn, tot, fi); fflush(itf); Index[0] = '\0'; JSLF(PValNgramS, PJSLNgram, (uint8_t *)Index); while (PValNgramS != NULL) { fprintf(tsf[BKDRHash((uint8_t *)Index)], "%lu\t%s\n", *PValNgramS, Index); JSLN(PValNgramS, PJSLNgram, (uint8_t *)Index); } JSLFA(Bytes, PJSLNgram); } for (i = 0; i < TEMP_N; ++i) { fflush(tsf[i]); fclose(tsf[i]); } fprintf(stdout, "\r%"PRId64" in %"PRId64" of %d\n", itemn, tot, fi); return 0; }
int jtableS_free(jtableS *table) { int ret; JSLFA(ret, table->t); return ret; }
int main(int argc, char *argv[]) { if (argc >= 7 && argc <= 13){ int max_nb_lines = 120; int xdrop_treshold = 0; char* reverse_comp_att = NULL; int i; for (i=7; i<argc; i+=2){ if (strcmp(argv[i],"-r") == 0) reverse_comp_att = argv[i+1]; else if (strcmp(argv[i],"-x") == 0) xdrop_treshold = atoi(argv[i+1]); else if (strcmp(argv[i],"-l") == 0) max_nb_lines = atoi(argv[i+1]); } // Initialisation du regex const char *str_regex = "^[acgtumrwsykvhdbnACGTUMRWSYKVHDBN]+"; int err; regex_t preg; err = regcomp (&preg, str_regex, REG_NOSUB | REG_EXTENDED); // Initialisation outil pour connaitre la mémoire utilisée char buffer2 [255] = "smem -ntk"; // Initialisation outil pour connaitre le temps utilisé temps_exec temps; temps.debut = times(&temps.sdebut); // On parse le fichier XGMML Pvoid_t net = parse(argv[1], argv[3], argv[4], reverse_comp_att); printf("Parsing XGMML file done \n\n"); // Affichage mémoire utilisée /*printf("Memory used by the system after parsing graph \n\n"); system(buffer2); printf("\n\n"); fflush(stdout);*/ // On parse le fichier FASTA list* list_queries = parseFastaFile(argv[2], max_nb_lines); printf("Parsing FASTA file done \n\n"); fflush(stdout); // On indexe le graphe Pvoid_t index = indexGraph(&net, atoi(argv[5]), atoi(argv[6]), reverse_comp_att); printf("Indexing graph done \n\n"); // Affichage mémoire utilisée /*printf("Memory used by the system after indexing graph \n\n"); system(buffer2); printf("\n\n");*/ // Affichage temps d'execution printf("Time after indexing graph : \n"); temps.fin = times(&temps.sfin); temp(&temps); fflush(stdout); int nb_queries = 0; int found = 0; int not_found = 0; cell* elem_list_queries = list_queries->first; while (elem_list_queries){ // Si pas d'erreur if (err == 0) { int match; // On exécute l'expression régulière match = regexec (&preg, (char*)elem_list_queries->val, 0, NULL, 0); // Si il y a correspondance if (match == 0){ // On lance les extensions int lAlign = NULL; if (reverse_comp_att == NULL) lAlign = extend((char*)elem_list_queries->val, &index, atoi(argv[5]), atoi(argv[6]), xdrop_treshold, nb_queries); else lAlign = extendR((char*)elem_list_queries->val, &index, atoi(argv[5]), atoi(argv[6]), xdrop_treshold, nb_queries); // Analyse et affichage des résultats (lAlign == 0) ? not_found++ : found++; } else{ not_found++; printf("Invalid sequence : %s \n\n", (char*)elem_list_queries->val); exit(0); } } else printf("An error occured with the regex \n\n"); elem_list_queries = elem_list_queries->prox; nb_queries++; } printf("Number of queries found : %d \n", found); printf("Number of queries not found : %d \n\n", not_found); // Affichage mémoire utilisée /*printf("Memory used by the system after extending queries \n\n"); system(buffer2); printf("\n\n");*/ // Affichage temps d'execution printf("Time after aligning graph : \n"); temps.fin = times(&temps.sfin); temp(&temps); fflush(stdout); Word_t* PValue; Word_t Bytes; uint8_t keyIndex[MAXLINE]; uint8_t keyNetwork[MAXLINE]; keyIndex[0] = keyNetwork[0] = '\0'; JSLF(PValue, index, keyIndex); while (PValue != NULL) { list_free_NodeUint16((list_NodeUint16*)*PValue); JSLN(PValue, index, keyIndex); } JSLFA(Bytes, index); JSLF(PValue, net, keyNetwork); if (reverse_comp_att == NULL){ while (PValue != NULL) { freeNode((Node*)*PValue); JSLN(PValue, net, keyNetwork); } } else { while (PValue != NULL) { freeNodeR((NodeR*)*PValue); JSLN(PValue, net, keyNetwork); } } JSLFA(Bytes, net); list_of_generic_free(list_queries); // On libère l'espace alloué à notre expression régulière regfree (&preg); return 0; } else if (argc == 2){ if (strcmp(argv[1], "-h") == 0){ printf("BlastGraph <name_file_graph.xgmml> <name_query_file.fasta> <node_attribute> <edge_attribute> <seed_size> <overlap_size> [-options]\n\n"); printf("<node_attribute> : the name of the attribute in the XGMML file (in the <node> marker) that contains the sequence of each node\n"); printf("<edge_attribute> : the name of the attribute in the XGMML file (in the <node> marker) that contains the two letters indicating if the sequences of the source node and target node have to be read in forward (F) or reverse-complement (R)\n"); printf("<seed_size> is the seed size used to anchor query sequences on the graph\n"); printf("<overlap_size> is the size of the overlaps between the sequences source and target nodes\n\n"); printf("[-options] : \n"); printf("-r reverse_complement_attribute : take into account the reverse-complement identified by reverse_complement_attribute in the graph. By default, compute the reverse-complement of each sequence in each node identified by node_attribute.\n"); printf("-x value : use this value as treshold for the X-DROP heuristic. 0 by default.\n"); printf("-l value : use this value as the maximum number of characters per line in the query file. 120 by default.\n"); return 0; } else{ fprintf (stderr, "Bad number of parameters \n"); exit (EXIT_FAILURE); } } else { fprintf (stderr, "Bad number of parameters \n"); exit (EXIT_FAILURE); } }