static STANDARDIZER * CreateStd(char *lextab, char *gaztab, char *rultab) { STANDARDIZER *std; LEXICON *lex; LEXICON *gaz; RULES *rules; int err; int SPIcode; DBG("Enter: CreateStd"); SPIcode = SPI_connect(); if (SPIcode != SPI_OK_CONNECT) { elog(ERROR, "CreateStd: couldn't open a connection to SPI"); } std = std_init(); if (!std) elog(ERROR, "CreateStd: could not allocate memory (std)"); lex = lex_init(std->err_p); if (!lex) { std_free(std); SPI_finish(); elog(ERROR, "CreateStd: could not allocate memory (lex)"); } err = load_lex(lex, lextab); if (err == -1) { lex_free(lex); std_free(std); SPI_finish(); elog(ERROR, "CreateStd: failed to load '%s' for lexicon", lextab); } gaz = lex_init(std->err_p); if (!gaz) { lex_free(lex); std_free(std); SPI_finish(); elog(ERROR, "CreateStd: could not allocate memory (gaz)"); } err = load_lex(gaz, gaztab); if (err == -1) { lex_free(gaz); lex_free(lex); std_free(std); SPI_finish(); elog(ERROR, "CreateStd: failed to load '%s' for gazeteer", gaztab); } rules = rules_init(std->err_p); if (!rules) { lex_free(gaz); lex_free(lex); std_free(std); SPI_finish(); elog(ERROR, "CreateStd: could not allocate memory (rules)"); } err = load_rules(rules, rultab); if (err == -1) { rules_free(rules); lex_free(gaz); lex_free(lex); std_free(std); SPI_finish(); elog(ERROR, "CreateStd: failed to load '%s' for rules", rultab); } std_use_lex(std, lex); std_use_gaz(std, gaz); std_use_rules(std, rules); std_ready_standardizer(std); SPI_finish(); return std; }
int main(int argc, char *argv[]) { STANDARDIZER *std; LEXICON *lex; LEXICON *gaz; RULES *rules; char buf[1024]; int seq; char input_str[ 4096 ] ; char word[512]; char stdword[512]; int token; int nr; int rule[RULESIZE]; int err; int cnt; int option = 0; FILE *in; if (argc == 3 && !strcmp(argv[1], "-o")) { option = strtol(argv[2], NULL, 10); argc -= 2; argv += 2; } else if (argc != 1) Usage(); std = std_init(); assert(std); lex = lex_init(std->err_p); assert(lex); in = fopen(LEXIN, "rb"); assert(in); cnt = 0; while (!feof(in) && fgets(buf, 1024, in)) { cnt++; /* parse into fields */ if (parse_csv(buf, &seq, word, stdword, &token)) { /* add the record to the lexicon */ err = lex_add_entry(lex, seq, word, stdword, token); if (err != 1) printf("lex: Failed: %d: %s", cnt, buf); } else { printf("lex: Skipping: %d: %s", cnt, buf); } } fclose(in); if (option & 1) { printf("------------ address lexicon --------------\n"); print_lexicon(lex->hash_table); printf("\n"); } gaz = lex_init(std->err_p); assert(gaz); in = fopen(GAZIN, "rb"); assert(in); cnt = 0; while (!feof(in) && fgets(buf, 1024, in)) { cnt++; /* parse into fields */ if (parse_csv(buf, &seq, word, stdword, &token)) { /* add the record to the lexicon */ err = lex_add_entry(gaz, seq, word, stdword, token); if (err != 1) printf("gaz: Failed: %d: %s", cnt, buf); } else { printf("gaz: Skipping: %d: %s", cnt, buf); } } fclose(in); if (option & 2) { printf("------------ gazeteer lexicon --------------\n"); print_lexicon(gaz->hash_table); printf("\n"); } rules = rules_init(std->err_p); assert(rules); rules -> r_p -> collect_statistics = TRUE ; /* ************ RULES **************** */ in = fopen(RULESIN, "rb"); assert(in); cnt = 0; while (!feof(in) && fgets(buf, 1024, in)) { cnt++; /* parse into fields */ nr = parse_rule(buf, rule); /* add the record to the rules */ err = rules_add_rule(rules, nr, rule); if (err != 0) printf("rules: Failed: %d (%d): %s", cnt, err, buf); } err = rules_ready(rules); if (err != 0) printf("rules: Failed: err=%d\n", err); fclose(in); std_use_lex(std, lex); std_use_gaz(std, gaz); std_use_rules(std, rules); std_ready_standardizer(std); printf( "Standardization test. Type \"exit\" to quit:\n" ) ; fflush( stdout ) ; while ( TRUE ) { err = standardize_command_line( std, input_str, option ) ; if ( err == FAIL ) { break ; } } printf( "OK\n" ) ; fflush( stdout ) ; std_free(std); /* these were freed when we bound them with std_use_*() rules_free(rules); lex_free(gaz); lex_free(lex); */ return 0; }