static int load_rules(RULES *rules, char *tab) { int ret; SPIPlanPtr SPIplan; Portal SPIportal; bool moredata = TRUE; #ifdef DEBUG struct timeval t1, t2; double elapsed; #endif char *sql; int rule_arr[MAX_RULE_LENGTH]; int ntuples; int total_tuples = 0; rules_columns_t rules_columns = {rule: -1}; char *rule; DBG("start load_rules\n"); SET_TIME(t1); if (!tab || !strlen(tab)) { elog(NOTICE, "load_rules: rules table is not usable"); return -1; } if (!tableNameOk(tab)) { elog(NOTICE, "load_rules: rules table name may only be alphanum and '.\"_' characters (%s)", tab); return -1; } sql = SPI_palloc(strlen(tab)+35); strcpy(sql, "select rule from "); strcat(sql, tab); strcat(sql, " order by id "); /* get the sql for the lexicon records and prepare the query */ SPIplan = SPI_prepare(sql, 0, NULL); if (SPIplan == NULL) { elog(NOTICE, "load_rules: couldn't create query plan for the rule data via SPI (%s)", sql); return -1; } if ((SPIportal = SPI_cursor_open(NULL, SPIplan, NULL, NULL, true)) == NULL) { elog(NOTICE, "load_rules: SPI_cursor_open('%s') returns NULL", sql); return -1; } while (moredata == TRUE) { //DBG("calling SPI_cursor_fetch"); SPI_cursor_fetch(SPIportal, TRUE, TUPLIMIT); if (SPI_tuptable == NULL) { elog(NOTICE, "load_rules: SPI_tuptable is NULL"); return -1; } if (rules_columns.rule == -1) { ret = fetch_rules_columns(SPI_tuptable, &rules_columns); if (ret) return ret; } ntuples = SPI_processed; //DBG("Reading edges: %i - %i", total_tuples, total_tuples+ntuples); if (ntuples > 0) { int t; SPITupleTable *tuptable = SPI_tuptable; TupleDesc tupdesc = SPI_tuptable->tupdesc; for (t = 0; t < ntuples; t++) { int nr; //if (t%100 == 0) { DBG(" t: %i", t); } HeapTuple tuple = tuptable->vals[t]; GET_TEXT_FROM_TUPLE(rule,rules_columns.rule); nr = parse_rule(rule, rule_arr); if (nr == -1) { elog(NOTICE, "load_roles: rule exceeds 128 terms"); return -1; } ret = rules_add_rule(rules, nr, rule_arr); if (ret != 0) { elog(NOTICE,"load_roles: failed to add rule %d (%d): %s", total_tuples+t+1, ret, rule); return -1; } } //DBG("calling SPI_freetuptable"); SPI_freetuptable(tuptable); //DBG("back from SPI_freetuptable"); } else moredata = FALSE; total_tuples += ntuples; } ret = rules_ready(rules); if (ret != 0) { elog(NOTICE, "load_roles: failed to ready the rules: err: %d", ret); return -1; } SET_TIME(t2); ELAPSED_T(t1, t2); DBG("Time to read %i rule records: %.1f ms.", total_tuples, elapsed); return 0; }
int main(int argc, char *argv[]) { STANDARDIZER *std; LEXICON *lex; LEXICON *gaz; RULES *rules; char buf[1024]; int seq; char input_str[ 4096 ] ; char word[512]; char stdword[512]; int token; int nr; int rule[RULESIZE]; int err; int cnt; int option = 0; FILE *in; if (argc == 3 && !strcmp(argv[1], "-o")) { option = strtol(argv[2], NULL, 10); argc -= 2; argv += 2; } else if (argc != 1) Usage(); std = std_init(); assert(std); lex = lex_init(std->err_p); assert(lex); in = fopen(LEXIN, "rb"); assert(in); cnt = 0; while (!feof(in) && fgets(buf, 1024, in)) { cnt++; /* parse into fields */ if (parse_csv(buf, &seq, word, stdword, &token)) { /* add the record to the lexicon */ err = lex_add_entry(lex, seq, word, stdword, token); if (err != 1) printf("lex: Failed: %d: %s", cnt, buf); } else { printf("lex: Skipping: %d: %s", cnt, buf); } } fclose(in); if (option & 1) { printf("------------ address lexicon --------------\n"); print_lexicon(lex->hash_table); printf("\n"); } gaz = lex_init(std->err_p); assert(gaz); in = fopen(GAZIN, "rb"); assert(in); cnt = 0; while (!feof(in) && fgets(buf, 1024, in)) { cnt++; /* parse into fields */ if (parse_csv(buf, &seq, word, stdword, &token)) { /* add the record to the lexicon */ err = lex_add_entry(gaz, seq, word, stdword, token); if (err != 1) printf("gaz: Failed: %d: %s", cnt, buf); } else { printf("gaz: Skipping: %d: %s", cnt, buf); } } fclose(in); if (option & 2) { printf("------------ gazeteer lexicon --------------\n"); print_lexicon(gaz->hash_table); printf("\n"); } rules = rules_init(std->err_p); assert(rules); rules -> r_p -> collect_statistics = TRUE ; /* ************ RULES **************** */ in = fopen(RULESIN, "rb"); assert(in); cnt = 0; while (!feof(in) && fgets(buf, 1024, in)) { cnt++; /* parse into fields */ nr = parse_rule(buf, rule); /* add the record to the rules */ err = rules_add_rule(rules, nr, rule); if (err != 0) printf("rules: Failed: %d (%d): %s", cnt, err, buf); } err = rules_ready(rules); if (err != 0) printf("rules: Failed: err=%d\n", err); fclose(in); std_use_lex(std, lex); std_use_gaz(std, gaz); std_use_rules(std, rules); std_ready_standardizer(std); printf( "Standardization test. Type \"exit\" to quit:\n" ) ; fflush( stdout ) ; while ( TRUE ) { err = standardize_command_line( std, input_str, option ) ; if ( err == FAIL ) { break ; } } printf( "OK\n" ) ; fflush( stdout ) ; std_free(std); /* these were freed when we bound them with std_use_*() rules_free(rules); lex_free(gaz); lex_free(lex); */ return 0; }