Beispiel #1
0
static int load_rules(RULES *rules, char *tab)
{
    int ret;
    SPIPlanPtr SPIplan;
    Portal SPIportal;
    bool moredata = TRUE;
#ifdef DEBUG
    struct timeval t1, t2;
    double elapsed;
#endif
    char *sql;

    int rule_arr[MAX_RULE_LENGTH];

    int ntuples;
    int total_tuples = 0;

    rules_columns_t rules_columns = {rule: -1};

    char *rule;

    DBG("start load_rules\n");
    SET_TIME(t1);

    if (!tab || !strlen(tab)) {
        elog(NOTICE, "load_rules: rules table is not usable");
        return -1;
    }
    if (!tableNameOk(tab)) {
        elog(NOTICE, "load_rules: rules table name may only be alphanum and '.\"_' characters (%s)", tab);
        return -1;
    }
    sql = SPI_palloc(strlen(tab)+35);
    strcpy(sql, "select rule from ");
    strcat(sql, tab);
    strcat(sql, " order by id ");

    /* get the sql for the lexicon records and prepare the query */
    SPIplan = SPI_prepare(sql, 0, NULL);
    if (SPIplan == NULL) {
        elog(NOTICE, "load_rules: couldn't create query plan for the rule data via SPI (%s)", sql);
        return -1;
    }

    if ((SPIportal = SPI_cursor_open(NULL, SPIplan, NULL, NULL, true)) == NULL) {
        elog(NOTICE, "load_rules: SPI_cursor_open('%s') returns NULL", sql);
        return -1;
    }

    while (moredata == TRUE) {
        //DBG("calling SPI_cursor_fetch");
        SPI_cursor_fetch(SPIportal, TRUE, TUPLIMIT);

        if (SPI_tuptable == NULL) {
            elog(NOTICE, "load_rules: SPI_tuptable is NULL");
            return -1;
        }

        if (rules_columns.rule == -1) {
            ret = fetch_rules_columns(SPI_tuptable, &rules_columns);
            if (ret)
                return ret;
        }

        ntuples = SPI_processed;
        //DBG("Reading edges: %i - %i", total_tuples, total_tuples+ntuples);

        if (ntuples > 0) {
            int t;
            SPITupleTable *tuptable = SPI_tuptable;
            TupleDesc tupdesc = SPI_tuptable->tupdesc;

            for (t = 0; t < ntuples; t++) {
                int nr;
                //if (t%100 == 0) { DBG("    t: %i", t); }
                HeapTuple tuple = tuptable->vals[t];
                GET_TEXT_FROM_TUPLE(rule,rules_columns.rule);
                nr = parse_rule(rule, rule_arr);
                if (nr == -1) {
                    elog(NOTICE, "load_roles: rule exceeds 128 terms");
                    return -1;
                }
                ret = rules_add_rule(rules, nr, rule_arr);
                if (ret != 0) {
                    elog(NOTICE,"load_roles: failed to add rule %d (%d): %s",
                         total_tuples+t+1, ret, rule);
                    return -1;
                }
            }
            //DBG("calling SPI_freetuptable");
            SPI_freetuptable(tuptable);
            //DBG("back from SPI_freetuptable");
        }
        else
            moredata = FALSE;

        total_tuples += ntuples;
    }

    ret = rules_ready(rules);
    if (ret != 0) {
        elog(NOTICE, "load_roles: failed to ready the rules: err: %d", ret);
        return -1;
    }


    SET_TIME(t2);
    ELAPSED_T(t1, t2);
    DBG("Time to read %i rule records: %.1f ms.", total_tuples, elapsed);

    return 0;
}
Beispiel #2
0
int main(int argc, char *argv[])
{
    STANDARDIZER *std;
    LEXICON *lex;
    LEXICON *gaz;
    RULES *rules;

    char buf[1024];

    int seq;
    char input_str[ 4096 ] ;
    char word[512];
    char stdword[512];
    int token;
    int nr;
    int rule[RULESIZE];
    int err;
    int cnt;
    int option = 0;

    FILE *in;

    if (argc == 3 && !strcmp(argv[1], "-o")) {
        option = strtol(argv[2], NULL, 10);
        argc -= 2;
        argv += 2;
    }
    else if (argc != 1) 
        Usage();

    std = std_init();
    assert(std);

    lex = lex_init(std->err_p);
    assert(lex);

    in = fopen(LEXIN, "rb");
    assert(in);

    cnt = 0;
    while (!feof(in) && fgets(buf, 1024, in)) {
        cnt++;
        /* parse into fields */
        if (parse_csv(buf, &seq, word, stdword, &token)) {
            /* add the record to the lexicon */
            err = lex_add_entry(lex, seq, word, stdword, token);
            if (err != 1)
                printf("lex: Failed: %d: %s", cnt, buf);
        }
        else {
            printf("lex: Skipping: %d: %s", cnt, buf);
        }
    }
    fclose(in);

    if (option & 1) {
        printf("------------ address lexicon --------------\n");
        print_lexicon(lex->hash_table);
        printf("\n");
    }

    gaz = lex_init(std->err_p);
    assert(gaz);

    in = fopen(GAZIN, "rb");
    assert(in);

    cnt = 0;
    while (!feof(in) && fgets(buf, 1024, in)) {
        cnt++;
        /* parse into fields */
        if (parse_csv(buf, &seq, word, stdword, &token)) {
            /* add the record to the lexicon */
            err = lex_add_entry(gaz, seq, word, stdword, token);
            if (err != 1)
                printf("gaz: Failed: %d: %s", cnt, buf);
        }
        else {
            printf("gaz: Skipping: %d: %s", cnt, buf);
        }
    }
    fclose(in);

    if (option & 2) {
        printf("------------ gazeteer lexicon --------------\n");
        print_lexicon(gaz->hash_table);
        printf("\n");
    }

    rules = rules_init(std->err_p);
    assert(rules);
    rules -> r_p -> collect_statistics = TRUE ;

    /* ************ RULES **************** */

    in = fopen(RULESIN, "rb");
    assert(in);

    cnt = 0;
    while (!feof(in) && fgets(buf, 1024, in)) {
        cnt++;
        /* parse into fields */
        nr = parse_rule(buf, rule);

        /* add the record to the rules */
        err = rules_add_rule(rules, nr, rule);
        if (err != 0)
            printf("rules: Failed: %d (%d): %s", cnt, err, buf);
    }
    err = rules_ready(rules);
    if (err != 0)
        printf("rules: Failed: err=%d\n", err);
    fclose(in);

    std_use_lex(std, lex);
    std_use_gaz(std, gaz);
    std_use_rules(std, rules);
    std_ready_standardizer(std);

    printf( "Standardization test. Type \"exit\" to quit:\n" ) ;
    fflush( stdout ) ;
    while ( TRUE ) {
        err = standardize_command_line( std, input_str, option ) ;
        if ( err == FAIL ) {
            break ;
        }
    }
    printf( "OK\n" ) ;
    fflush( stdout ) ;

    std_free(std);
/* these were freed when we bound them with std_use_*()
    rules_free(rules);
    lex_free(gaz);
    lex_free(lex);
*/

    return 0;
}