Exemple #1
0
static int load_lex(LEXICON *lex, char *tab)
{
    int ret;
    SPIPlanPtr SPIplan;
    Portal SPIportal;
    bool moredata = TRUE;
#ifdef DEBUG
    struct timeval t1, t2;
    double elapsed;
#endif
    char *sql;

    int ntuples;
    int total_tuples = 0;

    lex_columns_t lex_columns = {seq: -1, word: -1, stdword: -1, token: -1};

    int seq;
    char *word;
    char *stdword;
    int token;

    DBG("start load_lex\n");
    SET_TIME(t1);

    if (!tab || !strlen(tab)) {
        elog(NOTICE, "load_lex: rules table is not usable");
        return -1;
    }
    if (!tableNameOk(tab)) {
        elog(NOTICE, "load_lex: lex and gaz table names may only be alphanum and '.\"_' characters (%s)", tab);
        return -1;
    }
    sql = SPI_palloc(strlen(tab)+65);
    strcpy(sql, "select seq, word, stdword, token from ");
    strcat(sql, tab);
    strcat(sql, " order by id ");

    /* get the sql for the lexicon records and prepare the query */
    SPIplan = SPI_prepare(sql, 0, NULL);
    if (SPIplan == NULL) {
        elog(NOTICE, "load_lex: couldn't create query plan for the lex data via SPI (%s)", sql);
        return -1;
    }

    /* get the sql for the lexicon records and prepare the query */
    SPIplan = SPI_prepare(sql, 0, NULL);
    if (SPIplan == NULL) {
        elog(NOTICE, "load_lex: couldn't create query plan for the lexicon data via SPI");
        return -1;
    }

    if ((SPIportal = SPI_cursor_open(NULL, SPIplan, NULL, NULL, true)) == NULL) {
        elog(NOTICE, "load_lex: SPI_cursor_open('%s') returns NULL", sql);
        return -1;
    }

    while (moredata == TRUE) {
        //DBG("calling SPI_cursor_fetch");
        SPI_cursor_fetch(SPIportal, TRUE, TUPLIMIT);

        if (SPI_tuptable == NULL) {
            elog(NOTICE, "load_lex: SPI_tuptable is NULL");
            return -1;
        }

        if (lex_columns.seq == -1) {
            ret = fetch_lex_columns(SPI_tuptable, &lex_columns);
            if (ret)
                return ret;
        }

        ntuples = SPI_processed;
        //DBG("Reading edges: %i - %i", total_tuples, total_tuples+ntuples);
        total_tuples += ntuples;

        if (ntuples > 0) {
            int t;
            Datum binval;
            bool isnull;
            SPITupleTable *tuptable = SPI_tuptable;
            TupleDesc tupdesc = SPI_tuptable->tupdesc;

            for (t = 0; t < ntuples; t++) {
                //if (t%100 == 0) { DBG("    t: %i", t); }
                HeapTuple tuple = tuptable->vals[t];
                GET_INT_FROM_TUPLE(seq,lex_columns.seq,"load_lex: seq contains a null value");
                GET_TEXT_FROM_TUPLE(word,lex_columns.word);
                GET_TEXT_FROM_TUPLE(stdword,lex_columns.stdword);
                GET_INT_FROM_TUPLE(token,lex_columns.token,"load_lex: token contains a null value");
                lex_add_entry(lex, seq, word, stdword, token);
            }
            //DBG("calling SPI_freetuptable");
            SPI_freetuptable(tuptable);
            //DBG("back from SPI_freetuptable");
        }
        else
            moredata = FALSE;

    }

    SET_TIME(t2);
    ELAPSED_T(t1, t2);
    DBG("Time to read %i lexicon records: %.1f ms.", total_tuples, elapsed);

    return 0;
}

static int fetch_rules_columns(SPITupleTable *tuptable, rules_columns_t *rules_cols)
{
    int err = 0;
    FETCH_COL(rules_cols,rule,"rule");
    if (err) {
        elog(NOTICE, "rules queries must return column 'rule'");
        return -1;
    }
    CHECK_TYP(rules_cols,rule,TEXTOID);
    if (err) {
        elog(NOTICE, "rules column type must be: 'rule' text");
        return -1;
    }
    return 0;
}
Exemple #2
0
int main(int argc, char *argv[])
{
    STANDARDIZER *std;
    LEXICON *lex;
    LEXICON *gaz;
    RULES *rules;

    char buf[1024];

    int seq;
    char input_str[ 4096 ] ;
    char word[512];
    char stdword[512];
    int token;
    int nr;
    int rule[RULESIZE];
    int err;
    int cnt;
    int option = 0;

    FILE *in;

    if (argc == 3 && !strcmp(argv[1], "-o")) {
        option = strtol(argv[2], NULL, 10);
        argc -= 2;
        argv += 2;
    }
    else if (argc != 1) 
        Usage();

    std = std_init();
    assert(std);

    lex = lex_init(std->err_p);
    assert(lex);

    in = fopen(LEXIN, "rb");
    assert(in);

    cnt = 0;
    while (!feof(in) && fgets(buf, 1024, in)) {
        cnt++;
        /* parse into fields */
        if (parse_csv(buf, &seq, word, stdword, &token)) {
            /* add the record to the lexicon */
            err = lex_add_entry(lex, seq, word, stdword, token);
            if (err != 1)
                printf("lex: Failed: %d: %s", cnt, buf);
        }
        else {
            printf("lex: Skipping: %d: %s", cnt, buf);
        }
    }
    fclose(in);

    if (option & 1) {
        printf("------------ address lexicon --------------\n");
        print_lexicon(lex->hash_table);
        printf("\n");
    }

    gaz = lex_init(std->err_p);
    assert(gaz);

    in = fopen(GAZIN, "rb");
    assert(in);

    cnt = 0;
    while (!feof(in) && fgets(buf, 1024, in)) {
        cnt++;
        /* parse into fields */
        if (parse_csv(buf, &seq, word, stdword, &token)) {
            /* add the record to the lexicon */
            err = lex_add_entry(gaz, seq, word, stdword, token);
            if (err != 1)
                printf("gaz: Failed: %d: %s", cnt, buf);
        }
        else {
            printf("gaz: Skipping: %d: %s", cnt, buf);
        }
    }
    fclose(in);

    if (option & 2) {
        printf("------------ gazeteer lexicon --------------\n");
        print_lexicon(gaz->hash_table);
        printf("\n");
    }

    rules = rules_init(std->err_p);
    assert(rules);
    rules -> r_p -> collect_statistics = TRUE ;

    /* ************ RULES **************** */

    in = fopen(RULESIN, "rb");
    assert(in);

    cnt = 0;
    while (!feof(in) && fgets(buf, 1024, in)) {
        cnt++;
        /* parse into fields */
        nr = parse_rule(buf, rule);

        /* add the record to the rules */
        err = rules_add_rule(rules, nr, rule);
        if (err != 0)
            printf("rules: Failed: %d (%d): %s", cnt, err, buf);
    }
    err = rules_ready(rules);
    if (err != 0)
        printf("rules: Failed: err=%d\n", err);
    fclose(in);

    std_use_lex(std, lex);
    std_use_gaz(std, gaz);
    std_use_rules(std, rules);
    std_ready_standardizer(std);

    printf( "Standardization test. Type \"exit\" to quit:\n" ) ;
    fflush( stdout ) ;
    while ( TRUE ) {
        err = standardize_command_line( std, input_str, option ) ;
        if ( err == FAIL ) {
            break ;
        }
    }
    printf( "OK\n" ) ;
    fflush( stdout ) ;

    std_free(std);
/* these were freed when we bound them with std_use_*()
    rules_free(rules);
    lex_free(gaz);
    lex_free(lex);
*/

    return 0;
}