Пример #1
0
static STANDARDIZER *
CreateStd(char *lextab, char *gaztab, char *rultab)
{
    STANDARDIZER        *std;
    LEXICON             *lex;
    LEXICON             *gaz;
    RULES               *rules;
    int                  err;
    int                  SPIcode;

    DBG("Enter: CreateStd");
    SPIcode = SPI_connect();
    if (SPIcode != SPI_OK_CONNECT) {
        elog(ERROR, "CreateStd: couldn't open a connection to SPI");
    }

    std = std_init();
    if (!std)
        elog(ERROR, "CreateStd: could not allocate memory (std)");

    lex = lex_init(std->err_p);
    if (!lex) {
        std_free(std);
        SPI_finish();
        elog(ERROR, "CreateStd: could not allocate memory (lex)");
    }

    err = load_lex(lex, lextab);
    if (err == -1) {
        lex_free(lex);
        std_free(std);
        SPI_finish();
        elog(ERROR, "CreateStd: failed to load '%s' for lexicon", lextab);
    }

    gaz = lex_init(std->err_p);
    if (!gaz) {
        lex_free(lex);
        std_free(std);
        SPI_finish();
        elog(ERROR, "CreateStd: could not allocate memory (gaz)");
    }

    err = load_lex(gaz, gaztab);
    if (err == -1) {
        lex_free(gaz);
        lex_free(lex);
        std_free(std);
        SPI_finish();
        elog(ERROR, "CreateStd: failed to load '%s' for gazeteer", gaztab);
    }

    rules = rules_init(std->err_p);
    if (!rules) {
        lex_free(gaz);
        lex_free(lex);
        std_free(std);
        SPI_finish();
        elog(ERROR, "CreateStd: could not allocate memory (rules)");
    }

    err = load_rules(rules, rultab);
    if (err == -1) {
        rules_free(rules);
        lex_free(gaz);
        lex_free(lex);
        std_free(std);
        SPI_finish();
        elog(ERROR, "CreateStd: failed to load '%s' for rules", rultab);
    }

    std_use_lex(std, lex);
    std_use_gaz(std, gaz);
    std_use_rules(std, rules);
    std_ready_standardizer(std);

    SPI_finish();

    return std;
}
Пример #2
0
int main(int argc, char *argv[])
{
    STANDARDIZER *std;
    LEXICON *lex;
    LEXICON *gaz;
    RULES *rules;

    char buf[1024];

    int seq;
    char input_str[ 4096 ] ;
    char word[512];
    char stdword[512];
    int token;
    int nr;
    int rule[RULESIZE];
    int err;
    int cnt;
    int option = 0;

    FILE *in;

    if (argc == 3 && !strcmp(argv[1], "-o")) {
        option = strtol(argv[2], NULL, 10);
        argc -= 2;
        argv += 2;
    }
    else if (argc != 1) 
        Usage();

    std = std_init();
    assert(std);

    lex = lex_init(std->err_p);
    assert(lex);

    in = fopen(LEXIN, "rb");
    assert(in);

    cnt = 0;
    while (!feof(in) && fgets(buf, 1024, in)) {
        cnt++;
        /* parse into fields */
        if (parse_csv(buf, &seq, word, stdword, &token)) {
            /* add the record to the lexicon */
            err = lex_add_entry(lex, seq, word, stdword, token);
            if (err != 1)
                printf("lex: Failed: %d: %s", cnt, buf);
        }
        else {
            printf("lex: Skipping: %d: %s", cnt, buf);
        }
    }
    fclose(in);

    if (option & 1) {
        printf("------------ address lexicon --------------\n");
        print_lexicon(lex->hash_table);
        printf("\n");
    }

    gaz = lex_init(std->err_p);
    assert(gaz);

    in = fopen(GAZIN, "rb");
    assert(in);

    cnt = 0;
    while (!feof(in) && fgets(buf, 1024, in)) {
        cnt++;
        /* parse into fields */
        if (parse_csv(buf, &seq, word, stdword, &token)) {
            /* add the record to the lexicon */
            err = lex_add_entry(gaz, seq, word, stdword, token);
            if (err != 1)
                printf("gaz: Failed: %d: %s", cnt, buf);
        }
        else {
            printf("gaz: Skipping: %d: %s", cnt, buf);
        }
    }
    fclose(in);

    if (option & 2) {
        printf("------------ gazeteer lexicon --------------\n");
        print_lexicon(gaz->hash_table);
        printf("\n");
    }

    rules = rules_init(std->err_p);
    assert(rules);
    rules -> r_p -> collect_statistics = TRUE ;

    /* ************ RULES **************** */

    in = fopen(RULESIN, "rb");
    assert(in);

    cnt = 0;
    while (!feof(in) && fgets(buf, 1024, in)) {
        cnt++;
        /* parse into fields */
        nr = parse_rule(buf, rule);

        /* add the record to the rules */
        err = rules_add_rule(rules, nr, rule);
        if (err != 0)
            printf("rules: Failed: %d (%d): %s", cnt, err, buf);
    }
    err = rules_ready(rules);
    if (err != 0)
        printf("rules: Failed: err=%d\n", err);
    fclose(in);

    std_use_lex(std, lex);
    std_use_gaz(std, gaz);
    std_use_rules(std, rules);
    std_ready_standardizer(std);

    printf( "Standardization test. Type \"exit\" to quit:\n" ) ;
    fflush( stdout ) ;
    while ( TRUE ) {
        err = standardize_command_line( std, input_str, option ) ;
        if ( err == FAIL ) {
            break ;
        }
    }
    printf( "OK\n" ) ;
    fflush( stdout ) ;

    std_free(std);
/* these were freed when we bound them with std_use_*()
    rules_free(rules);
    lex_free(gaz);
    lex_free(lex);
*/

    return 0;
}