Exemple #1
0
/* Note: do not free phrase, as it is used by strings (array of segment). */
bool segment(FacadePhraseTable3 * phrase_table,
             FacadePhraseIndex * phrase_index,
             GArray * current_ucs4,
             GArray * strings /* Array of SegmentStep. */){
    ucs4_t * phrase = (ucs4_t *)current_ucs4->data;
    guint phrase_len = current_ucs4->len;

    /* Prepare for shortest path segment dynamic programming. */
    GArray * steps = g_array_new(TRUE, TRUE, sizeof(SegmentStep));
    SegmentStep step;
    for ( glong i = 0; i < phrase_len + 1; ++i ){
        g_array_append_val(steps, step);
    }

    SegmentStep * first_step = &g_array_index(steps, SegmentStep, 0);
    first_step->m_nword = 0;

    PhraseTokens tokens;
    memset(tokens, 0, sizeof(PhraseTokens));
    phrase_index->prepare_tokens(tokens);

    for ( glong i = 0; i < phrase_len + 1; ++i ) {
        SegmentStep * step_begin = &g_array_index(steps, SegmentStep, i);
        size_t nword = step_begin->m_nword;
        for ( glong k = i + 1; k < phrase_len + 1; ++k ) {
            size_t len = k - i;
            ucs4_t * cur_phrase = phrase + i;

            phrase_token_t token = null_token;
            phrase_index->clear_tokens(tokens);
            int result = phrase_table->search(len, cur_phrase, tokens);
            int num = get_first_token(tokens, token);

            if ( !(result & SEARCH_OK) ){
                token = null_token;
                if ( 1 != len )
                    continue;
            }
            ++nword;

            SegmentStep * step_end = &g_array_index(steps, SegmentStep, k);
            if ( nword < step_end->m_nword ) {
                step_end->m_handle = token;
                step_end->m_phrase = cur_phrase;
                step_end->m_phrase_len = len;
                step_end->m_nword = nword;
                step_end->m_backward_nstep = i - k;
            }
            if ( !(result & SEARCH_CONTINUED) )
                break;
        }
    }
    phrase_index->destroy_tokens(tokens);

    return backtrace(steps, phrase_len, strings);
}
Exemple #2
0
static int masm_main_loop(char * obj_file,char * src_file)
{
    FILE * obj_fp, *src_fp;
    char buf[BUFSIZ] = {0};
    int     length;
    char *  p, * q;
    src_fp = obj_fp = NULL;
    uint32_t counter = 0;
    uint32_t lines   = 0;
    hash_table * label_hash = hash_create(512);

    char op_name[128];
    char label[128];
    char rd[6];
    char rs[6];
    char rt[6];
    char imm[20];
    int32_t rd_num,rs_num,rt_num,imm_num;

    if((src_fp = fopen(src_file,"r")) == NULL)
    {
        printf("Can not open %s:%s\n",src_file,strerror(errno));
    }
    if((obj_fp = fopen(obj_file,"w+")) == NULL)
    {
        printf("Can not open %s:%s\n",obj_file,strerror(errno));
    }
    int total_lines = get_file_lines(src_fp);
    uint32_t * instruction = calloc(1,total_lines * sizeof(uint32_t));
    var_t    * var = calloc(1,total_lines * sizeof(var_t));
    int     var_count = 0;
    fseek(src_fp,0L,SEEK_SET);
    while(1)
    {
        fgets(buf,BUFSIZ,src_fp);
        if(feof(src_fp))
        {
            break;
        }
        lines++;
        length = strlen(buf);
        p     = buf;
        
        //skip whitespace
        while(length > 0 &&isspace(p[0]))
        {
            length--;
            p++;
        }
        //printf("length=%d\t%s",length,buf+i);
        if(p[0] == ';' || p[0] == '\0')
        {
            continue;
        }
        q = get_first_token(p);
        strncpy(op_name, p , q-p);
        op_name[q-p] = '\0';
        if(line_has_label(p))
        {
            /* it is label */
            label_t l;
            l.name = op_name;
            l.real_line = lines;
            l.line = counter;
            hash_add_item(&label_hash,str2hash(op_name),&l);
            p = skip_label_wthie(q);
            /* 获得字符串 */
            q = get_opcode_token(p);
            strncpy(op_name, p , q-p);
            op_name[q-p] = '\0';
            //printf("%s",op_name);
        }

        /* p now a opcode start q-p is opecode */
        int op_index = verify_opcode(op_name,lines);
        q = skip_wthie(q);
        p = q;
        /* now at rd */

        switch(op_index)
        {
#if 1
        	case ADD:
        	case SUB:
        	case MUL:
        	case DIV:
        	case MOD:
        	case AND:
        	case OR:
        	case NOT:
        	case XOR:
        	case LWORD:
        	case SWORD:
                /* 获得字符串 */
            q = get_reg_token(p);
            strncpy(rd, p , q-p);
            rd[q-p] = '\0';
            q = skip_reg_wthie(q);
            p = q;

            q = get_reg_token(p);
            strncpy(rs, p , q-p);
            rs[q-p] = '\0';
            q = skip_reg_wthie(q);
            p = q;

            q = get_reg_token(p);
            strncpy(rt, p , q-p);
            rt[q-p] = '\0';
            rd_num = get_reg_index(rd,lines);
            rs_num = get_reg_index(rs,lines);
            rt_num = get_reg_index(rt,lines);
            instruction[counter] = (op_index << 26) | (rd_num << 21) | (rs_num << 16)| (rt_num << 11);
            break;
            ///C语言中的左移就是逻辑左移,而右移,
            ///对于无符号数来说就是逻辑右移,
            ///对有符号来说就是算术右移
            ///想要实现符号左移,比较麻烦一点,首先保存最高位,然后左移之后补上最高位。
        	case SLL:
        	case SLR:
            case SAL:
        	case SAR:
        	case ADDI:
        	case ANDI:  ///这里的立即数是0扩展的。
        	case ORI:
        	case XORI:
        	case LUI: ///哦,载入高16位数啊,靠,那么低位怎么载入呢?用ori
            q = get_reg_token(p);
            strncpy(rd, p , q-p);
            rd[q-p] = '\0';
            q = skip_reg_wthie(q);
            p = q;

            q = get_reg_token(p);
            strncpy(rs, p , q-p);
            rs[q-p] = '\0';
            q = skip_reg_wthie(q);
            p = q;

            q = get_reg_token(p);
            strncpy(imm, p , q-p);
            imm[q-p] = '\0';
            rd_num = get_reg_index(rd,lines);
            rs_num = get_reg_index(rs,lines);
            imm_num = atoi(imm);
            if(imm_num > 32767 || imm_num < -32768)
            {
                printf("________\n");
                printf("[ERROR 6] line: %d imm num is too lager or too smaller\n",lines);
            }
            instruction[counter] = (op_index << 26) | (rd_num << 21) | (rs_num << 16)| (imm_num & 0x0000ffff);

            break;
        	case LESS:
        	case GREAT:
            case LESSE:
        	case GREATE:
        	case LESSU:
        	case GREATU:
            case LESSEU:
        	case GREATEU:
        	case EQUAL:
        	case UEQUAL:
            q = get_reg_token(p);
            strncpy(rd, p , q-p);
            rd[q-p] = '\0';
            q = skip_reg_wthie(q);
            p = q;

            q = get_reg_token(p);
            strncpy(rs, p , q-p);
            rs[q-p] = '\0';
            q = skip_reg_wthie(q);
            p = q;

            q = get_reg_token(p);
            strncpy(label, p , q-p);
            label[q-p] = '\0';
            rd_num = get_reg_index(rd,lines);
            rs_num = get_reg_index(rs,lines);
            var[var_count].name = malloc(strlen(label) + 1);
            strcpy(var[var_count].name, label);
            var[var_count].line = counter;
            var_count++;
            instruction[counter] = (op_index << 26) | (rd_num << 21) | (rs_num << 16) | 0x0;
        	break;
        	case JMP:
            q = get_reg_token(p);
            strncpy(label, p , q-p);
            label[q-p] = '\0';
            var[var_count].name = malloc(strlen(label) + 1);
            strcpy(var[var_count].name, label);
            var[var_count].line = counter;
            var_count++;
            instruction[counter] = (op_index << 26);
            break;
            /* 存储指令 */
        	case MOV:
            q = get_reg_token(p);
            strncpy(rd, p , q-p);
            rd[q-p] = '\0';
            q = skip_reg_wthie(q);
            p = q;

            q = get_reg_token(p);
            strncpy(rs, p , q-p);
            rs[q-p] = '\0';
            rd_num = get_reg_index(rd,lines);
            rs_num = get_reg_index(rs,lines);
            instruction[counter] = (op_index << 26) | (rd_num << 21) | (rs_num << 16) | 0x0;
                break;
            default:
                break;
            #endif
        }
        counter++;
    }

    /* 第二趟汇编 */
    struct blist * head;
    for(int i = 0; i < var_count; i++)
    {
        if((head = hash_lookup_item(label_hash,str2hash(var[i].name),&var[i])) != NULL)
        {
            label_t * node = head->item;
            int imm_2 = node->line - var[i].line;
            if((instruction[var[i].line] >> 26) == JMP)
            {
                if(imm_2 > 33554431 || imm_2 < -33554432)
                {
                    printf("[ERROR 7] line: %d imm num is too lager or too smaller\n",lines);
                }
                instruction[var[i].line] |= imm_2 & 0x03ffffff;
            }
            else
            {
                if(imm_2 > 32767 || imm_2 < -32768)
                {
                    printf("[ERROR 6] line: %d imm num is too lager or too smaller\n",lines);
                }
                instruction[var[i].line] |= imm_2 & 0x0000ffff;
            }
        }
Exemple #3
0
int check_expression(Resources *res, TToken **last_token, index_t *last_index) {
    args_assert(res != NULL, INTERNAL_ERROR);

    TToken *input_token = NULL;
    TToken *top_token = NULL;
    TToken *tmp = NULL;
    index_t top_index = ZERO_INDEX;
    index_t input_index = ZERO_INDEX;
    TStack stack;
    int iRet = RETURN_OK;
    int return_type;

    init_stack(&stack);

    new_item(&res->struct_buff, top_index, top_token);
    top_token->token_type = END_OF_EXPR;
    push(&res->struct_buff, &stack, top_index); // $ on top of the stack

    if ((*last_token) != NULL) 
        input_index = *last_index;
    else 
        input_index = get_token(res->source, &res->string_buff, &res->struct_buff);
    
    catch_internal_error(
        dereference_structure(&res->struct_buff, input_index, (void **)&input_token),
        INTERNAL_ERROR,
        "Failed to dereference structure buffer."
    );

    if (input_token->token_type == ERRORT) {
        iRet = LEXICAL_ERROR;
        goto EXIT;
    }
    
        
    catch_internal_error(
        dereference_structure(&res->struct_buff, top_index, (void **)&top_token),
        INTERNAL_ERROR,
        "Failed to dereference structure buffer."
    );

    do {
#if DEBUG
         print_stack(&res->struct_buff, &stack);
#endif
         debug_print("%s %d\n", "TOP", top_token->token_type);
         debug_print("%s %d\n", "INPUT", input_token->token_type);
        
        if (top_token->token_type == IDENTIFIER 
            && input_token->token_type == OPENING_BRACKET) {
            debug_print("%s\n", "FUNCTION CALL IN EXPR");
            
            index_t last_id = top_token->token_index;
            catch_undefined_error(is_func_declared(res, last_id),
                                 SEMANTIC_ERROR, "Function declaration check failed.", 1
            );
            
            dereference_structure(&res->struct_buff, input_index, (void **)last_token);

            if ((iRet = generate_function_call(res, last_id)) != 0) goto EXIT;
            return_type = get_return_type(res, top_token->token_index);
            catch_internal_error(return_type, SYNTAX_ERROR, "Failed to get function return type.");

            // Reduction of function call
            if((iRet = reduce(&res->struct_buff, &stack, return_type)) != RETURN_OK)
                goto EXIT;

            top_index = stack.top;
            catch_syntax_error(
                get_first_token(&res->struct_buff, &stack, &top_index),
                INTERNAL_ERROR,
                "Failed to get first token", 1
            );
            input_index = get_token(res->source, &res->string_buff, &res->struct_buff);
            catch_internal_error(
                dereference_structure(&res->struct_buff, input_index, (void **)&input_token),
                INTERNAL_ERROR,
                "Failed to dereference structure buffer."
            );

            if (input_token->token_type == ERRORT) {
                iRet = LEXICAL_ERROR;
                goto EXIT;
            }

            catch_internal_error(
                dereference_structure(&res->struct_buff, top_index, (void **)&top_token),
                INTERNAL_ERROR,
                "Failed to dereference structure buffer."
            );
            if (type_filter(top_token->token_type) == END_OF_EXPR &&
                type_filter(input_token->token_type) == END_OF_EXPR)
                break;

        }

        switch(precedence_table[type_filter(top_token->token_type)]
                               [type_filter(input_token->token_type)]) {
            case H:
                debug_print("%s\n", "CASE H");
                top_index = input_index;
                push(&res->struct_buff, &stack, top_index);
                input_index = get_token(res->source, &res->string_buff, &res->struct_buff);
                catch_internal_error(
                    dereference_structure(&res->struct_buff, input_index, (void **)&input_token),
                    INTERNAL_ERROR,
                    "Failed to dereference structure buffer."
                );
                
                if (input_token->token_type == ERRORT) {
                    iRet = LEXICAL_ERROR;
                    goto EXIT;
                }

                catch_internal_error(
                    dereference_structure(&res->struct_buff, top_index, (void **)&top_token),
                    INTERNAL_ERROR,
                    "Failed to dereference structure buffer."
                );

                break;

            case S:
                debug_print("%s\n", "CASE S");
                new_item(&res->struct_buff, top_index, top_token);
                catch_internal_error(
                    dereference_structure(&res->struct_buff, stack.top, (void **)&tmp),
                    INTERNAL_ERROR,
                    "Failed to dereference structure buffer."
                );

                top_token->token_type = SHIFT;

                if (tmp->token_type == RVALUE) {
                    index_t rvalue_index = stack.top;
                    pop(&res->struct_buff, &stack);
                    push(&res->struct_buff, &stack, top_index);
                    push(&res->struct_buff, &stack, rvalue_index);

                } else
                    push(&res->struct_buff, &stack, top_index);
                
                catch_internal_error(
                    dereference_structure(&res->struct_buff, input_index, (void **)&input_token),
                    INTERNAL_ERROR,
                    "Failed to dereference structure buffer."
                );

                top_index = input_index;
                push(&res->struct_buff, &stack, top_index);
                input_index = get_token(res->source, &res->string_buff, &res->struct_buff);
                catch_internal_error(
                    dereference_structure(&res->struct_buff, input_index, (void **)&input_token),
                    INTERNAL_ERROR,
                    "Failed to dereference structure buffer."
                );

                if (input_token->token_type == ERRORT) {
                    iRet = LEXICAL_ERROR;
                    goto EXIT;
                }

                catch_internal_error(
                    dereference_structure(&res->struct_buff, top_index, (void **)&top_token),
                    INTERNAL_ERROR,
                    "Failed to dereference structure buffer."
                );
                break;
            
            case R:
                debug_print("%s\n", "CASE R");
                if ((iRet = get_rule(res, &stack)) != RETURN_OK)
                    goto EXIT;
                
                top_index = stack.top;
                
                catch_syntax_error(
                    get_first_token(&res->struct_buff, &stack, &top_index),
                    INTERNAL_ERROR,
                    "Failed to get first token", 1
                );
                catch_internal_error(
                    dereference_structure(&res->struct_buff, top_index, (void **)&top_token),
                    INTERNAL_ERROR,
                    "Failed to dereference structure buffer."
                );
                break;
 
            case E:
                debug_print("%s\n", "CASE E");
                if (type_filter(top_token->token_type) == END_OF_EXPR && 
                    type_filter(input_token->token_type) == CLOSING_BRACKET) {
                    catch_internal_error(
                         dereference_structure(&res->struct_buff, input_index, (void **)last_token),
                         INTERNAL_ERROR,
                         "Failed to dereference structure buffer."
                    );
                    
                    catch_internal_error(
                         dereference_structure(&res->struct_buff, stack.top, (void **)&top_token),
                         INTERNAL_ERROR,
                         "Failed to dereference structure buffer."
                    );
                    if (top_token->original_type == 0) {      // Empty expression, there was nothing reduced on top
                        debug_print("%s: %d\n", "EMPTY EXPRESSION RETURN", SYNTAX_ERROR);
                        iRet = SYNTAX_ERROR;
                        goto EXIT;
                    }

                    goto FINISH;

                }

                iRet = SYNTAX_ERROR;
                goto EXIT;
            
            default:
                debug_print("%s", "DEFAULT\n");
                iRet = INTERNAL_ERROR;
                goto EXIT;
        }
                  
    } while (type_filter(top_token->token_type) != END_OF_EXPR || type_filter(input_token->token_type) != END_OF_EXPR);
    
    catch_internal_error(
         dereference_structure(&res->struct_buff, input_index, (void **)last_token),
         INTERNAL_ERROR,
         "Failed to dereference structure buffer."
    );
    
    catch_internal_error(
         dereference_structure(&res->struct_buff, stack.top, (void **)&top_token),
         INTERNAL_ERROR,
         "Failed to dereference structure buffer."
    );

FINISH:
    debug_print("%s: %d\n", "TYPE OF EXPRESSION", top_token->original_type);
    // send type of expression back to syntax_analysis
    (*last_token)->original_type = top_token->original_type;
    
    // set type of stack top on runtime stack
    catch_internal_error(new_instruction_int_int(&res->instruction_buffer, 0lu, top_token->original_type, 0, SET_TYPE),
                         INTERNAL_ERROR, "Failed to generate new instruction");
    

EXIT:
    debug_print("%s: %d\n", "RETURN", iRet);
    return iRet;
}
Exemple #4
0
int main(int argc, char * argv[]){
    int i = 1;
    bool train_pi_gram = true;
    const char * bigram_filename = "bigram.db";

    setlocale(LC_ALL, "");
    while ( i < argc ){
	if ( strcmp("--help", argv[i]) == 0){
	    print_help();
            exit(0);
	}else if ( strcmp("--skip-pi-gram-training", argv[i]) == 0 ){
	    train_pi_gram = false;
	}else if ( strcmp("--bigram-file", argv[i]) == 0){
            if ( ++i >= argc ) {
                print_help();
                exit(EINVAL);
            }
            bigram_filename = argv[i];
	}else{
            print_help();
            exit(EINVAL);
        }
	++i;
    }
    
    PhraseLargeTable2 phrase_table;
    /* init phrase table */
    MemoryChunk * chunk = new MemoryChunk;
    chunk->load("phrase_index.bin");
    phrase_table.load(chunk);

    FacadePhraseIndex phrase_index;
    if (!load_phrase_index(&phrase_index))
        exit(ENOENT);
    
    Bigram bigram;
    bigram.attach(bigram_filename, ATTACH_CREATE|ATTACH_READWRITE);

    PhraseTokens tokens;
    memset(tokens, 0, sizeof(PhraseTokens));
    phrase_index.prepare_tokens(tokens);
    
    char* linebuf = NULL;
    size_t size = 0;
    phrase_token_t last_token, cur_token = last_token = 0;
    while( getline(&linebuf, &size, stdin) ){
	if ( feof(stdin) )
	    break;
        linebuf[strlen(linebuf)-1] = '\0';

        glong phrase_len = 0;
        ucs4_t * phrase = g_utf8_to_ucs4(linebuf, -1, NULL, &phrase_len, NULL);

	phrase_token_t token = null_token;
        if ( 0 != phrase_len ) {
            phrase_index.clear_tokens(tokens);
            int result = phrase_table.search(phrase_len, phrase, tokens);
            int num = get_first_token(tokens, token);
            if ( !(result & SEARCH_OK) )
                token = null_token;
            g_free(phrase);
            phrase = NULL;
        }

	last_token = cur_token;
	cur_token = token;

        /* skip null_token in second word. */
        if ( null_token == cur_token )
            continue;

        /* training uni-gram */
        phrase_index.add_unigram_frequency(cur_token, 1);

        /* skip pi-gram training. */
        if ( null_token == last_token ){
            if ( !train_pi_gram )
                continue;
            last_token = sentence_start;
        }

        /* train bi-gram */
        SingleGram * single_gram = NULL;
        bigram.load(last_token, single_gram);

        if ( NULL == single_gram ){
            single_gram = new SingleGram;
        }
        guint32 freq, total_freq;
        /* increase freq */
        if (single_gram->get_freq(cur_token, freq))
            assert(single_gram->set_freq(cur_token, freq + 1));
        else
            assert(single_gram->insert_freq(cur_token, 1));
        /* increase total freq */
        single_gram->get_total_freq(total_freq);
        single_gram->set_total_freq(total_freq + 1);

        bigram.store(last_token, single_gram);
        delete single_gram;
    }

    phrase_index.destroy_tokens(tokens);
    free(linebuf);
    
    if (!save_phrase_index(&phrase_index))
        exit(ENOENT);

    return 0;
}
int main(int argc, char * argv[]){
    const char * evals_text = "evals.text";

    pinyin_option_t options = USE_TONE;
    FacadeChewingTable largetable;

    MemoryChunk * chunk = new MemoryChunk;
    chunk->load("pinyin_index.bin");
    largetable.load(options, chunk, NULL);

    FacadePhraseTable2 phrase_table;
    chunk = new MemoryChunk;
    chunk->load("phrase_index.bin");
    phrase_table.load(chunk, NULL);

    FacadePhraseIndex phrase_index;
    if (!load_phrase_index(&phrase_index))
        exit(ENOENT);

    Bigram system_bigram;
    system_bigram.attach("bigram.db", ATTACH_READONLY);
    Bigram user_bigram;
    user_bigram.attach(NULL, ATTACH_CREATE|ATTACH_READWRITE);

    PinyinLookup2 pinyin_lookup(options, &largetable, &phrase_index,
                               &system_bigram, &user_bigram);

    /* open evals.text. */
    FILE * evals_file = fopen(evals_text, "r");
    if ( NULL == evals_file ) {
        fprintf(stderr, "Can't open file:%s\n", evals_text);
        exit(ENOENT);
    }

    PhraseTokens phrase_tokens;
    memset(phrase_tokens, 0, sizeof(PhraseTokens));
    phrase_index.prepare_tokens(phrase_tokens);

    /* Evaluates the correction rate of test text documents. */
    size_t tested_count = 0; size_t passed_count = 0;
    char* linebuf = NULL; size_t size = 0;
    TokenVector tokens = g_array_new(FALSE, TRUE, sizeof(phrase_token_t));

    phrase_token_t token = null_token;
    while( getline(&linebuf, &size, evals_file) ) {
        if ( feof(evals_file) )
            break;
        if ( '\n' == linebuf[strlen(linebuf)-1] )
            linebuf[strlen(linebuf)-1] = '\0';

        glong phrase_len = 0;
        ucs4_t * phrase = g_utf8_to_ucs4(linebuf, -1, NULL, &phrase_len, NULL);

        token = null_token;
        if ( 0 != phrase_len ) {
            int result = phrase_table.search(phrase_len, phrase, phrase_tokens);
            int num = get_first_token(phrase_tokens, token);

            if ( !(result & SEARCH_OK) )
                token = null_token;

            g_free(phrase);
            phrase = NULL;
        }

        if ( null_token == token ) {
            if ( tokens->len ) { /* one test. */
                if ( do_one_test(&pinyin_lookup, &phrase_index, tokens) ) {
                    tested_count ++; passed_count ++;
                } else {
                    tested_count ++;
                }
                g_array_set_size(tokens, 0);
            }
        } else {
            g_array_append_val(tokens, token);
        }
    }

    if ( tokens->len ) { /* one test. */
        if ( do_one_test(&pinyin_lookup, &phrase_index, tokens) ) {
            tested_count ++; passed_count ++;
        } else {
            tested_count ++;
        }
    }

    parameter_t rate = passed_count / (parameter_t) tested_count;
    printf("correction rate:%f\n", rate);

    g_array_free(tokens, TRUE);
    fclose(evals_file);
    free(linebuf);

    phrase_index.destroy_tokens(phrase_tokens);

    return 0;
}