const char* ARMv7DOpcodeDataProcessingRegisterT1::format() { appendInstructionName(opName(), inITBlock() && (!(op() == 0x8) || (op() == 0xa) || (op() == 0xb))); appendRegisterName(rdn()); appendSeparator(); appendRegisterName(rm()); if (op() == 0x9) // rsb T1 appendString(", #0"); else if (op() == 0xd) { // mul T1 appendSeparator(); appendRegisterName(rdn()); } return m_formatBuffer; }
const char* ARMv7DOpcodeAddRegisterT2::format() { appendInstructionName("add"); appendRegisterName(rdn()); appendSeparator(); appendRegisterName(rm()); return m_formatBuffer; }
const char* ARMv7DOpcodeAddSubtractImmediate8::format() { appendInstructionName(opName(), !inITBlock()); appendRegisterName(rdn()); appendSeparator(); appendUnsignedImmediate(immediate8()); return m_formatBuffer; }
/** * Binary read function with byte swap and word id conversion * * @param fp [in] file pointer * @param buf [out] data buffer * @param unitnum [in] number of unit to read. * @param need_conv [in] TRUE if need conversion from 2byte to 4byte */ static boolean rdn_wordid_func(FILE *fp, void *buf, int unitnum, boolean need_conv) { int i; unsigned short *s; WORD_ID *t; WORD_ID d; if (need_conv) { /* read unsigned short units */ rdn(fp, buf, sizeof(unsigned short), unitnum); /* convert them to WORD_ID (integer) */ for(i=unitnum-1;i>=0;i--) { s = (unsigned short *)buf + i; t = (WORD_ID *)buf + i; d = *s; *t = d; } } else { /* read as usual */ rdn(fp, buf, sizeof(WORD_ID), unitnum); } return TRUE; }
static boolean ngram_read_bin_compat(FILE *fp, NGRAM_INFO *ndata, int *retry_ret) { int i,n,len; char *w, *p; NNID *n3_bgn; NNID d, ntmp; #ifdef WORDS_INT unsigned short *buf; #endif NGRAM_TUPLE_INFO *t, *tt, *ttt; /* old binary N-gram assumes these types */ ndata->bigram_index_reversed = TRUE; ndata->n = 3; ndata->dir = DIR_RL; /* read total info and set max_word_num */ ndata->d = (NGRAM_TUPLE_INFO *)mymalloc(sizeof(NGRAM_TUPLE_INFO) * ndata->n); memset(ndata->d, 0, sizeof(NGRAM_TUPLE_INFO) * ndata->n); for(n=0;n<ndata->n;n++) { rdn(fp, &(ndata->d[n].totalnum), sizeof(NNID), 1); } ndata->max_word_num = ndata->d[0].totalnum; if (file_version == 4) { rdn(fp, &(ndata->d[1].context_num), sizeof(NNID), 1); } for(n=0;n<ndata->n;n++) { if (n < 2) { ndata->d[n].is24bit = FALSE; } else { if (ndata->d[n].totalnum >= NNID_MAX_24) { jlog("Warning: ngram_read_bin_compat: num of %d-gram exceeds 24bit, now switch to %dbit index\n", n+1, sizeof(NNID) * 8); ndata->d[n].is24bit = FALSE; } else { ndata->d[n].is24bit = TRUE; } } ndata->d[n].nnid2ctid_upper = NULL; ndata->d[n].nnid2ctid_lower = NULL; } /* always do back-off compaction for 3-gram and up */ /* mark 2-gram and up */ ndata->d[0].ct_compaction = FALSE; for(n=1;n<ndata->n;n++) { ndata->d[n].ct_compaction = TRUE; } /* read wname */ rdn(fp, &len, sizeof(int), 1); w = mymalloc(len); rdn(fp, w, 1, len); /* assign... */ ndata->wname = (char **)mymalloc(sizeof(char *) * ndata->max_word_num); p = w; i = 0; while (p < w + len) { ndata->wname[i++] = p; while(*p != '\0') p++; p++; } if (i != ndata->max_word_num) { jlog("Error: ngram_read_bin_compat: wname error??\n"); return FALSE; } /* malloc 1-gram */ t = &(ndata->d[0]); tt = &(ndata->d[1]); ttt = &(ndata->d[2]); t->bgn_upper = NULL; t->bgn_lower = NULL; t->bgn = NULL; t->num = NULL; t->bgnlistlen = 0; t->nnid2wid = NULL; t->nnid2ctid_upper = NULL; t->nnid2ctid_lower = NULL; t->context_num = t->totalnum; t->prob = (LOGPROB *)mymalloc_big(sizeof(LOGPROB), t->totalnum); ndata->bo_wt_1 = (LOGPROB *)mymalloc_big(sizeof(LOGPROB), t->context_num); t->bo_wt = (LOGPROB *)mymalloc_big(sizeof(LOGPROB), t->context_num); tt->bgnlistlen = t->context_num; tt->bgn = (NNID *)mymalloc_big(sizeof(NNID), tt->bgnlistlen); tt->num = (WORD_ID *)mymalloc_big(sizeof(WORD_ID), tt->bgnlistlen); /* read 1-gram */ jlog("stat: ngram_read_bin_compat: reading 1-gram\n"); rdn(fp, t->prob, sizeof(LOGPROB), t->totalnum); rdn(fp, ndata->bo_wt_1, sizeof(LOGPROB), t->context_num); rdn(fp, t->bo_wt, sizeof(LOGPROB), t->context_num); rdn(fp, tt->bgn, sizeof(NNID), tt->bgnlistlen); #ifdef WORDS_INT rdn_wordid(fp, tt->num, tt->bgnlistlen, need_conv); #else rdn(fp, tt->num, sizeof(WORD_ID), tt->bgnlistlen); #endif #ifdef WORDS_INT { /* check if we are wrongly reading word_id=2byte bingram (if bingram version >= 4, this should not be happen because header correctly tells the word_id byte size. This will occur only if matches all the conditions below: - you run Julius with --enable-words-int, - you use old bingram of version <= 3, and - you use bingram file converted without --enable-words-int */ WORD_ID w; for(w=0;w<ndata->max_word_num;w++) { if (ndata->d[1].num[w] > ndata->max_word_num) { if (words_int_retry) { jlog("Error: ngram_read_bin_compat: retry failed, wrong bingram format\n"); return FALSE; } jlog("Warning: ngram_read_bin_compat: incorrect data, may be a 2-byte v3 bingram, retry with conversion\n"); free(ndata->wname[0]); free(ndata->wname); free(t->prob); free(ndata->bo_wt_1); free(t->bo_wt); free(tt->bgn); free(tt->num); myfrewind(fp); words_int_retry = TRUE; *retry_ret = 1; return FALSE; } } } #endif /* malloc the rest */ tt->nnid2wid = (WORD_ID *)mymalloc_big(sizeof(WORD_ID), tt->totalnum); tt->prob = (LOGPROB *)mymalloc_big(sizeof(LOGPROB), tt->totalnum); ndata->p_2 = (LOGPROB *)mymalloc_big(sizeof(LOGPROB), tt->totalnum); if (file_version == 4) { /* context compaction and 24bit */ tt->nnid2ctid_upper = (NNID_UPPER *)mymalloc_big(sizeof(NNID_UPPER), tt->totalnum); tt->nnid2ctid_lower = (NNID_LOWER *)mymalloc_big(sizeof(NNID_LOWER), tt->totalnum); tt->bo_wt = (LOGPROB *)mymalloc_big(sizeof(LOGPROB), tt->context_num); ttt->bgnlistlen = tt->context_num; ttt->bgn_upper = (NNID_UPPER *)mymalloc_big(sizeof(NNID_UPPER), ttt->bgnlistlen); ttt->bgn_lower = (NNID_LOWER *)mymalloc_big(sizeof(NNID_LOWER), ttt->bgnlistlen); ttt->num = (WORD_ID *)mymalloc_big(sizeof(WORD_ID), ttt->bgnlistlen); } else { tt->context_num = tt->totalnum; tt->bo_wt = (LOGPROB *)mymalloc_big(sizeof(LOGPROB), tt->context_num); ttt->bgnlistlen = tt->context_num; ttt->num = (WORD_ID *)mymalloc_big(sizeof(WORD_ID), ttt->bgnlistlen); if (ttt->is24bit) { ttt->bgn_upper = (NNID_UPPER *)mymalloc_big(sizeof(NNID_UPPER), ttt->bgnlistlen); ttt->bgn_lower = (NNID_LOWER *)mymalloc_big(sizeof(NNID_LOWER), ttt->bgnlistlen); n3_bgn = (NNID *)mymalloc_big(sizeof(NNID), ttt->bgnlistlen); } else { ttt->bgn = (NNID *)mymalloc_big(sizeof(NNID), ttt->bgnlistlen); } } ttt->nnid2wid = (WORD_ID *)mymalloc_big(sizeof(WORD_ID), ttt->totalnum); ttt->prob = (LOGPROB *)mymalloc_big(sizeof(LOGPROB), ttt->totalnum); ttt->bo_wt = NULL; /* read 2-gram*/ jlog("Stat: ngram_read_bin_compat: reading 2-gram\n"); #ifdef WORDS_INT rdn_wordid(fp, tt->nnid2wid, tt->totalnum, need_conv); #else rdn(fp, tt->nnid2wid, sizeof(WORD_ID), tt->totalnum); #endif rdn(fp, ndata->p_2, sizeof(LOGPROB), tt->totalnum); rdn(fp, tt->prob, sizeof(LOGPROB), tt->totalnum); if (file_version == 4) { rdn(fp, tt->nnid2ctid_upper, sizeof(NNID_UPPER), tt->totalnum); rdn(fp, tt->nnid2ctid_lower, sizeof(NNID_LOWER), tt->totalnum); rdn(fp, tt->bo_wt, sizeof(LOGPROB), tt->context_num); rdn(fp, ttt->bgn_upper, sizeof(NNID_UPPER), ttt->bgnlistlen); rdn(fp, ttt->bgn_lower, sizeof(NNID_LOWER), ttt->bgnlistlen); #ifdef WORDS_INT rdn_wordid(fp, ttt->num, ttt->bgnlistlen, need_conv); #else rdn(fp, ttt->num, sizeof(WORD_ID), ttt->bgnlistlen); #endif } else { rdn(fp, tt->bo_wt, sizeof(LOGPROB), tt->context_num); if (ttt->is24bit) { rdn(fp, n3_bgn, sizeof(NNID), ttt->bgnlistlen); for(d=0;d<ttt->bgnlistlen;d++) { if (n3_bgn[d] == NNID_INVALID) { ttt->bgn_lower[d] = 0; ttt->bgn_upper[d] = NNID_INVALID_UPPER; } else { ntmp = n3_bgn[d] & 0xffff; ttt->bgn_lower[d] = ntmp; ntmp = n3_bgn[d] >> 16; ttt->bgn_upper[d] = ntmp; } } } else {
static boolean ngram_read_bin_v5(FILE *fp, NGRAM_INFO *ndata) { int i,n,len; char *w, *p; #ifdef WORDS_INT unsigned short *buf; #endif NGRAM_TUPLE_INFO *t; /* read some info extended from version 5 */ rdn(fp, &(ndata->n), sizeof(int), 1); rdn(fp, &(ndata->dir), sizeof(int), 1); rdn(fp, &(ndata->bigram_index_reversed), sizeof(boolean), 1); jlog("Stat: ngram_read_bin_v5: this is %s %d-gram file\n", (ndata->dir == DIR_LR) ? "forward" : "backward", ndata->n); /* read total info and set max_word_num */ ndata->d = (NGRAM_TUPLE_INFO *)mymalloc(sizeof(NGRAM_TUPLE_INFO) * ndata->n); memset(ndata->d, 0, sizeof(NGRAM_TUPLE_INFO) * ndata->n); for(n=0;n<ndata->n;n++) { rdn(fp, &(ndata->d[n].totalnum), sizeof(NNID), 1); } ndata->max_word_num = ndata->d[0].totalnum; /* read wname */ rdn(fp, &len, sizeof(int), 1); w = mymalloc(len); rdn(fp, w, 1, len); /* assign... */ ndata->wname = (char **)mymalloc(sizeof(char *) * ndata->max_word_num); p = w; i = 0; while (p < w + len) { ndata->wname[i++] = p; while(*p != '\0') p++; p++; } if (i != ndata->max_word_num) { jlog("Error: ngram_read_bin_v5: wname error??\n"); return FALSE; } /* read N-gram */ for(n=0;n<ndata->n;n++) { jlog("stat: ngram_read_bin_v5: reading %d-gram\n", n+1); t = &(ndata->d[n]); rdn(fp, &(t->is24bit), sizeof(boolean), 1); rdn(fp, &(t->ct_compaction), sizeof(boolean), 1); rdn(fp, &(t->bgnlistlen), sizeof(NNID), 1); rdn(fp, &(t->context_num), sizeof(NNID), 1); if (n > 0) { if (t->is24bit) { t->bgn_upper = (NNID_UPPER *)mymalloc_big(sizeof(NNID_UPPER), t->bgnlistlen); rdn(fp, t->bgn_upper, sizeof(NNID_UPPER), t->bgnlistlen); t->bgn_lower = (NNID_LOWER *)mymalloc_big(sizeof(NNID_LOWER), t->bgnlistlen); rdn(fp, t->bgn_lower, sizeof(NNID_LOWER), t->bgnlistlen); } else { t->bgn = (NNID *)mymalloc_big(sizeof(NNID), t->bgnlistlen); rdn(fp, t->bgn, sizeof(NNID), t->bgnlistlen); } t->num = (WORD_ID *)mymalloc_big(sizeof(WORD_ID), t->bgnlistlen); #ifdef WORDS_INT rdn_wordid(fp, t->num, t->bgnlistlen, need_conv); #else rdn(fp, t->num, sizeof(WORD_ID), t->bgnlistlen); #endif t->nnid2wid = (WORD_ID *)mymalloc_big(sizeof(WORD_ID), t->totalnum); #ifdef WORDS_INT rdn_wordid(fp, t->nnid2wid, t->totalnum, need_conv); #else rdn(fp, t->nnid2wid, sizeof(WORD_ID), t->totalnum); #endif } else { t->bgn_upper = NULL; t->bgn_lower = NULL; t->bgn = NULL; t->num = NULL; t->bgnlistlen = 0; t->nnid2wid = NULL; } t->prob = (LOGPROB *)mymalloc_big(sizeof(LOGPROB), t->totalnum); rdn(fp, t->prob, sizeof(LOGPROB), t->totalnum); rdn(fp, &i, sizeof(int), 1); if (i == 1) { t->bo_wt = (LOGPROB *)mymalloc_big(sizeof(LOGPROB), t->context_num); rdn(fp, t->bo_wt, sizeof(LOGPROB), t->context_num); } else { t->bo_wt = NULL; } rdn(fp, &i, sizeof(int), 1); if (i == 1) { t->nnid2ctid_upper = (NNID_UPPER *)mymalloc_big(sizeof(NNID_UPPER), t->totalnum); t->nnid2ctid_lower = (NNID_LOWER *)mymalloc_big(sizeof(NNID_LOWER), t->totalnum); rdn(fp, t->nnid2ctid_upper, sizeof(NNID_UPPER), t->totalnum); rdn(fp, t->nnid2ctid_lower, sizeof(NNID_LOWER), t->totalnum); } else { t->nnid2ctid_upper = NULL; t->nnid2ctid_lower = NULL; } } rdn(fp, &i, sizeof(int), 1); if (i == 1) { ndata->bo_wt_1 = (LOGPROB *)mymalloc_big(sizeof(LOGPROB), ndata->d[0].context_num); rdn(fp, ndata->bo_wt_1, sizeof(LOGPROB), ndata->d[0].context_num); } else { ndata->bo_wt_1 = NULL; } rdn(fp, &i, sizeof(int), 1); if (i == 1) { jlog("Stat: ngram_read_bin_v5: reading additional LR 2-gram\n"); ndata->p_2 = (LOGPROB *)mymalloc_big(sizeof(LOGPROB), ndata->d[1].totalnum); rdn(fp, ndata->p_2, sizeof(LOGPROB), ndata->d[1].totalnum); } else { ndata->p_2 = NULL; } return TRUE; }
/** * Check header to see whether the version matches. * * @param fp [in] file pointer */ static boolean check_header(FILE *fp) { char buf[BINGRAM_HDSIZE], *p; rdn(fp, buf, 1, BINGRAM_HDSIZE); p = buf; #ifdef WORDS_INT need_conv = FALSE; #endif /* version check */ if (strnmatch(p, BINGRAM_IDSTR, strlen(BINGRAM_IDSTR))) { /* bingram file made by mkbingram before 3.4.2 */ file_version = 3; p += strlen(BINGRAM_IDSTR) + 1; } else if (strnmatch(p, BINGRAM_IDSTR_V4, strlen(BINGRAM_IDSTR_V4))) { /* bingram file made by mkbingram later than 3.5 */ file_version = 4; p += strlen(BINGRAM_IDSTR_V4) + 1; } else if (strnmatch(p, BINGRAM_IDSTR_V5, strlen(BINGRAM_IDSTR_V5))) { /* bingram file made by JuliusLib-4 and later */ file_version = 5; p += strlen(BINGRAM_IDSTR_V5) + 1; } else { /* not a bingram file */ jlog("Error: ngram_read_bin: invalid header\n"); return FALSE; } /* word size check (for bingram build by mkbingram 3.3p5 and later */ if (strnmatch(p, BINGRAM_SIZESTR_HEAD, strlen(BINGRAM_SIZESTR_HEAD))) { p += strlen(BINGRAM_SIZESTR_HEAD); if (! strnmatch(p, BINGRAM_SIZESTR_BODY, strlen(BINGRAM_SIZESTR_BODY))) { /* word size does not match (int / short) */ #ifdef WORDS_INT if (strnmatch(p, BINGRAM_SIZESTR_BODY_2BYTE, strlen(BINGRAM_SIZESTR_BODY_2BYTE))) { /* this is 2-byte word ID, will convert while reading */ jlog("Warning: ngram_read_bin: 2-bytes bingram, converting to 4 bytes\n"); need_conv = TRUE; p += strlen(BINGRAM_SIZESTR_BODY_2BYTE) + 1; } else { jlog("Error: ngram_read_bin: unknown word byte size!\n"); return FALSE; } #else if (strnmatch(p, BINGRAM_SIZESTR_BODY_4BYTE, strlen(BINGRAM_SIZESTR_BODY_4BYTE))) { /*** 4bytes to 2bytes not implemented, just terminate here... ***/ jlog("Error: ngram_read_bin: cannot handle 4-bytes bingram\n"); jlog("Error: ngram_read_bin: please use Julius compiled with --enable-words-int\n"); return FALSE; //p += strlen(BINGRAM_SIZESTR_BODY_4BYTE) + 1; } else { jlog("Error: ngram_read_bin: unknown word byte size!\n"); return FALSE; } #endif } else { p += strlen(BINGRAM_SIZESTR_BODY) + 1; } /* byte order check (v4 (rev.3.5) and later) */ if (file_version >= 4) { if (!strnmatch(p, BINGRAM_BYTEORDER_HEAD, strlen(BINGRAM_BYTEORDER_HEAD))) { jlog("Error: ngram_read_bin: no information for byte order??\n"); return FALSE; } p += strlen(BINGRAM_BYTEORDER_HEAD); if (! strnmatch(p, BINGRAM_NATURAL_BYTEORDER, strlen(BINGRAM_NATURAL_BYTEORDER))) { /* file endian and running endian is different, need swapping */ need_swap = TRUE; } else { need_swap = FALSE; } p += strlen(BINGRAM_NATURAL_BYTEORDER) + 1; } } /* if no BINGRAM_SIZESTR_HEAD found, just pass it */ /* in case of V3 bingram file, the unit size of word_id and its byte order cannot be determined from the header. In that case, we assume byteorder to be a BIG ENDIAN. The word_id unit size (2byte in normal, or 4byte if bingram generated with mkbingram with --enable-words-int) will be automagically detected. */ if (file_version < 4) { /* assume input as big endian */ #ifdef WORDS_BIGENDIAN need_swap = FALSE; #else need_swap = TRUE; #endif } /*jlog("%s",buf);*/ return TRUE; }
static void assemble() { int v[501]; int f = 0; int i; Labv = v; clear: for (i = 0; i <= 500; i++) Labv[i] = 0; Cp = 0; next: rch(); sw: switch (Ch) { default: if (Ch == EOF) return; printf("\nBAD CH %c AT P = %d\n", Ch, P); goto next; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': setlab(rdn()); Cp = 0; goto sw; case '$': case ' ': case '\n': goto next; case 'L': f = 0; break; case 'S': f = 1; break; case 'A': f = 2; break; case 'J': f = 3; break; case 'T': f = 4; break; case 'F': f = 5; break; case 'K': f = 6; break; case 'X': f = 7; break; case 'C': rch(); stc(rdn()); goto sw; case 'D': rch(); if (Ch == 'L') { rch(); stw(0); labref(rdn(), P - 1); } else stw(rdn()); goto sw; case 'G': rch(); A = rdn() + G; if (Ch == 'L') rch(); else printf("\nBAD CODE AT P = %d\n", P); M[A] = 0; labref(rdn(), A); goto sw; case 'Z': for (i = 0; i <= 500; i++) if (Labv[i] > 0) printf("L%d UNSET\n", i); goto clear; } W = f << FSHIFT; rch(); if (Ch == 'I') { W = W + IBIT; rch(); } if (Ch == 'P') { W = W + PBIT; rch(); } if (Ch == 'G') { W = W + GBIT; rch(); } if (Ch == 'L') { rch(); stw(W + DBIT); stw(0); labref(rdn(), P - 1); } else { int a = rdn(); if ((a & ABITS) == a) stw(W + a); else { stw(W + DBIT); stw(a); } } goto sw; }