/** * Returns a set of suggestions for the given input touch points. The commitPoint argument indicates * whether to prematurely commit the suggested words up to the given point for sentence-level * suggestion. * * Note: Currently does not support concurrent calls across threads. Continuous suggestion is * automatically activated for sequential calls that share the same starting input. * TODO: Stop detecting continuous suggestion. Start using traverseSession instead. */ void Suggest::getSuggestions(ProximityInfo *pInfo, void *traverseSession, int *inputXs, int *inputYs, int *times, int *pointerIds, int *inputCodePoints, int inputSize, const float languageWeight, SuggestionResults *const outSuggestionResults) const { PROF_OPEN; PROF_START(0); const float maxSpatialDistance = TRAVERSAL->getMaxSpatialDistance(); DicTraverseSession *tSession = static_cast<DicTraverseSession *>(traverseSession); tSession->setupForGetSuggestions(pInfo, inputCodePoints, inputSize, inputXs, inputYs, times, pointerIds, maxSpatialDistance, TRAVERSAL->getMaxPointerCount()); // TODO: Add the way to evaluate cache initializeSearch(tSession); PROF_END(0); PROF_START(1); // keep expanding search dicNodes until all have terminated. while (tSession->getDicTraverseCache()->activeSize() > 0) { expandCurrentDicNodes(tSession); tSession->getDicTraverseCache()->advanceActiveDicNodes(); tSession->getDicTraverseCache()->advanceInputIndex(inputSize); } PROF_END(1); PROF_START(2); SuggestionsOutputUtils::outputSuggestions( SCORING, tSession, languageWeight, outSuggestionResults); PROF_END(2); PROF_CLOSE; }
void print_summary(article_t* article) { array_t * a; sentence_t * s; string_t w; PROF_START; a = article->sentences; for(s=(sentence_t*)ARR_FIRST(a); !ARR_END(a); s=(sentence_t*)ARR_NEXT(a)) { if(s->is_selected) { if(s->is_para_begin) fprintf(stdout, "\n"); w = s->begin; while(w < s->end) { while(0 == *w && w < s->end) ++w; if(w >= s->end) break; fprintf(stdout, "%s ", w); w = w + strlen(w); } } } PROF_END("summary output"); }
/* * New, 13 Jan 1997. */ static void feepowermodg(curveParams *par, giant x, giant n) /* Power ladder. x := x^n (mod 2^q-k) */ { int len, pos; giant t1; PROF_START; t1 = borrowGiant(par->maxDigits); gtog(x, t1); int_to_giant(1, x); len = bitlen(n); pos = 0; while(1) { if(bitval(n, pos++)) { mulg(t1, x); feemod(par, x); } if(pos>=len) break; gsquare(t1); feemod(par, t1); } returnGiant(t1); PROF_END(powerModTime); }
static jlong latinime_BinaryDictionary_open(JNIEnv *env, jclass clazz, jstring sourceDir, jlong dictOffset, jlong dictSize, jboolean isUpdatable) { PROF_OPEN; PROF_START(66); const jsize sourceDirUtf8Length = env->GetStringUTFLength(sourceDir); if (sourceDirUtf8Length <= 0) { AKLOGE("DICT: Can't get sourceDir string"); return 0; } char sourceDirChars[sourceDirUtf8Length + 1]; env->GetStringUTFRegion(sourceDir, 0, env->GetStringLength(sourceDir), sourceDirChars); sourceDirChars[sourceDirUtf8Length] = '\0'; DictionaryStructureWithBufferPolicy *const dictionaryStructureWithBufferPolicy = DictionaryStructureWithBufferPolicyFactory::newDictionaryStructureWithBufferPolicy( sourceDirChars, static_cast<int>(dictOffset), static_cast<int>(dictSize), isUpdatable == JNI_TRUE); if (!dictionaryStructureWithBufferPolicy) { return 0; } Dictionary *const dictionary = new Dictionary(env, dictionaryStructureWithBufferPolicy); PROF_END(66); PROF_CLOSE; return reinterpret_cast<jlong>(dictionary); }
static void numer_plus(giant x1, giant x2, giant res, curveParams *par) /* Numerator algebra. res = (x1 x2 + a)(x1 + x2) + 2(c x1 x2 + b). */ { giant t1; giant t2; PROF_START; t1 = borrowGiant(par->maxDigits); t2 = borrowGiant(par->maxDigits); gtog(x1, t1); mulg(x2, t1); feemod(par, t1); gtog(x2, t2); addg(x1, t2); feemod(par, t2); gtog(t1, res); if(!isZero(par->a)) addg(par->a, res); mulg(t2, res); feemod(par, res); if(par->curveType == FCT_Weierstrass) { // i.e., isZero(par->c) int_to_giant(0, t1); } else { mulg(par->c, t1); feemod(par, t1); } if(!isZero(par->b)) addg(par->b, t1); gshiftleft(1, t1); addg(t1, res); feemod(par, res); returnGiant(t1); returnGiant(t2); PROF_END(numerPlusTime); }
/* * Completely rewritten in CryptKit-18, 13 Jan 1997, for new IEEE-style * curveParameters. */ int which_curve(giant x, curveParams *par) /* Returns (+-1) depending on whether x is on curve (+-)y^2 = x^3 + c x^2 + a x + b. */ { giant t1; giant t2; giant t3; int result; PROF_START; t1 = borrowGiant(par->maxDigits); t2 = borrowGiant(par->maxDigits); t3 = borrowGiant(par->maxDigits); /* First, set t2:= x^3 + c x^2 + a x + b. */ gtog(x, t2); addg(par->c, t2); mulg(x, t2); addg(par->a, t2); /* t2 := x^2 + c x + a. */ feemod(par, t2); mulg(x, t2); addg(par->b, t2); feemod(par, t2); /* Next, test whether t2 is a square. */ gtog(t2, t1); make_base(par, t3); iaddg(1, t3); gshiftright(1, t3); /* t3 = (p+1)/2. */ feepowermodg(par, t1, t3); /* t1 := t2^((p+1)/2) (mod p). */ if(gcompg(t1, t2) == 0) result = CURVE_PLUS; else result = CURVE_MINUS; returnGiant(t1); returnGiant(t2); returnGiant(t3); PROF_END(whichCurveTime); return result; }
int binvaux(giant p, giant x) /* Binary inverse method. Returns zero if no inverse exists, in which case x becomes GCD(x,p). */ { giant scratch7; giant u0; giant u1; giant v0; giant v1; int result = 1; int giantSize; PROF_START; if(isone(x)) return(result); giantSize = 4 * abs(p->sign); scratch7 = borrowGiant(giantSize); u0 = borrowGiant(giantSize); u1 = borrowGiant(giantSize); v0 = borrowGiant(giantSize); v1 = borrowGiant(giantSize); int_to_giant(1, v0); gtog(x, v1); int_to_giant(0,x); gtog(p, u1); while(!isZero(v1)) { gtog(u1, u0); bdivg(v1, u0); gtog(x, scratch7); gtog(v0, x); mulg(u0, v0); subg(v0,scratch7); gtog(scratch7, v0); gtog(u1, scratch7); gtog(v1, u1); mulg(u0, v1); subg(v1,scratch7); gtog(scratch7, v1); } if (!isone(u1)) { gtog(u1,x); if(x->sign<0) addg(p, x); result = 0; goto done; } if (x->sign<0) addg(p, x); done: returnGiant(scratch7); returnGiant(u0); returnGiant(u1); returnGiant(v0); returnGiant(v1); PROF_END(binvauxTime); return(result); }
status_t parse_lang_xml(const char* file_name, lang_t* lang) { string_t tag; PROF_START; if(SMRZR_OK != stream_create(file_name, &lang->stream)) ERROR_RET; if(NULL == get_xml_tag(&lang->stream)) /* ignore the first line */ ERROR_RET; if(NULL == get_xml_tag(&lang->stream)) /* next tag is the dictionary */ ERROR_RET; /* next tag is a child - stemmer/parser/exclude */ while(NULL != (tag = get_xml_tag(&lang->stream))) { if(!strcmp("stemmer", tag)) { if(SMRZR_OK != parse_stemmer_xml(lang)) return SMRZR_ERROR; } else if(!strcmp("parser", tag)) { if(SMRZR_OK != parse_parser_xml(lang)) return SMRZR_ERROR; } else if(!strcmp("exclude", tag)) { if(SMRZR_OK != parse_exclude_xml(lang)) return SMRZR_ERROR; } else if(!strcmp("/dictionary", tag)) { /* done with xml doc */ PROF_END("lang info xml parsing"); return(SMRZR_OK); } else { fprintf(stderr, "Invalid child '%s' of 'dictionary' node\n", tag); ERROR_RET; } } PROF_END("lang info xml parsing"); return(SMRZR_OK); }
int signature_compare(giant p0x, giant p1x, giant p2x, curveParams *par) /* Returns non-zero iff p0x cannot be the x-coordinate of the sum of two points whose respective x-coordinates are p1x, p2x. */ { int ret = 0; giant t1; giant t2; giant t3; giant t4; giant t5; PROF_START; t1 = borrowGiant(par->maxDigits); t2 = borrowGiant(par->maxDigits); t3 = borrowGiant(par->maxDigits); t4 = borrowGiant(par->maxDigits); t5 = borrowGiant(par->maxDigits); if(gcompg(p1x, p2x) == 0) { int_to_giant(1, t1); numer_double(p1x, t1, t2, par); denom_double(p1x, t1, t3, par); mulg(p0x, t3); subg(t3, t2); feemod(par, t2); } else { numer_plus(p1x, p2x, t1, par); gshiftleft(1, t1); feemod(par, t1); int_to_giant(1, t3); numer_times(p1x, t3, p2x, t3, t2, par); int_to_giant(1, t4); int_to_giant(1, t5); denom_times(p1x, t4 , p2x, t5, t3, par); /* Now we require t3 x0^2 - t1 x0 + t2 == 0. */ mulg(p0x, t3); feemod(par, t3); subg(t1, t3); mulg(p0x, t3); feemod(par, t3); addg(t3, t2); feemod(par, t2); } if(!isZero(t2)) ret = SIGNATURE_INVALID; returnGiant(t1); returnGiant(t2); returnGiant(t3); returnGiant(t4); returnGiant(t5); PROF_END(sigCompTime); return(ret); }
static void numer_times(giant x1, giant z1, giant x2, giant z2, giant res, curveParams *par) /* Numerator algebra. res := (x1 x2 - a z1 z2)^2 - 4 b(x1 z2 + x2 z1 + c z1 z2) z1 z2 */ { giant t1; giant t2; giant t3; giant t4; PROF_START; t1 = borrowGiant(par->maxDigits); t2 = borrowGiant(par->maxDigits); t3 = borrowGiant(par->maxDigits); t4 = borrowGiant(par->maxDigits); gtog(x1, t1); mulg(x2, t1); feemod(par, t1); gtog(z1, t2); mulg(z2, t2); feemod(par, t2); gtog(t1, res); if(!isZero(par->a)) { gtog(par->a, t3); mulg(t2, t3); feemod(par, t3); subg(t3, res); } gsquare(res); feemod(par, res); if(isZero(par->b)) goto done; if(par->curveType != FCT_Weierstrass) { // i.e., !isZero(par->c) gtog(par->c, t3); mulg(t2, t3); feemod(par, t3); } else int_to_giant(0, t3); gtog(z1, t4); mulg(x2, t4); feemod(par, t4); addg(t4, t3); gtog(x1, t4); mulg(z2, t4); feemod(par, t4); addg(t4, t3); mulg(par->b, t3); feemod(par, t3); mulg(t2, t3); gshiftleft(2, t3); feemod(par, t3); subg(t3, res); feemod(par, res); done: returnGiant(t1); returnGiant(t2); returnGiant(t3); returnGiant(t4); PROF_END(numerTimesTime); }
static void numer_double(giant x, giant z, giant res, curveParams *par) /* Numerator algebra. res := (x^2 - a z^2)^2 - 4 b (2 x + c z) z^3. */ { giant t1; giant t2; PROF_START; t1 = borrowGiant(par->maxDigits); t2 = borrowGiant(par->maxDigits); gtog(x, t1); gsquare(t1); feemod(par, t1); gtog(z, res); gsquare(res); feemod(par, res); gtog(res, t2); if(!isZero(par->a) ) { if(!isone(par->a)) { /* Speedup - REC 17 Jan 1997. */ mulg(par->a, res); feemod(par, res); } subg(res, t1); feemod(par, t1); } gsquare(t1); feemod(par, t1); /* t1 := (x^2 - a z^2)^2. */ if(isZero(par->b)) { /* Speedup - REC 17 Jan 1997. */ gtog(t1, res); goto done; } if(par->curveType != FCT_Weierstrass) { // i.e., !isZero(par->c) // Speedup - REC 17 Jan 1997. gtog(z, res); mulg(par->c, res); feemod(par, res); } else { int_to_giant(0, res); } addg(x, res); addg(x, res); mulg(par->b, res); feemod(par, res); gshiftleft(2, res); mulg(z, res); feemod(par, res); mulg(t2, res); feemod(par, res); negg(res); addg(t1, res); feemod(par, res); done: returnGiant(t1); returnGiant(t2); PROF_END(numerDoubleTime); }
static void denom_times(giant x1, giant z1, giant x2, giant z2, giant res, curveParams *par) /* Denominator algebra. res := (x1 z2 - x2 z1)^2 */ { giant t1; PROF_START; t1 = borrowGiant(par->maxDigits); gtog(x1, res); mulg(z2, res); feemod(par, res); gtog(z1, t1); mulg(x2, t1); feemod(par, t1); subg(t1, res); gsquare(res); feemod(par, res); returnGiant(t1); PROF_END(denomTimesTime); }
static void denom_double(giant x, giant z, giant res, curveParams *par) /* Denominator algebra. res = 4 z (x^3 + c x^2 z + a x z^2 + b z^3). */ { giant t1; giant t2; PROF_START; t1 = borrowGiant(par->maxDigits); t2 = borrowGiant(par->maxDigits); gtog(x, res); gtog(z, t1); if(par->curveType != FCT_Weierstrass) { // i.e., !isZero(par->c) gtog(par->c, t2); mulg(t1, t2); feemod(par, t2); addg(t2, res); } mulg(x, res); feemod(par, res); gsquare(t1); feemod(par, t1); if(!isZero(par->a)) { gtog(t1, t2); mulg(par->a, t2); feemod(par, t2); addg(t2, res); } mulg(x, res); feemod(par, res); if(!isZero(par->b)) { mulg(z, t1); feemod(par, t1); mulg(par->b, t1); feemod(par, t1); addg(t1, res); } mulg(z, res); gshiftleft(2, res); feemod(par, res); returnGiant(t1); returnGiant(t2); PROF_END(denomDoubleTime); }
/* * General elliptic multiply. * * {xx, zz} := k * {xx, zz} */ void elliptic(giant xx, giant zz, giant k, curveParams *par) { int len = bitlen(k); int pos = len - 2; giant xs; giant zs; giant xorg; giant zorg; PROF_START; if(sequalg(1,k)) return; if(sequalg(2,k)) { ell_even(xx, zz, xx, zz, par); goto out; } zs = borrowGiant(par->maxDigits); xs = borrowGiant(par->maxDigits); zorg = borrowGiant(par->maxDigits); xorg = borrowGiant(par->maxDigits); gtog(xx, xorg); gtog(zz, zorg); ell_even(xx, zz, xs, zs, par); do { if(bitval(k, pos--)) { ell_odd(xs, zs, xx, zz, xorg, zorg, par); ell_even(xs, zs, xs, zs, par); } else { ell_odd(xx, zz, xs, zs, xorg, zorg, par); ell_even(xx, zz, xx, zz, par); } } while (pos >= 0); // REC fix 9/23/94 returnGiant(xs); returnGiant(zs); returnGiant(xorg); returnGiant(zorg); out: PROF_END(ellipticTime); }
int CParticle::Render (float fBrightness) { if (m_nDelay > 0) return 0; if (m_nLife < 0) return 0; if ((m_nType < 0) || (m_nType >= PARTICLE_TYPES)) return 0; #if 0 //DBG if (m_nType == LIGHT_PARTICLES) m_nType = m_nType; CBitmap* bmP = ParticleImageInfo (int (m_nType)).bmP; if (!bmP) return 0; #endif #if !ENABLE_RENDER return 1; #else PROF_START bool bFlushed = false; if (particleManager.LastType () != m_nRenderType) { PROF_END(ptParticles) bFlushed = particleManager.FlushBuffer (fBrightness); PROF_CONT particleManager.SetLastType (m_nRenderType); particleManager.m_bBufferEmissive = m_bEmissive; } else bFlushed = false; #if LAZY_RENDER_SETUP tRenderParticle* pb = particleManager.particleBuffer + particleManager.BufPtr (); pb->particle = this; pb->fBrightness = fBrightness; pb->nFrame = m_iFrame; pb->nRotFrame = m_nRotFrame; #else Setup (fBrightness, m_iFrame, m_nRotFrame, particleRenderBuffer + particleManager.BufPtr () * 4, 0); #endif particleManager.IncBufPtr (); if (particleManager.BufPtr () >= PART_BUF_SIZE) particleManager.FlushBuffer (fBrightness); if (particleManager.Animate ()) { if (m_bAnimate && (m_nFrames > 1)) { m_iFrame = (m_iFrame + 1) % (m_nFrames * m_nFrames); UpdateTexCoord (); } if (m_bRotate) { if (m_bRotate < 0) m_nRotFrame = (m_nRotFrame + 1) % PARTICLE_POSITIONS; else { m_bRotate <<= 1; if (m_bRotate == 4) { m_bRotate = 1; m_nRotFrame = (m_nRotFrame + 1) % PARTICLE_POSITIONS; } } } } PROF_END(ptParticles) return bFlushed ? -1 : 1; #endif }
/* * Completely rewritten in CryptKit-18, 13 Jan 1997, for new IEEE-style * curveParameters. */ void elliptic_add(giant x1, giant x2, giant x3, curveParams *par, int s) { /* Addition algorithm for x3 = x1 + x2 on the curve, with sign ambiguity s. From theory, we know that if {x1,1} and {x2,1} are on a curve, then their elliptic sum (x1,1} + {x2,1} = {x3,1} must have x3 as one of two values: x3 = U/2 + s*Sqrt[U^2/4 - V] where sign s = +-1, and U,V are functions of x1,x2. Tho present function is called a maximum of twice, to settle which of +- is s. When a call is made, it is guaranteed already that x1, x2 both lie on the same curve (+- curve); i.e., which curve (+-) is not connected at all with sign s of the x3 relation. */ giant cur_n; giant t1; giant t2; giant t3; giant t4; giant t5; PROF_START; cur_n = borrowGiant(par->maxDigits); t1 = borrowGiant(par->maxDigits); t2 = borrowGiant(par->maxDigits); t3 = borrowGiant(par->maxDigits); t4 = borrowGiant(par->maxDigits); t5 = borrowGiant(par->maxDigits); if(gcompg(x1, x2)==0) { int_to_giant(1, t1); numer_double(x1, t1, x3, par); denom_double(x1, t1, t2, par); binvg_cp(par, t2); mulg(t2, x3); feemod(par, x3); goto out; } numer_plus(x1, x2, t1, par); int_to_giant(1, t3); numer_times(x1, t3, x2, t3, t2, par); int_to_giant(1, t4); int_to_giant(1, t5); denom_times(x1, t4, x2, t5, t3, par); binvg_cp(par, t3); mulg(t3, t1); feemod(par, t1); /* t1 := U/2. */ mulg(t3, t2); feemod(par, t2); /* t2 := V. */ /* Now x3 will be t1 +- Sqrt[t1^2 - t2]. */ gtog(t1, t4); gsquare(t4); feemod(par, t4); subg(t2, t4); make_base(par, cur_n); iaddg(1, cur_n); gshiftright(2, cur_n); /* cur_n := (p+1)/4. */ feepowermodg(par, t4, cur_n); /* t4 := t2^((p+1)/4) (mod p). */ gtog(t1, x3); if(s != SIGN_PLUS) negg(t4); addg(t4, x3); feemod(par, x3); out: returnGiant(cur_n); returnGiant(t1); returnGiant(t2); returnGiant(t3); returnGiant(t4); returnGiant(t5); PROF_END(ellAddTime); }
static jint latinime_BinaryDictionary_open(JNIEnv *env, jobject object, jstring sourceDir, jlong dictOffset, jlong dictSize, jint typedLetterMultiplier, jint fullWordMultiplier, jint maxWordLength, jint maxWords, jint maxAlternatives) { PROF_OPEN; PROF_START(66); const char *sourceDirChars = env->GetStringUTFChars(sourceDir, NULL); if (sourceDirChars == NULL) { LOGE("DICT: Can't get sourceDir string"); return 0; } int fd = 0; void *dictBuf = NULL; int adjust = 0; #ifdef USE_MMAP_FOR_DICTIONARY /* mmap version */ fd = open(sourceDirChars, O_RDONLY); if (fd < 0) { LOGE("DICT: Can't open sourceDir. sourceDirChars=%s errno=%d", sourceDirChars, errno); return 0; } int pagesize = getpagesize(); adjust = dictOffset % pagesize; int adjDictOffset = dictOffset - adjust; int adjDictSize = dictSize + adjust; dictBuf = mmap(NULL, sizeof(char) * adjDictSize, PROT_READ, MAP_PRIVATE, fd, adjDictOffset); if (dictBuf == MAP_FAILED) { LOGE("DICT: Can't mmap dictionary. errno=%d", errno); return 0; } dictBuf = (void *)((char *)dictBuf + adjust); #else // USE_MMAP_FOR_DICTIONARY /* malloc version */ FILE *file = NULL; file = fopen(sourceDirChars, "rb"); if (file == NULL) { LOGE("DICT: Can't fopen sourceDir. sourceDirChars=%s errno=%d", sourceDirChars, errno); return 0; } dictBuf = malloc(sizeof(char) * dictSize); if (!dictBuf) { LOGE("DICT: Can't allocate memory region for dictionary. errno=%d", errno); return 0; } int ret = fseek(file, (long)dictOffset, SEEK_SET); if (ret != 0) { LOGE("DICT: Failure in fseek. ret=%d errno=%d", ret, errno); return 0; } ret = fread(dictBuf, sizeof(char) * dictSize, 1, file); if (ret != 1) { LOGE("DICT: Failure in fread. ret=%d errno=%d", ret, errno); return 0; } ret = fclose(file); if (ret != 0) { LOGE("DICT: Failure in fclose. ret=%d errno=%d", ret, errno); return 0; } #endif // USE_MMAP_FOR_DICTIONARY env->ReleaseStringUTFChars(sourceDir, sourceDirChars); if (!dictBuf) { LOGE("DICT: dictBuf is null"); return 0; } Dictionary *dictionary = NULL; if (BinaryFormat::UNKNOWN_FORMAT == BinaryFormat::detectFormat((uint8_t*)dictBuf)) { LOGE("DICT: dictionary format is unknown, bad magic number"); #ifdef USE_MMAP_FOR_DICTIONARY releaseDictBuf(((char*)dictBuf) - adjust, adjDictSize, fd); #else // USE_MMAP_FOR_DICTIONARY releaseDictBuf(dictBuf, 0, 0); #endif // USE_MMAP_FOR_DICTIONARY } else { dictionary = new Dictionary(dictBuf, dictSize, fd, adjust, typedLetterMultiplier, fullWordMultiplier, maxWordLength, maxWords, maxAlternatives); } PROF_END(66); PROF_CLOSE; return (jint)dictionary; }
static jlong latinime_BinaryDictionary_open(JNIEnv *env, jclass clazz, jstring sourceDir, jlong dictOffset, jlong dictSize) { PROF_OPEN; PROF_START(66); const jsize sourceDirUtf8Length = env->GetStringUTFLength(sourceDir); if (sourceDirUtf8Length <= 0) { AKLOGE("DICT: Can't get sourceDir string"); return 0; } char sourceDirChars[sourceDirUtf8Length + 1]; env->GetStringUTFRegion(sourceDir, 0, env->GetStringLength(sourceDir), sourceDirChars); sourceDirChars[sourceDirUtf8Length] = '\0'; int fd = 0; void *dictBuf = 0; int adjust = 0; #ifdef USE_MMAP_FOR_DICTIONARY /* mmap version */ fd = open(sourceDirChars, O_RDONLY); if (fd < 0) { AKLOGE("DICT: Can't open sourceDir. sourceDirChars=%s errno=%d", sourceDirChars, errno); return 0; } int pagesize = getpagesize(); adjust = static_cast<int>(dictOffset) % pagesize; int adjDictOffset = static_cast<int>(dictOffset) - adjust; int adjDictSize = static_cast<int>(dictSize) + adjust; dictBuf = mmap(0, adjDictSize, PROT_READ, MAP_PRIVATE, fd, adjDictOffset); if (dictBuf == MAP_FAILED) { AKLOGE("DICT: Can't mmap dictionary. errno=%d", errno); return 0; } dictBuf = static_cast<char *>(dictBuf) + adjust; #else // USE_MMAP_FOR_DICTIONARY /* malloc version */ FILE *file = 0; file = fopen(sourceDirChars, "rb"); if (file == 0) { AKLOGE("DICT: Can't fopen sourceDir. sourceDirChars=%s errno=%d", sourceDirChars, errno); return 0; } dictBuf = malloc(dictSize); if (!dictBuf) { AKLOGE("DICT: Can't allocate memory region for dictionary. errno=%d", errno); return 0; } int ret = fseek(file, static_cast<long>(dictOffset), SEEK_SET); if (ret != 0) { AKLOGE("DICT: Failure in fseek. ret=%d errno=%d", ret, errno); return 0; } ret = fread(dictBuf, dictSize, 1, file); if (ret != 1) { AKLOGE("DICT: Failure in fread. ret=%d errno=%d", ret, errno); return 0; } ret = fclose(file); if (ret != 0) { AKLOGE("DICT: Failure in fclose. ret=%d errno=%d", ret, errno); return 0; } #endif // USE_MMAP_FOR_DICTIONARY if (!dictBuf) { AKLOGE("DICT: dictBuf is null"); return 0; } Dictionary *dictionary = 0; if (BinaryFormat::UNKNOWN_FORMAT == BinaryFormat::detectFormat(static_cast<uint8_t *>(dictBuf), static_cast<int>(dictSize))) { AKLOGE("DICT: dictionary format is unknown, bad magic number"); #ifdef USE_MMAP_FOR_DICTIONARY releaseDictBuf(static_cast<const char *>(dictBuf) - adjust, adjDictSize, fd); #else // USE_MMAP_FOR_DICTIONARY releaseDictBuf(dictBuf, 0, 0); #endif // USE_MMAP_FOR_DICTIONARY } else { dictionary = new Dictionary(dictBuf, static_cast<int>(dictSize), fd, adjust); } PROF_END(66); PROF_CLOSE; return reinterpret_cast<jlong>(dictionary); }
void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates, const int *ycoordinates, const int *codes, const int inputLength, const std::map<int, int> *bigramMap, const uint8_t *bigramFilter, const bool useFullEditDistance, Correction *correction, WordsPriorityQueuePool *queuePool) { PROF_OPEN; PROF_START(0); PROF_END(0); PROF_START(1); getOneWordSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, bigramMap, bigramFilter, useFullEditDistance, inputLength, correction, queuePool); PROF_END(1); PROF_START(2); // Note: This line is intentionally left blank PROF_END(2); PROF_START(3); // Note: This line is intentionally left blank PROF_END(3); PROF_START(4); bool hasAutoCorrectionCandidate = false; WordsPriorityQueue* masterQueue = queuePool->getMasterQueue(); if (masterQueue->size() > 0) { float nsForMaster = masterQueue->getHighestNormalizedScore( proximityInfo->getPrimaryInputWord(), inputLength, 0, 0, 0); hasAutoCorrectionCandidate = (nsForMaster > START_TWO_WORDS_CORRECTION_THRESHOLD); } PROF_END(4); PROF_START(5); // Multiple word suggestions if (SUGGEST_MULTIPLE_WORDS && inputLength >= MIN_USER_TYPED_LENGTH_FOR_MULTIPLE_WORD_SUGGESTION) { getSplitMultipleWordsSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, useFullEditDistance, inputLength, correction, queuePool, hasAutoCorrectionCandidate); } PROF_END(5); PROF_START(6); // Note: This line is intentionally left blank PROF_END(6); if (DEBUG_DICT) { queuePool->dumpSubQueue1TopSuggestions(); for (int i = 0; i < SUB_QUEUE_MAX_COUNT; ++i) { WordsPriorityQueue* queue = queuePool->getSubQueue(FIRST_WORD_INDEX, i); if (queue->size() > 0) { WordsPriorityQueue::SuggestedWord* sw = queue->top(); const int score = sw->mScore; const unsigned short* word = sw->mWord; const int wordLength = sw->mWordLength; float ns = Correction::RankingAlgorithm::calcNormalizedScore( proximityInfo->getPrimaryInputWord(), i, word, wordLength, score); ns += 0; AKLOGI("--- TOP SUB WORDS for %d --- %d %f [%d]", i, score, ns, (ns > TWO_WORDS_CORRECTION_WITH_OTHER_ERROR_THRESHOLD)); DUMP_WORD(proximityInfo->getPrimaryInputWord(), i); DUMP_WORD(word, wordLength); } } } }
// bigramMap contains the association <bigram address> -> <bigram frequency> // bigramFilter is a bloom filter for fast rejection: see functions setInFilter and isInFilter // in bigram_dictionary.cpp int UnigramDictionary::getSuggestions(ProximityInfo *proximityInfo, WordsPriorityQueuePool *queuePool, Correction *correction, const int *xcoordinates, const int *ycoordinates, const int *codes, const int codesSize, const std::map<int, int> *bigramMap, const uint8_t *bigramFilter, const bool useFullEditDistance, unsigned short *outWords, int *frequencies) { queuePool->clearAll(); Correction* masterCorrection = correction; correction->resetCorrection(); if (BinaryFormat::REQUIRES_GERMAN_UMLAUT_PROCESSING & FLAGS) { // Incrementally tune the word and try all possibilities int codesBuffer[getCodesBufferSize(codes, codesSize)]; int xCoordinatesBuffer[codesSize]; int yCoordinatesBuffer[codesSize]; getWordWithDigraphSuggestionsRec(proximityInfo, xcoordinates, ycoordinates, codesBuffer, xCoordinatesBuffer, yCoordinatesBuffer, codesSize, bigramMap, bigramFilter, useFullEditDistance, codes, codesSize, 0, codesBuffer, masterCorrection, queuePool, GERMAN_UMLAUT_DIGRAPHS, sizeof(GERMAN_UMLAUT_DIGRAPHS) / sizeof(GERMAN_UMLAUT_DIGRAPHS[0])); } else if (BinaryFormat::REQUIRES_FRENCH_LIGATURES_PROCESSING & FLAGS) { int codesBuffer[getCodesBufferSize(codes, codesSize)]; int xCoordinatesBuffer[codesSize]; int yCoordinatesBuffer[codesSize]; getWordWithDigraphSuggestionsRec(proximityInfo, xcoordinates, ycoordinates, codesBuffer, xCoordinatesBuffer, yCoordinatesBuffer, codesSize, bigramMap, bigramFilter, useFullEditDistance, codes, codesSize, 0, codesBuffer, masterCorrection, queuePool, FRENCH_LIGATURES_DIGRAPHS, sizeof(FRENCH_LIGATURES_DIGRAPHS) / sizeof(FRENCH_LIGATURES_DIGRAPHS[0])); } else { // Normal processing getWordSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, codesSize, bigramMap, bigramFilter, useFullEditDistance, masterCorrection, queuePool); } PROF_START(20); if (DEBUG_DICT) { float ns = queuePool->getMasterQueue()->getHighestNormalizedScore( proximityInfo->getPrimaryInputWord(), codesSize, 0, 0, 0); ns += 0; AKLOGI("Max normalized score = %f", ns); } const int suggestedWordsCount = queuePool->getMasterQueue()->outputSuggestions( proximityInfo->getPrimaryInputWord(), codesSize, frequencies, outWords); if (DEBUG_DICT) { float ns = queuePool->getMasterQueue()->getHighestNormalizedScore( proximityInfo->getPrimaryInputWord(), codesSize, 0, 0, 0); ns += 0; AKLOGI("Returning %d words", suggestedWordsCount); /// Print the returned words for (int j = 0; j < suggestedWordsCount; ++j) { short unsigned int* w = outWords + j * MAX_WORD_LENGTH; char s[MAX_WORD_LENGTH]; for (int i = 0; i <= MAX_WORD_LENGTH; i++) s[i] = w[i]; (void)s; AKLOGI("%s %i", s, frequencies[j]); } } PROF_END(20); PROF_CLOSE; return suggestedWordsCount; }
status_t grade_article(article_t* article, lang_t* lang, float ratio) { array_t * a, * temp; word_t * w; sentence_t * s, * s_score; size_t top_occs[] = { 0, 0, 0, 0}, occs, i, max_words; word_t * top_words[] = { 0, 0, 0, 0}; string_t ws, ws_stem; bool_t is_first = SMRZR_TRUE; PROF_START; /* find top occs and corresponding words */ a = article->words; for(w = (word_t*)ARR_FIRST(a); !ARR_END(a); w = (word_t*)ARR_NEXT(a)) { for(occs = 0; occs < TOP_OCCS_MAX; ++occs) { if(top_occs[occs] < w->num_occ) { for(i = TOP_OCCS_MAX-1; i > occs; --i) { top_occs[i] = top_occs[i-1]; top_words[i] = top_words[i-1]; } top_occs[occs] = w->num_occ; top_words[occs] = w; break; } } } /*for(occs = 0; occs < TOP_OCCS_MAX; ++occs) { fprintf(stdout, "top occ %lu - %lu [%s]\n", occs, top_occs[occs], top_words[occs]->stem); }*/ /* score all sentences */ a = article->sentences; for(s=(sentence_t*)ARR_FIRST(a); !ARR_END(a); s=(sentence_t*)ARR_NEXT(a)) { ws = s->begin; while(ws < s->end) { while(0 == *ws && ws < s->end) ++ws; if(ws >= s->end) break; if(NULL == (ws_stem = get_word_stem(&article->stack, lang, ws, SMRZR_FALSE))) ERROR_RET; if(NULL == (w = (word_t*)array_search(article->words, ws_stem, comp_word_by_stem))) { /* possibly a word excluded */ ws = ws + strlen(ws); continue; } occs = 0; while(top_occs[occs] != w->num_occ && occs < TOP_OCCS_MAX) ++occs; switch(occ2score[occs]) { case 3: /* score += occ * 3 */ s->score += ((w->num_occ << 1) + w->num_occ); break; case 2: /* score += occ * 2 */ s->score += (w->num_occ << 1); break; case 1: /* score += occ */ s->score += w->num_occ; break; default: ERROR_RET; } ws = ws + strlen(ws); } if(SMRZR_TRUE == s->is_para_begin) { s->score *= 1.6; } else if(SMRZR_TRUE == is_first) { s->score = (s->score << 1); /* super-boost 1st line */ is_first = SMRZR_FALSE; } /*fprintf(stdout, "%u ", s->score);*/ } /*fprintf(stdout, "\n");*/ /* sort on sentence score */ if(NULL == (temp = array_new(SMRZR_TRUE, sizeof(sentence_t), ARR_SZ(article->sentences), NULL))) ERROR_RET; for(s=(sentence_t*)ARR_FIRST(a); !ARR_END(a); s=(sentence_t*)ARR_NEXT(a)) { if(NULL == (s_score = array_sorted_alloc(&temp, (elem_t)(size_t)(s->score), comp_sentence_by_score))) ERROR_RET; memcpy(s_score, s, sizeof(sentence_t)); s_score->cookie = (elem_t)s; } /* pick sentences with highest scores until we get required ratio of words*/ max_words = article->num_words * ratio; a = temp; for(s=(sentence_t*)ARR_FIRST(a); !ARR_END(a) && (ssize_t)max_words > 0; s=(sentence_t*)ARR_NEXT(a)) { ((sentence_t*)s->cookie)->is_selected = SMRZR_TRUE; max_words -= s->num_words; /*fprintf(stdout, "Selected sentence: score %u, %lu words, %ld remaining\n", s->score, s->num_words, (ssize_t)max_words);*/ } array_free(temp); PROF_END("article grading"); return(SMRZR_OK); }
int main(int argc, char **argv) { if (argc != 4) { fputs("We need 3 arguments\n", stdout); return 1; } MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &mpi_rank); MPI_Comm_size(MPI_COMM_WORLD, &mpi_world_size); rebuild_target = atoi(argv[1]); const char *store_dir = argv[2]; const char *data_file = argv[3]; int ntargets = mpi_world_size - 1; if (ntargets > MAX_STORAGE_TARGETS) return 1; if (rebuild_target < 0 || rebuild_target > ntargets) return 1; helper = 1; while (helper == rebuild_target) helper += 1; if (helper == rebuild_target) return 1; PROF_START(total); PROF_START(init); int last_run_fd = -1; RunData last_run; memset(&last_run, 0, sizeof(RunData)); if (mpi_rank == 0) { last_run_fd = open(data_file, O_RDWR | O_CREAT, S_IRUSR | S_IWUSR); read(last_run_fd, &last_run, sizeof(RunData)); } /* Create mapping from storage targets to ranks, and vice versa */ Target targetIDs[MAX_STORAGE_TARGETS] = {{0,0}}; Target targetID = {0,0}; if (mpi_rank != 0) { int store_fd = open(store_dir, O_DIRECTORY | O_RDONLY); int target_ID_fd = openat(store_fd, "targetNumID", O_RDONLY); char targetID_s[20] = {0}; read(target_ID_fd, targetID_s, sizeof(targetID_s)); close(target_ID_fd); close(store_fd); targetID.id = atoi(targetID_s); targetID.rank = mpi_rank; } MPI_Gather( &targetID, sizeof(Target), MPI_BYTE, targetIDs, sizeof(Target), MPI_BYTE, 0, MPI_COMM_WORLD); if (mpi_rank == 0) { if (last_run.ntargets != ntargets) { /* ERROR - new number of targets */ assert(0); } for (int i = 0; i < ntargets; i++) targetIDs[i] = targetIDs[i+1]; for (int i = 0; i < ntargets; i++) last_run.targetIDs[i].rank = -1; for (int i = 0; i < ntargets; i++) { Target target = targetIDs[i]; int j = 0; int found = 0; for (; j < ntargets; j++) if (last_run.targetIDs[j].id == target.id) { last_run.targetIDs[j] = target; found = 1; } if (!found) { /* ERROR - new target introduced */ printf(" > %d, %d\n", target.id, target.rank); assert(0); } } rank2st[0] = -1; for (int i = 0; i < ntargets; i++) { st2rank[i] = last_run.targetIDs[i].rank; rank2st[st2rank[i]] = i; } } MPI_Bcast(st2rank, sizeof(st2rank), MPI_BYTE, 0, MPI_COMM_WORLD); MPI_Bcast(rank2st, sizeof(rank2st), MPI_BYTE, 0, MPI_COMM_WORLD); PROF_END(init); if (mpi_rank == 0) printf("%d(rank=%d), %d(rank=%d)\n", rebuild_target, st2rank[rebuild_target], helper, st2rank[helper]); PROF_START(main_work); memset(&pr_sender, 0, sizeof(pr_sender)); if (mpi_rank != 0 && rank2st[mpi_rank] != rebuild_target) { PersistentDB *pdb = pdb_init(); pdb_iterate(pdb, do_file); pdb_term(pdb); if (rank2st[mpi_rank] == helper) { int dummy; MPI_Ssend((void*)&dummy, sizeof(dummy), MPI_BYTE, st2rank[rebuild_target], 0, MPI_COMM_WORLD); } pr_add_tmp_to_total(&pr_sample); pr_report_progress(&pr_sender, pr_sample); pr_report_done(&pr_sender); } else if (rank2st[mpi_rank] == rebuild_target) { int helper_rank = st2rank[helper]; MPI_Status stat; int count; FileInfo fi; MPI_Recv(&fi, sizeof(FileInfo), MPI_BYTE, helper_rank, 0, MPI_COMM_WORLD, &stat); MPI_Get_count(&stat, MPI_BYTE, &count); while (count == sizeof(FileInfo)) { char key[200]; MPI_Recv(key, sizeof(key), MPI_BYTE, helper_rank, 0, MPI_COMM_WORLD, &stat); int keylen; MPI_Get_count(&stat, MPI_BYTE, &keylen); key[keylen] = '\0'; do_file(key, keylen, &fi); MPI_Recv(&fi, sizeof(FileInfo), MPI_BYTE, helper_rank, 0, MPI_COMM_WORLD, &stat); MPI_Get_count(&stat, MPI_BYTE, &count); } pr_add_tmp_to_total(&pr_sample); pr_report_progress(&pr_sender, pr_sample); pr_report_done(&pr_sender); } else if (mpi_rank == 0) { printf("st - total files | data read | data written | disk I/O\n"); pr_receive_loop(ntargets-1); } PROF_END(main_work); PROF_END(total); if (mpi_rank == 0) { printf("Overall timings: \n"); printf("init | %9.2f ms\n", 1e3*PROF_VAL(init)); printf("main_work | %9.2f ms\n", 1e3*PROF_VAL(main_work)); printf("total | %9.2f ms\n", 1e3*PROF_VAL(total)); } MPI_Barrier(MPI_COMM_WORLD); char *iter = hs.corrupt; for (size_t i = 0; i < hs.corrupt_count; i++) { printf("Potentially corrupt chunk: '%s'\n", iter); iter += strlen(iter); } MPI_Finalize(); }
status_t parse_article(const char* file_name, lang_t* lang, article_t* article) { string_t word, word_core, word_stem; sentence_t* sentence; word_t* word_entry; stream_t* stream = &article->stream; bool_t is_new, is_para_end = SMRZR_FALSE; PROF_START; if(SMRZR_OK != stream_create(file_name, stream)) ERROR_RET; while(!STREAM_END(stream)) { STREAM_FIND_WORD(stream); if(STREAM_END(stream)) break; sentence = sentence_new(&article->sentences, article->stream.curr); assert(NULL != sentence); if(SMRZR_TRUE == is_para_end) { sentence->is_para_begin = SMRZR_TRUE; is_para_end = SMRZR_FALSE; } while(!STREAM_END(stream)) { STREAM_GET_WORD(stream, word, is_para_end); sentence->num_words++; if(NULL == (word_core = get_word_core(&article->stack, lang, word))) ERROR_RET; if(NULL == array_search(lang->exclude, word_core, comp_strings)) { if(NULL == (word_stem = get_word_stem(&article->stack, lang, word_core, SMRZR_TRUE))) ERROR_RET; if(NULL == (word_entry = array_search_or_alloc(&article->words, word_stem, comp_word_by_stem, &is_new))) ERROR_RET; if(SMRZR_TRUE == is_new) { word_entry->num_occ = 1; word_entry->stem = word_stem; } else { ++(word_entry->num_occ); array_pop_free(article->stack, word_stem); } } else { array_pop_free(article->stack, word_core); } if(end_of_line(lang, word)) { sentence->end = word + strlen(word); article->num_words += sentence->num_words; break; } } } PROF_END("article parsing"); /*fprintf(stdout, "Number of sentences - %lu\n", ARR_SZ(article->sentences)); fprintf(stdout, "Number of words - %lu\n", ARR_SZ(article->words));*/ return(SMRZR_OK); }