static void SplashLayout() { unichar_t *start, *pt, *lastspace; extern const char *source_modtime_str; extern const char *source_version_str; uc_strcpy(msg, "When my father finished his book on Renaissance printing (The Craft of Printing and the Publication of Shakespeare's Works) he told me that I would have to write the chapter on computer typography. This is my attempt to do so."); GDrawSetFont(splashw,splash_font); linecnt = 0; lines[linecnt++] = msg-1; for ( start = msg; *start!='\0'; start = pt ) { lastspace = NULL; for ( pt=start; ; ++pt ) { if ( *pt==' ' || *pt=='\0' ) { if ( GDrawGetTextWidth(splashw,start,pt-start)<splashimage.u.image->width-10 ) lastspace = pt; else break; if ( *pt=='\0' ) break; } } if ( lastspace!=NULL ) pt = lastspace; lines[linecnt++] = pt; if ( *pt ) ++pt; } uc_strcpy(pt, " FontForge used to be named PfaEdit."); pt += u_strlen(pt); lines[linecnt++] = pt; uc_strcpy(pt," git hash: ");; pt += u_strlen(pt); lines[linecnt++] = pt; uc_strcat(pt, FONTFORGE_GIT_VERSION); pt += u_strlen(pt); lines[linecnt++] = pt; uc_strcpy(pt," Version: ");; uc_strcat(pt,FONTFORGE_MODTIME_STR); pt += u_strlen(pt); lines[linecnt++] = pt; uc_strcat(pt," ("); uc_strcat(pt,FONTFORGE_MODTIME_STR); uc_strcat(pt,"-ML"); #ifdef FREETYPE_HAS_DEBUGGER uc_strcat(pt,"-TtfDb"); #endif uc_strcat(pt,")"); pt += u_strlen(pt); lines[linecnt++] = pt; uc_strcpy(pt," Lib Version: "); uc_strcat(pt,FONTFORGE_MODTIME_STR); lines[linecnt++] = pt+u_strlen(pt); lines[linecnt] = NULL; is = u_strchr(msg,'('); ie = u_strchr(msg,')'); }
char *_GIO_decomposeURL(const unichar_t *url,char **host, int *port, char **username, char **password) { unichar_t *pt, *pt2, *upt, *ppt; char *path; char proto[40]; /* ftp://[user[:password]@]ftpserver[:port]/url-path */ *username = NULL; *password = NULL; *port = -1; pt = uc_strstr(url,"://"); if ( pt==NULL ) { *host = NULL; return( cu_copy(url)); } cu_strncpy(proto,url,(size_t)(pt-url)<sizeof(proto)?(size_t)(pt-url):sizeof(proto)); pt += 3; pt2 = u_strchr(pt,'/'); if ( pt2==NULL ) { pt2 = pt+u_strlen(pt); path = copy("/"); } else { path = cu_copy(pt2); } upt = u_strchr(pt,'@'); if ( upt!=NULL && upt<pt2 ) { ppt = u_strchr(pt,':'); if ( ppt==NULL ) *username = cu_copyn(pt,upt-pt); else { *username = cu_copyn(pt,ppt-pt); *password = cu_copyn(ppt+1,upt-ppt-1); } pt = upt+1; } ppt = u_strchr(pt,':'); if ( ppt!=NULL && ppt<pt2 ) { char *temp = cu_copyn(ppt+1,pt2-ppt-1), *end; *port = strtol(temp,&end,10); if ( *end!='\0' ) *port = -2; free(temp); pt2 = ppt; } *host = cu_copyn(pt,pt2-pt); if ( *username ) *password = GIO_PasswordCache(proto,*host,*username,*password); return( path ); }
unichar_t *u_GFileNormalize(unichar_t *name) { unichar_t *pt, *base, *ppt; if ( (pt = uc_strstr(name,"://"))!=NULL ) { base = u_strchr(pt+3,'/'); if ( base==NULL ) return( name ); ++base; } else if ( *name=='/' ) base = name+1; else base = name; for ( pt=base; *pt!='\0'; ) { if ( *pt=='/' ) u_strcpy(pt,pt+1); else if ( uc_strncmp(pt,"./",2)==0 ) u_strcpy(pt,pt+2); else if ( uc_strncmp(pt,"../",2)==0 ) { for ( ppt=pt-2; ppt>=base && *ppt!='/'; --ppt ); ++ppt; if ( ppt>=base ) { u_strcpy(ppt,pt+3); pt = ppt; } else pt += 3; } else { while ( *pt!='/' && *pt!='\0' ) ++pt; if ( *pt == '/' ) ++pt; } } return( name ); }
/** * This function allocates and returns a token_t structure corresponding to the given * string. */ token_t* new_token_t(unichar* str) { token_t* tok=(token_t*)malloc(sizeof(token_t)); if (tok==NULL) { fatal_alloc_error("new_token_t"); } for (const keyword_t* key=keywords;key->str!=NULL;key++) { if (!u_strcmp(str, key->str)) { /* If the token is a keyword */ tok->type=key->val; tok->str= NULL; tok->next=NULL; return tok; } } if (*str=='<') { /* If we have a '<', we look for the ending '>' */ unichar* p=u_strchr(str,'>'); if (p==NULL || *(p+1)!='\0') { fatal_error("Invalid token: '%S'\n",str); } *p='\0'; tok->type=TOK_ANGLE; /* We copy the content between the angle brackets */ tok->str=u_strdup(str+1); tok->next=NULL; return tok; } /* Otherwise, we create a default token with the string */ tok->type=TOK_STR; tok->str=u_strdup(str); tok->next=NULL; return tok; }
const CompactTrieDictionary * ICULanguageBreakFactory::loadDictionaryFor(UScriptCode script, int32_t /*breakType*/) { UErrorCode status = U_ZERO_ERROR; // Open root from brkitr tree. char dictnbuff[256]; char ext[4]={'\0'}; UResourceBundle *b = ures_open(U_ICUDATA_BRKITR, "", &status); b = ures_getByKeyWithFallback(b, "dictionaries", b, &status); b = ures_getByKeyWithFallback(b, uscript_getShortName(script), b, &status); int32_t dictnlength = 0; const UChar *dictfname = ures_getString(b, &dictnlength, &status); if (U_SUCCESS(status) && (size_t)dictnlength >= sizeof(dictnbuff)) { dictnlength = 0; status = U_BUFFER_OVERFLOW_ERROR; } if (U_SUCCESS(status) && dictfname) { UChar* extStart=u_strchr(dictfname, 0x002e); int len = 0; if(extStart!=NULL){ len = extStart-dictfname; u_UCharsToChars(extStart+1, ext, sizeof(ext)); // nul terminates the buff u_UCharsToChars(dictfname, dictnbuff, len); } dictnbuff[len]=0; // nul terminate } ures_close(b); UDataMemory *file = udata_open(U_ICUDATA_BRKITR, ext, dictnbuff, &status); if (U_SUCCESS(status)) { const CompactTrieDictionary *dict = new CompactTrieDictionary( file, status); if (U_SUCCESS(status) && dict == NULL) { status = U_MEMORY_ALLOCATION_ERROR; } if (U_FAILURE(status)) { delete dict; dict = NULL; } return dict; } else if (dictfname != NULL){ //create dummy dict if dictionary filename not valid UChar c = 0x0020; status = U_ZERO_ERROR; MutableTrieDictionary *mtd = new MutableTrieDictionary(c, status, TRUE); mtd->addWord(&c, 1, status, 1); return new CompactTrieDictionary(*mtd, status); } return NULL; }
int msgformat_fix_quotes(UChar **spattern, uint32_t *spattern_len, UErrorCode *ec) { if(*spattern && *spattern_len && u_strchr(*spattern, (UChar)'\'')) { UChar *npattern = safe_emalloc(sizeof(UChar)*2, *spattern_len, sizeof(UChar)); uint32_t npattern_len; npattern_len = umsg_autoQuoteApostrophe(*spattern, *spattern_len, npattern, 2*(*spattern_len)+1, ec); efree(*spattern); if( U_FAILURE(*ec) ) { return FAILURE; } npattern = erealloc(npattern, sizeof(UChar)*(npattern_len+1)); *spattern = npattern; *spattern_len = npattern_len; } return SUCCESS; }
U_CAPI UChar* U_EXPORT2 u_strchr32(const UChar* s, UChar32 c) { if ((uint32_t) c <= U_BMP_MAX) { /* find BMP code point */ return u_strchr(s, (UChar) c); } else if ((uint32_t) c <= UCHAR_MAX_VALUE) { /* find supplementary code point as surrogate pair */ UChar cs, lead = U16_LEAD(c), trail = U16_TRAIL(c); while ((cs = *s++) != 0) { if (cs == lead && *s == trail) { return (UChar*) (s - 1); } } return NULL; } else { /* not a Unicode code point, not findable */ return NULL; } }
WordListLine WordlistEscapedInputStringToParsedDataComplex( SplineFont* sf, const unichar_t* input_const, WordlistEscapedInputStringToRealString_getFakeUnicodeOfScFunc getUnicodeFunc, void* udata ) { unichar_t* input = u_copy( input_const ); WordListChar* ret = calloc( WordListLineSz, sizeof(WordListChar)); WordListChar* out = ret; unichar_t* in = input; unichar_t* in_end = input + u_strlen(input); // trim comment and beyond from input { unichar_t* p = input; while( p && p < in_end ) { p = u_strchr( p, '#' ); if( p > input && *(p-1) == '/' ) { p++; continue; } if( p ) *p = '\0'; break; } } in_end = input + u_strlen(input); int addingGlyphsToSelected = 0; int currentGlyphIndex = -1; for ( ; in < in_end; in++ ) { unichar_t ch = *in; TRACE("in:%p end:%p got char %d %c\n", in, in_end, ch, ch ); if( ch == '[' ) { addingGlyphsToSelected = 1; continue; } if( ch == ']' ) { addingGlyphsToSelected = 0; continue; } int isSelected = addingGlyphsToSelected; currentGlyphIndex++; if( ch == '/' || ch == '\\' ) { // start of a glyph name unichar_t glyphname[ PATH_MAX+1 ]; unichar_t* updated_in = 0; SplineChar* sc = u_WordlistEscapedInputStringToRealString_readGlyphName( sf, in, in_end, &updated_in, glyphname ); if( sc ) { in = updated_in; int n = getUnicodeFunc( sc, udata ); if( n == -1 ) { /* * Okay, this probably means we've got an unencoded glyph (generally * used for OpenType substitutions). * Redeem the value from the SplineFont datamap instead of fetching from * the Unicode identifier. */ n = sf->map->backmap[sc->orig_pos]; /* * Unencoded glyphs have special mappings in the SplineFont that * start from 65536 (values beyond Unicode, 65535 being the reserved * "frontier" value). */ if ( (sf->map->enc->is_unicodebmp || sf->map->enc->is_unicodefull) && n < 65536 ) { TRACE("ToRealString: backmapped position does not match Unicode encoding\n"); TRACE("orig_pos: %d, backmap: %d, attached unicode enc: %d\n", sc->orig_pos, n, sc->unicodeenc ); TRACE("ToRealString: INVALID CHAR POSITION, name: %s\n", sc->name ); } } out->sc = sc; out->isSelected = isSelected; out->currentGlyphIndex = currentGlyphIndex; out->n = n; out++; /* out = utf8_idpb( out, n, 0 ); */ /* if( !out ) */ /* printf("ToRealString error on out\n"); */ continue; } } /* If we reach this point, we're looking based on codepoint. */ SplineChar* sc = SFGetOrMakeChar( sf, (int)ch, 0 ); out->sc = sc; out->isSelected = isSelected; out->currentGlyphIndex = currentGlyphIndex; out++; } free(input); return(ret); }
/* Instead of having a separate pass for 'special' patterns, reintegrate the two * so we don't get bitten by preflight bugs again. We can be reasonably efficient * without two separate code paths, this code isn't that performance-critical. * * This code is general enough to deal with patterns that have a prefix or swap the * language and remainder components, since we gave developers enough rope to do such * things if they futz with the pattern data. But since we don't give them a way to * specify a pattern for arbitrary combinations of components, there's not much use in * that. I don't think our data includes such patterns, the only variable I know if is * whether there is a space before the open paren, or not. Oh, and zh uses different * chars than the standard open/close paren (which ja and ko use, btw). */ U_CAPI int32_t U_EXPORT2 uloc_getDisplayName(const char *locale, const char *displayLocale, UChar *dest, int32_t destCapacity, UErrorCode *pErrorCode) { static const UChar defaultSeparator[9] = { 0x007b, 0x0030, 0x007d, 0x002c, 0x0020, 0x007b, 0x0031, 0x007d, 0x0000 }; /* "{0}, {1}" */ static const UChar sub0[4] = { 0x007b, 0x0030, 0x007d , 0x0000 } ; /* {0} */ static const UChar sub1[4] = { 0x007b, 0x0031, 0x007d , 0x0000 } ; /* {1} */ static const int32_t subLen = 3; static const UChar defaultPattern[10] = { 0x007b, 0x0030, 0x007d, 0x0020, 0x0028, 0x007b, 0x0031, 0x007d, 0x0029, 0x0000 }; /* {0} ({1}) */ static const int32_t defaultPatLen = 9; static const int32_t defaultSub0Pos = 0; static const int32_t defaultSub1Pos = 5; int32_t length; /* of formatted result */ const UChar *separator; int32_t sepLen = 0; const UChar *pattern; int32_t patLen = 0; int32_t sub0Pos, sub1Pos; UChar formatOpenParen = 0x0028; // ( UChar formatReplaceOpenParen = 0x005B; // [ UChar formatCloseParen = 0x0029; // ) UChar formatReplaceCloseParen = 0x005D; // ] UBool haveLang = TRUE; /* assume true, set false if we find we don't have a lang component in the locale */ UBool haveRest = TRUE; /* assume true, set false if we find we don't have any other component in the locale */ UBool retry = FALSE; /* set true if we need to retry, see below */ int32_t langi = 0; /* index of the language substitution (0 or 1), virtually always 0 */ if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { return 0; } if(destCapacity<0 || (destCapacity>0 && dest==NULL)) { *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; return 0; } { UErrorCode status = U_ZERO_ERROR; UResourceBundle* locbundle=ures_open(U_ICUDATA_LANG, displayLocale, &status); UResourceBundle* dspbundle=ures_getByKeyWithFallback(locbundle, _kLocaleDisplayPattern, NULL, &status); separator=ures_getStringByKeyWithFallback(dspbundle, _kSeparator, &sepLen, &status); pattern=ures_getStringByKeyWithFallback(dspbundle, _kPattern, &patLen, &status); ures_close(dspbundle); ures_close(locbundle); } /* If we couldn't find any data, then use the defaults */ if(sepLen == 0) { separator = defaultSeparator; } /* #10244: Even though separator is now a pattern, it is awkward to handle it as such * here since we are trying to build the display string in place in the dest buffer, * and to handle it as a pattern would entail having separate storage for the * substrings that need to be combined (the first of which may be the result of * previous such combinations). So for now we continue to treat the portion between * {0} and {1} as a string to be appended when joining substrings, ignoring anything * that is before {0} or after {1} (no existing separator pattern has any such thing). * This is similar to how pattern is handled below. */ { UChar *p0=u_strstr(separator, sub0); UChar *p1=u_strstr(separator, sub1); if (p0==NULL || p1==NULL || p1<p0) { *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; return 0; } separator = (const UChar *)p0 + subLen; sepLen = p1 - separator; } if(patLen==0 || (patLen==defaultPatLen && !u_strncmp(pattern, defaultPattern, patLen))) { pattern=defaultPattern; patLen=defaultPatLen; sub0Pos=defaultSub0Pos; sub1Pos=defaultSub1Pos; // use default formatOpenParen etc. set above } else { /* non-default pattern */ UChar *p0=u_strstr(pattern, sub0); UChar *p1=u_strstr(pattern, sub1); if (p0==NULL || p1==NULL) { *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; return 0; } sub0Pos=p0-pattern; sub1Pos=p1-pattern; if (sub1Pos < sub0Pos) { /* a very odd pattern */ int32_t t=sub0Pos; sub0Pos=sub1Pos; sub1Pos=t; langi=1; } if (u_strchr(pattern, 0xFF08) != NULL) { formatOpenParen = 0xFF08; // fullwidth ( formatReplaceOpenParen = 0xFF3B; // fullwidth [ formatCloseParen = 0xFF09; // fullwidth ) formatReplaceCloseParen = 0xFF3D; // fullwidth ] } } /* We loop here because there is one case in which after the first pass we could need to * reextract the data. If there's initial padding before the first element, we put in * the padding and then write that element. If it turns out there's no second element, * we didn't need the padding. If we do need the data (no preflight), and the first element * would have fit but for the padding, we need to reextract. In this case (only) we * adjust the parameters so padding is not added, and repeat. */ do { UChar* p=dest; int32_t patPos=0; /* position in the pattern, used for non-substitution portions */ int32_t langLen=0; /* length of language substitution */ int32_t langPos=0; /* position in output of language substitution */ int32_t restLen=0; /* length of 'everything else' substitution */ int32_t restPos=0; /* position in output of 'everything else' substitution */ UEnumeration* kenum = NULL; /* keyword enumeration */ /* prefix of pattern, extremely likely to be empty */ if(sub0Pos) { if(destCapacity >= sub0Pos) { while (patPos < sub0Pos) { *p++ = pattern[patPos++]; } } else { patPos=sub0Pos; } length=sub0Pos; } else { length=0; } for(int32_t subi=0,resti=0;subi<2;) { /* iterate through patterns 0 and 1*/ UBool subdone = FALSE; /* set true when ready to move to next substitution */ /* prep p and cap for calls to get display components, pin cap to 0 since they complain if cap is negative */ int32_t cap=destCapacity-length; if (cap <= 0) { cap=0; } else { p=dest+length; } if (subi == langi) { /* {0}*/ if(haveLang) { langPos=length; langLen=uloc_getDisplayLanguage(locale, displayLocale, p, cap, pErrorCode); length+=langLen; haveLang=langLen>0; } subdone=TRUE; } else { /* {1} */ if(!haveRest) { subdone=TRUE; } else { int32_t len; /* length of component (plus other stuff) we just fetched */ switch(resti++) { case 0: restPos=length; len=uloc_getDisplayScriptInContext(locale, displayLocale, p, cap, pErrorCode); break; case 1: len=uloc_getDisplayCountry(locale, displayLocale, p, cap, pErrorCode); break; case 2: len=uloc_getDisplayVariant(locale, displayLocale, p, cap, pErrorCode); break; case 3: kenum = uloc_openKeywords(locale, pErrorCode); /* fall through */ default: { const char* kw=uenum_next(kenum, &len, pErrorCode); if (kw == NULL) { uenum_close(kenum); len=0; /* mark that we didn't add a component */ subdone=TRUE; } else { /* incorporating this behavior into the loop made it even more complex, so just special case it here */ len = uloc_getDisplayKeyword(kw, displayLocale, p, cap, pErrorCode); if(len) { if(len < cap) { p[len]=0x3d; /* '=', assume we'll need it */ } len+=1; /* adjust for call to get keyword */ cap-=len; if(cap <= 0) { cap=0; } else { p+=len; } } /* reset for call below */ if(*pErrorCode == U_BUFFER_OVERFLOW_ERROR) { *pErrorCode=U_ZERO_ERROR; } int32_t vlen = uloc_getDisplayKeywordValue(locale, kw, displayLocale, p, cap, pErrorCode); if(len) { if(vlen==0) { --len; /* remove unneeded '=' */ } /* restore cap and p to what they were at start */ cap=destCapacity-length; if(cap <= 0) { cap=0; } else { p=dest+length; } } len+=vlen; /* total we added for key + '=' + value */ } } break; } /* end switch */ if (len>0) { /* we addeed a component, so add separator and write it if there's room. */ if(len+sepLen<=cap) { const UChar * plimit = p + len; for (; p < plimit; p++) { if (*p == formatOpenParen) { *p = formatReplaceOpenParen; } else if (*p == formatCloseParen) { *p = formatReplaceCloseParen; } } for(int32_t i=0;i<sepLen;++i) { *p++=separator[i]; } } length+=len+sepLen; } else if(subdone) { /* remove separator if we added it */ if (length!=restPos) { length-=sepLen; } restLen=length-restPos; haveRest=restLen>0; } } } if(*pErrorCode == U_BUFFER_OVERFLOW_ERROR) { *pErrorCode=U_ZERO_ERROR; } if(subdone) { if(haveLang && haveRest) { /* append internal portion of pattern, the first time, or last portion of pattern the second time */ int32_t padLen; patPos+=subLen; padLen=(subi==0 ? sub1Pos : patLen)-patPos; if(length+padLen < destCapacity) { p=dest+length; for(int32_t i=0;i<padLen;++i) { *p++=pattern[patPos++]; } } else { patPos+=padLen; } length+=padLen; } else if(subi==0) { /* don't have first component, reset for second component */ sub0Pos=0; length=0; } else if(length>0) { /* true length is the length of just the component we got. */ length=haveLang?langLen:restLen; if(dest && sub0Pos!=0) { if (sub0Pos+length<=destCapacity) { /* first component not at start of result, but we have full component in buffer. */ u_memmove(dest, dest+(haveLang?langPos:restPos), length); } else { /* would have fit, but didn't because of pattern prefix. */ sub0Pos=0; /* stops initial padding (and a second retry, so we won't end up here again) */ retry=TRUE; } } } ++subi; /* move on to next substitution */ } } } while(retry); return u_terminateUChars(dest, destCapacity, length, pErrorCode); }
enum charset _GDraw_ParseMapping(unichar_t *setname) { unichar_t *pt; int val; if ( uc_strstrmatch(setname,"iso")!=NULL && uc_strstrmatch(setname,"10646")!=NULL ) return( em_unicode ); else if ( uc_strstrmatch(setname,"UnicodePlane")!=NULL ) { pt = u_strchr(setname,'-'); if ( pt==NULL ) return( em_uplane0+1 ); return( em_uplane0+u_strtol(pt+1,NULL,10) ); } else if ( uc_strstrmatch(setname,"unicode")!=NULL ) return( em_unicode ); #if 0 if ( uc_strstrmatch(setname,"ascii")!=NULL || ( uc_strstrmatch(setname,"iso")!=NULL && uc_strstrmatch(setname,"646")!=NULL )) { char *lang = getenv( "LANG" ); if ( lang==NULL || *lang=='\0' || (*lang=='e' && *lang=='n' )) return( em_iso8859_1 ); /* ascii can masquarade as iso8859-1 for english speakers (no accents needed) */ } #endif if ( uc_strstrmatch(setname,"iso")!=NULL && uc_strstrmatch(setname,"8859")!=NULL ) { pt = uc_strstrmatch(setname,"8859"); pt += 4; if ( *pt=='-' ) ++pt; if ( !isdigit(*pt) ) /* Bad */; else if ( !isdigit(pt[1]) ) return( em_iso8859_1+*pt-'1' ); else { val = (pt[0]-'0')*10 + pt[1]-'0'; switch ( val ) { case 10: case 11: return( em_iso8859_10+val-10 ); case 13: case 14: case 15: return( em_iso8859_13+val-13 ); } } } if ( uc_strstrmatch(setname,"latin1")!=NULL ) return( em_iso8859_1 ); else if ( uc_strstrmatch(setname,"latin2")!=NULL ) return( em_iso8859_2 ); else if ( uc_strstrmatch(setname,"latin3")!=NULL ) return( em_iso8859_3 ); else if ( uc_strstrmatch(setname,"latin4")!=NULL ) return( em_iso8859_4 ); else if ( uc_strstrmatch(setname,"latin5")!=NULL ) return( em_iso8859_9 ); else if ( uc_strstrmatch(setname,"latin6")!=NULL ) return( em_iso8859_10 ); else if ( uc_strstrmatch(setname,"latin7")!=NULL ) return( em_iso8859_13 ); else if ( uc_strstrmatch(setname,"latin8")!=NULL ) return( em_iso8859_14 ); else if ( uc_strstrmatch(setname,"latin0")!=NULL || uc_strstrmatch(setname,"latin9")!=NULL ) return( em_iso8859_15 ); if ( uc_strstrmatch(setname,"koi8")!=NULL ) return( em_koi8_r ); if ( uc_strstrmatch(setname,"cyrillic")!=NULL ) return( em_iso8859_5 ); /* This is grasping at straws */ else if ( uc_strstrmatch(setname,"greek")!=NULL ) return( em_iso8859_7 ); /* This is grasping at straws */ else if ( uc_strstrmatch(setname,"arabic")!=NULL ) return( em_iso8859_6 ); /* This is grasping at straws */ else if ( uc_strstrmatch(setname,"hebrew")!=NULL ) return( em_iso8859_8 ); /* This is grasping at straws */ else if ( uc_strstrmatch(setname,"thai")!=NULL || uc_strstrmatch(setname,"tis")!=NULL ) return( em_iso8859_11 ); if ( uc_strstrmatch(setname,"jis")!=NULL ) { if ( uc_strstrmatch(setname,"201")!=NULL ) return( em_jis201 ); if ( uc_strstrmatch(setname,"208")!=NULL ) return( em_jis208 ); if ( uc_strstrmatch(setname,"212")!=NULL ) return( em_jis212 ); if ( uc_strstrmatch(setname,"213")!=NULL ) /* I don't support 213 */ return( em_none ); return( em_jis208 ); } if ( uc_strstrmatch(setname,"ksc")!=NULL && uc_strstrmatch(setname,"5601")!=NULL ) return( em_ksc5601 ); /* Seem to be several versions of 5601, we want 94x94 */ if ( uc_strstrmatch(setname,"gb")!=NULL && uc_strstrmatch(setname,"2312")!=NULL ) return( em_gb2312 ); if ( uc_strstrmatch(setname,"big5")!=NULL ) return( em_big5 ); if ( uc_strstrmatch(setname,"mac")!=NULL ) return( em_mac ); if ( uc_strstrmatch(setname,"win")!=NULL ) return( em_win ); if ( IsUserMap(setname)) return( em_user ); /* !!! Encodings used for postscript japanese fonts, which I don't understand */ #if 0 if ( uc_strstrmatch(setname,"RJSJ")!=NULL ) return( em_sjis ); if ( uc_strstrmatch(setname,"EUC")!=NULL ) return( em_euc ); #endif return( em_none ); }
U_CAPI UChar* U_EXPORT2 u_strFindFirst(const UChar* s, int32_t length, const UChar* sub, int32_t subLength) { const UChar* start, * p, * q, * subLimit; UChar c, cs, cq; if (sub == NULL || subLength < -1) { return (UChar*) s; } if (s == NULL || length < -1) { return NULL; } start = s; if (length < 0 && subLength < 0) { /* both strings are NUL-terminated */ if ((cs = *sub++) == 0) { return (UChar*) s; } if (*sub == 0 && !U16_IS_SURROGATE(cs)) { /* the substring consists of a single, non-surrogate BMP code point */ return u_strchr(s, cs); } while ((c = *s++) != 0) { if (c == cs) { /* found first substring UChar, compare rest */ p = s; q = sub; for (; ;) { if ((cq = *q) == 0) { if (isMatchAtCPBoundary(start, s - 1, p, NULL)) { return (UChar*) (s - 1); /* well-formed match */ } else { break; /* no match because surrogate pair is split */ } } if ((c = *p) == 0) { return NULL; /* no match, and none possible after s */ } if (c != cq) { break; /* no match */ } ++p; ++q; } } } /* not found */ return NULL; } if (subLength < 0) { subLength = u_strlen(sub); } if (subLength == 0) { return (UChar*) s; } /* get sub[0] to search for it fast */ cs = *sub++; --subLength; subLimit = sub + subLength; if (subLength == 0 && !U16_IS_SURROGATE(cs)) { /* the substring consists of a single, non-surrogate BMP code point */ return length < 0 ? u_strchr(s, cs) : u_memchr(s, cs, length); } if (length < 0) { /* s is NUL-terminated */ while ((c = *s++) != 0) { if (c == cs) { /* found first substring UChar, compare rest */ p = s; q = sub; for (; ;) { if (q == subLimit) { if (isMatchAtCPBoundary(start, s - 1, p, NULL)) { return (UChar*) (s - 1); /* well-formed match */ } else { break; /* no match because surrogate pair is split */ } } if ((c = *p) == 0) { return NULL; /* no match, and none possible after s */ } if (c != *q) { break; /* no match */ } ++p; ++q; } } } } else { const UChar* limit, * preLimit; /* subLength was decremented above */ if (length <= subLength) { return NULL; /* s is shorter than sub */ } limit = s + length; /* the substring must start before preLimit */ preLimit = limit - subLength; while (s != preLimit) { c = *s++; if (c == cs) { /* found first substring UChar, compare rest */ p = s; q = sub; for (; ;) { if (q == subLimit) { if (isMatchAtCPBoundary(start, s - 1, p, limit)) { return (UChar*) (s - 1); /* well-formed match */ } else { break; /* no match because surrogate pair is split */ } } if (*p != *q) { break; /* no match */ } ++p; ++q; } } } } /* not found */ return NULL; }
/** * Testing the discontigous contractions */ static void TestDiscontiguos() { const char *rulestr = "&z < AB < X\\u0300 < ABC < X\\u0300\\u0315"; UChar rule[50]; int rulelen = u_unescape(rulestr, rule, 50); const char *src[] = { "ADB", "ADBC", "A\\u0315B", "A\\u0315BC", /* base character blocked */ "XD\\u0300", "XD\\u0300\\u0315", /* non blocking combining character */ "X\\u0319\\u0300", "X\\u0319\\u0300\\u0315", /* blocking combining character */ "X\\u0314\\u0300", "X\\u0314\\u0300\\u0315", /* contraction prefix */ "ABDC", "AB\\u0315C","X\\u0300D\\u0315", "X\\u0300\\u0319\\u0315", "X\\u0300\\u031A\\u0315", /* ends not with a contraction character */ "X\\u0319\\u0300D", "X\\u0319\\u0300\\u0315D", "X\\u0300D\\u0315D", "X\\u0300\\u0319\\u0315D", "X\\u0300\\u031A\\u0315D" }; const char *tgt[] = { /* non blocking combining character */ "A D B", "A D BC", "A \\u0315 B", "A \\u0315 BC", /* base character blocked */ "X D \\u0300", "X D \\u0300\\u0315", /* non blocking combining character */ "X\\u0300 \\u0319", "X\\u0300\\u0315 \\u0319", /* blocking combining character */ "X \\u0314 \\u0300", "X \\u0314 \\u0300\\u0315", /* contraction prefix */ "AB DC", "AB \\u0315 C","X\\u0300 D \\u0315", "X\\u0300\\u0315 \\u0319", "X\\u0300 \\u031A \\u0315", /* ends not with a contraction character */ "X\\u0300 \\u0319D", "X\\u0300\\u0315 \\u0319D", "X\\u0300 D\\u0315D", "X\\u0300\\u0315 \\u0319D", "X\\u0300 \\u031A\\u0315D" }; int size = 20; UCollator *coll; UErrorCode status = U_ZERO_ERROR; int count = 0; UCollationElements *iter; UCollationElements *resultiter; coll = ucol_openRules(rule, rulelen, UCOL_OFF, UCOL_DEFAULT_STRENGTH,NULL, &status); iter = ucol_openElements(coll, rule, 1, &status); resultiter = ucol_openElements(coll, rule, 1, &status); if (U_FAILURE(status)) { log_err_status(status, "Error opening collation rules -> %s\n", u_errorName(status)); return; } while (count < size) { UChar str[20]; UChar tstr[20]; int strLen = u_unescape(src[count], str, 20); UChar *s; ucol_setText(iter, str, strLen, &status); if (U_FAILURE(status)) { log_err("Error opening collation iterator\n"); return; } u_unescape(tgt[count], tstr, 20); s = tstr; log_verbose("count %d\n", count); for (;;) { uint32_t ce; UChar *e = u_strchr(s, 0x20); if (e == 0) { e = u_strchr(s, 0); } ucol_setText(resultiter, s, (int32_t)(e - s), &status); ce = ucol_next(resultiter, &status); if (U_FAILURE(status)) { log_err("Error manipulating collation iterator\n"); return; } while (ce != UCOL_NULLORDER) { if (ce != (uint32_t)ucol_next(iter, &status) || U_FAILURE(status)) { log_err("Discontiguos contraction test mismatch\n"); return; } ce = ucol_next(resultiter, &status); if (U_FAILURE(status)) { log_err("Error getting next collation element\n"); return; } } s = e + 1; if (*e == 0) { break; } } ucol_reset(iter); backAndForth(iter); count ++; } ucol_closeElements(resultiter); ucol_closeElements(iter); ucol_close(coll); }
/** * Computes training by extracting statistics from a tagged corpus file. */ void do_training(U_FILE* input_text,U_FILE* rforms_file,U_FILE* iforms_file){ /* these two hash tables are respectively for simple and compound entries */ struct string_hash_ptr* rforms_table = NULL, *iforms_table = NULL; if(rforms_file != NULL){ rforms_table = new_string_hash_ptr(200000); } if(iforms_file != NULL){ iforms_table = new_string_hash_ptr(200000); } /* we initialize a contextual matrix */ struct corpus_entry** context = new_context_matrix(); initialize_context_matrix(context); unichar line[MAX_TAGGED_CORPUS_LINE]; /* check the format of the corpus */ long previous_file_position = ftell(input_text); if(u_fgets(line,input_text) == EOF){ fatal_error("File is empty"); } fseek(input_text,previous_file_position,SEEK_SET); int format_corpus = check_corpus_entry(line); if(format_corpus == 0){ // the corpus is in the Tagger format, one word per line where line=word/tag while(u_fgets(line,input_text) !=EOF){ if(u_strlen(line) == 0){ initialize_context_matrix(context); } else{ corpus_entry* entry = new_corpus_entry(line); if(u_strchr(line,'_')!=NULL && line[0]!='_'){ corpus_entry** entries = extract_simple_words(entry); free_corpus_entry(entry); for(int i=0;entries[i]!=NULL;i++){ push_corpus_entry(entries[i],context); add_statistics(context,rforms_table,iforms_table); } free(entries); } else { push_corpus_entry(entry,context); add_statistics(context,rforms_table,iforms_table); } } } } else { // the corpus is in the Unitex tagged format, one sentence per line where token={word,lemma.tag} unichar *tmp,*s = (unichar*)malloc(sizeof(unichar)*(MAX_TAGGED_CORPUS_LINE)); int current_len,len; unsigned int i; while(u_fgets(line,input_text) != EOF){ current_len = 0, len = 0; /* extract each token of the sentence */ for (;;) { len = 1+u_strlen(line+current_len)-u_strlen(u_strchr(line+current_len,'}')); tmp = u_strcpy_sized(s,len-1,line+current_len+1); u_strcat(tmp,"\0"); if(u_strcmp(s,"S") == 0) break; //particular case: '\},\}.PONCT' if(line[current_len+2] == '}'){ int start = current_len+3; do{ tmp = u_strchr(line+start,'}'); start += 1+u_strlen(line+start)-u_strlen(tmp); } while(*(tmp+1) != ' '); tmp = u_strcpy_sized(s,start-current_len-1,line+current_len+1); u_strcat(tmp,"\0"); len += start-current_len-3; } /* format the {XX.YY} into standard tagger format, XX/YY */ unichar* newline = (unichar*)malloc(sizeof(unichar)*(8096)); if(u_strchr(s,',')[1] == ','){ u_strcpy(newline,","); } else u_strcpy_sized(newline,1+u_strlen(s)-u_strlen(u_strchr(s,',')),s); u_sprintf(newline,"%S/%S\0",newline,s+u_strrchr(s,'.')+1); for(i=0;i<u_strlen(newline);i++){ if(newline[i] == ' ') newline[i] = '_'; } //create corpus entry corpus_entry* entry = new_corpus_entry(newline); if(u_strchr(newline,'_') != NULL && newline[0] != '_'){ corpus_entry** entries = extract_simple_words(entry); free_corpus_entry(entry); for(int j=0;entries[j]!=NULL;j++){ push_corpus_entry(entries[j],context); add_statistics(context,rforms_table,iforms_table); } free(entries); } else { push_corpus_entry(entry,context); add_statistics(context,rforms_table,iforms_table); } free(newline); current_len += len+1; } initialize_context_matrix(context); } free(s); } free_context_matrix(context); /* we fill dictionary files with pairs (tuple,value) and then * we add a special line "CODE\tFEATURES,.value" in order to * specify whether the dictionary contains inflected or raw form tuples*/ unichar* str = u_strdup(""); if(rforms_table != NULL){ write_keys_values(rforms_table,rforms_table->hash->root,str,rforms_file); u_fprintf(rforms_file,"%s,.%d\n","CODE\tFEATURES",0); free_string_hash_ptr(rforms_table,NULL); } if(iforms_table != NULL){ write_keys_values(iforms_table,iforms_table->hash->root,str,iforms_file); u_fprintf(iforms_file,"%s,.%d\n","CODE\tFEATURES",1); free_string_hash_ptr(iforms_table,NULL); } free(str); }
static jobjectArray getContentImpl(JNIEnv* env, jclass clazz, jstring locale, jboolean needsTZ) { UErrorCode status = U_ZERO_ERROR; const char *loc = env->GetStringUTFChars(locale, NULL); UResourceBundle *root = ures_openU(NULL, loc, &status); env->ReleaseStringUTFChars(locale, loc); if(U_FAILURE(status)) { LOGI("Error getting resources"); status = U_ZERO_ERROR; return NULL; } jclass obj_class = env->FindClass("java/lang/Object"); jclass integer_class = env->FindClass("java/lang/Integer"); jmethodID integerInit = env->GetMethodID(integer_class, "<init>", "(I)V"); jobjectArray result; jobject firstDayOfWeek = NULL; jobject minimalDaysInFirstWeek = NULL; jobjectArray amPmMarkers = NULL; jobjectArray eras = NULL; jstring localPatternChars = NULL; jobjectArray weekdays = NULL; jobjectArray shortWeekdays = NULL; jobjectArray months = NULL; jobjectArray shortMonths = NULL; jstring time_SHORT = NULL; jstring time_MEDIUM = NULL; jstring time_LONG = NULL; jstring time_FULL = NULL; jstring date_SHORT = NULL; jstring date_MEDIUM = NULL; jstring date_LONG = NULL; jstring date_FULL = NULL; jstring decimalPatternChars = NULL; jstring naN = NULL; jstring infinity = NULL; jstring currencySymbol = NULL; jstring intCurrencySymbol = NULL; jstring numberPattern = NULL; jstring integerPattern = NULL; jstring currencyPattern = NULL; jstring percentPattern = NULL; jobjectArray zones = NULL; int counter = 0; int firstDayVals[2] = {-1, -1}; const jchar* nan = (const jchar *)NULL; const jchar* inf = (const jchar *)NULL; int nanL, infL; UResourceBundle *gregorian; UResourceBundle *gregorianElems; UResourceBundle *rootElems; // get the resources needed rootElems = ures_getByKey(root, "calendar", NULL, &status); if(U_FAILURE(status)) { return NULL; } gregorian = ures_getByKey(rootElems, "gregorian", NULL, &status); if(U_FAILURE(status)) { ures_close(rootElems); return NULL; } // adding the first day of week and minimal days in first week values getDayInitVector(env, gregorian, firstDayVals); if((firstDayVals[0] != -1) && (firstDayVals[1] != -1)) { firstDayOfWeek = env->NewObject(integer_class, integerInit, firstDayVals[0]); minimalDaysInFirstWeek = env->NewObject(integer_class, integerInit, firstDayVals[1]); // adding First_Day and Minimal_Days integer to the result counter += 2; } // adding ampm string array to the result"); amPmMarkers = getAmPmMarkers(env, gregorian); if(amPmMarkers != NULL) { counter++; } // adding eras string array to the result eras = getEras(env, gregorian); if(eras != NULL) { counter++; } // local pattern chars are initially always the same localPatternChars = env->NewStringUTF("GyMdkHmsSEDFwWahKzZ"); // adding local pattern chars string to the result counter++; // adding month names string array to the result months = getMonthNames(env, gregorian); if(months != NULL) { counter++; } // adding short month names string array to the result shortMonths = getShortMonthNames(env, gregorian); if(shortMonths != NULL) { counter++; } // adding day names string array to the result weekdays = getWeekdayNames(env, gregorian); if(weekdays != NULL) { counter++; } // adding short day names string array to the result shortWeekdays = getShortWeekdayNames(env, gregorian); if(shortWeekdays != NULL) { counter++; } const UChar *pattern; jchar check[2] = {0, 0}; u_uastrcpy(check, "v"); jchar replacement[2] = {0, 0}; u_uastrcpy(replacement, "z"); jchar *pos; jchar *patternCopy; int patternLength; // adding date and time format patterns to the result gregorianElems = ures_getByKey(gregorian, "DateTimePatterns", NULL, &status); if(U_FAILURE(status)) { status = U_ZERO_ERROR; goto endOfCalendar; } pattern = ures_getStringByIndex(gregorianElems, 0, &patternLength, &status); // there are some patterns in icu that use the pattern character 'v' // java doesn't accept this, so it gets replaced by 'z' which has // about the same result as 'v', the timezone name. // 'v' -> "PT", 'z' -> "PST", v is the generic timezone and z the standard tz // "vvvv" -> "Pacific Time", "zzzz" -> "Pacific Standard Time" patternCopy = (jchar *) malloc((patternLength + 1) * sizeof(jchar)); u_strcpy(patternCopy, pattern); if(U_FAILURE(status)) { free(patternCopy); status = U_ZERO_ERROR; goto endOfCalendar; } while((pos = u_strchr(patternCopy, check[0])) != NULL) { u_memset(pos, replacement[0], 1); } time_FULL = env->NewString(patternCopy, patternLength); free(patternCopy); counter++; pattern = ures_getStringByIndex(gregorianElems, 1, &patternLength, &status); if(U_FAILURE(status)) { status = U_ZERO_ERROR; goto endOfCalendar; } time_LONG = env->NewString(pattern, patternLength); counter++; pattern = ures_getStringByIndex(gregorianElems, 2, &patternLength, &status); if(U_FAILURE(status)) { status = U_ZERO_ERROR; goto endOfCalendar; } time_MEDIUM = env->NewString(pattern, patternLength); counter++; pattern = ures_getStringByIndex(gregorianElems, 3, &patternLength, &status); if(U_FAILURE(status)) { status = U_ZERO_ERROR; goto endOfCalendar; } time_SHORT = env->NewString(pattern, patternLength); counter++; pattern = ures_getStringByIndex(gregorianElems, 4, &patternLength, &status); if(U_FAILURE(status)) { status = U_ZERO_ERROR; goto endOfCalendar; } date_FULL = env->NewString(pattern, patternLength); counter++; pattern = ures_getStringByIndex(gregorianElems, 5, &patternLength, &status); if(U_FAILURE(status)) { status = U_ZERO_ERROR; goto endOfCalendar; } date_LONG = env->NewString(pattern, patternLength); counter++; pattern = ures_getStringByIndex(gregorianElems, 6, &patternLength, &status); if(U_FAILURE(status)) { status = U_ZERO_ERROR; goto endOfCalendar; } date_MEDIUM = env->NewString(pattern, patternLength); counter++; pattern = ures_getStringByIndex(gregorianElems, 7, &patternLength, &status); if(U_FAILURE(status)) { status = U_ZERO_ERROR; goto endOfCalendar; } date_SHORT = env->NewString(pattern, patternLength); counter++; endOfCalendar: if(gregorianElems != NULL) { ures_close(gregorianElems); } ures_close(gregorian); ures_close(rootElems); rootElems = ures_getByKey(root, "NumberElements", NULL, &status); if(U_FAILURE(status)) { status = U_ZERO_ERROR; } if(ures_getSize(rootElems) >= 11) { // adding decimal pattern chars to the result decimalPatternChars = getDecimalPatternChars(env, rootElems); if(decimalPatternChars != NULL) { counter++; } // adding NaN pattern char to the result nan = ures_getStringByIndex(rootElems, 10, &nanL, &status); if(U_SUCCESS(status)) { naN = env->NewString(nan, nanL); counter++; } status = U_ZERO_ERROR; // adding infinity pattern char to the result inf = ures_getStringByIndex(rootElems, 9, &infL, &status); if(U_SUCCESS(status)) { infinity = env->NewString(inf, infL); counter++; } status = U_ZERO_ERROR; } ures_close(rootElems); // adding intl currency code to result intCurrencySymbol = getIntCurrencyCode(env, clazz, locale); if(intCurrencySymbol != NULL) { // adding currency symbol to result currencySymbol = getCurrencySymbol(env, clazz, locale, intCurrencySymbol); } else { intCurrencySymbol = env->NewStringUTF("XXX"); } if(currencySymbol == NULL) { currencySymbol = env->NewStringUTF("\u00a4"); } counter += 2; // adding number format patterns to the result int numOfEntries; int decSepOffset; NumberFormat *nf; jchar *tmpPattern; rootElems = ures_getByKey(root, "NumberPatterns", NULL, &status); if(U_FAILURE(status)) { status = U_ZERO_ERROR; goto zones; } numOfEntries = ures_getSize(rootElems); if(numOfEntries < 3) { ures_close(rootElems); goto zones; } // number pattern pattern = ures_getStringByIndex(rootElems, 0, &patternLength, &status); if(U_FAILURE(status)) { status = U_ZERO_ERROR; ures_close(rootElems); goto zones; } numberPattern = env->NewString(pattern, patternLength); counter++; // integer pattern derived from number pattern decSepOffset = u_strcspn(pattern, (jchar *)".\0"); tmpPattern = (jchar *) malloc((decSepOffset + 1) * sizeof(jchar)); u_strncpy(tmpPattern, pattern, decSepOffset); integerPattern = env->NewString(tmpPattern, decSepOffset); free(tmpPattern); counter++; // currency pattern pattern = ures_getStringByIndex(rootElems, 1, &patternLength, &status); if(U_FAILURE(status)) { status = U_ZERO_ERROR; ures_close(rootElems); goto zones; } currencyPattern = env->NewString(pattern, patternLength); counter++; // percent pattern pattern = ures_getStringByIndex(rootElems, 2, &patternLength, &status); if(U_FAILURE(status)) { status = U_ZERO_ERROR; ures_close(rootElems); goto zones; } percentPattern = env->NewString(pattern, patternLength); counter++; ures_close(rootElems); zones: ures_close(root); if(needsTZ == JNI_TRUE) { counter++; //add empty timezone } // collect all content and put it into an array result = env->NewObjectArray(counter, obj_class, NULL); int index = 0; if(needsTZ == JNI_TRUE) { addObject(env, result, "timezones", NULL, index++); } if(firstDayOfWeek != NULL && index < counter) { addObject(env, result, "First_Day", firstDayOfWeek, index++); } if(minimalDaysInFirstWeek != NULL && index < counter) { addObject(env, result, "Minimal_Days", minimalDaysInFirstWeek, index++); } if(amPmMarkers != NULL && index < counter) { addObject(env, result, "ampm", amPmMarkers, index++); } if(eras != NULL && index < counter) { addObject(env, result, "eras", eras, index++); } if(localPatternChars != NULL && index < counter) { addObject(env, result, "LocalPatternChars", localPatternChars, index++); } if(weekdays != NULL && index < counter) { addObject(env, result, "weekdays", weekdays, index++); } if(shortWeekdays != NULL && index < counter) { addObject(env, result, "shortWeekdays", shortWeekdays, index++); } if(months != NULL && index < counter) { addObject(env, result, "months", months, index++); } if(shortMonths != NULL && index < counter) { addObject(env, result, "shortMonths", shortMonths, index++); } if(time_SHORT != NULL && index < counter) { addObject(env, result, "Time_SHORT", time_SHORT, index++); } if(time_MEDIUM != NULL && index < counter) { addObject(env, result, "Time_MEDIUM", time_MEDIUM, index++); } if(time_LONG != NULL && index < counter) { addObject(env, result, "Time_LONG", time_LONG, index++); } if(time_FULL != NULL && index < counter) { addObject(env, result, "Time_FULL", time_FULL, index++); } if(date_SHORT != NULL && index < counter) { addObject(env, result, "Date_SHORT", date_SHORT, index++); } if(date_MEDIUM != NULL && index < counter) { addObject(env, result, "Date_MEDIUM", date_MEDIUM, index++); } if(date_LONG != NULL && index < counter) { addObject(env, result, "Date_LONG", date_LONG, index++); } if(date_FULL != NULL && index < counter) { addObject(env, result, "Date_FULL", date_FULL, index++); } if(decimalPatternChars != NULL && index < counter) { addObject(env, result, "DecimalPatternChars", decimalPatternChars, index++); } if(naN != NULL && index < counter) { addObject(env, result, "NaN", naN, index++); } if(infinity != NULL && index < counter) { addObject(env, result, "Infinity", infinity, index++); } if(currencySymbol != NULL && index < counter) { addObject(env, result, "CurrencySymbol", currencySymbol, index++); } if(intCurrencySymbol != NULL && index < counter) { addObject(env, result, "IntCurrencySymbol", intCurrencySymbol, index++); } if(numberPattern != NULL && index < counter) { addObject(env, result, "Number", numberPattern, index++); } if(integerPattern != NULL && index < counter) { addObject(env, result, "Integer", integerPattern, index++); } if(currencyPattern != NULL && index < counter) { addObject(env, result, "Currency", currencyPattern, index++); } if(percentPattern != NULL && index < counter) { addObject(env, result, "Percent", percentPattern, index++); } return result; }
U_NAMESPACE_BEGIN // ------------------------------------- BreakIterator* BreakIterator::buildInstance(const Locale& loc, const char *type, int32_t kind, UErrorCode &status) { char fnbuff[256]; char ext[4]={'\0'}; char actualLocale[ULOC_FULLNAME_CAPACITY]; int32_t size; const UChar* brkfname = NULL; UResourceBundle brkRulesStack; UResourceBundle brkNameStack; UResourceBundle *brkRules = &brkRulesStack; UResourceBundle *brkName = &brkNameStack; RuleBasedBreakIterator *result = NULL; if (U_FAILURE(status)) return NULL; ures_initStackObject(brkRules); ures_initStackObject(brkName); // Get the locale UResourceBundle *b = ures_open(U_ICUDATA_BRKITR, loc.getName(), &status); /* this is a hack for now. Should be fixed when the data is fetched from brk_index.txt */ if(status==U_USING_DEFAULT_WARNING){ status=U_ZERO_ERROR; ures_openFillIn(b, U_ICUDATA_BRKITR, "", &status); } // Get the "boundaries" array. if (U_SUCCESS(status)) { brkRules = ures_getByKeyWithFallback(b, "boundaries", brkRules, &status); // Get the string object naming the rules file brkName = ures_getByKeyWithFallback(brkRules, type, brkName, &status); // Get the actual string brkfname = ures_getString(brkName, &size, &status); U_ASSERT((size_t)size<sizeof(fnbuff)); if ((size_t)size>=sizeof(fnbuff)) { size=0; if (U_SUCCESS(status)) { status = U_BUFFER_OVERFLOW_ERROR; } } // Use the string if we found it if (U_SUCCESS(status) && brkfname) { uprv_strncpy(actualLocale, ures_getLocale(brkName, &status), sizeof(actualLocale)/sizeof(actualLocale[0])); UChar* extStart=u_strchr(brkfname, 0x002e); int len = 0; if(extStart!=NULL){ len = (int)(extStart-brkfname); u_UCharsToChars(extStart+1, ext, sizeof(ext)); // nul terminates the buff u_UCharsToChars(brkfname, fnbuff, len); } fnbuff[len]=0; // nul terminate } } ures_close(brkRules); ures_close(brkName); UDataMemory* file = udata_open(U_ICUDATA_BRKITR, ext, fnbuff, &status); if (U_FAILURE(status)) { ures_close(b); return NULL; } // Create a RuleBasedBreakIterator result = new RuleBasedBreakIterator(file, status); // If there is a result, set the valid locale and actual locale, and the kind if (U_SUCCESS(status) && result != NULL) { U_LOCALE_BASED(locBased, *(BreakIterator*)result); locBased.setLocaleIDs(ures_getLocaleByType(b, ULOC_VALID_LOCALE, &status), actualLocale); result->setBreakType(kind); } ures_close(b); if (U_FAILURE(status) && result != NULL) { // Sometimes redundant check, but simple delete result; return NULL; } if (result == NULL) { udata_close(file); if (U_SUCCESS(status)) { status = U_MEMORY_ALLOCATION_ERROR; } } return result; }
static void GIOdispatch(GIOControl *gc, enum giofuncs gf) { unichar_t *temp, *pt, *tpt; int i; gc->gf = gf; if ( _GIO_stdfuncs.useragent == NULL ) _GIO_stdfuncs.useragent = copy("*****@*****.**"); temp = _GIO_translateURL(gc->path,gf); if ( temp!=NULL ) { if ( gc->origpath==NULL ) gc->origpath = gc->path; else free(gc->path); gc->path = temp; } if ( gc->topath!=NULL ) { temp = _GIO_translateURL(gc->topath,gf); if ( temp!=NULL ) { free(gc->topath); gc->topath = temp; } if ( gf==gf_renamefile ) { if (( pt = uc_strstr(gc->path,"://"))== NULL ) pt = gc->path; else { pt=u_strchr(pt+3,'/'); if ( pt==NULL ) pt = gc->path+u_strlen(gc->path); } if (( tpt = uc_strstr(gc->topath,"://"))== NULL ) tpt = gc->topath; else { tpt=u_strchr(tpt+3,'/'); if ( tpt==NULL ) tpt = gc->topath+u_strlen(gc->topath); } if ( tpt-gc->topath!=pt-gc->path || u_strnmatch(gc->path,gc->topath,pt-gc->path)!=0 ) { _GIO_reporterror(gc,EXDEV); return; } } } pt = uc_strstr(gc->path,"://"); if ( pt!=NULL ) { for ( i=0; i<plen; ++i ) if ( u_strnmatch(protocols[i].proto,gc->path,pt-gc->path)==0 ) break; if ( i>=plen && !AddProtocol(gc->path,pt-gc->path) ) { gc->protocol_index = -2; gc->return_code = 501; gc->error = err501; uc_strcpy(gc->status,"No support for browsing: "); u_strncpy(gc->status+u_strlen(gc->status), gc->path, pt-gc->path ); gc->done = true; (gc->receiveerror)(gc); return; } gc->protocol_index = i; if ( !protocols[i].dothread ) (protocols[i].dispatcher)(gc); else { #ifndef HAVE_PTHREAD_H gc->return_code = 501; gc->error = err501; uc_strcpy(gc->status,"No support for protocol"); gc->done = true; (gc->receiveerror)(gc); return; #else static pthread_cond_t initcond = PTHREAD_COND_INITIALIZER; static pthread_mutex_t initmutex = PTHREAD_MUTEX_INITIALIZER; /* could put stuff here to queue functions if we get too many */ /* threads, or perhaps even a thread pool */ uc_strcpy(gc->status,"Queued"); gc->threaddata = (struct gio_threaddata *) malloc(sizeof(struct gio_threaddata)); gc->threaddata->mutex = initmutex; gc->threaddata->cond = initcond; if ( _GIO_stdfuncs.gdraw_sync_thread!=NULL ) (_GIO_stdfuncs.gdraw_sync_thread)(NULL,NULL,NULL); pthread_create(&gc->threaddata->thread,NULL, (ptread_startfunc_t *) (protocols[i].dispatcher), gc); #endif } } else { gc->protocol_index = -1; _GIO_localDispatch(gc); } }
/* * This function behaves in the same way that a main one, except that it does * not invoke the setBufferMode function. */ int main_LocateTfst(int argc,char* const argv[]) { if (argc==1) { usage(); return SUCCESS_RETURN_CODE; } VersatileEncodingConfig vec=VEC_DEFAULT; int val,index=-1; char text[FILENAME_MAX]=""; char alphabet[FILENAME_MAX]=""; int is_korean=0; int tilde_negation_operator=1; int selected_negation_operator=0; int tagging=0; int single_tags_only=0; int match_word_boundaries=1; MatchPolicy match_policy=LONGEST_MATCHES; OutputPolicy output_policy=IGNORE_OUTPUTS; AmbiguousOutputPolicy ambiguous_output_policy=ALLOW_AMBIGUOUS_OUTPUTS; VariableErrorPolicy variable_error_policy=IGNORE_VARIABLE_ERRORS; int search_limit=NO_MATCH_LIMIT; char foo; vector_ptr* injected=new_vector_ptr(); bool only_verify_arguments = false; UnitexGetOpt options; while (EOF!=(val=options.parse_long(argc,argv,optstring_LocateTfst,lopts_LocateTfst,&index))) { switch(val) { case 't': if (options.vars()->optarg[0]=='\0') { error("You must specify a non empty .tfst name\n"); free_vector_ptr(injected); return USAGE_ERROR_CODE; } strcpy(text,options.vars()->optarg); break; case 'a': if (options.vars()->optarg[0]=='\0') { error("You must specify a non empty alphabet name\n"); free_vector_ptr(injected); return USAGE_ERROR_CODE; } strcpy(alphabet,options.vars()->optarg); break; case 'K': is_korean=1; match_word_boundaries=0; break; case 'l': search_limit=NO_MATCH_LIMIT; break; case 'g': if (options.vars()->optarg[0]=='\0') { error("You must specify an argument for negation operator\n"); free_vector_ptr(injected); return USAGE_ERROR_CODE; } selected_negation_operator=1; if ((strcmp(options.vars()->optarg,"minus")==0) || (strcmp(options.vars()->optarg,"-")==0)) { tilde_negation_operator=0; } else if ((strcmp(options.vars()->optarg,"tilde")!=0) && (strcmp(options.vars()->optarg,"~")!=0)) { error("You must specify a valid argument for negation operator\n"); free_vector_ptr(injected); return USAGE_ERROR_CODE; } break; case 'n': if (1!=sscanf(options.vars()->optarg,"%d%c",&search_limit,&foo) || search_limit<=0) { /* foo is used to check that the search limit is not like "45gjh" */ error("Invalid search limit argument: %s\n",options.vars()->optarg); free_vector_ptr(injected); return USAGE_ERROR_CODE; } break; case 'S': match_policy=SHORTEST_MATCHES; break; case 'L': match_policy=LONGEST_MATCHES; break; case 'A': match_policy=ALL_MATCHES; break; case 'I': output_policy=IGNORE_OUTPUTS; break; case 'M': output_policy=MERGE_OUTPUTS; break; case 'R': output_policy=REPLACE_OUTPUTS; break; case 'X': variable_error_policy=EXIT_ON_VARIABLE_ERRORS; break; case 'Y': variable_error_policy=IGNORE_VARIABLE_ERRORS; break; case 'Z': variable_error_policy=BACKTRACK_ON_VARIABLE_ERRORS; break; case 'b': ambiguous_output_policy=ALLOW_AMBIGUOUS_OUTPUTS; break; case 'z': ambiguous_output_policy=IGNORE_AMBIGUOUS_OUTPUTS; break; case 'V': only_verify_arguments = true; break; case 'h': usage(); return SUCCESS_RETURN_CODE; case 1: tagging=1; break; case 2: single_tags_only=1; break; case 3: match_word_boundaries=0; break; case 'k': if (options.vars()->optarg[0]=='\0') { error("Empty input_encoding argument\n"); free_vector_ptr(injected); return USAGE_ERROR_CODE; } decode_reading_encoding_parameter(&(vec.mask_encoding_compatibility_input),options.vars()->optarg); break; case 'q': if (options.vars()->optarg[0]=='\0') { error("Empty output_encoding argument\n"); free_vector_ptr(injected); return USAGE_ERROR_CODE; } decode_writing_encoding_parameter(&(vec.encoding_output),&(vec.bom_output),options.vars()->optarg); break; case 'v': { unichar* key=u_strdup(options.vars()->optarg); unichar* value=u_strchr(key,'='); if (value==NULL) { error("Invalid variable injection: %s\n",options.vars()->optarg); free_vector_ptr(injected); return USAGE_ERROR_CODE; } (*value)='\0'; value++; value=u_strdup(value); vector_ptr_add(injected,key); vector_ptr_add(injected,value); break; } case ':': index==-1 ? error("Missing argument for option -%c\n",options.vars()->optopt) : error("Missing argument for option --%s\n",lopts_LocateTfst[index].name); free_vector_ptr(injected); return USAGE_ERROR_CODE; case '?': index==-1 ? error("Invalid option -%c\n",options.vars()->optopt) : error("Invalid option --%s\n",options.vars()->optarg); free_vector_ptr(injected); return USAGE_ERROR_CODE; break; } index=-1; } if (options.vars()->optind!=argc-1) { error("Invalid arguments: rerun with --help\n"); free_vector_ptr(injected); return USAGE_ERROR_CODE; } if (only_verify_arguments) { // freeing all allocated memory free_vector_ptr(injected); return SUCCESS_RETURN_CODE; } if (selected_negation_operator==0) { get_graph_compatibility_mode_by_file(&vec,&tilde_negation_operator); } char grammar[FILENAME_MAX]; char output[FILENAME_MAX]; strcpy(grammar,argv[options.vars()->optind]); get_path(text,output); strcat(output,"concord.ind"); int OK=locate_tfst(text, grammar, alphabet, output, &vec, match_policy, output_policy, ambiguous_output_policy, variable_error_policy, search_limit, is_korean, tilde_negation_operator, injected, tagging, single_tags_only, match_word_boundaries); free_vector_ptr(injected); return (!OK); }
static void SplashLayout() { unichar_t *start, *pt, *lastspace; extern const char *source_modtime_str; extern const char *source_version_str; uc_strcpy(msg, "When my father finished his book on Renaissance printing (The Craft of Printing and the Publication of Shakespeare's Works) he told me that I would have to write the chapter on computer typography. This is my attempt to do so."); GDrawSetFont(splashw,splash_font); linecnt = 0; lines[linecnt++] = msg-1; for ( start = msg; *start!='\0'; start = pt ) { lastspace = NULL; for ( pt=start; ; ++pt ) { if ( *pt==' ' || *pt=='\0' ) { if ( GDrawGetTextWidth(splashw,start,pt-start,NULL)<splashimage.u.image->width-10 ) lastspace = pt; else break; if ( *pt=='\0' ) break; } } if ( lastspace!=NULL ) pt = lastspace; lines[linecnt++] = pt; if ( *pt ) ++pt; } uc_strcpy(pt, " FontForge used to be named PfaEdit."); pt += u_strlen(pt); lines[linecnt++] = pt; uc_strcpy(pt," Version: ");; uc_strcat(pt,source_modtime_str); uc_strcat(pt," ("); uc_strcat(pt,source_version_str); #ifdef FONTFORGE_CONFIG_TYPE3 uc_strcat(pt,"-ML"); #endif #ifdef FREETYPE_HAS_DEBUGGER uc_strcat(pt,"-TtfDb"); #endif #ifdef _NO_PYTHON uc_strcat(pt,"-NoPython"); #endif #ifdef FONTFORGE_CONFIG_USE_LONGDOUBLE uc_strcat(pt,"-LD"); #elif defined(FONTFORGE_CONFIG_USE_DOUBLE) uc_strcat(pt,"-D"); #endif #ifndef FONTFORGE_CONFIG_DEVICETABLES uc_strcat(pt,"-NoDevTab"); #endif uc_strcat(pt,")"); pt += u_strlen(pt); lines[linecnt++] = pt; uc_strcpy(pt," Library Version: "); uc_strcat(pt,library_version_configuration.library_source_modtime_string); lines[linecnt++] = pt+u_strlen(pt); lines[linecnt] = NULL; is = u_strchr(msg,'('); ie = u_strchr(msg,')'); }