Exemple #1
0
static void SplashLayout() {
    unichar_t *start, *pt, *lastspace;
    extern const char *source_modtime_str;
    extern const char *source_version_str;

    uc_strcpy(msg, "When my father finished his book on Renaissance printing (The Craft of Printing and the Publication of Shakespeare's Works) he told me that I would have to write the chapter on computer typography. This is my attempt to do so.");

    GDrawSetFont(splashw,splash_font);
    linecnt = 0;
    lines[linecnt++] = msg-1;
    for ( start = msg; *start!='\0'; start = pt ) {
	lastspace = NULL;
	for ( pt=start; ; ++pt ) {
	    if ( *pt==' ' || *pt=='\0' ) {
		if ( GDrawGetTextWidth(splashw,start,pt-start)<splashimage.u.image->width-10 )
		    lastspace = pt;
		else
	break;
		if ( *pt=='\0' )
	break;
	    }
	}
	if ( lastspace!=NULL )
	    pt = lastspace;
	lines[linecnt++] = pt;
	if ( *pt ) ++pt;
    }
    uc_strcpy(pt, " FontForge used to be named PfaEdit.");

    pt += u_strlen(pt);
    lines[linecnt++] = pt;
    uc_strcpy(pt,"  git hash: ");;
    pt += u_strlen(pt);
    lines[linecnt++] = pt;
    uc_strcat(pt, FONTFORGE_GIT_VERSION);

    pt += u_strlen(pt);
    lines[linecnt++] = pt;
    uc_strcpy(pt,"  Version: ");;
    uc_strcat(pt,FONTFORGE_MODTIME_STR);

    pt += u_strlen(pt);
    lines[linecnt++] = pt;
    uc_strcat(pt,"           (");
    uc_strcat(pt,FONTFORGE_MODTIME_STR);
    uc_strcat(pt,"-ML");
#ifdef FREETYPE_HAS_DEBUGGER
    uc_strcat(pt,"-TtfDb");
#endif
    uc_strcat(pt,")");
    pt += u_strlen(pt);
    lines[linecnt++] = pt;
    uc_strcpy(pt,"  Lib Version: ");
    uc_strcat(pt,FONTFORGE_MODTIME_STR);
    lines[linecnt++] = pt+u_strlen(pt);
    lines[linecnt] = NULL;
    is = u_strchr(msg,'(');
    ie = u_strchr(msg,')');
}
Exemple #2
0
char *_GIO_decomposeURL(const unichar_t *url,char **host, int *port, char **username,
	char **password) {
    unichar_t *pt, *pt2, *upt, *ppt;
    char *path;
    char proto[40];
    /* ftp://[user[:password]@]ftpserver[:port]/url-path */

    *username = NULL; *password = NULL; *port = -1;
    pt = uc_strstr(url,"://");
    if ( pt==NULL ) {
	*host = NULL;
return( cu_copy(url));
    }
    cu_strncpy(proto,url,(size_t)(pt-url)<sizeof(proto)?(size_t)(pt-url):sizeof(proto));
    pt += 3;

    pt2 = u_strchr(pt,'/');
    if ( pt2==NULL ) {
	pt2 = pt+u_strlen(pt);
	path = copy("/");
    } else {
	path = cu_copy(pt2);
    }

    upt = u_strchr(pt,'@');
    if ( upt!=NULL && upt<pt2 ) {
	ppt = u_strchr(pt,':');
	if ( ppt==NULL )
	    *username = cu_copyn(pt,upt-pt);
	else {
	    *username = cu_copyn(pt,ppt-pt);
	    *password = cu_copyn(ppt+1,upt-ppt-1);
	}
	pt = upt+1;
    }

    ppt = u_strchr(pt,':');
    if ( ppt!=NULL && ppt<pt2 ) {
	char *temp = cu_copyn(ppt+1,pt2-ppt-1), *end;
	*port = strtol(temp,&end,10);
	if ( *end!='\0' )
	    *port = -2;
	free(temp);
	pt2 = ppt;
    }
    *host = cu_copyn(pt,pt2-pt);
    if ( *username )
	*password = GIO_PasswordCache(proto,*host,*username,*password);
return( path );
}
Exemple #3
0
unichar_t *u_GFileNormalize(unichar_t *name) {
    unichar_t *pt, *base, *ppt;

    if ( (pt = uc_strstr(name,"://"))!=NULL ) {
	base = u_strchr(pt+3,'/');
	if ( base==NULL )
return( name );
	++base;
    } else if ( *name=='/' )
	base = name+1;
    else
	base = name;
    for ( pt=base; *pt!='\0'; ) {
	if ( *pt=='/' )
	    u_strcpy(pt,pt+1);
	else if ( uc_strncmp(pt,"./",2)==0 )
	    u_strcpy(pt,pt+2);
	else if ( uc_strncmp(pt,"../",2)==0 ) {
	    for ( ppt=pt-2; ppt>=base && *ppt!='/'; --ppt );
	    ++ppt;
	    if ( ppt>=base ) {
		u_strcpy(ppt,pt+3);
		pt = ppt;
	    } else
		pt += 3;
	} else {
	    while ( *pt!='/' && *pt!='\0' ) ++pt;
	    if ( *pt == '/' ) ++pt;
	}
    }
return( name );
}
Exemple #4
0
/**
 * This function allocates and returns a token_t structure corresponding to the given
 * string.
 */
token_t* new_token_t(unichar* str) {
token_t* tok=(token_t*)malloc(sizeof(token_t));
if (tok==NULL) {
   fatal_alloc_error("new_token_t");
}
for (const keyword_t* key=keywords;key->str!=NULL;key++) {
   if (!u_strcmp(str, key->str)) {
      /* If the token is a keyword */
      tok->type=key->val;
      tok->str= NULL;
      tok->next=NULL;
      return tok;
   }
}
if (*str=='<') {
   /* If we have a '<', we look for the ending '>' */
   unichar* p=u_strchr(str,'>');
   if (p==NULL || *(p+1)!='\0') {
      fatal_error("Invalid token: '%S'\n",str);
   }
   *p='\0';
   tok->type=TOK_ANGLE;
   /* We copy the content between the angle brackets */
   tok->str=u_strdup(str+1);
   tok->next=NULL;
   return tok;
}
/* Otherwise, we create a default token with the string */
tok->type=TOK_STR;
tok->str=u_strdup(str);
tok->next=NULL;
return tok;
}
const CompactTrieDictionary *
ICULanguageBreakFactory::loadDictionaryFor(UScriptCode script, int32_t /*breakType*/) {
    UErrorCode status = U_ZERO_ERROR;
    // Open root from brkitr tree.
    char dictnbuff[256];
    char ext[4]={'\0'};

    UResourceBundle *b = ures_open(U_ICUDATA_BRKITR, "", &status);
    b = ures_getByKeyWithFallback(b, "dictionaries", b, &status);
    b = ures_getByKeyWithFallback(b, uscript_getShortName(script), b, &status);
    int32_t dictnlength = 0;
    const UChar *dictfname = ures_getString(b, &dictnlength, &status);
    if (U_SUCCESS(status) && (size_t)dictnlength >= sizeof(dictnbuff)) {
        dictnlength = 0;
        status = U_BUFFER_OVERFLOW_ERROR;
    }
    if (U_SUCCESS(status) && dictfname) {
        UChar* extStart=u_strchr(dictfname, 0x002e);
        int len = 0;
        if(extStart!=NULL){
            len = extStart-dictfname;
            u_UCharsToChars(extStart+1, ext, sizeof(ext)); // nul terminates the buff
            u_UCharsToChars(dictfname, dictnbuff, len);
        }
        dictnbuff[len]=0; // nul terminate
    }
    ures_close(b);
    UDataMemory *file = udata_open(U_ICUDATA_BRKITR, ext, dictnbuff, &status);
    if (U_SUCCESS(status)) {
        const CompactTrieDictionary *dict = new CompactTrieDictionary(
            file, status);
        if (U_SUCCESS(status) && dict == NULL) {
            status = U_MEMORY_ALLOCATION_ERROR;
        }
        if (U_FAILURE(status)) {
            delete dict;
            dict = NULL;
        }
        return dict;
    } else if (dictfname != NULL){
        //create dummy dict if dictionary filename not valid
        UChar c = 0x0020;
        status = U_ZERO_ERROR;
        MutableTrieDictionary *mtd = new MutableTrieDictionary(c, status, TRUE);
        mtd->addWord(&c, 1, status, 1);
        return new CompactTrieDictionary(*mtd, status);  
    }
    return NULL;
}
Exemple #6
0
int msgformat_fix_quotes(UChar **spattern, uint32_t *spattern_len, UErrorCode *ec)
{
	if(*spattern && *spattern_len && u_strchr(*spattern, (UChar)'\'')) {
		UChar *npattern = safe_emalloc(sizeof(UChar)*2, *spattern_len, sizeof(UChar));
		uint32_t npattern_len;
		npattern_len = umsg_autoQuoteApostrophe(*spattern, *spattern_len, npattern, 2*(*spattern_len)+1, ec);
		efree(*spattern);
		if( U_FAILURE(*ec) )
		{
			return FAILURE;
		}
		npattern = erealloc(npattern, sizeof(UChar)*(npattern_len+1));
		*spattern = npattern;
		*spattern_len = npattern_len;
	}
	return SUCCESS;
}
Exemple #7
0
U_CAPI UChar* U_EXPORT2
u_strchr32(const UChar* s, UChar32 c) {
    if ((uint32_t) c <= U_BMP_MAX) {
        /* find BMP code point */
        return u_strchr(s, (UChar) c);
    } else if ((uint32_t) c <= UCHAR_MAX_VALUE) {
        /* find supplementary code point as surrogate pair */
        UChar cs, lead = U16_LEAD(c), trail = U16_TRAIL(c);

        while ((cs = *s++) != 0) {
            if (cs == lead && *s == trail) {
                return (UChar*) (s - 1);
            }
        }
        return NULL;
    } else {
        /* not a Unicode code point, not findable */
        return NULL;
    }
}
Exemple #8
0
WordListLine WordlistEscapedInputStringToParsedDataComplex(
    SplineFont* sf,
    const unichar_t* input_const,
    WordlistEscapedInputStringToRealString_getFakeUnicodeOfScFunc getUnicodeFunc,
    void* udata )
{
    unichar_t* input = u_copy( input_const );
    WordListChar* ret = calloc( WordListLineSz, sizeof(WordListChar));
    WordListChar* out = ret;
    unichar_t* in     = input;
    unichar_t* in_end = input + u_strlen(input);
    // trim comment and beyond from input
    {
	unichar_t* p = input;
	while( p && p < in_end  )
	{
	    p = u_strchr( p, '#' );
	    if( p > input && *(p-1) == '/' )
	    {
		p++;
		continue;
	    }
	    if( p )
		*p = '\0';
	    break;
	}
    }
    in_end = input + u_strlen(input);

    int addingGlyphsToSelected = 0;
    int currentGlyphIndex = -1;
    for ( ; in < in_end; in++ )
    {
	unichar_t ch = *in;
	TRACE("in:%p end:%p got char %d %c\n", in, in_end, ch, ch );
	if( ch == '[' )
	{
	    addingGlyphsToSelected = 1;
	    continue;
	}
	if( ch == ']' )
	{
	    addingGlyphsToSelected = 0;
	    continue;
	}
	int isSelected = addingGlyphsToSelected;
	currentGlyphIndex++;

	if( ch == '/' || ch == '\\' )
	{
	    // start of a glyph name
	    unichar_t glyphname[ PATH_MAX+1 ];
	    unichar_t* updated_in = 0;
	    SplineChar* sc = u_WordlistEscapedInputStringToRealString_readGlyphName( sf, in, in_end, &updated_in, glyphname );
	    if( sc )
	    {
		in = updated_in;
		int n = getUnicodeFunc( sc, udata );
		if( n == -1 )
		{
		    /*
		     * Okay, this probably means we've got an unencoded glyph (generally
		     * used for OpenType substitutions).
		     * Redeem the value from the SplineFont datamap instead of fetching from
		     * the Unicode identifier.
		     */
		    n = sf->map->backmap[sc->orig_pos];

		    /*
		     * Unencoded glyphs have special mappings in the SplineFont that
		     * start from 65536 (values beyond Unicode, 65535 being the reserved
		     * "frontier" value).
		     */
		    if ( (sf->map->enc->is_unicodebmp || sf->map->enc->is_unicodefull) && n < 65536 ) {
		        TRACE("ToRealString: backmapped position does not match Unicode encoding\n");
		        TRACE("orig_pos: %d, backmap: %d, attached unicode enc: %d\n", sc->orig_pos, n, sc->unicodeenc );
		        TRACE("ToRealString: INVALID CHAR POSITION, name: %s\n", sc->name );
		    }
		}

		out->sc = sc;
		out->isSelected = isSelected;
		out->currentGlyphIndex = currentGlyphIndex;
                out->n = n;
		out++;
		/* out = utf8_idpb( out, n, 0 ); */
		/* if( !out ) */
		/*     printf("ToRealString error on out\n"); */
		continue;
	    }
	}

	/* If we reach this point, we're looking based on codepoint. */
	SplineChar* sc = SFGetOrMakeChar( sf, (int)ch, 0 );
	out->sc = sc;
	out->isSelected = isSelected;
	out->currentGlyphIndex = currentGlyphIndex;
	out++;
    }

    free(input);
    return(ret);
}
Exemple #9
0
/* Instead of having a separate pass for 'special' patterns, reintegrate the two
 * so we don't get bitten by preflight bugs again.  We can be reasonably efficient
 * without two separate code paths, this code isn't that performance-critical.
 *
 * This code is general enough to deal with patterns that have a prefix or swap the
 * language and remainder components, since we gave developers enough rope to do such
 * things if they futz with the pattern data.  But since we don't give them a way to
 * specify a pattern for arbitrary combinations of components, there's not much use in
 * that.  I don't think our data includes such patterns, the only variable I know if is
 * whether there is a space before the open paren, or not.  Oh, and zh uses different
 * chars than the standard open/close paren (which ja and ko use, btw).
 */
U_CAPI int32_t U_EXPORT2
uloc_getDisplayName(const char *locale,
                    const char *displayLocale,
                    UChar *dest, int32_t destCapacity,
                    UErrorCode *pErrorCode)
{
    static const UChar defaultSeparator[9] = { 0x007b, 0x0030, 0x007d, 0x002c, 0x0020, 0x007b, 0x0031, 0x007d, 0x0000 }; /* "{0}, {1}" */
    static const UChar sub0[4] = { 0x007b, 0x0030, 0x007d , 0x0000 } ; /* {0} */
    static const UChar sub1[4] = { 0x007b, 0x0031, 0x007d , 0x0000 } ; /* {1} */
    static const int32_t subLen = 3;
    static const UChar defaultPattern[10] = {
        0x007b, 0x0030, 0x007d, 0x0020, 0x0028, 0x007b, 0x0031, 0x007d, 0x0029, 0x0000
    }; /* {0} ({1}) */
    static const int32_t defaultPatLen = 9;
    static const int32_t defaultSub0Pos = 0;
    static const int32_t defaultSub1Pos = 5;

    int32_t length; /* of formatted result */

    const UChar *separator;
    int32_t sepLen = 0;
    const UChar *pattern;
    int32_t patLen = 0;
    int32_t sub0Pos, sub1Pos;
    
    UChar formatOpenParen         = 0x0028; // (
    UChar formatReplaceOpenParen  = 0x005B; // [
    UChar formatCloseParen        = 0x0029; // )
    UChar formatReplaceCloseParen = 0x005D; // ]

    UBool haveLang = TRUE; /* assume true, set false if we find we don't have
                              a lang component in the locale */
    UBool haveRest = TRUE; /* assume true, set false if we find we don't have
                              any other component in the locale */
    UBool retry = FALSE; /* set true if we need to retry, see below */

    int32_t langi = 0; /* index of the language substitution (0 or 1), virtually always 0 */

    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
        return 0;
    }

    if(destCapacity<0 || (destCapacity>0 && dest==NULL)) {
        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
        return 0;
    }

    {
        UErrorCode status = U_ZERO_ERROR;
        UResourceBundle* locbundle=ures_open(U_ICUDATA_LANG, displayLocale, &status);
        UResourceBundle* dspbundle=ures_getByKeyWithFallback(locbundle, _kLocaleDisplayPattern,
                                                             NULL, &status);

        separator=ures_getStringByKeyWithFallback(dspbundle, _kSeparator, &sepLen, &status);
        pattern=ures_getStringByKeyWithFallback(dspbundle, _kPattern, &patLen, &status);

        ures_close(dspbundle);
        ures_close(locbundle);
    }

    /* If we couldn't find any data, then use the defaults */
    if(sepLen == 0) {
       separator = defaultSeparator;
    }
    /* #10244: Even though separator is now a pattern, it is awkward to handle it as such
     * here since we are trying to build the display string in place in the dest buffer,
     * and to handle it as a pattern would entail having separate storage for the
     * substrings that need to be combined (the first of which may be the result of
     * previous such combinations). So for now we continue to treat the portion between
     * {0} and {1} as a string to be appended when joining substrings, ignoring anything
     * that is before {0} or after {1} (no existing separator pattern has any such thing).
     * This is similar to how pattern is handled below.
     */
    {
        UChar *p0=u_strstr(separator, sub0);
        UChar *p1=u_strstr(separator, sub1);
        if (p0==NULL || p1==NULL || p1<p0) {
            *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
            return 0;
        }
        separator = (const UChar *)p0 + subLen;
        sepLen = p1 - separator;
    }

    if(patLen==0 || (patLen==defaultPatLen && !u_strncmp(pattern, defaultPattern, patLen))) {
        pattern=defaultPattern;
        patLen=defaultPatLen;
        sub0Pos=defaultSub0Pos;
        sub1Pos=defaultSub1Pos;
        // use default formatOpenParen etc. set above
    } else { /* non-default pattern */
        UChar *p0=u_strstr(pattern, sub0);
        UChar *p1=u_strstr(pattern, sub1);
        if (p0==NULL || p1==NULL) {
            *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
            return 0;
        }
        sub0Pos=p0-pattern;
        sub1Pos=p1-pattern;
        if (sub1Pos < sub0Pos) { /* a very odd pattern */
            int32_t t=sub0Pos; sub0Pos=sub1Pos; sub1Pos=t;
            langi=1;
        }
        if (u_strchr(pattern, 0xFF08) != NULL) {
            formatOpenParen         = 0xFF08; // fullwidth (
            formatReplaceOpenParen  = 0xFF3B; // fullwidth [
            formatCloseParen        = 0xFF09; // fullwidth )
            formatReplaceCloseParen = 0xFF3D; // fullwidth ]
        }
    }

    /* We loop here because there is one case in which after the first pass we could need to
     * reextract the data.  If there's initial padding before the first element, we put in
     * the padding and then write that element.  If it turns out there's no second element,
     * we didn't need the padding.  If we do need the data (no preflight), and the first element
     * would have fit but for the padding, we need to reextract.  In this case (only) we
     * adjust the parameters so padding is not added, and repeat.
     */
    do {
        UChar* p=dest;
        int32_t patPos=0; /* position in the pattern, used for non-substitution portions */
        int32_t langLen=0; /* length of language substitution */
        int32_t langPos=0; /* position in output of language substitution */
        int32_t restLen=0; /* length of 'everything else' substitution */
        int32_t restPos=0; /* position in output of 'everything else' substitution */
        UEnumeration* kenum = NULL; /* keyword enumeration */

        /* prefix of pattern, extremely likely to be empty */
        if(sub0Pos) {
            if(destCapacity >= sub0Pos) {
                while (patPos < sub0Pos) {
                    *p++ = pattern[patPos++];
                }
            } else {
                patPos=sub0Pos;
            }
            length=sub0Pos;
        } else {
            length=0;
        }

        for(int32_t subi=0,resti=0;subi<2;) { /* iterate through patterns 0 and 1*/
            UBool subdone = FALSE; /* set true when ready to move to next substitution */

            /* prep p and cap for calls to get display components, pin cap to 0 since
               they complain if cap is negative */
            int32_t cap=destCapacity-length;
            if (cap <= 0) {
                cap=0;
            } else {
                p=dest+length;
            }

            if (subi == langi) { /* {0}*/
                if(haveLang) {
                    langPos=length;
                    langLen=uloc_getDisplayLanguage(locale, displayLocale, p, cap, pErrorCode);
                    length+=langLen;
                    haveLang=langLen>0;
                }
                subdone=TRUE;
            } else { /* {1} */
                if(!haveRest) {
                    subdone=TRUE;
                } else {
                    int32_t len; /* length of component (plus other stuff) we just fetched */
                    switch(resti++) {
                        case 0:
                            restPos=length;
                            len=uloc_getDisplayScriptInContext(locale, displayLocale, p, cap, pErrorCode);
                            break;
                        case 1:
                            len=uloc_getDisplayCountry(locale, displayLocale, p, cap, pErrorCode);
                            break;
                        case 2:
                            len=uloc_getDisplayVariant(locale, displayLocale, p, cap, pErrorCode);
                            break;
                        case 3:
                            kenum = uloc_openKeywords(locale, pErrorCode);
                            /* fall through */
                        default: {
                            const char* kw=uenum_next(kenum, &len, pErrorCode);
                            if (kw == NULL) {
                                uenum_close(kenum);
                                len=0; /* mark that we didn't add a component */
                                subdone=TRUE;
                            } else {
                                /* incorporating this behavior into the loop made it even more complex,
                                   so just special case it here */
                                len = uloc_getDisplayKeyword(kw, displayLocale, p, cap, pErrorCode);
                                if(len) {
                                    if(len < cap) {
                                        p[len]=0x3d; /* '=', assume we'll need it */
                                    }
                                    len+=1;

                                    /* adjust for call to get keyword */
                                    cap-=len;
                                    if(cap <= 0) {
                                        cap=0;
                                    } else {
                                        p+=len;
                                    }
                                }
                                /* reset for call below */
                                if(*pErrorCode == U_BUFFER_OVERFLOW_ERROR) {
                                    *pErrorCode=U_ZERO_ERROR;
                                }
                                int32_t vlen = uloc_getDisplayKeywordValue(locale, kw, displayLocale,
                                                                           p, cap, pErrorCode);
                                if(len) {
                                    if(vlen==0) {
                                        --len; /* remove unneeded '=' */
                                    }
                                    /* restore cap and p to what they were at start */
                                    cap=destCapacity-length;
                                    if(cap <= 0) {
                                        cap=0;
                                    } else {
                                        p=dest+length;
                                    }
                                }
                                len+=vlen; /* total we added for key + '=' + value */
                            }
                        } break;
                    } /* end switch */

                    if (len>0) {
                        /* we addeed a component, so add separator and write it if there's room. */
                        if(len+sepLen<=cap) {
                            const UChar * plimit = p + len;
                            for (; p < plimit; p++) {
                                if (*p == formatOpenParen) {
                                    *p = formatReplaceOpenParen;
                                } else if (*p == formatCloseParen) {
                                    *p = formatReplaceCloseParen;
                                }
                            }
                            for(int32_t i=0;i<sepLen;++i) {
                                *p++=separator[i];
                            }
                        }
                        length+=len+sepLen;
                    } else if(subdone) {
                        /* remove separator if we added it */
                        if (length!=restPos) {
                            length-=sepLen;
                        }
                        restLen=length-restPos;
                        haveRest=restLen>0;
                    }
                }
            }

            if(*pErrorCode == U_BUFFER_OVERFLOW_ERROR) {
                *pErrorCode=U_ZERO_ERROR;
            }

            if(subdone) {
                if(haveLang && haveRest) {
                    /* append internal portion of pattern, the first time,
                       or last portion of pattern the second time */
                    int32_t padLen;
                    patPos+=subLen;
                    padLen=(subi==0 ? sub1Pos : patLen)-patPos;
                    if(length+padLen < destCapacity) {
                        p=dest+length;
                        for(int32_t i=0;i<padLen;++i) {
                            *p++=pattern[patPos++];
                        }
                    } else {
                        patPos+=padLen;
                    }
                    length+=padLen;
                } else if(subi==0) {
                    /* don't have first component, reset for second component */
                    sub0Pos=0;
                    length=0;
                } else if(length>0) {
                    /* true length is the length of just the component we got. */
                    length=haveLang?langLen:restLen;
                    if(dest && sub0Pos!=0) {
                        if (sub0Pos+length<=destCapacity) {
                            /* first component not at start of result,
                               but we have full component in buffer. */
                            u_memmove(dest, dest+(haveLang?langPos:restPos), length);
                        } else {
                            /* would have fit, but didn't because of pattern prefix. */
                            sub0Pos=0; /* stops initial padding (and a second retry,
                                          so we won't end up here again) */
                            retry=TRUE;
                        }
                    }
                }

                ++subi; /* move on to next substitution */
            }
        }
    } while(retry);

    return u_terminateUChars(dest, destCapacity, length, pErrorCode);
}
Exemple #10
0
enum charset _GDraw_ParseMapping(unichar_t *setname) {
    unichar_t *pt;
    int val;

    if ( uc_strstrmatch(setname,"iso")!=NULL && uc_strstrmatch(setname,"10646")!=NULL )
return( em_unicode );
    else if ( uc_strstrmatch(setname,"UnicodePlane")!=NULL ) {
	pt = u_strchr(setname,'-');
	if ( pt==NULL )
return( em_uplane0+1 );
return( em_uplane0+u_strtol(pt+1,NULL,10) );
    } else if ( uc_strstrmatch(setname,"unicode")!=NULL )
return( em_unicode );

#if 0
    if ( uc_strstrmatch(setname,"ascii")!=NULL ||
	    ( uc_strstrmatch(setname,"iso")!=NULL && uc_strstrmatch(setname,"646")!=NULL )) {
	char *lang = getenv( "LANG" );
	if ( lang==NULL || *lang=='\0' || (*lang=='e' && *lang=='n' ))
return( em_iso8859_1 );		/* ascii can masquarade as iso8859-1 for english speakers (no accents needed) */
    }
#endif

    if ( uc_strstrmatch(setname,"iso")!=NULL && uc_strstrmatch(setname,"8859")!=NULL ) {
	pt = uc_strstrmatch(setname,"8859");
	pt += 4;
	if ( *pt=='-' ) ++pt;
	if ( !isdigit(*pt) )
	    /* Bad */;
	else if ( !isdigit(pt[1]) )
return( em_iso8859_1+*pt-'1' );
	else {
	    val = (pt[0]-'0')*10 + pt[1]-'0';
	    switch ( val ) {
	      case 10: case 11:
return( em_iso8859_10+val-10 );
	      case 13: case 14: case 15:
return( em_iso8859_13+val-13 );
	    }
	}
    }

    if ( uc_strstrmatch(setname,"latin1")!=NULL )
return( em_iso8859_1 );
    else if ( uc_strstrmatch(setname,"latin2")!=NULL )
return( em_iso8859_2 );
    else if ( uc_strstrmatch(setname,"latin3")!=NULL )
return( em_iso8859_3 );
    else if ( uc_strstrmatch(setname,"latin4")!=NULL )
return( em_iso8859_4 );
    else if ( uc_strstrmatch(setname,"latin5")!=NULL )
return( em_iso8859_9 );
    else if ( uc_strstrmatch(setname,"latin6")!=NULL )
return( em_iso8859_10 );
    else if ( uc_strstrmatch(setname,"latin7")!=NULL )
return( em_iso8859_13 );
    else if ( uc_strstrmatch(setname,"latin8")!=NULL )
return( em_iso8859_14 );
    else if ( uc_strstrmatch(setname,"latin0")!=NULL || uc_strstrmatch(setname,"latin9")!=NULL )
return( em_iso8859_15 );

    if ( uc_strstrmatch(setname,"koi8")!=NULL )
return( em_koi8_r );

    if ( uc_strstrmatch(setname,"cyrillic")!=NULL )
return( em_iso8859_5 );		/* This is grasping at straws */
    else if ( uc_strstrmatch(setname,"greek")!=NULL )
return( em_iso8859_7 );		/* This is grasping at straws */
    else if ( uc_strstrmatch(setname,"arabic")!=NULL )
return( em_iso8859_6 );		/* This is grasping at straws */
    else if ( uc_strstrmatch(setname,"hebrew")!=NULL )
return( em_iso8859_8 );		/* This is grasping at straws */
    else if ( uc_strstrmatch(setname,"thai")!=NULL || uc_strstrmatch(setname,"tis")!=NULL )
return( em_iso8859_11 );

    if ( uc_strstrmatch(setname,"jis")!=NULL ) {
	if ( uc_strstrmatch(setname,"201")!=NULL )
return( em_jis201 );
	if ( uc_strstrmatch(setname,"208")!=NULL )
return( em_jis208 );
	if ( uc_strstrmatch(setname,"212")!=NULL )
return( em_jis212 );
	if ( uc_strstrmatch(setname,"213")!=NULL )	/* I don't support 213 */
return( em_none );

return( em_jis208 );
    }

    if ( uc_strstrmatch(setname,"ksc")!=NULL && uc_strstrmatch(setname,"5601")!=NULL )
return( em_ksc5601 );	/* Seem to be several versions of 5601, we want 94x94 */

    if ( uc_strstrmatch(setname,"gb")!=NULL && uc_strstrmatch(setname,"2312")!=NULL )
return( em_gb2312 );
    if ( uc_strstrmatch(setname,"big5")!=NULL )
return( em_big5 );

    if ( uc_strstrmatch(setname,"mac")!=NULL )
return( em_mac );
    if ( uc_strstrmatch(setname,"win")!=NULL )
return( em_win );

    if ( IsUserMap(setname))
return( em_user );

/* !!! Encodings used for postscript japanese fonts, which I don't understand */
#if 0
    if ( uc_strstrmatch(setname,"RJSJ")!=NULL )
return( em_sjis );
    if ( uc_strstrmatch(setname,"EUC")!=NULL )
return( em_euc );
#endif

return( em_none );
}
Exemple #11
0
U_CAPI UChar* U_EXPORT2
u_strFindFirst(const UChar* s, int32_t length,
               const UChar* sub, int32_t subLength) {
    const UChar* start, * p, * q, * subLimit;
    UChar c, cs, cq;

    if (sub == NULL || subLength < -1) {
        return (UChar*) s;
    }
    if (s == NULL || length < -1) {
        return NULL;
    }

    start = s;

    if (length < 0 && subLength < 0) {
        /* both strings are NUL-terminated */
        if ((cs = *sub++) == 0) {
            return (UChar*) s;
        }
        if (*sub == 0 && !U16_IS_SURROGATE(cs)) {
            /* the substring consists of a single, non-surrogate BMP code point */
            return u_strchr(s, cs);
        }

        while ((c = *s++) != 0) {
            if (c == cs) {
                /* found first substring UChar, compare rest */
                p = s;
                q = sub;
                for (; ;) {
                    if ((cq = *q) == 0) {
                        if (isMatchAtCPBoundary(start, s - 1, p, NULL)) {
                            return (UChar*) (s - 1); /* well-formed match */
                        } else {
                            break; /* no match because surrogate pair is split */
                        }
                    }
                    if ((c = *p) == 0) {
                        return NULL; /* no match, and none possible after s */
                    }
                    if (c != cq) {
                        break; /* no match */
                    }
                    ++p;
                    ++q;
                }
            }
        }

        /* not found */
        return NULL;
    }

    if (subLength < 0) {
        subLength = u_strlen(sub);
    }
    if (subLength == 0) {
        return (UChar*) s;
    }

    /* get sub[0] to search for it fast */
    cs = *sub++;
    --subLength;
    subLimit = sub + subLength;

    if (subLength == 0 && !U16_IS_SURROGATE(cs)) {
        /* the substring consists of a single, non-surrogate BMP code point */
        return length < 0 ? u_strchr(s, cs) : u_memchr(s, cs, length);
    }

    if (length < 0) {
        /* s is NUL-terminated */
        while ((c = *s++) != 0) {
            if (c == cs) {
                /* found first substring UChar, compare rest */
                p = s;
                q = sub;
                for (; ;) {
                    if (q == subLimit) {
                        if (isMatchAtCPBoundary(start, s - 1, p, NULL)) {
                            return (UChar*) (s - 1); /* well-formed match */
                        } else {
                            break; /* no match because surrogate pair is split */
                        }
                    }
                    if ((c = *p) == 0) {
                        return NULL; /* no match, and none possible after s */
                    }
                    if (c != *q) {
                        break; /* no match */
                    }
                    ++p;
                    ++q;
                }
            }
        }
    } else {
        const UChar* limit, * preLimit;

        /* subLength was decremented above */
        if (length <= subLength) {
            return NULL; /* s is shorter than sub */
        }

        limit = s + length;

        /* the substring must start before preLimit */
        preLimit = limit - subLength;

        while (s != preLimit) {
            c = *s++;
            if (c == cs) {
                /* found first substring UChar, compare rest */
                p = s;
                q = sub;
                for (; ;) {
                    if (q == subLimit) {
                        if (isMatchAtCPBoundary(start, s - 1, p, limit)) {
                            return (UChar*) (s - 1); /* well-formed match */
                        } else {
                            break; /* no match because surrogate pair is split */
                        }
                    }
                    if (*p != *q) {
                        break; /* no match */
                    }
                    ++p;
                    ++q;
                }
            }
        }
    }

    /* not found */
    return NULL;
}
Exemple #12
0
/**
* Testing the discontigous contractions
*/
static void TestDiscontiguos() {
    const char               *rulestr    =
                            "&z < AB < X\\u0300 < ABC < X\\u0300\\u0315";
          UChar               rule[50];
          int                 rulelen = u_unescape(rulestr, rule, 50);
    const char               *src[] = {
     "ADB", "ADBC", "A\\u0315B", "A\\u0315BC",
    /* base character blocked */
     "XD\\u0300", "XD\\u0300\\u0315",
    /* non blocking combining character */
     "X\\u0319\\u0300", "X\\u0319\\u0300\\u0315",
     /* blocking combining character */
     "X\\u0314\\u0300", "X\\u0314\\u0300\\u0315",
     /* contraction prefix */
     "ABDC", "AB\\u0315C","X\\u0300D\\u0315", "X\\u0300\\u0319\\u0315",
     "X\\u0300\\u031A\\u0315",
     /* ends not with a contraction character */
     "X\\u0319\\u0300D", "X\\u0319\\u0300\\u0315D", "X\\u0300D\\u0315D",
     "X\\u0300\\u0319\\u0315D", "X\\u0300\\u031A\\u0315D"
    };
    const char               *tgt[] = {
     /* non blocking combining character */
     "A D B", "A D BC", "A \\u0315 B", "A \\u0315 BC",
    /* base character blocked */
     "X D \\u0300", "X D \\u0300\\u0315",
    /* non blocking combining character */
     "X\\u0300 \\u0319", "X\\u0300\\u0315 \\u0319",
     /* blocking combining character */
     "X \\u0314 \\u0300", "X \\u0314 \\u0300\\u0315",
     /* contraction prefix */
     "AB DC", "AB \\u0315 C","X\\u0300 D \\u0315", "X\\u0300\\u0315 \\u0319",
     "X\\u0300 \\u031A \\u0315",
     /* ends not with a contraction character */
     "X\\u0300 \\u0319D", "X\\u0300\\u0315 \\u0319D", "X\\u0300 D\\u0315D",
     "X\\u0300\\u0315 \\u0319D", "X\\u0300 \\u031A\\u0315D"
    };
          int                 size   = 20;
          UCollator          *coll;
          UErrorCode          status    = U_ZERO_ERROR;
          int                 count     = 0;
          UCollationElements *iter;
          UCollationElements *resultiter;

    coll       = ucol_openRules(rule, rulelen, UCOL_OFF, UCOL_DEFAULT_STRENGTH,NULL, &status);
    iter       = ucol_openElements(coll, rule, 1, &status);
    resultiter = ucol_openElements(coll, rule, 1, &status);

    if (U_FAILURE(status)) {
        log_err_status(status, "Error opening collation rules -> %s\n", u_errorName(status));
        return;
    }

    while (count < size) {
        UChar  str[20];
        UChar  tstr[20];
        int    strLen = u_unescape(src[count], str, 20);
        UChar *s;

        ucol_setText(iter, str, strLen, &status);
        if (U_FAILURE(status)) {
            log_err("Error opening collation iterator\n");
            return;
        }

        u_unescape(tgt[count], tstr, 20);
        s = tstr;

        log_verbose("count %d\n", count);

        for (;;) {
            uint32_t  ce;
            UChar    *e = u_strchr(s, 0x20);
            if (e == 0) {
                e = u_strchr(s, 0);
            }
            ucol_setText(resultiter, s, (int32_t)(e - s), &status);
            ce = ucol_next(resultiter, &status);
            if (U_FAILURE(status)) {
                log_err("Error manipulating collation iterator\n");
                return;
            }
            while (ce != UCOL_NULLORDER) {
                if (ce != (uint32_t)ucol_next(iter, &status) ||
                    U_FAILURE(status)) {
                    log_err("Discontiguos contraction test mismatch\n");
                    return;
                }
                ce = ucol_next(resultiter, &status);
                if (U_FAILURE(status)) {
                    log_err("Error getting next collation element\n");
                    return;
                }
            }
            s = e + 1;
            if (*e == 0) {
                break;
            }
        }
        ucol_reset(iter);
        backAndForth(iter);
        count ++;
    }
    ucol_closeElements(resultiter);
    ucol_closeElements(iter);
    ucol_close(coll);
}
/**
 * Computes training by extracting statistics from a tagged corpus file.
 */
void do_training(U_FILE* input_text,U_FILE* rforms_file,U_FILE* iforms_file){
/* these two hash tables are respectively for simple and compound entries */
struct string_hash_ptr* rforms_table = NULL, *iforms_table = NULL;
if(rforms_file != NULL){
	rforms_table = new_string_hash_ptr(200000);
}
if(iforms_file != NULL){
	iforms_table = new_string_hash_ptr(200000);
}


/* we initialize a contextual matrix */
struct corpus_entry** context = new_context_matrix();
initialize_context_matrix(context);


unichar line[MAX_TAGGED_CORPUS_LINE];

/* check the format of the corpus */
long previous_file_position = ftell(input_text);
if(u_fgets(line,input_text) == EOF){
	fatal_error("File is empty");
}
fseek(input_text,previous_file_position,SEEK_SET);

int format_corpus = check_corpus_entry(line);

if(format_corpus == 0){
	// the corpus is in the Tagger format, one word per line where line=word/tag
	while(u_fgets(line,input_text) !=EOF){
		if(u_strlen(line) == 0){
			initialize_context_matrix(context);
		}
		else{
			corpus_entry* entry = new_corpus_entry(line);
			if(u_strchr(line,'_')!=NULL && line[0]!='_'){
				corpus_entry** entries = extract_simple_words(entry);
				free_corpus_entry(entry);
				for(int i=0;entries[i]!=NULL;i++){
					push_corpus_entry(entries[i],context);
					add_statistics(context,rforms_table,iforms_table);
				}
				free(entries);
			}
			else {
				push_corpus_entry(entry,context);
				add_statistics(context,rforms_table,iforms_table);
			}
		}
	}
}
else {
	// the corpus is in the Unitex tagged format, one sentence per line where token={word,lemma.tag}
	unichar *tmp,*s = (unichar*)malloc(sizeof(unichar)*(MAX_TAGGED_CORPUS_LINE));
	int current_len,len;
	unsigned int i;
	while(u_fgets(line,input_text) != EOF){
		current_len = 0, len = 0;
		/* extract each token of the sentence */
		for (;;) {
			len = 1+u_strlen(line+current_len)-u_strlen(u_strchr(line+current_len,'}'));
			tmp = u_strcpy_sized(s,len-1,line+current_len+1);
			u_strcat(tmp,"\0");
			if(u_strcmp(s,"S") == 0)
				break;

			//particular case: '\},\}.PONCT'
			if(line[current_len+2] == '}'){
				int start = current_len+3;
				do{
					tmp = u_strchr(line+start,'}');
					start += 1+u_strlen(line+start)-u_strlen(tmp);
				}
				while(*(tmp+1) != ' ');
				tmp = u_strcpy_sized(s,start-current_len-1,line+current_len+1);
				u_strcat(tmp,"\0");
				len += start-current_len-3;
			}

			/* format the {XX.YY} into standard tagger format, XX/YY */
			unichar* newline = (unichar*)malloc(sizeof(unichar)*(8096));
			if(u_strchr(s,',')[1] == ','){
				u_strcpy(newline,",");
			}
			else
				u_strcpy_sized(newline,1+u_strlen(s)-u_strlen(u_strchr(s,',')),s);
			u_sprintf(newline,"%S/%S\0",newline,s+u_strrchr(s,'.')+1);
			for(i=0;i<u_strlen(newline);i++){
				if(newline[i] == ' ')
					newline[i] = '_';
			}

			//create corpus entry
			corpus_entry* entry = new_corpus_entry(newline);
			if(u_strchr(newline,'_') != NULL && newline[0] != '_'){
				corpus_entry** entries = extract_simple_words(entry);
				free_corpus_entry(entry);
				for(int j=0;entries[j]!=NULL;j++){
					push_corpus_entry(entries[j],context);
					add_statistics(context,rforms_table,iforms_table);
				}
				free(entries);
			}
			else {
				push_corpus_entry(entry,context);
				add_statistics(context,rforms_table,iforms_table);
			}

			free(newline);
			current_len += len+1;
		}
		initialize_context_matrix(context);
	}
	free(s);
}
free_context_matrix(context);
/* we fill dictionary files with pairs (tuple,value) and then
 * we add a special line "CODE\tFEATURES,.value" in order to
 * specify whether the dictionary contains inflected or raw form tuples*/
unichar* str = u_strdup("");
if(rforms_table != NULL){
	write_keys_values(rforms_table,rforms_table->hash->root,str,rforms_file);
	u_fprintf(rforms_file,"%s,.%d\n","CODE\tFEATURES",0);
	free_string_hash_ptr(rforms_table,NULL);
}
if(iforms_table != NULL){
	write_keys_values(iforms_table,iforms_table->hash->root,str,iforms_file);
	u_fprintf(iforms_file,"%s,.%d\n","CODE\tFEATURES",1);
	free_string_hash_ptr(iforms_table,NULL);
}
free(str);
}
static jobjectArray getContentImpl(JNIEnv* env, jclass clazz, 
        jstring locale, jboolean needsTZ) {
    
    UErrorCode status = U_ZERO_ERROR;

    const char *loc = env->GetStringUTFChars(locale, NULL);
    UResourceBundle *root = ures_openU(NULL, loc, &status);

    env->ReleaseStringUTFChars(locale, loc);
    if(U_FAILURE(status)) {
        LOGI("Error getting resources");
        status = U_ZERO_ERROR;
        return NULL;
    }



    jclass obj_class = env->FindClass("java/lang/Object");
    jclass integer_class = env->FindClass("java/lang/Integer");
    jmethodID integerInit = env->GetMethodID(integer_class, "<init>", "(I)V");
    jobjectArray result;

    jobject firstDayOfWeek = NULL;
    jobject minimalDaysInFirstWeek = NULL;
    jobjectArray amPmMarkers = NULL;
    jobjectArray eras = NULL;
    jstring localPatternChars = NULL;
    jobjectArray weekdays = NULL;
    jobjectArray shortWeekdays = NULL;
    jobjectArray months = NULL;
    jobjectArray shortMonths = NULL;
    jstring time_SHORT = NULL;
    jstring time_MEDIUM = NULL;
    jstring time_LONG = NULL;
    jstring time_FULL = NULL;
    jstring date_SHORT = NULL;
    jstring date_MEDIUM = NULL;
    jstring date_LONG = NULL;
    jstring date_FULL = NULL;
    jstring decimalPatternChars = NULL;
    jstring naN = NULL;
    jstring infinity = NULL;
    jstring currencySymbol = NULL;
    jstring intCurrencySymbol = NULL;
    jstring numberPattern = NULL;
    jstring integerPattern = NULL;
    jstring currencyPattern = NULL;
    jstring percentPattern = NULL;
    jobjectArray zones = NULL;

    int counter = 0;

    int firstDayVals[2] = {-1, -1};

    const jchar* nan = (const jchar *)NULL;
    const jchar* inf = (const jchar *)NULL;
    int nanL, infL;


    UResourceBundle *gregorian;
    UResourceBundle *gregorianElems;
    UResourceBundle *rootElems;




    // get the resources needed
    rootElems = ures_getByKey(root, "calendar", NULL, &status);
    if(U_FAILURE(status)) {
        return NULL;
    }

    gregorian = ures_getByKey(rootElems, "gregorian", NULL, &status);
    if(U_FAILURE(status)) {
        ures_close(rootElems);
        return NULL;
    }



    // adding the first day of week and minimal days in first week values
    getDayInitVector(env, gregorian, firstDayVals);
    if((firstDayVals[0] != -1) && (firstDayVals[1] != -1)) {
        firstDayOfWeek = env->NewObject(integer_class, integerInit, firstDayVals[0]);
        minimalDaysInFirstWeek = env->NewObject(integer_class, integerInit, firstDayVals[1]);
        // adding First_Day and Minimal_Days integer to the result
        counter += 2;
    }


    // adding ampm string array to the result");
    amPmMarkers = getAmPmMarkers(env, gregorian);
    if(amPmMarkers != NULL) {
        counter++;
    }


    // adding eras string array to the result
    eras = getEras(env, gregorian);
    if(eras != NULL) {
        counter++;
    }


    // local pattern chars are initially always the same
    localPatternChars = env->NewStringUTF("GyMdkHmsSEDFwWahKzZ");
    // adding local pattern chars string to the result
    counter++;


    // adding month names string array to the result
    months = getMonthNames(env, gregorian);
    if(months != NULL) {
        counter++;
    }


    // adding short month names string array to the result
    shortMonths = getShortMonthNames(env, gregorian);
    if(shortMonths != NULL) {
        counter++;
    }


    // adding day names string array to the result
    weekdays = getWeekdayNames(env, gregorian);
    if(weekdays != NULL) {
        counter++;
    }


    // adding short day names string array to the result
    shortWeekdays = getShortWeekdayNames(env, gregorian);
    if(shortWeekdays != NULL) {
        counter++;
    }

    const UChar *pattern;
    jchar check[2] = {0, 0};
    u_uastrcpy(check, "v");
    jchar replacement[2] = {0, 0};
    u_uastrcpy(replacement, "z");
    jchar *pos;
    jchar *patternCopy;
    int patternLength;

    // adding date and time format patterns to the result
    gregorianElems = ures_getByKey(gregorian, "DateTimePatterns", NULL, &status);
    if(U_FAILURE(status)) {
        status = U_ZERO_ERROR;
        goto endOfCalendar;
    }

    pattern = ures_getStringByIndex(gregorianElems, 0, &patternLength, &status);
    // there are some patterns in icu that use the pattern character 'v'
    // java doesn't accept this, so it gets replaced by 'z' which has
    // about the same result as 'v', the timezone name. 
    // 'v' -> "PT", 'z' -> "PST", v is the generic timezone and z the standard tz
    // "vvvv" -> "Pacific Time", "zzzz" -> "Pacific Standard Time"
    patternCopy = (jchar *) malloc((patternLength + 1) * sizeof(jchar));
    u_strcpy(patternCopy, pattern);
    if(U_FAILURE(status)) {
        free(patternCopy);
        status = U_ZERO_ERROR;
        goto endOfCalendar;
    }
    while((pos = u_strchr(patternCopy, check[0])) != NULL) {
        u_memset(pos, replacement[0], 1);
    }
    time_FULL = env->NewString(patternCopy, patternLength);
    free(patternCopy);
    counter++;

    pattern = ures_getStringByIndex(gregorianElems, 1, &patternLength, &status);
    if(U_FAILURE(status)) {
        status = U_ZERO_ERROR;
        goto endOfCalendar;
    }
    time_LONG = env->NewString(pattern, patternLength);
    counter++;

    pattern = ures_getStringByIndex(gregorianElems, 2, &patternLength, &status);
    if(U_FAILURE(status)) {
        status = U_ZERO_ERROR;
        goto endOfCalendar;
    }
    time_MEDIUM = env->NewString(pattern, patternLength);
    counter++;

    pattern = ures_getStringByIndex(gregorianElems, 3, &patternLength, &status);
    if(U_FAILURE(status)) {
        status = U_ZERO_ERROR;
        goto endOfCalendar;
    }
    time_SHORT = env->NewString(pattern, patternLength);
    counter++;

    pattern = ures_getStringByIndex(gregorianElems, 4, &patternLength, &status);
    if(U_FAILURE(status)) {
        status = U_ZERO_ERROR;
        goto endOfCalendar;
    }
    date_FULL = env->NewString(pattern, patternLength);
    counter++;

    pattern = ures_getStringByIndex(gregorianElems, 5, &patternLength, &status);
    if(U_FAILURE(status)) {
        status = U_ZERO_ERROR;
        goto endOfCalendar;
    }
    date_LONG = env->NewString(pattern, patternLength);
    counter++;

    pattern = ures_getStringByIndex(gregorianElems, 6, &patternLength, &status);
    if(U_FAILURE(status)) {
        status = U_ZERO_ERROR;
        goto endOfCalendar;
    }
    date_MEDIUM = env->NewString(pattern, patternLength);
    counter++;

    pattern = ures_getStringByIndex(gregorianElems, 7, &patternLength, &status);
    if(U_FAILURE(status)) {
        status = U_ZERO_ERROR;
        goto endOfCalendar;
    }
    date_SHORT = env->NewString(pattern, patternLength);
    counter++;


endOfCalendar:

    if(gregorianElems != NULL) {
        ures_close(gregorianElems);
    }
    ures_close(gregorian);
    ures_close(rootElems);


    rootElems = ures_getByKey(root, "NumberElements", NULL, &status);
    if(U_FAILURE(status)) {
        status = U_ZERO_ERROR;
    }

    if(ures_getSize(rootElems) >= 11) {

        // adding decimal pattern chars to the result
        decimalPatternChars = getDecimalPatternChars(env, rootElems);
        if(decimalPatternChars != NULL) {
            counter++;
        }

        // adding NaN pattern char to the result
        nan = ures_getStringByIndex(rootElems, 10, &nanL, &status);
        if(U_SUCCESS(status)) {
            naN = env->NewString(nan, nanL);
            counter++;
        }
        status = U_ZERO_ERROR;

        // adding infinity pattern char to the result
        inf = ures_getStringByIndex(rootElems, 9, &infL, &status);
        if(U_SUCCESS(status)) {
            infinity = env->NewString(inf, infL);
            counter++;
        }
        status = U_ZERO_ERROR;
    }

    ures_close(rootElems);


    // adding intl currency code to result
    intCurrencySymbol = getIntCurrencyCode(env, clazz, locale);
    if(intCurrencySymbol != NULL) {
        // adding currency symbol to result
        currencySymbol = getCurrencySymbol(env, clazz, locale, intCurrencySymbol);
    } else {
        intCurrencySymbol = env->NewStringUTF("XXX");
    }
    if(currencySymbol == NULL) {
        currencySymbol = env->NewStringUTF("\u00a4");
    }
    counter += 2;


    // adding number format patterns to the result
    int numOfEntries;
    int decSepOffset;
    NumberFormat *nf;
    jchar *tmpPattern;

    rootElems = ures_getByKey(root, "NumberPatterns", NULL, &status);
    if(U_FAILURE(status)) {
        status = U_ZERO_ERROR;
        goto zones;
    }

    numOfEntries = ures_getSize(rootElems);
    if(numOfEntries < 3) {
        ures_close(rootElems);
        goto zones;
    }

    // number pattern
    pattern = ures_getStringByIndex(rootElems, 0, &patternLength, &status);
    if(U_FAILURE(status)) {
        status = U_ZERO_ERROR;
        ures_close(rootElems);
        goto zones;
    }
    numberPattern = env->NewString(pattern, patternLength);
    counter++;

    // integer pattern derived from number pattern
    decSepOffset = u_strcspn(pattern, (jchar *)".\0");
    tmpPattern =  (jchar *) malloc((decSepOffset + 1) * sizeof(jchar));
    u_strncpy(tmpPattern, pattern, decSepOffset);
    integerPattern = env->NewString(tmpPattern, decSepOffset);
    free(tmpPattern);
    counter++;

    // currency pattern
    pattern = ures_getStringByIndex(rootElems, 1, &patternLength, &status);
    if(U_FAILURE(status)) {
        status = U_ZERO_ERROR;
        ures_close(rootElems);
        goto zones;
    }
    currencyPattern = env->NewString(pattern, patternLength);
    counter++;

    // percent pattern
    pattern = ures_getStringByIndex(rootElems, 2, &patternLength, &status);
    if(U_FAILURE(status)) {
        status = U_ZERO_ERROR;
        ures_close(rootElems);
        goto zones;
    }
    percentPattern = env->NewString(pattern, patternLength);
    counter++;

    ures_close(rootElems);

zones:

    ures_close(root);


    if(needsTZ == JNI_TRUE) {
        counter++; //add empty timezone
    }



    // collect all content and put it into an array
    result = env->NewObjectArray(counter, obj_class, NULL);

    int index = 0;
    
    if(needsTZ == JNI_TRUE) {
        addObject(env, result, "timezones", NULL, index++);
    }
    if(firstDayOfWeek != NULL && index < counter) {
        addObject(env, result, "First_Day", firstDayOfWeek, index++);
    }
    if(minimalDaysInFirstWeek != NULL && index < counter) {
        addObject(env, result, "Minimal_Days", minimalDaysInFirstWeek, index++);
    }
    if(amPmMarkers != NULL && index < counter) {
        addObject(env, result, "ampm", amPmMarkers, index++);
    }
    if(eras != NULL && index < counter) {
        addObject(env, result, "eras", eras, index++);
    }
    if(localPatternChars != NULL && index < counter) {
        addObject(env, result, "LocalPatternChars", localPatternChars, index++);
    }
    if(weekdays != NULL && index < counter) {
        addObject(env, result, "weekdays", weekdays, index++);
    }
    if(shortWeekdays != NULL && index < counter) {
        addObject(env, result, "shortWeekdays", shortWeekdays, index++);
    }
    if(months != NULL && index < counter) {
        addObject(env, result, "months", months, index++);
    }
    if(shortMonths != NULL && index < counter) {
        addObject(env, result, "shortMonths", shortMonths, index++);
    }
    if(time_SHORT != NULL && index < counter) {
        addObject(env, result, "Time_SHORT", time_SHORT, index++);
    }
    if(time_MEDIUM != NULL && index < counter) {
        addObject(env, result, "Time_MEDIUM", time_MEDIUM, index++);
    }
    if(time_LONG != NULL && index < counter) {
        addObject(env, result, "Time_LONG", time_LONG, index++);
    }
    if(time_FULL != NULL && index < counter) {
        addObject(env, result, "Time_FULL", time_FULL, index++);
    }
    if(date_SHORT != NULL && index < counter) {
        addObject(env, result, "Date_SHORT", date_SHORT, index++);
    }
    if(date_MEDIUM != NULL && index < counter) {
        addObject(env, result, "Date_MEDIUM", date_MEDIUM, index++);
    }
    if(date_LONG != NULL && index < counter) {
        addObject(env, result, "Date_LONG", date_LONG, index++);
    }
    if(date_FULL != NULL && index < counter) {
        addObject(env, result, "Date_FULL", date_FULL, index++);
    }
    if(decimalPatternChars != NULL && index < counter) {
        addObject(env, result, "DecimalPatternChars", decimalPatternChars, index++);
    }
    if(naN != NULL && index < counter) {
        addObject(env, result, "NaN", naN, index++);
    }
    if(infinity != NULL && index < counter) {
        addObject(env, result, "Infinity", infinity, index++);
    }
    if(currencySymbol != NULL && index < counter) {
        addObject(env, result, "CurrencySymbol", currencySymbol, index++);
    }
    if(intCurrencySymbol != NULL && index < counter) {
        addObject(env, result, "IntCurrencySymbol", intCurrencySymbol, index++);
    }
    if(numberPattern != NULL && index < counter) {
        addObject(env, result, "Number", numberPattern, index++);
    }
    if(integerPattern != NULL && index < counter) {
        addObject(env, result, "Integer", integerPattern, index++);
    }
    if(currencyPattern != NULL && index < counter) {
        addObject(env, result, "Currency", currencyPattern, index++);
    }
    if(percentPattern != NULL && index < counter) {
        addObject(env, result, "Percent", percentPattern, index++);
    }

    return result;

}
Exemple #15
0
U_NAMESPACE_BEGIN

// -------------------------------------

BreakIterator*
BreakIterator::buildInstance(const Locale& loc, const char *type, int32_t kind, UErrorCode &status)
{
    char fnbuff[256];
    char ext[4]={'\0'};
    char actualLocale[ULOC_FULLNAME_CAPACITY];
    int32_t size;
    const UChar* brkfname = NULL;
    UResourceBundle brkRulesStack;
    UResourceBundle brkNameStack;
    UResourceBundle *brkRules = &brkRulesStack;
    UResourceBundle *brkName  = &brkNameStack;
    RuleBasedBreakIterator *result = NULL;

    if (U_FAILURE(status))
        return NULL;

    ures_initStackObject(brkRules);
    ures_initStackObject(brkName);

    // Get the locale
    UResourceBundle *b = ures_open(U_ICUDATA_BRKITR, loc.getName(), &status);
    /* this is a hack for now. Should be fixed when the data is fetched from
        brk_index.txt */
    if(status==U_USING_DEFAULT_WARNING){
        status=U_ZERO_ERROR;
        ures_openFillIn(b, U_ICUDATA_BRKITR, "", &status);
    }

    // Get the "boundaries" array.
    if (U_SUCCESS(status)) {
        brkRules = ures_getByKeyWithFallback(b, "boundaries", brkRules, &status);
        // Get the string object naming the rules file
        brkName = ures_getByKeyWithFallback(brkRules, type, brkName, &status);
        // Get the actual string
        brkfname = ures_getString(brkName, &size, &status);
        U_ASSERT((size_t)size<sizeof(fnbuff));
        if ((size_t)size>=sizeof(fnbuff)) {
            size=0;
            if (U_SUCCESS(status)) {
                status = U_BUFFER_OVERFLOW_ERROR;
            }
        }

        // Use the string if we found it
        if (U_SUCCESS(status) && brkfname) {
            uprv_strncpy(actualLocale,
                ures_getLocale(brkName, &status),
                sizeof(actualLocale)/sizeof(actualLocale[0]));

            UChar* extStart=u_strchr(brkfname, 0x002e);
            int len = 0;
            if(extStart!=NULL){
                len = (int)(extStart-brkfname);
                u_UCharsToChars(extStart+1, ext, sizeof(ext)); // nul terminates the buff
                u_UCharsToChars(brkfname, fnbuff, len);
            }
            fnbuff[len]=0; // nul terminate
        }
    }

    ures_close(brkRules);
    ures_close(brkName);

    UDataMemory* file = udata_open(U_ICUDATA_BRKITR, ext, fnbuff, &status);
    if (U_FAILURE(status)) {
        ures_close(b);
        return NULL;
    }

    // Create a RuleBasedBreakIterator
    result = new RuleBasedBreakIterator(file, status);

    // If there is a result, set the valid locale and actual locale, and the kind
    if (U_SUCCESS(status) && result != NULL) {
        U_LOCALE_BASED(locBased, *(BreakIterator*)result);
        locBased.setLocaleIDs(ures_getLocaleByType(b, ULOC_VALID_LOCALE, &status), actualLocale);
        result->setBreakType(kind);
    }

    ures_close(b);

    if (U_FAILURE(status) && result != NULL) {  // Sometimes redundant check, but simple
        delete result;
        return NULL;
    }

    if (result == NULL) {
        udata_close(file);
        if (U_SUCCESS(status)) {
            status = U_MEMORY_ALLOCATION_ERROR;
        }
    }

    return result;
}
Exemple #16
0
static void GIOdispatch(GIOControl *gc, enum giofuncs gf) {
    unichar_t *temp, *pt, *tpt;
    int i;

    gc->gf = gf;

    if ( _GIO_stdfuncs.useragent == NULL )
	_GIO_stdfuncs.useragent = copy("*****@*****.**");

    temp = _GIO_translateURL(gc->path,gf);
    if ( temp!=NULL ) {
	if ( gc->origpath==NULL )
	    gc->origpath = gc->path;
	else
	    free(gc->path);
	gc->path = temp;
    }
    if ( gc->topath!=NULL ) {
	temp = _GIO_translateURL(gc->topath,gf);
	if ( temp!=NULL ) {
	    free(gc->topath);
	    gc->topath = temp;
	}
	if ( gf==gf_renamefile ) {
	    if (( pt = uc_strstr(gc->path,"://"))== NULL )
		pt = gc->path;
	    else {
		pt=u_strchr(pt+3,'/');
		if ( pt==NULL ) pt = gc->path+u_strlen(gc->path);
	    }
	    if (( tpt = uc_strstr(gc->topath,"://"))== NULL )
		tpt = gc->topath;
	    else {
		tpt=u_strchr(tpt+3,'/');
		if ( tpt==NULL ) tpt = gc->topath+u_strlen(gc->topath);
	    }
	    if ( tpt-gc->topath!=pt-gc->path ||
		    u_strnmatch(gc->path,gc->topath,pt-gc->path)!=0 ) {
		_GIO_reporterror(gc,EXDEV);
return;
	    }
	}
    }

    pt = uc_strstr(gc->path,"://");
    if ( pt!=NULL ) {
	for ( i=0; i<plen; ++i )
	    if ( u_strnmatch(protocols[i].proto,gc->path,pt-gc->path)==0 )
	break;
	if ( i>=plen && !AddProtocol(gc->path,pt-gc->path) ) {
	    gc->protocol_index = -2;
	    gc->return_code = 501;
	    gc->error = err501;
	    uc_strcpy(gc->status,"No support for browsing: ");
	    u_strncpy(gc->status+u_strlen(gc->status), gc->path, pt-gc->path );
	    gc->done = true;
	    (gc->receiveerror)(gc);
return;
	}
	gc->protocol_index = i;
	if ( !protocols[i].dothread )
	    (protocols[i].dispatcher)(gc);
	else {
#ifndef HAVE_PTHREAD_H
	    gc->return_code = 501;
	    gc->error = err501;
	    uc_strcpy(gc->status,"No support for protocol");
	    gc->done = true;
	    (gc->receiveerror)(gc);
return;
#else
	    static pthread_cond_t initcond = PTHREAD_COND_INITIALIZER;
	    static pthread_mutex_t initmutex = PTHREAD_MUTEX_INITIALIZER;
	    /* could put stuff here to queue functions if we get too many */
	    /*  threads, or perhaps even a thread pool */
	    uc_strcpy(gc->status,"Queued");
	    gc->threaddata = (struct gio_threaddata *) malloc(sizeof(struct gio_threaddata));
	    gc->threaddata->mutex = initmutex;
	    gc->threaddata->cond = initcond;
	    if ( _GIO_stdfuncs.gdraw_sync_thread!=NULL )
		(_GIO_stdfuncs.gdraw_sync_thread)(NULL,NULL,NULL);
	    pthread_create(&gc->threaddata->thread,NULL,
		    (ptread_startfunc_t *) (protocols[i].dispatcher), gc);
#endif
	}
    } else {
	gc->protocol_index = -1;
	_GIO_localDispatch(gc);
    }
}
/*
 * This function behaves in the same way that a main one, except that it does
 * not invoke the setBufferMode function.
 */
int main_LocateTfst(int argc,char* const argv[]) {
if (argc==1) {
   usage();
   return SUCCESS_RETURN_CODE;
}

VersatileEncodingConfig vec=VEC_DEFAULT;
int val,index=-1;
char text[FILENAME_MAX]="";
char alphabet[FILENAME_MAX]="";
int is_korean=0;
int tilde_negation_operator=1;
int selected_negation_operator=0;
int tagging=0;
int single_tags_only=0;
int match_word_boundaries=1;
MatchPolicy match_policy=LONGEST_MATCHES;
OutputPolicy output_policy=IGNORE_OUTPUTS;
AmbiguousOutputPolicy ambiguous_output_policy=ALLOW_AMBIGUOUS_OUTPUTS;
VariableErrorPolicy variable_error_policy=IGNORE_VARIABLE_ERRORS;
int search_limit=NO_MATCH_LIMIT;
char foo;
vector_ptr* injected=new_vector_ptr();
bool only_verify_arguments = false;
UnitexGetOpt options;
while (EOF!=(val=options.parse_long(argc,argv,optstring_LocateTfst,lopts_LocateTfst,&index))) {
   switch(val) {
   case 't': if (options.vars()->optarg[0]=='\0') {
                error("You must specify a non empty .tfst name\n");
                free_vector_ptr(injected);
                return USAGE_ERROR_CODE;
             }
             strcpy(text,options.vars()->optarg);
             break;
   case 'a': if (options.vars()->optarg[0]=='\0') {
                error("You must specify a non empty alphabet name\n");
                free_vector_ptr(injected);
                return USAGE_ERROR_CODE;
             }
             strcpy(alphabet,options.vars()->optarg);
             break;
   case 'K': is_korean=1;
   	   	   	  match_word_boundaries=0;
              break;
   case 'l': search_limit=NO_MATCH_LIMIT; break;
   case 'g': if (options.vars()->optarg[0]=='\0') {
                error("You must specify an argument for negation operator\n");
                free_vector_ptr(injected);
                return USAGE_ERROR_CODE;
             }
             selected_negation_operator=1;
             if ((strcmp(options.vars()->optarg,"minus")==0) || (strcmp(options.vars()->optarg,"-")==0)) {
                 tilde_negation_operator=0;
             }
             else
             if ((strcmp(options.vars()->optarg,"tilde")!=0) && (strcmp(options.vars()->optarg,"~")!=0)) {
                 error("You must specify a valid argument for negation operator\n");
                 free_vector_ptr(injected);
                 return USAGE_ERROR_CODE;                 
             }
             break;
   case 'n': if (1!=sscanf(options.vars()->optarg,"%d%c",&search_limit,&foo) || search_limit<=0) {
                /* foo is used to check that the search limit is not like "45gjh" */
                error("Invalid search limit argument: %s\n",options.vars()->optarg);
                free_vector_ptr(injected);
                return USAGE_ERROR_CODE;                
             }
             break;
   case 'S': match_policy=SHORTEST_MATCHES; break;
   case 'L': match_policy=LONGEST_MATCHES; break;
   case 'A': match_policy=ALL_MATCHES; break;
   case 'I': output_policy=IGNORE_OUTPUTS; break;
   case 'M': output_policy=MERGE_OUTPUTS; break;
   case 'R': output_policy=REPLACE_OUTPUTS; break;
   case 'X': variable_error_policy=EXIT_ON_VARIABLE_ERRORS; break;
   case 'Y': variable_error_policy=IGNORE_VARIABLE_ERRORS; break;
   case 'Z': variable_error_policy=BACKTRACK_ON_VARIABLE_ERRORS; break;
   case 'b': ambiguous_output_policy=ALLOW_AMBIGUOUS_OUTPUTS; break;
   case 'z': ambiguous_output_policy=IGNORE_AMBIGUOUS_OUTPUTS; break;
   case 'V': only_verify_arguments = true;
             break;
   case 'h': usage(); 
             return SUCCESS_RETURN_CODE;
   case 1: tagging=1; break;
   case 2: single_tags_only=1; break;
   case 3: match_word_boundaries=0; break;
   case 'k': if (options.vars()->optarg[0]=='\0') {
                error("Empty input_encoding argument\n");
                free_vector_ptr(injected);
                return USAGE_ERROR_CODE;                
             }
             decode_reading_encoding_parameter(&(vec.mask_encoding_compatibility_input),options.vars()->optarg);
             break;
   case 'q': if (options.vars()->optarg[0]=='\0') {
                error("Empty output_encoding argument\n");
                free_vector_ptr(injected);
                return USAGE_ERROR_CODE;                
             }
             decode_writing_encoding_parameter(&(vec.encoding_output),&(vec.bom_output),options.vars()->optarg);
             break;
   case 'v': {
	   unichar* key=u_strdup(options.vars()->optarg);
	   unichar* value=u_strchr(key,'=');
	   if (value==NULL) {
		   error("Invalid variable injection: %s\n",options.vars()->optarg);
       free_vector_ptr(injected);
       return USAGE_ERROR_CODE;       
	   }
	   (*value)='\0';
	   value++;
	   value=u_strdup(value);
	   vector_ptr_add(injected,key);
	   vector_ptr_add(injected,value);
	   break;
   }
   case ':': index==-1 ? error("Missing argument for option -%c\n",options.vars()->optopt) :
                         error("Missing argument for option --%s\n",lopts_LocateTfst[index].name);
             free_vector_ptr(injected);
             return USAGE_ERROR_CODE;
   case '?': index==-1 ? error("Invalid option -%c\n",options.vars()->optopt) :
                         error("Invalid option --%s\n",options.vars()->optarg);
             free_vector_ptr(injected);
             return USAGE_ERROR_CODE;
             break;
   }
   index=-1;
}

if (options.vars()->optind!=argc-1) {
   error("Invalid arguments: rerun with --help\n");
   free_vector_ptr(injected);
   return USAGE_ERROR_CODE;
}

if (only_verify_arguments) {
  // freeing all allocated memory
  free_vector_ptr(injected);
  return SUCCESS_RETURN_CODE;
}

if (selected_negation_operator==0) {
    get_graph_compatibility_mode_by_file(&vec,&tilde_negation_operator);
}

char grammar[FILENAME_MAX];
char output[FILENAME_MAX];
strcpy(grammar,argv[options.vars()->optind]);
get_path(text,output);
strcat(output,"concord.ind");

int OK=locate_tfst(text,
                   grammar,
                   alphabet,
                   output,
                   &vec,
                   match_policy,
                   output_policy,
                   ambiguous_output_policy,
                   variable_error_policy,
                   search_limit,
                   is_korean,
                   tilde_negation_operator,
                   injected,
                   tagging,
                   single_tags_only,
                   match_word_boundaries);

free_vector_ptr(injected);

return (!OK);
}
Exemple #18
0
static void SplashLayout() {
    unichar_t *start, *pt, *lastspace;
    extern const char *source_modtime_str;
    extern const char *source_version_str;

    uc_strcpy(msg, "When my father finished his book on Renaissance printing (The Craft of Printing and the Publication of Shakespeare's Works) he told me that I would have to write the chapter on computer typography. This is my attempt to do so.");

    GDrawSetFont(splashw,splash_font);
    linecnt = 0;
    lines[linecnt++] = msg-1;
    for ( start = msg; *start!='\0'; start = pt ) {
	lastspace = NULL;
	for ( pt=start; ; ++pt ) {
	    if ( *pt==' ' || *pt=='\0' ) {
		if ( GDrawGetTextWidth(splashw,start,pt-start,NULL)<splashimage.u.image->width-10 )
		    lastspace = pt;
		else
	break;
		if ( *pt=='\0' )
	break;
	    }
	}
	if ( lastspace!=NULL )
	    pt = lastspace;
	lines[linecnt++] = pt;
	if ( *pt ) ++pt;
    }
    uc_strcpy(pt, " FontForge used to be named PfaEdit.");
    pt += u_strlen(pt);
    lines[linecnt++] = pt;
    uc_strcpy(pt,"  Version: ");;
    uc_strcat(pt,source_modtime_str);
    uc_strcat(pt," (");
    uc_strcat(pt,source_version_str);
#ifdef FONTFORGE_CONFIG_TYPE3
    uc_strcat(pt,"-ML");
#endif
#ifdef FREETYPE_HAS_DEBUGGER
    uc_strcat(pt,"-TtfDb");
#endif
#ifdef _NO_PYTHON
    uc_strcat(pt,"-NoPython");
#endif
#ifdef FONTFORGE_CONFIG_USE_LONGDOUBLE
    uc_strcat(pt,"-LD");
#elif defined(FONTFORGE_CONFIG_USE_DOUBLE)
    uc_strcat(pt,"-D");
#endif
#ifndef FONTFORGE_CONFIG_DEVICETABLES
    uc_strcat(pt,"-NoDevTab");
#endif
    uc_strcat(pt,")");
    pt += u_strlen(pt);
    lines[linecnt++] = pt;
    uc_strcpy(pt,"  Library Version: ");
    uc_strcat(pt,library_version_configuration.library_source_modtime_string);
    lines[linecnt++] = pt+u_strlen(pt);
    lines[linecnt] = NULL;
    is = u_strchr(msg,'(');
    ie = u_strchr(msg,')');
}