static engine_return_t engine_bin_whole_line_match(error_t **UNUSED(error), void *data, const UString *subject) { FETCH_DATA(data, p, bin_pattern_t); /* If search is case insensitive, we don't do case folding here, u_strcasecmp suffice (it does full case folding internally) */ if (ustring_empty(p->pattern)) { return ustring_empty(subject) ? ENGINE_WHOLE_LINE_MATCH : ENGINE_NO_MATCH; } else { if (IS_CASE_INSENSITIVE(p->flags)) { return (0 == u_strcasecmp(p->pattern->ptr, subject->ptr, 0) ? ENGINE_WHOLE_LINE_MATCH : ENGINE_NO_MATCH); } else { return (0 == u_strcmp(p->pattern->ptr, subject->ptr) ? ENGINE_WHOLE_LINE_MATCH : ENGINE_NO_MATCH); } } }
void remove_keyword(unichar* keyword,struct string_hash_ptr* keywords) { unichar* lower=u_strdup(keyword); u_tolower(lower); KeyWord* k=(KeyWord*)get_value(lower,keywords); free(lower); if (k==NULL) return; while (k!=NULL) { if (k->sequence!=NULL && !u_strcmp(keyword,k->sequence)) { free(k->sequence); k->sequence=NULL; return; } k=k->next; } }
/* Test u_vformatMessage() with various test patterns. */ static void TestMessageFormatWithValist( void ) { UChar *str; UChar* result; int32_t resultLengthOut,resultlength,i, patternlength; UErrorCode status = U_ZERO_ERROR; UDate d1=1000000000.0; ctest_setTimeZone(NULL, &status); str=(UChar*)malloc(sizeof(UChar) * 7); u_uastrcpy(str, "MyDisk"); resultlength=1; result=(UChar*)malloc(sizeof(UChar) * 1); log_verbose("Testing u_formatMessage90\n"); InitStrings(); for (i = 0; i < cnt_testCases; i++) { status=U_ZERO_ERROR; patternlength=u_strlen(testCasePatterns[i]); resultLengthOut=CallFormatMessage( "en_US",testCasePatterns[i], patternlength, result, resultlength, &status, 1, 3456.00, d1); if(status== U_BUFFER_OVERFLOW_ERROR) { status=U_ZERO_ERROR; resultlength=resultLengthOut+1; result=(UChar*)realloc(result,sizeof(UChar) * resultlength); CallFormatMessage( "en_US",testCasePatterns[i], patternlength, result, resultlength, &status, 1, 3456.00, d1); } if(U_FAILURE(status)){ log_data_err("ERROR: failure in message format on testcase %d: %s (Are you missing data?)\n", i, myErrorName(status) ); } else if(u_strcmp(result, testResultStrings[i])==0){ log_verbose("PASS: MessagFormat successful on testcase : %d\n", i); } else{ log_err("FAIL: Error in MessageFormat on testcase : %d\n GOT %s EXPECTED %s\n", i, austrdup(result), austrdup(testResultStrings[i]) ); } } free(result); free(str); FreeStrings(); ctest_resetTimeZone(); }
/** * Caches the given token sequence in the given cache. Note that * match is supposed to contain a single match, not a match list. */ static void cache_match_internal(struct match_list* match,const int* tab,int start,int end,LocateCache *c,Abstract_allocator prv_alloc) { int token=-1; struct match_list* m=match; if (start<=end) { token=tab[start]; m=NULL; } /* No node */ if (*c==NULL) { *c=new_LocateCache(token,m,prv_alloc); if (token!=-1) { cache_match_internal(match,tab,start+1,end,&((*c)->middle),prv_alloc); } return; } /* There is a node */ if (token<(*c)->token) { /* If we have to move on the left */ return cache_match_internal(match,tab,start,end,&((*c)->left),prv_alloc); } if (token>(*c)->token) { /* If we have to move on the right */ return cache_match_internal(match,tab,start,end,&((*c)->right),prv_alloc); } /* We have the correct token */ if (token==-1) { /* If we are in a final node that already existed, we just add * the new match at the end of the match list to get the same match order as * if the cache system had not been used, but only if the match is not already present */ struct match_list* *ptr=&((*c)->matches); struct match_list* z; match->next=NULL; while ((*ptr)!=NULL) { z=*ptr; if (compare_matches(&(z->m),&(match->m))==A_EQUALS_B && !u_strcmp(z->output,match->output)) { /* We discard a match that was already in cache */ free_match_list_element(match,prv_alloc); return; } ptr=&((*ptr)->next); } (*ptr)=match; return; } cache_match_internal(match,tab,start+1,end,&((*c)->middle),prv_alloc); }
static UBool doTestUCharNames(const char *name, const char *standard, const char **expected, int32_t size) { UErrorCode err = U_ZERO_ERROR; UEnumeration *myEnum = ucnv_openStandardNames(name, standard, &err); int32_t enumCount = uenum_count(myEnum, &err); int32_t idx, repeatTimes = 3; if (err == U_FILE_ACCESS_ERROR) { log_data_err("Unable to open standard names for %s of standard: %s\n", name, standard); return 0; } if (size != enumCount) { log_err("FAIL: different size arrays. Got %d. Expected %d\n", enumCount, size); return 0; } if (size < 0 && myEnum) { log_err("FAIL: size < 0, but recieved an actual object\n"); return 0; } log_verbose("\n%s %s\n", name, standard); while (repeatTimes-- > 0) { for (idx = 0; idx < enumCount; idx++) { UChar testName[256]; int32_t len; const UChar *enumName = uenum_unext(myEnum, &len, &err); u_uastrncpy(testName, expected[idx], UPRV_LENGTHOF(testName)); if (u_strcmp(enumName, testName) != 0 || U_FAILURE(err) || len != (int32_t)uprv_strlen(expected[idx])) { log_err("FAIL: uenum_next(%d) == \"%s\". expected \"%s\", len=%d, error=%s\n", idx, enumName, testName, len, u_errorName(err)); } log_verbose("%s\n", expected[idx]); err = U_ZERO_ERROR; } log_verbose("\n reset\n"); uenum_reset(myEnum, &err); if (U_FAILURE(err)) { log_err("FAIL: uenum_reset() for %s{%s} failed with %s\n", name, standard, u_errorName(err)); err = U_ZERO_ERROR; } } uenum_close(myEnum); return 1; }
/** * Sets the given dic variable, inserting it in the variable list if absent. */ void set_dic_variable(const unichar* name,struct dela_entry* dic_entry,struct dic_variable* *list,int must_clone) { while (*list!=NULL) { if (!u_strcmp((*list)->name,name)) { /* If we have found the variable we were looking for */ /* We have to free the previous value */ free_dela_entry((*list)->dic_entry); if (must_clone) { (*list)->dic_entry=clone_dela_entry(dic_entry); } else { (*list)->dic_entry=dic_entry; } return; } list=&((*list)->next); } *list=new_dic_variable(name,dic_entry,NULL,must_clone); }
bool Wordlist_selectionsEqual( unichar_t* s1, unichar_t* s2 ) { static unichar_t s1stripped[ PATH_MAX ]; static unichar_t s2stripped[ PATH_MAX ]; int s1HasSelection = 0; int s2HasSelection = 0; u_strcpy( s1stripped, Wordlist_selectionStringOnly( s1, &s1HasSelection )); u_strcpy( s2stripped, Wordlist_selectionStringOnly( s2, &s2HasSelection )); if( s1HasSelection && !s2HasSelection ) return false; if( !s1HasSelection && s2HasSelection ) return false; return !u_strcmp( s1stripped, s2stripped ); }
ArgExtractor::ArgExtractor(const NumberFormat& nf, const Formattable& obj, UErrorCode& status) : ncnf((NumberFormat*) &nf), num(&obj), setCurr(FALSE) { const UObject* o = obj.getObject(); // most commonly o==NULL const CurrencyAmount* amt; if (o != NULL && (amt = dynamic_cast<const CurrencyAmount*>(o)) != NULL) { // getISOCurrency() returns a pointer to internal storage, so we // copy it to retain it across the call to setCurrency(). const UChar* curr = amt->getISOCurrency(); u_strcpy(save, nf.getCurrency()); setCurr = (u_strcmp(curr, save) != 0); if (setCurr) { ncnf->setCurrency(curr, status); } num = &amt->getNumber(); } }
static void TestMessageWithUnusedArgNumber() { UErrorCode errorCode = U_ZERO_ERROR; U_STRING_DECL(pattern, "abc {1} def", 11); UChar x[2] = { 0x78, 0 }; // "x" UChar y[2] = { 0x79, 0 }; // "y" U_STRING_DECL(expected, "abc y def", 9); UChar result[20]; int32_t length; U_STRING_INIT(pattern, "abc {1} def", 11); U_STRING_INIT(expected, "abc y def", 9); length = u_formatMessage("en", pattern, -1, result, LENGTHOF(result), &errorCode, x, y); if (U_FAILURE(errorCode) || length != u_strlen(expected) || u_strcmp(result, expected) != 0) { log_err("u_formatMessage(pattern with only {1}, 2 args) failed: result length %d, UErrorCode %s \n", (int)length, u_errorName(errorCode)); } }
/** * Returns 1 if the given DELAF entry is compatible with the given pattern; * 0 otherwise. */ int is_entry_compatible_with_pattern(const struct dela_entry* entry,const struct pattern* pattern) { switch(pattern->type) { case LEMMA_PATTERN: return (!u_strcmp(entry->lemma,pattern->lemma)); case CODE_PATTERN: return is_compatible_code_pattern(entry,pattern); case LEMMA_AND_CODE_PATTERN: return (!u_strcmp(entry->lemma,pattern->lemma)) && is_compatible_code_pattern(entry,pattern); case FULL_PATTERN: return (!u_strcmp(entry->inflected,pattern->inflected)) && (!u_strcmp(entry->lemma,pattern->lemma)) && is_compatible_code_pattern(entry,pattern); case AMBIGUOUS_PATTERN: return !u_strcmp(entry->lemma,pattern->lemma) || dic_entry_contain_gram_code(entry,pattern->lemma); case INFLECTED_AND_LEMMA_PATTERN: return (!u_strcmp(entry->inflected,pattern->inflected)) && (!u_strcmp(entry->lemma,pattern->lemma)); default: fatal_error("Unexpected case in is_entry_compatible_with_pattern\n"); } return 0; }
////////////////////////////////////////////////////////////////////////////////// // Desinstantiates the unification variable "var". int unif_desinstantiate(MultiFlex_ctx* p_multiFlex_ctx,unichar* var) { int v, w, found; found = 0; for (v=0; v<(p_multiFlex_ctx->UNIF_VARS).no_vars; v++) if (!u_strcmp(var,(p_multiFlex_ctx->UNIF_VARS).vars[v].id)) { found = 1; break; } // if found v points to the variable following the one we want to eliminate if (found) { free((p_multiFlex_ctx->UNIF_VARS).vars[v].id); for (w=v+1; w<(p_multiFlex_ctx->UNIF_VARS).no_vars;w++) (p_multiFlex_ctx->UNIF_VARS).vars[w-1] = (p_multiFlex_ctx->UNIF_VARS).vars[w]; (p_multiFlex_ctx->UNIF_VARS).no_vars--; } return 0; }
/** * Get the id of the given property or 0 if not there * @param hs the hashset in question * @param prop the property to find * @return id > 0 if found, else 0 */ int hashset_get( hashset *hs, UChar *prop ) { unsigned slot = hash(prop, u_strlen(prop))%(unsigned)hs->num_buckets; if ( hs->buckets[slot] == NULL ) return 0; else { struct hs_bucket *b = hs->buckets[slot]; while ( b != NULL ) { if ( u_strcmp(b->key,prop)==0 ) return b->id; b = b->next; } return 0; } }
////////////////////////////////////////////////////////////////////////////////// // Desinstantiates the unification variable "var". int unif_desinstantiate(unif_vars_T* UNIF_VARS,unichar* var) { int v, w, found; found = 0; for (v=0; v<UNIF_VARS->no_vars; v++) if (!u_strcmp(var,UNIF_VARS->vars[v].id)) { found = 1; break; } // if found v points to the variable following the one we want to eliminate if (found) { free(UNIF_VARS->vars[v].id); for (w=v+1; w<UNIF_VARS->no_vars;w++) UNIF_VARS->vars[w-1] = UNIF_VARS->vars[w]; UNIF_VARS->no_vars--; } return 0; }
/** * Test localized currency patterns. */ static void TestCurrency(void) { UNumberFormat *currencyFmt; UChar *str; int32_t lneed, i; UFieldPosition pos; UChar res[100]; UErrorCode status = U_ZERO_ERROR; const char* locale[]={"fr_CA", "de_DE_PREEURO", "fr_FR_PREEURO"}; const char* result[]={"1,50\\u00a0$", "1,50\\u00a0DM", "1,50\\u00a0F"}; log_verbose("\nTesting the number format with different currency patterns\n"); for(i=0; i < 3; i++) { str=NULL; currencyFmt = unum_open(UNUM_CURRENCY, NULL,0,locale[i],NULL, &status); if(U_FAILURE(status)){ log_data_err("Error in the construction of number format with style currency: %s (Are you missing data?)\n", myErrorName(status)); } else { lneed=0; lneed= unum_formatDouble(currencyFmt, 1.50, NULL, lneed, NULL, &status); if(status==U_BUFFER_OVERFLOW_ERROR){ status=U_ZERO_ERROR; str=(UChar*)malloc(sizeof(UChar) * (lneed+1) ); pos.field = 0; unum_formatDouble(currencyFmt, 1.50, str, lneed+1, &pos, &status); } if(U_FAILURE(status)) { log_err("Error in formatting using unum_formatDouble(.....): %s\n", myErrorName(status) ); } else { u_unescape(result[i], res, (int32_t)strlen(result[i])+1); if (u_strcmp(str, res) != 0){ log_err("FAIL: Expected %s Got: %s for locale: %s\n", result[i], aescstrdup(str, -1), locale[i]); } } } unum_close(currencyFmt); free(str); } }
UBool isEuroAware(UConverter* myConv) { static const UChar euroString[2] = { 0x20AC, 0x0000 }; char target[20]; UChar euroBack[2]; int32_t targetSize, euroBackSize; UErrorCode err = U_ZERO_ERROR; /*const char* myName = ucnv_getName(myConv, &err);*/ targetSize = ucnv_fromUChars(myConv, target, sizeof(target), euroString, -1, &err); if (U_FAILURE(err)) { log_err("Failure Occured in ucnv_fromUChars euro roundtrip test\n"); return FALSE; } euroBackSize = ucnv_toUChars(myConv, euroBack, 2, target, targetSize, &err); if (U_FAILURE(err)) { log_err("Failure Occured in ucnv_toUChars euro roundtrip test\n"); return FALSE; } if (u_strcmp(euroString, euroBack)) { /* log_err("%s FAILED Euro rountrip\n", myName);*/ return FALSE; } else { /* log_verbose("%s PASSED Euro rountrip\n", myName);*/ return TRUE; } }
UnicodeString& NumberFormat::format(const Formattable& obj, UnicodeString& appendTo, FieldPositionIterator* posIter, UErrorCode& status) const { if (U_FAILURE(status)) return appendTo; ArgExtractor arg(*this, obj, status); const Formattable *n = arg.number(); const UChar *iso = arg.iso(); if(arg.wasCurrency() && u_strcmp(iso, getCurrency())) { // trying to format a different currency. // Right now, we clone. LocalPointer<NumberFormat> cloneFmt((NumberFormat*)this->clone()); cloneFmt->setCurrency(iso, status); // next line should NOT recurse, because n is numeric whereas obj was a wrapper around currency amount. return cloneFmt->format(*n, appendTo, posIter, status); } if (n->isNumeric() && n->getDigitList() != NULL) { // Decimal Number format(*n->getDigitList(), appendTo, posIter, status); } else { switch (n->getType()) { case Formattable::kDouble: format(n->getDouble(), appendTo, posIter, status); break; case Formattable::kLong: format(n->getLong(), appendTo, posIter, status); break; case Formattable::kInt64: format(n->getInt64(), appendTo, posIter, status); break; default: status = U_INVALID_FORMAT_ERROR; break; } } return appendTo; }
/** * This function compares two variables. Input and output variables are * considered as text content. For dictionary variables, it's the inflected * form that is taken into account. * * Note 1: you can compare variables of different kinds * Note 2: you can compare a variable to a constant string. To do that, the string * must start with # */ int compare_variables(const unichar* var1,const unichar* var2,struct locate_parameters* p,int case_matters) { int free_v1; unichar* v1=get_var_content_str(var1,p,&free_v1); if (!v1) { return VAR_CMP_ERROR; } int free_v2; unichar* v2=get_var_content_str(var2,p,&free_v2); if (!v2) { if (free_v1) free(v1); return VAR_CMP_ERROR; } int ret=case_matters?u_strcmp(v1,v2):u_strcmp_ignore_case(v1,v2); if (free_v1) free(v1); if (free_v2) free(v2); if (ret==0) { return VAR_CMP_EQUAL; } return VAR_CMP_DIFF; }
static void MessageLength(void) { UErrorCode status = U_ZERO_ERROR; const char patChars[] = {"123{0}456{0}"}; const char expectedChars[] = {"123abc"}; UChar pattern[sizeof(patChars)]; UChar arg[] = {0x61,0x62,0x63,0}; UChar result[128] = {0}; UChar expected[sizeof(expectedChars)]; u_uastrncpy(pattern, patChars, sizeof(pattern)/sizeof(pattern[0])); u_uastrncpy(expected, expectedChars, sizeof(expected)/sizeof(expected[0])); u_formatMessage("en_US", pattern, 6, result, sizeof(result)/sizeof(result[0]), &status, arg); if (U_FAILURE(status)) { log_err("u_formatMessage method failed. Error: %s \n",u_errorName(status)); } if (u_strcmp(result, expected) != 0) { log_err("u_formatMessage didn't return expected result\n"); } }
static void VerifysetSymbols(UDateFormat* datfor, UDateFormatSymbolType type, int32_t idx, const char* expected) { UChar *result=NULL; UChar *value=NULL; int32_t resultlength, resultlengthout; UErrorCode status = U_ZERO_ERROR; value=(UChar*)malloc(sizeof(UChar) * (strlen(expected) + 1)); u_uastrcpy(value, expected); udat_setSymbols(datfor, type, idx, value, u_strlen(value), &status); if(U_FAILURE(status)) { log_err("FAIL: Error in udat_setSymbols() %s\n", myErrorName(status) ); return; } resultlength=0; resultlengthout=udat_getSymbols(datfor, type, idx, NULL, resultlength, &status); if(status==U_BUFFER_OVERFLOW_ERROR){ status=U_ZERO_ERROR; resultlength=resultlengthout+1; result=(UChar*)malloc(sizeof(UChar) * resultlength); udat_getSymbols(datfor, type, idx, result, resultlength, &status); } if(U_FAILURE(status)){ log_err("FAIL: error in retrieving the value using getSymbols after setting it previously\n %s\n", myErrorName(status) ); return; } if(u_strcmp(result, value)!=0){ log_err("FAIL:Error in setting and then getting symbols\n Expected %s Got %s\n", austrdup(value), austrdup(result) ); } else log_verbose("PASS: setSymbols successful\n"); free(value); free(result); }
unichar *protect_braced_string(const unichar *s){ unichar *result; unichar *stop_sentence; stop_sentence = (unichar*) malloc(sizeof(unichar) * (1 + 1)); if (stop_sentence == NULL) { perror("malloc\n"); fprintf(stderr, "Impossible to allocate memory\n"); exit(1); } u_sprintf(stop_sentence, "S"); if (u_strcmp(stop_sentence, s) == 0) { return stop_sentence; } else { unichar* text = protect_text_in_braced_string(s); unichar* lem = protect_lem_in_braced_string(s); //u_printf("text / lem = %S --- %S\n",text, lem); int length_t = u_strlen(text); int length_l = u_strlen(lem); result = (unichar*) malloc(sizeof(unichar) * (length_t + length_l + 2 + 1)); if (result == NULL) { perror("malloc\n"); fprintf(stderr, "Impossible to allocate memory\n"); exit(1); } u_sprintf(result, "%S,.%S", text, lem); free(lem); free(text); free(stop_sentence); } return result; }
/** * Returns 1 if the given tag does not match anything in the text. * */ static int matches_E(Fst2* fst2,int tag_number) { if (tag_number<0) { return 0; } if (tag_number==0) { return 1; } Fst2Tag tag=fst2->tags[tag_number]; switch (tag->type) { /* WARNING: this is important not to use a default clause here! * By enumerating all values instead, we make sure that there will * be a compiler warning if one day a new value is added to the enum tag_type * that is not taken into account here. */ case UNDEFINED_TAG: // used at initialization of a tag case META_TAG: // <MOT>, <MIN>, etc. case PATTERN_TAG: // <be.V> case PATTERN_NUMBER_TAG: // used when patterns have been numbered case TOKEN_LIST_TAG: break; // used when the tag matches a list of tokens. This will /* The following matches E */ case BEGIN_VAR_TAG: // $a( case END_VAR_TAG: // $a) case BEGIN_OUTPUT_VAR_TAG: // $|a( case END_OUTPUT_VAR_TAG: // $|a) case BEGIN_POSITIVE_CONTEXT_TAG: // $[ case BEGIN_NEGATIVE_CONTEXT_TAG: // $![ case END_CONTEXT_TAG: // $] case LEFT_CONTEXT_TAG: // $* case BEGIN_MORPHO_TAG: // $< case END_MORPHO_TAG: // $> case TEXT_START_TAG: // {^} case TEXT_END_TAG: return 1; // {$} } /* Finally, we test if we have a <E> transition with an output */ if (!u_strcmp(tag->input,"<E>")) { return 1; } return 0; }
/** * This function compares two tree nodes as follow: * 1) by the unichar that lead to them * 2) by their hash_number (n� of line in INF file) * 3) by the transition that get out of them */ static inline int compare_nodes(const struct dictionary_node_transition* a,const struct dictionary_node_transition* b) { /* If the nodes have not the same INF codes, they are different */ struct dictionary_node* a_node = a->node; struct dictionary_node* b_node = b->node; if (a_node->single_INF_code_list!=b_node->single_INF_code_list) { if (a_node->single_INF_code_list!=NULL && b_node->single_INF_code_list==NULL) return -1; if (a_node->single_INF_code_list==NULL && b_node->single_INF_code_list!=NULL) return 1; } if (a_node->single_INF_code_list!=NULL && b_node->single_INF_code_list!=NULL && a_node->INF_code!=b_node->INF_code) return (a_node->INF_code - b_node->INF_code); /* Then, we compare all the outgoing transitions, two by two */ a=a_node->trans; b=b_node->trans; while(a!=NULL && b!=NULL) { /* If the 2 current transitions are not tagged by the same * character, then the nodes are different */ if (a->letter - b->letter != 0) return (a->letter - b->letter); int output_diff=u_strcmp(a->output,b->output); if (output_diff!=0) return output_diff; /* If the characters are equal and destination nodes are different... */ if (((int)(a->node - b->node)) != 0) return (int)(a->node - b->node); a=a->next; b=b->next; } if (a==b) { /* If a==b==NULL, the transition lists are equal, the nodes are equivalent */ return 0; } if (a==NULL) { /* If the first list is shorter than the second */ return -1; } /* If the first list is longer then the second */ return 1; }
/*---------------------------------------------------------------------------------------------- Return true if the two characters represent runs that can be rendered in the same segment. ----------------------------------------------------------------------------------------------*/ bool FwGrTxtSrc::sameSegment(toffset ich1, toffset ich2) { int ichMinBogus, ichLimBogus; LgCharRenderProps chrp1, chrp2; GrResult res = (GrResult)m_qts->GetCharProps(GrToVwOffset(ich1), &chrp1, &ichMinBogus, &ichLimBogus); if (ResultFailed(res)) return false; res = (GrResult)m_qts->GetCharProps(GrToVwOffset(ich2), &chrp2, &ichMinBogus, &ichLimBogus); if (ResultFailed(res)) return false; if (u_strcmp(chrp1.szFaceName, chrp2.szFaceName) != 0) return false; if (chrp1.ws != chrp2.ws) { // Can't compare default fonts for different writing systems. StrUni stuFace; stuFace.Assign(chrp1.szFaceName, 8); if (stuFace == L"<default") return false; } if (chrp1.dympHeight != chrp2.dympHeight) return false; if (chrp1.ttvBold != chrp2.ttvBold) return false; if (chrp1.ttvItalic != chrp2.ttvItalic) return false; if (chrp1.fWsRtl != chrp2.fWsRtl) return false; if (chrp1.nDirDepth != chrp2.nDirDepth) return false; if (chrp1.ssv != chrp2.ssv) return false; if (chrp1.dympOffset != chrp2.dympOffset) return false; // eventually improve on this return true; }
Encoding *ParseEncodingNameFromList(GGadget *listfield) { const unichar_t *name = _GGadgetGetTitle(listfield); int32 len; GTextInfo **ti = GGadgetGetList(listfield,&len); int i; Encoding *enc = NULL; for ( i=0; i<len; ++i ) if ( ti[i]->text!=NULL ) { if ( u_strcmp(name,ti[i]->text)==0 ) { enc = FindOrMakeEncoding(ti[i]->userdata); break; } } if ( enc == NULL ) { char *temp = u2utf8_copy(name); enc = FindOrMakeEncoding(temp); free(temp); } if ( enc==NULL ) ff_post_error(_("Bad Encoding"),_("Bad Encoding")); return( enc ); }
/** * Add a new name to the hashset and allocate it a unique id * @param hs the hashset in question * @param prop the property to add * @return 1 if successful, else 0 */ int hashset_put( hashset *hs, UChar *prop ) { unsigned slot; struct hs_bucket *b; if ( (float)hs->num_keys/(float)hs->num_buckets > MAX_RATIO ) { if ( !hashset_rehash(hs) ) return 0; } slot = hash(prop,u_strlen(prop))%hs->num_buckets; b = hs->buckets[slot]; if ( b == NULL ) { hs->buckets[slot] = hs_bucket_create(prop,hs->id++); if ( hs->buckets[slot] == NULL ) return 0; } else { do { // if key already present, just return if ( u_strcmp(prop,b->key)==0 ) return 0; else if ( b->next != NULL ) b = b->next; } while ( b->next != NULL ); // key not found b->next = hs_bucket_create(prop,hs->id++); if ( b->next == NULL ) return 0; } hs->num_keys++; return 1; }
/*Testing udat_getSymbols() and udat_setSymbols() and udat_countSymbols()*/ static void TestSymbols() { UDateFormat *def, *fr; UErrorCode status = U_ZERO_ERROR; UChar *value=NULL; UChar *result = NULL; int32_t resultlength; int32_t resultlengthout; UChar *pattern; /*creating a dateformat with french locale */ log_verbose("\ncreating a date format with french locale\n"); fr = udat_open(UDAT_FULL, UDAT_DEFAULT, "fr_FR", NULL, 0, NULL, 0, &status); if(U_FAILURE(status)) { log_data_err("error in creating the dateformat using full time style with french locale -> %s (Are you missing data?)\n", myErrorName(status) ); return; } /*creating a default dateformat */ log_verbose("\ncreating a date format with default locale\n"); /* this is supposed to open default date format, but later on it treats it like it is "en_US" - very bad if you try to run the tests on machine where default locale is NOT "en_US" */ /* def = udat_open(UDAT_DEFAULT,UDAT_DEFAULT ,NULL, NULL, 0, &status); */ def = udat_open(UDAT_DEFAULT,UDAT_DEFAULT ,"en_US", NULL, 0, NULL, 0, &status); if(U_FAILURE(status)) { log_err("error in creating the dateformat using short date and time style\n %s\n", myErrorName(status) ); return; } /*Testing countSymbols, getSymbols and setSymbols*/ log_verbose("\nTesting countSymbols\n"); /*since the month names has the last string empty and week names are 1 based 1.e first string in the weeknames array is empty */ if(udat_countSymbols(def, UDAT_ERAS)!=2 || udat_countSymbols(def, UDAT_MONTHS)!=12 || udat_countSymbols(def, UDAT_SHORT_MONTHS)!=12 || udat_countSymbols(def, UDAT_WEEKDAYS)!=8 || udat_countSymbols(def, UDAT_SHORT_WEEKDAYS)!=8 || udat_countSymbols(def, UDAT_AM_PMS)!=2 || udat_countSymbols(def, UDAT_QUARTERS) != 4 || udat_countSymbols(def, UDAT_SHORT_QUARTERS) != 4 || udat_countSymbols(def, UDAT_LOCALIZED_CHARS)!=1) { log_err("FAIL: error in udat_countSymbols\n"); } else log_verbose("PASS: udat_countSymbols() successful\n"); /*testing getSymbols*/ log_verbose("\nTesting getSymbols\n"); pattern=(UChar*)malloc(sizeof(UChar) * 10); u_uastrcpy(pattern, "jeudi"); resultlength=0; resultlengthout=udat_getSymbols(fr, UDAT_WEEKDAYS, 5 , NULL, resultlength, &status); if(status==U_BUFFER_OVERFLOW_ERROR) { status=U_ZERO_ERROR; resultlength=resultlengthout+1; if(result != NULL) { free(result); result = NULL; } result=(UChar*)malloc(sizeof(UChar) * resultlength); udat_getSymbols(fr, UDAT_WEEKDAYS, 5, result, resultlength, &status); } if(U_FAILURE(status)) { log_err("FAIL: Error in udat_getSymbols().... %s\n", myErrorName(status) ); } else log_verbose("PASS: getSymbols succesful\n"); if(u_strcmp(result, pattern)==0) log_verbose("PASS: getSymbols retrieved the right value\n"); else log_data_err("FAIL: getSymbols retrieved the wrong value\n"); /*run series of tests to test getsymbols regressively*/ log_verbose("\nTesting getSymbols() regressively\n"); VerifygetSymbols(fr, UDAT_WEEKDAYS, 1, "dimanche"); VerifygetSymbols(def, UDAT_WEEKDAYS, 1, "Sunday"); VerifygetSymbols(fr, UDAT_SHORT_WEEKDAYS, 7, "sam."); VerifygetSymbols(def, UDAT_SHORT_WEEKDAYS, 7, "Sat"); VerifygetSymbols(def, UDAT_MONTHS, 11, "December"); VerifygetSymbols(def, UDAT_MONTHS, 0, "January"); VerifygetSymbols(fr, UDAT_ERAS, 0, "av. J.-C."); VerifygetSymbols(def, UDAT_AM_PMS, 0, "AM"); VerifygetSymbols(def, UDAT_AM_PMS, 1, "PM"); VerifygetSymbols(fr, UDAT_SHORT_MONTHS, 0, "janv."); VerifygetSymbols(def, UDAT_SHORT_MONTHS, 11, "Dec"); VerifygetSymbols(fr, UDAT_QUARTERS, 0, "1er trimestre"); VerifygetSymbols(def, UDAT_QUARTERS, 3, "4th quarter"); VerifygetSymbols(fr, UDAT_SHORT_QUARTERS, 1, "T2"); VerifygetSymbols(def, UDAT_SHORT_QUARTERS, 2, "Q3"); VerifygetSymbols(def,UDAT_LOCALIZED_CHARS, 0, "GyMdkHmsSEDFwWahKzYeugAZvcLQqV"); if(result != NULL) { free(result); result = NULL; } free(pattern); log_verbose("\nTesting setSymbols\n"); /*applying the pattern so that setSymbolss works */ resultlength=0; resultlengthout=udat_toPattern(fr, FALSE, NULL, resultlength, &status); if(status==U_BUFFER_OVERFLOW_ERROR) { status=U_ZERO_ERROR; resultlength=resultlengthout + 1; pattern=(UChar*)malloc(sizeof(UChar) * resultlength); udat_toPattern(fr, FALSE, pattern, resultlength, &status); } if(U_FAILURE(status)) { log_err("FAIL: error in extracting the pattern from UNumberFormat\n %s\n", myErrorName(status) ); } udat_applyPattern(def, FALSE, pattern, u_strlen(pattern)); resultlength=0; resultlengthout=udat_toPattern(def, FALSE, NULL, resultlength,&status); if(status==U_BUFFER_OVERFLOW_ERROR) { status=U_ZERO_ERROR; resultlength=resultlengthout + 1; if(result != NULL) { free(result); result = NULL; } result=(UChar*)malloc(sizeof(UChar) * resultlength); udat_toPattern(fr, FALSE,result, resultlength, &status); } if(U_FAILURE(status)) { log_err("FAIL: error in extracting the pattern from UNumberFormat\n %s\n", myErrorName(status) ); } if(u_strcmp(result, pattern)==0) log_verbose("Pattern applied properly\n"); else log_err("pattern could not be applied properly\n"); free(pattern); /*testing set symbols */ resultlength=0; resultlengthout=udat_getSymbols(fr, UDAT_MONTHS, 11 , NULL, resultlength, &status); if(status==U_BUFFER_OVERFLOW_ERROR){ status=U_ZERO_ERROR; resultlength=resultlengthout+1; if(result != NULL) { free(result); result = NULL; } result=(UChar*)malloc(sizeof(UChar) * resultlength); udat_getSymbols(fr, UDAT_MONTHS, 11, result, resultlength, &status); } if(U_FAILURE(status)) log_err("FAIL: error in getSymbols() %s\n", myErrorName(status) ); resultlength=resultlengthout+1; udat_setSymbols(def, UDAT_MONTHS, 11, result, resultlength, &status); if(U_FAILURE(status)) { log_err("FAIL: Error in udat_setSymbols() : %s\n", myErrorName(status) ); } else log_verbose("PASS: SetSymbols successful\n"); resultlength=0; resultlengthout=udat_getSymbols(def, UDAT_MONTHS, 11, NULL, resultlength, &status); if(status==U_BUFFER_OVERFLOW_ERROR){ status=U_ZERO_ERROR; resultlength=resultlengthout+1; value=(UChar*)malloc(sizeof(UChar) * resultlength); udat_getSymbols(def, UDAT_MONTHS, 11, value, resultlength, &status); } if(U_FAILURE(status)) log_err("FAIL: error in retrieving the value using getSymbols i.e roundtrip\n"); if(u_strcmp(result, value)!=0) log_data_err("FAIL: Error in settting and getting symbols\n"); else log_verbose("PASS: setSymbols successful\n"); /*run series of tests to test setSymbols regressively*/ log_verbose("\nTesting setSymbols regressively\n"); VerifysetSymbols(def, UDAT_ERAS, 0, "BeforeChrist"); VerifysetSymbols(def, UDAT_ERA_NAMES, 1, "AnnoDomini"); VerifysetSymbols(def, UDAT_WEEKDAYS, 1, "Sundayweek"); VerifysetSymbols(def, UDAT_SHORT_WEEKDAYS, 7, "Satweek"); VerifysetSymbols(def, UDAT_NARROW_WEEKDAYS, 4, "M"); VerifysetSymbols(def, UDAT_STANDALONE_WEEKDAYS, 1, "Sonntagweek"); VerifysetSymbols(def, UDAT_STANDALONE_SHORT_WEEKDAYS, 7, "Sams"); VerifysetSymbols(def, UDAT_STANDALONE_NARROW_WEEKDAYS, 4, "V"); VerifysetSymbols(fr, UDAT_MONTHS, 11, "december"); VerifysetSymbols(fr, UDAT_SHORT_MONTHS, 0, "Jan"); VerifysetSymbols(fr, UDAT_NARROW_MONTHS, 1, "R"); VerifysetSymbols(fr, UDAT_STANDALONE_MONTHS, 11, "dezember"); VerifysetSymbols(fr, UDAT_STANDALONE_SHORT_MONTHS, 7, "Aug"); VerifysetSymbols(fr, UDAT_STANDALONE_NARROW_MONTHS, 2, "M"); VerifysetSymbols(fr, UDAT_QUARTERS, 0, "1. Quart"); VerifysetSymbols(fr, UDAT_SHORT_QUARTERS, 1, "QQ2"); VerifysetSymbols(fr, UDAT_STANDALONE_QUARTERS, 2, "3rd Quar."); VerifysetSymbols(fr, UDAT_STANDALONE_SHORT_QUARTERS, 3, "4QQ"); /*run series of tests to test get and setSymbols regressively*/ log_verbose("\nTesting get and set symbols regressively\n"); VerifygetsetSymbols(fr, def, UDAT_WEEKDAYS, 1); VerifygetsetSymbols(fr, def, UDAT_WEEKDAYS, 7); VerifygetsetSymbols(fr, def, UDAT_SHORT_WEEKDAYS, 1); VerifygetsetSymbols(fr, def, UDAT_SHORT_WEEKDAYS, 7); VerifygetsetSymbols(fr, def, UDAT_MONTHS, 0); VerifygetsetSymbols(fr, def, UDAT_SHORT_MONTHS, 0); VerifygetsetSymbols(fr, def, UDAT_ERAS,1); VerifygetsetSymbols(fr, def, UDAT_LOCALIZED_CHARS, 0); VerifygetsetSymbols(fr, def, UDAT_AM_PMS, 1); /*closing*/ udat_close(fr); udat_close(def); if(result != NULL) { free(result); result = NULL; } free(value); }
/* Testing the DateFormat API */ static void TestDateFormat() { UDateFormat *def, *fr, *it, *de, *def1, *fr_pat; UDateFormat *any; UDateFormat *copy; UErrorCode status = U_ZERO_ERROR; UChar* result = NULL; const UCalendar *cal; const UNumberFormat *numformat1, *numformat2; UChar temp[50]; int32_t numlocales; UDate d1; int i; int32_t resultlength; int32_t resultlengthneeded; int32_t parsepos; UDate d = 837039928046.0; double num = -10456.37; /*const char* str="yyyy.MM.dd G 'at' hh:mm:ss z"; const char t[]="2/3/76 2:50 AM";*/ /*Testing udat_open() to open a dateformat */ ctest_setTimeZone(NULL, &status); log_verbose("\nTesting udat_open() with various parameters\n"); fr = udat_open(UDAT_FULL, UDAT_DEFAULT, "fr_FR", NULL,0, NULL, 0,&status); if(U_FAILURE(status)) { log_data_err("FAIL: error in creating the dateformat using full time style with french locale -> %s (Are you missing data?)\n", myErrorName(status) ); return; } /* this is supposed to open default date format, but later on it treats it like it is "en_US" - very bad if you try to run the tests on machine where default locale is NOT "en_US" */ /* def = udat_open(UDAT_SHORT, UDAT_SHORT, NULL, NULL, 0, &status); */ def = udat_open(UDAT_SHORT, UDAT_SHORT, "en_US", NULL, 0,NULL, 0, &status); if(U_FAILURE(status)) { log_err("FAIL: error in creating the dateformat using short date and time style\n %s\n", myErrorName(status) ); return; } it = udat_open(UDAT_DEFAULT, UDAT_MEDIUM, "it_IT", NULL, 0, NULL, 0,&status); if(U_FAILURE(status)) { log_err("FAIL: error in creating the dateformat using medium date style with italian locale\n %s\n", myErrorName(status) ); return; } de = udat_open(UDAT_LONG, UDAT_LONG, "de_DE", NULL, 0, NULL, 0,&status); if(U_FAILURE(status)) { log_err("FAIL: error in creating the dateformat using long time and date styles with german locale\n %s\n", myErrorName(status)); return; } /*creating a default dateformat */ def1 = udat_open(UDAT_SHORT, UDAT_SHORT, NULL, NULL, 0,NULL, 0, &status); if(U_FAILURE(status)) { log_err("FAIL: error in creating the dateformat using short date and time style\n %s\n", myErrorName(status) ); return; } /*Testing udat_getAvailable() and udat_countAvailable()*/ log_verbose("\nTesting getAvailableLocales and countAvailable()\n"); numlocales=udat_countAvailable(); /* use something sensible w/o hardcoding the count */ if(numlocales < 0) log_data_err("FAIL: error in countAvailable\n"); log_verbose("The number of locales for which date/time formatting patterns are available is %d\n", numlocales); for(i=0;i<numlocales;i++) { UErrorCode subStatus = U_ZERO_ERROR; log_verbose("Testing open of %s\n", udat_getAvailable(i)); any = udat_open(UDAT_SHORT, UDAT_SHORT, udat_getAvailable(i), NULL ,0, NULL, 0, &subStatus); if(U_FAILURE(subStatus)) { log_data_err("FAIL: date format %s (getAvailable(%d)) is not instantiable: %s\n", udat_getAvailable(i), i, u_errorName(subStatus)); } udat_close(any); } /*Testing udat_clone()*/ log_verbose("\nTesting the udat_clone() function of date format\n"); copy=udat_clone(def, &status); if(U_FAILURE(status)){ log_err("Error in creating the clone using udat_clone: %s\n", myErrorName(status) ); } /*if(def != copy) log_err("Error in udat_clone");*/ /*how should i check for equality???? */ /*Testing udat_format()*/ log_verbose("\nTesting the udat_format() function of date format\n"); u_uastrcpy(temp, "7/10/96 4:05 PM"); /*format using def */ resultlength=0; resultlengthneeded=udat_format(def, d, NULL, resultlength, NULL, &status); if(status==U_BUFFER_OVERFLOW_ERROR) { status=U_ZERO_ERROR; resultlength=resultlengthneeded+1; if(result != NULL) { free(result); result = NULL; } result=(UChar*)malloc(sizeof(UChar) * resultlength); udat_format(def, d, result, resultlength, NULL, &status); } if(U_FAILURE(status) || !result) { log_err("FAIL: Error in formatting using udat_format(.....) %s\n", myErrorName(status) ); return; } else log_verbose("PASS: formatting successful\n"); if(u_strcmp(result, temp)==0) log_verbose("PASS: Date Format for US locale successful using udat_format()\n"); else { char xbuf[2048]; char gbuf[2048]; u_austrcpy(xbuf, temp); u_austrcpy(gbuf, result); log_err("FAIL: Date Format for US locale failed using udat_format() - expected %s got %s\n", xbuf, gbuf); } /*format using fr */ u_unescape("10 juil. 1996 16:05:28 heure avanc\\u00E9e du Pacifique", temp, 50); if(result != NULL) { free(result); result = NULL; } result=myDateFormat(fr, d); if(u_strcmp(result, temp)==0) log_verbose("PASS: Date Format for french locale successful using udat_format()\n"); else log_data_err("FAIL: Date Format for french locale failed using udat_format().\n" ); /*format using it */ u_uastrcpy(temp, "10/lug/1996 16:05:28"); { UChar *fmtted; char g[100]; char x[100]; fmtted = myDateFormat(it,d); u_austrcpy(g, fmtted); u_austrcpy(x, temp); if(u_strcmp(fmtted, temp)==0) { log_verbose("PASS: Date Format for italian locale successful uisng udat_format() - wanted %s, got %s\n", x, g); } else { log_data_err("FAIL: Date Format for italian locale failed using udat_format() - wanted %s, got %s\n", x, g); } } /*Testing parsing using udat_parse()*/ log_verbose("\nTesting parsing using udat_parse()\n"); u_uastrcpy(temp,"2/3/76 2:50 AM"); parsepos=0; status=U_ZERO_ERROR; d1=udat_parse(def, temp, u_strlen(temp), &parsepos, &status); if(U_FAILURE(status)) { log_err("FAIL: Error in parsing using udat_parse(.....) %s\n", myErrorName(status) ); } else log_verbose("PASS: parsing succesful\n"); /*format it back and check for equality */ if(u_strcmp(myDateFormat(def, d1),temp)!=0) log_err("FAIL: error in parsing\n"); /*Testing parsing using udat_parse()*/ log_verbose("\nTesting parsing using udat_parse()\n"); u_uastrcpy(temp,"2/Don't parse this part"); status=U_ZERO_ERROR; d1=udat_parse(def, temp, u_strlen(temp), NULL, &status); if(status != U_PARSE_ERROR) { log_err("FAIL: udat_parse(\"bad string\") passed when it should have failed\n"); } else log_verbose("PASS: parsing succesful\n"); /*Testing udat_openPattern() */ status=U_ZERO_ERROR; log_verbose("\nTesting the udat_openPattern with a specified pattern\n"); /*for french locale */ fr_pat=udat_open(UDAT_IGNORE, UDAT_IGNORE,"fr_FR",NULL,0,temp, u_strlen(temp), &status); if(U_FAILURE(status)) { log_err("FAIL: Error in creating a date format using udat_openPattern \n %s\n", myErrorName(status) ); } else log_verbose("PASS: creating dateformat using udat_openPattern() succesful\n"); /*Testing applyPattern and toPattern */ log_verbose("\nTesting applyPattern and toPattern()\n"); udat_applyPattern(def1, FALSE, temp, u_strlen(temp)); log_verbose("Extracting the pattern\n"); resultlength=0; resultlengthneeded=udat_toPattern(def1, FALSE, NULL, resultlength, &status); if(status==U_BUFFER_OVERFLOW_ERROR) { status=U_ZERO_ERROR; resultlength=resultlengthneeded + 1; result=(UChar*)malloc(sizeof(UChar) * resultlength); udat_toPattern(def1, FALSE, result, resultlength, &status); } if(U_FAILURE(status)) { log_err("FAIL: error in extracting the pattern from UNumberFormat\n %s\n", myErrorName(status) ); } if(u_strcmp(result, temp)!=0) log_err("FAIL: Error in extracting the pattern\n"); else log_verbose("PASS: applyPattern and toPattern work fine\n"); if(result != NULL) { free(result); result = NULL; } /*Testing getter and setter functions*/ /*isLenient and setLenient()*/ log_verbose("\nTesting the isLenient and setLenient properties\n"); udat_setLenient(fr, udat_isLenient(it)); if(udat_isLenient(fr) != udat_isLenient(it)) log_err("ERROR: setLenient() failed\n"); else log_verbose("PASS: setLenient() successful\n"); /*Test get2DigitYearStart set2DigitYearStart */ log_verbose("\nTesting the get and set 2DigitYearStart properties\n"); d1= udat_get2DigitYearStart(fr_pat,&status); if(U_FAILURE(status)) { log_err("ERROR: udat_get2DigitYearStart failed %s\n", myErrorName(status) ); } status = U_ZERO_ERROR; udat_set2DigitYearStart(def1 ,d1, &status); if(U_FAILURE(status)) { log_err("ERROR: udat_set2DigitYearStart failed %s\n", myErrorName(status) ); } if(udat_get2DigitYearStart(fr_pat, &status) != udat_get2DigitYearStart(def1, &status)) log_err("FAIL: error in set2DigitYearStart\n"); else log_verbose("PASS: set2DigitYearStart successful\n"); /*try setting it to another value */ udat_set2DigitYearStart(de, 2000.0, &status); if(U_FAILURE(status)){ log_verbose("ERROR: udat_set2DigitYearStart failed %s\n", myErrorName(status) ); } if(udat_get2DigitYearStart(de, &status) != 2000) log_err("FAIL: error in set2DigitYearStart\n"); else log_verbose("PASS: set2DigitYearStart successful\n"); /*Test getNumberFormat() and setNumberFormat() */ log_verbose("\nTesting the get and set NumberFormat properties of date format\n"); numformat1=udat_getNumberFormat(fr_pat); udat_setNumberFormat(def1, numformat1); numformat2=udat_getNumberFormat(def1); if(u_strcmp(myNumformat(numformat1, num), myNumformat(numformat2, num)) !=0) log_err("FAIL: error in setNumberFormat or getNumberFormat()\n"); else log_verbose("PASS:setNumberFormat and getNumberFormat succesful\n"); /*try setting the number format to another format */ numformat1=udat_getNumberFormat(def); udat_setNumberFormat(def1, numformat1); numformat2=udat_getNumberFormat(def1); if(u_strcmp(myNumformat(numformat1, num), myNumformat(numformat2, num)) !=0) log_err("FAIL: error in setNumberFormat or getNumberFormat()\n"); else log_verbose("PASS: setNumberFormat and getNumberFormat succesful\n"); /*Test getCalendar and setCalendar*/ log_verbose("\nTesting the udat_getCalendar() and udat_setCalendar() properties\n"); cal=udat_getCalendar(fr_pat); udat_setCalendar(def1, cal); if(!ucal_equivalentTo(udat_getCalendar(fr_pat), udat_getCalendar(def1))) log_err("FAIL: Error in setting and getting the calendar\n"); else log_verbose("PASS: getting and setting calendar successful\n"); if(result!=NULL) { free(result); } /*Closing the UDateForamt */ udat_close(def); udat_close(fr); udat_close(it); udat_close(de); udat_close(def1); udat_close(fr_pat); udat_close(copy); ctest_resetTimeZone(); }
static void TestUSpoofCAPI(void) { /* * basic uspoof_open(). */ { USpoofChecker *sc; UErrorCode status = U_ZERO_ERROR; sc = uspoof_open(&status); TEST_ASSERT_SUCCESS(status); if (U_FAILURE(status)) { /* If things are so broken that we can't even open a default spoof checker, */ /* don't even try the rest of the tests. They would all fail. */ return; } uspoof_close(sc); } /* * Test Open from source rules. */ TEST_SETUP const char *dataSrcDir; char *fileName; char *confusables; int confusablesLength; char *confusablesWholeScript; int confusablesWholeScriptLength; FILE *f; UParseError pe; int32_t errType; USpoofChecker *rsc; dataSrcDir = ctest_dataSrcDir(); fileName = malloc(strlen(dataSrcDir) + 100); strcpy(fileName, dataSrcDir); strcat(fileName, U_FILE_SEP_STRING "unidata" U_FILE_SEP_STRING "confusables.txt"); f = fopen(fileName, "r"); TEST_ASSERT_NE(f, NULL); confusables = malloc(3000000); confusablesLength = fread(confusables, 1, 3000000, f); fclose(f); strcpy(fileName, dataSrcDir); strcat(fileName, U_FILE_SEP_STRING "unidata" U_FILE_SEP_STRING "confusablesWholeScript.txt"); f = fopen(fileName, "r"); TEST_ASSERT_NE(f, NULL); confusablesWholeScript = malloc(1000000); confusablesWholeScriptLength = fread(confusablesWholeScript, 1, 1000000, f); fclose(f); rsc = uspoof_openFromSource(confusables, confusablesLength, confusablesWholeScript, confusablesWholeScriptLength, &errType, &pe, &status); TEST_ASSERT_SUCCESS(status); free(confusablesWholeScript); free(confusables); free(fileName); uspoof_close(rsc); /* printf("ParseError Line is %d\n", pe.line); */ TEST_TEARDOWN; /* * openFromSerialized and serialize */ TEST_SETUP int32_t serializedSize = 0; int32_t actualLength = 0; char *buf; USpoofChecker *sc2; int32_t checkResults; serializedSize = uspoof_serialize(sc, NULL, 0, &status); TEST_ASSERT_EQ(status, U_BUFFER_OVERFLOW_ERROR); TEST_ASSERT(serializedSize > 0); /* Serialize the default spoof checker */ status = U_ZERO_ERROR; buf = (char *)malloc(serializedSize + 10); TEST_ASSERT(buf != NULL); buf[serializedSize] = 42; uspoof_serialize(sc, buf, serializedSize, &status); TEST_ASSERT_SUCCESS(status); TEST_ASSERT_EQ(42, buf[serializedSize]); /* Create a new spoof checker from the freshly serialized data */ sc2 = uspoof_openFromSerialized(buf, serializedSize+10, &actualLength, &status); TEST_ASSERT_SUCCESS(status); TEST_ASSERT_NE(NULL, sc2); TEST_ASSERT_EQ(serializedSize, actualLength); /* Verify that the new spoof checker at least wiggles */ checkResults = uspoof_check(sc2, goodLatin, -1, NULL, &status); TEST_ASSERT_SUCCESS(status); TEST_ASSERT_EQ(0, checkResults); checkResults = uspoof_check(sc2, scMixed, -1, NULL, &status); TEST_ASSERT_SUCCESS(status); TEST_ASSERT_EQ(USPOOF_SINGLE_SCRIPT | USPOOF_MIXED_SCRIPT_CONFUSABLE, checkResults); uspoof_close(sc2); free(buf); TEST_TEARDOWN; /* * Set & Get Check Flags */ TEST_SETUP int32_t t; uspoof_setChecks(sc, USPOOF_ALL_CHECKS, &status); TEST_ASSERT_SUCCESS(status); t = uspoof_getChecks(sc, &status); TEST_ASSERT_EQ(t, USPOOF_ALL_CHECKS); uspoof_setChecks(sc, 0, &status); TEST_ASSERT_SUCCESS(status); t = uspoof_getChecks(sc, &status); TEST_ASSERT_EQ(0, t); uspoof_setChecks(sc, USPOOF_WHOLE_SCRIPT_CONFUSABLE | USPOOF_MIXED_SCRIPT_CONFUSABLE | USPOOF_ANY_CASE, &status); TEST_ASSERT_SUCCESS(status); t = uspoof_getChecks(sc, &status); TEST_ASSERT_SUCCESS(status); TEST_ASSERT_EQ(USPOOF_WHOLE_SCRIPT_CONFUSABLE | USPOOF_MIXED_SCRIPT_CONFUSABLE | USPOOF_ANY_CASE, t); TEST_TEARDOWN; /* * get & setAllowedChars */ TEST_SETUP USet *us; const USet *uset; uset = uspoof_getAllowedChars(sc, &status); TEST_ASSERT_SUCCESS(status); TEST_ASSERT(uset_isFrozen(uset)); us = uset_open((UChar32)0x41, (UChar32)0x5A); /* [A-Z] */ uspoof_setAllowedChars(sc, us, &status); TEST_ASSERT_SUCCESS(status); TEST_ASSERT_NE(us, uspoof_getAllowedChars(sc, &status)); TEST_ASSERT(uset_equals(us, uspoof_getAllowedChars(sc, &status))); TEST_ASSERT_SUCCESS(status); uset_close(us); TEST_TEARDOWN; /* * clone() */ TEST_SETUP USpoofChecker *clone1 = NULL; USpoofChecker *clone2 = NULL; int32_t checkResults = 0; clone1 = uspoof_clone(sc, &status); TEST_ASSERT_SUCCESS(status); TEST_ASSERT_NE(clone1, sc); clone2 = uspoof_clone(clone1, &status); TEST_ASSERT_SUCCESS(status); TEST_ASSERT_NE(clone2, clone1); uspoof_close(clone1); /* Verify that the cloned spoof checker is alive */ checkResults = uspoof_check(clone2, goodLatin, -1, NULL, &status); TEST_ASSERT_SUCCESS(status); TEST_ASSERT_EQ(0, checkResults); checkResults = uspoof_check(clone2, scMixed, -1, NULL, &status); TEST_ASSERT_SUCCESS(status); TEST_ASSERT_EQ(USPOOF_SINGLE_SCRIPT | USPOOF_MIXED_SCRIPT_CONFUSABLE, checkResults); uspoof_close(clone2); TEST_TEARDOWN; /* * get & set Checks */ TEST_SETUP int32_t checks; int32_t checks2; int32_t checkResults; checks = uspoof_getChecks(sc, &status); TEST_ASSERT_SUCCESS(status); TEST_ASSERT_EQ(USPOOF_ALL_CHECKS, checks); checks &= ~(USPOOF_SINGLE_SCRIPT | USPOOF_MIXED_SCRIPT_CONFUSABLE); uspoof_setChecks(sc, checks, &status); TEST_ASSERT_SUCCESS(status); checks2 = uspoof_getChecks(sc, &status); TEST_ASSERT_EQ(checks, checks2); /* The checks that were disabled just above are the same ones that the "scMixed" test fails. So with those tests gone checking that Identifier should now succeed */ checkResults = uspoof_check(sc, scMixed, -1, NULL, &status); TEST_ASSERT_SUCCESS(status); TEST_ASSERT_EQ(0, checkResults); TEST_TEARDOWN; /* * AllowedLoacles */ TEST_SETUP const char *allowedLocales; int32_t checkResults; /* Default allowed locales list should be empty */ allowedLocales = uspoof_getAllowedLocales(sc, &status); TEST_ASSERT_SUCCESS(status); TEST_ASSERT(strcmp("", allowedLocales) == 0) /* Allow en and ru, which should enable Latin and Cyrillic only to pass */ uspoof_setAllowedLocales(sc, "en, ru_RU", &status); TEST_ASSERT_SUCCESS(status); allowedLocales = uspoof_getAllowedLocales(sc, &status); TEST_ASSERT_SUCCESS(status); TEST_ASSERT(strstr(allowedLocales, "en") != NULL); TEST_ASSERT(strstr(allowedLocales, "ru") != NULL); /* Limit checks to USPOOF_CHAR_LIMIT. Some of the test data has whole script confusables also, * which we don't want to see in this test. */ uspoof_setChecks(sc, USPOOF_CHAR_LIMIT, &status); TEST_ASSERT_SUCCESS(status); checkResults = uspoof_check(sc, goodLatin, -1, NULL, &status); TEST_ASSERT_SUCCESS(status); TEST_ASSERT_EQ(0, checkResults); checkResults = uspoof_check(sc, goodGreek, -1, NULL, &status); TEST_ASSERT_SUCCESS(status); TEST_ASSERT_EQ(USPOOF_CHAR_LIMIT, checkResults); checkResults = uspoof_check(sc, goodCyrl, -1, NULL, &status); TEST_ASSERT_SUCCESS(status); TEST_ASSERT_EQ(0, checkResults); /* Reset with an empty locale list, which should allow all characters to pass */ uspoof_setAllowedLocales(sc, " ", &status); TEST_ASSERT_SUCCESS(status); checkResults = uspoof_check(sc, goodGreek, -1, NULL, &status); TEST_ASSERT_SUCCESS(status); TEST_ASSERT_EQ(0, checkResults); TEST_TEARDOWN; /* * AllowedChars set/get the USet of allowed characters. */ TEST_SETUP const USet *set; USet *tmpSet; int32_t checkResults; /* By default, we should see no restriction; the USet should allow all characters. */ set = uspoof_getAllowedChars(sc, &status); TEST_ASSERT_SUCCESS(status); tmpSet = uset_open(0, 0x10ffff); TEST_ASSERT(uset_equals(tmpSet, set)); /* Setting the allowed chars should enable the check. */ uspoof_setChecks(sc, USPOOF_ALL_CHECKS & ~USPOOF_CHAR_LIMIT, &status); TEST_ASSERT_SUCCESS(status); /* Remove a character that is in our good Latin test identifier from the allowed chars set. */ uset_remove(tmpSet, goodLatin[1]); uspoof_setAllowedChars(sc, tmpSet, &status); TEST_ASSERT_SUCCESS(status); uset_close(tmpSet); /* Latin Identifier should now fail; other non-latin test cases should still be OK */ checkResults = uspoof_check(sc, goodLatin, -1, NULL, &status); TEST_ASSERT_SUCCESS(status); TEST_ASSERT_EQ(USPOOF_CHAR_LIMIT, checkResults); checkResults = uspoof_check(sc, goodGreek, -1, NULL, &status); TEST_ASSERT_SUCCESS(status); TEST_ASSERT_EQ(USPOOF_WHOLE_SCRIPT_CONFUSABLE, checkResults); TEST_TEARDOWN; /* * check UTF-8 */ TEST_SETUP char utf8buf[200]; int32_t checkResults; int32_t position; u_strToUTF8(utf8buf, sizeof(utf8buf), NULL, goodLatin, -1, &status); TEST_ASSERT_SUCCESS(status); position = 666; checkResults = uspoof_checkUTF8(sc, utf8buf, -1, &position, &status); TEST_ASSERT_SUCCESS(status); TEST_ASSERT_EQ(0, checkResults); TEST_ASSERT_EQ(666, position); u_strToUTF8(utf8buf, sizeof(utf8buf), NULL, goodCyrl, -1, &status); TEST_ASSERT_SUCCESS(status); checkResults = uspoof_checkUTF8(sc, utf8buf, -1, &position, &status); TEST_ASSERT_SUCCESS(status); TEST_ASSERT_EQ(0, checkResults); u_strToUTF8(utf8buf, sizeof(utf8buf), NULL, scMixed, -1, &status); TEST_ASSERT_SUCCESS(status); position = 666; checkResults = uspoof_checkUTF8(sc, utf8buf, -1, &position, &status); TEST_ASSERT_SUCCESS(status); TEST_ASSERT_EQ(USPOOF_MIXED_SCRIPT_CONFUSABLE | USPOOF_SINGLE_SCRIPT , checkResults); TEST_ASSERT_EQ(2, position); TEST_TEARDOWN; /* * uspoof_areConfusable() */ TEST_SETUP int32_t checkResults; checkResults = uspoof_areConfusable(sc, scLatin, -1, scMixed, -1, &status); TEST_ASSERT_SUCCESS(status); TEST_ASSERT_EQ(USPOOF_MIXED_SCRIPT_CONFUSABLE, checkResults); checkResults = uspoof_areConfusable(sc, goodGreek, -1, scLatin, -1, &status); TEST_ASSERT_SUCCESS(status); TEST_ASSERT_EQ(0, checkResults); checkResults = uspoof_areConfusable(sc, lll_Latin_a, -1, lll_Latin_b, -1, &status); TEST_ASSERT_SUCCESS(status); TEST_ASSERT_EQ(USPOOF_SINGLE_SCRIPT_CONFUSABLE, checkResults); TEST_TEARDOWN; /* * areConfusableUTF8 */ TEST_SETUP int32_t checkResults; char s1[200]; char s2[200]; u_strToUTF8(s1, sizeof(s1), NULL, scLatin, -1, &status); u_strToUTF8(s2, sizeof(s2), NULL, scMixed, -1, &status); TEST_ASSERT_SUCCESS(status); checkResults = uspoof_areConfusableUTF8(sc, s1, -1, s2, -1, &status); TEST_ASSERT_SUCCESS(status); TEST_ASSERT_EQ(USPOOF_MIXED_SCRIPT_CONFUSABLE, checkResults); u_strToUTF8(s1, sizeof(s1), NULL, goodGreek, -1, &status); u_strToUTF8(s2, sizeof(s2), NULL, scLatin, -1, &status); TEST_ASSERT_SUCCESS(status); checkResults = uspoof_areConfusableUTF8(sc, s1, -1, s2, -1, &status); TEST_ASSERT_SUCCESS(status); TEST_ASSERT_EQ(0, checkResults); u_strToUTF8(s1, sizeof(s1), NULL, lll_Latin_a, -1, &status); u_strToUTF8(s2, sizeof(s2), NULL, lll_Latin_b, -1, &status); TEST_ASSERT_SUCCESS(status); checkResults = uspoof_areConfusableUTF8(sc, s1, -1, s2, -1, &status); TEST_ASSERT_SUCCESS(status); TEST_ASSERT_EQ(USPOOF_SINGLE_SCRIPT_CONFUSABLE, checkResults); TEST_TEARDOWN; /* * getSkeleton */ TEST_SETUP UChar dest[100]; int32_t skelLength; skelLength = uspoof_getSkeleton(sc, USPOOF_ANY_CASE, lll_Latin_a, -1, dest, sizeof(dest)/sizeof(UChar), &status); TEST_ASSERT_SUCCESS(status); TEST_ASSERT_EQ(0, u_strcmp(lll_Skel, dest)); TEST_ASSERT_EQ(u_strlen(lll_Skel), skelLength); skelLength = uspoof_getSkeletonUTF8(sc, USPOOF_ANY_CASE, goodLatinUTF8, -1, dest, sizeof(dest)/sizeof(UChar), &status); TEST_ASSERT_SUCCESS(status); skelLength = uspoof_getSkeleton(sc, USPOOF_ANY_CASE, lll_Latin_a, -1, NULL, 0, &status); TEST_ASSERT_EQ(U_BUFFER_OVERFLOW_ERROR, status); TEST_ASSERT_EQ(3, skelLength); status = U_ZERO_ERROR; TEST_TEARDOWN; }
struct rule_list* parse_rules (unichar* entry,struct utags UTAG,vector_ptr* rules) { // parses dictionary entry to extract rules for derivation and composition struct rule_list* rule_list = new_rule_list(rules); struct rule_list* actual_list_pos = rule_list; unichar cleaned_entry[MAX_DICT_LINE_LENGTH]; // rules will be stripped off unichar beforcond[MAX_COMPOSITION_RULE_LENGTH]; unichar aftercond[MAX_COMPOSITION_RULE_LENGTH]; unichar then_code[MAX_COMPOSITION_RULE_LENGTH]; int bcpos, acpos, tpos; bcpos = acpos = tpos = 0; enum { BEGIN, BEFORE_COND, AFTER_COND, THEN }; int state = 0; int k = 0; for (int i = 0; entry[i] != '\0'; i++) { if ( state != BEGIN ) { // inside a rule if (entry[i] == '\\') i++; // unescaping escaped chars in rule if (entry[i] == ')') { // end of rule struct composition_rule* rule = new_composition_rule(); beforcond[bcpos] = '\0'; aftercond[acpos] = '\0'; then_code[tpos] = '\0'; parse_condition(beforcond, rule->before); parse_condition(aftercond, rule->after); parse_then_code(then_code, &rule->then); bcpos = acpos = tpos = 0; if (actual_list_pos->rule != 0) { // not first rule struct rule_list* tmp = new_rule_list(rules); actual_list_pos->next = tmp; actual_list_pos = tmp; } actual_list_pos->rule = rule; state = BEGIN; } else if (state == BEFORE_COND) { // condition before if (entry[i] == '#') state = AFTER_COND; else beforcond[bcpos++] = entry[i]; } else if (state == AFTER_COND) { // condition after if (entry[i] == '=') state = THEN; else aftercond[acpos++] = entry[i]; } else if (state == THEN) // then-code then_code[tpos++] = entry[i]; } else { // not inside a rule if (entry[i] == '+') { unichar tmp[MAX_DICT_LINE_LENGTH]; int j; for (j = i+1; ((entry[j] != '+') && (entry[j] != ':') && (entry[j] != '(') && (entry[j] != '\0')); j++) tmp[j-(i+1)] = entry[j]; tmp[j-(i+1)] = '\0'; if ((!u_strcmp(tmp, UTAG.PREFIX)) || (!u_strcmp(tmp, UTAG.SUFFIX))) { i = j-1; } else if (!u_strcmp(tmp, UTAG.RULE)) { i = j; // including '(' state = BEFORE_COND; } else { cleaned_entry[k++] = entry[i]; } } else { cleaned_entry[k++] = entry[i]; } } } cleaned_entry[k] = '\0'; u_strcpy(entry, cleaned_entry); if (rule_list->rule == 0) rule_list->rule = new_composition_rule(); return rule_list; }
/** * Adds a match to the global list of matches. The function takes into * account the match policy. For instance, we don't take [2;3] into account * if we are in longest match mode and if we already have [2;5]. * * This function is derived from the 'add_match' one in 'Matches.cpp', but it differs because * in Locate, we know exactly where we are in the text, so that we can filter matches easily. When * exploring a text automaton, it's not so easy to sort matches, because the state from where we * started the match may correspond to a position in text lower than the one of a previous match. * * IMPORTANT: 'e' is to be copied if the corresponding match must be added to the list */ struct tfst_simple_match_list* add_element_to_list(struct locate_tfst_infos* p,struct tfst_simple_match_list* list,struct tfst_simple_match_list* e) { if (list==NULL) { /* We can always add a match to the empty list */ return new_tfst_simple_match_list(e,NULL); } switch (compare_matches(&(list->m),&(e->m))) { case A_BEFORE_B: case A_BEFORE_B_OVERLAP: { /* If the candidate starts after the end of the current match, then we have to go on, * no matter the mode (shortest, longest or all matches) */ list->next=add_element_to_list(p,list->next,e); return list; } case A_INCLUDES_B: { if (p->match_policy==SHORTEST_MATCHES) { /* e must replace the current match in the list */ replace_match(list,e); return list; } else if (p->match_policy==LONGEST_MATCHES) { /* Our match is shorter than a match in the list, we discard it */ return list; } else { list->next=add_element_to_list(p,list->next,e); return list; } } case A_EQUALS_B: { /* In any mode we replace the existing match by the new one, except if we allow * ambiguous outputs */ if (u_strcmp(list->output,e->output)) { if (p->ambiguous_output_policy==ALLOW_AMBIGUOUS_OUTPUTS) { list=new_tfst_simple_match_list(e,list); return list; } else { /* If we don't allow ambiguous outputs, we have to print an error message */ error("Unexpected ambiguous outputs:\n<%S>\n<%S>\n",list->output,e->output); } } replace_match(list,e); return list; } case B_INCLUDES_A: { if (p->match_policy==SHORTEST_MATCHES) { /* Our match is longer than a match in the list, we discard it */ return list; } else if (p->match_policy==LONGEST_MATCHES) { /* e must replace the current match in the list */ replace_match(list,e); return list; } else { list->next=add_element_to_list(p,list->next,e); return list; } } case A_AFTER_B: case A_AFTER_B_OVERLAP: { /* If the candidate ends before the start of the current match, then we have to insert it * no matter the mode (shortest, longest or all matches) */ list=new_tfst_simple_match_list(e,list); return list; } } /* Should not arrive here */ return NULL; }