_WCRTLINK int __F_NAME(strnicmp,_wcsnicmp)( const CHAR_TYPE *s, const CHAR_TYPE *t, size_t n ) #endif { UCHAR_TYPE c1; UCHAR_TYPE c2; for( ; n > 0; --n ) { c1 = *s; c2 = *t; if( IS_ASCII( c1 ) && IS_ASCII( c2 ) ) { if( c1 >= STRING( 'A' ) && c1 <= STRING( 'Z' ) ) c1 += STRING( 'a' ) - STRING( 'A' ); if( c2 >= STRING( 'A' ) && c2 <= STRING( 'Z' ) ) { c2 += STRING( 'a' ) - STRING( 'A' ); } } if( c1 != c2 ) return( c1 - c2 ); /* less than or greater than */ if( c1 == NULLCHAR ) break; /* equal */ ++s; ++t; } return( 0 ); /* equal */ }
_WCRTLINK int __F_NAME(strnicmp,_wcsnicmp)( const CHAR_TYPE *s, const CHAR_TYPE *t, size_t n ) #endif { UCHAR_TYPE c1; UCHAR_TYPE c2; for( ;; ) { if( n == 0 ) return( 0 ); /* equal */ c1 = *s; c2 = *t; if( IS_ASCII( c1 ) && IS_ASCII( c2 ) ) { if( c1 >= 'A' && c1 <= 'Z' ) c1 += 'a' - 'A'; if( c2 >= 'A' && c2 <= 'Z' ) c2 += 'a' - 'A'; } if( c1 != c2 ) return( c1 - c2 ); /* less than or greater than */ if( c1 == NULLCHAR ) return( 0 ); /* equal */ ++s; ++t; --n; } }
uint8_t nsSampleWordBreaker::GetClass(char16_t c) { // begin of the hack if (IS_ALPHABETICAL_SCRIPT(c)) { if(IS_ASCII(c)) { if(ASCII_IS_SPACE(c)) { return kWbClassSpace; } else if(ASCII_IS_ALPHA(c) || ASCII_IS_DIGIT(c)) { return kWbClassAlphaLetter; } else { return kWbClassPunct; } } else if(IS_THAI(c)) { return kWbClassThaiLetter; } else if (c == 0x00A0/*NBSP*/) { return kWbClassSpace; } else { return kWbClassAlphaLetter; } } else { if(IS_HAN(c)) { return kWbClassHanLetter; } else if(IS_KATAKANA(c)) { return kWbClassKatakanaLetter; } else if(IS_HIRAGANA(c)) { return kWbClassHiraganaLetter; } else if(IS_HALFWIDTHKATAKANA(c)) { return kWbClassHWKatakanaLetter; } else { return kWbClassAlphaLetter; } } return 0; }
/** Check R encoding marking *for testing only* * This function should not be exported * * @param s character vector * * Results are printed on STDERR * * @version 0.1 (Marek Gagolewski) */ SEXP stri_test_Rmark(SEXP s) { #ifndef NDEBUG s = stri_prepare_arg_string(s, "str"); int ns = LENGTH(s); for (int i=0; i < ns; ++i) { fprintf(stdout, "!NDEBUG: Element #%d:\n", i); SEXP curs = STRING_ELT(s, i); if (curs == NA_STRING){ fprintf(stdout, "!NDEBUG: \tNA\n"); continue; } //const char* string = CHAR(curs); fprintf(stdout, "!NDEBUG: \tMARK_ASCII = %d\n", (IS_ASCII(curs) > 0)); fprintf(stdout, "!NDEBUG: \tMARK_UTF8 = %d\n", (IS_UTF8(curs) > 0)); fprintf(stdout, "!NDEBUG: \tMARK_LATIN1= %d\n", (IS_LATIN1(curs) > 0)); fprintf(stdout, "!NDEBUG: \tMARK_BYTES = %d\n", (IS_BYTES(curs) > 0)); fprintf(stdout, "!NDEBUG: \n"); } return R_NilValue; #else Rf_error("This function is enabled only if NDEBUG is undef."); return s; // s here avoids compiler warning #endif }
int Message::Read(const char *filename) { FILE *msgfile; int size; char cur; if(!(msgfile=fopen(filename,"r"))) return 0; //get file size fseek(msgfile,0,SEEK_END); size=ftell(msgfile)+1; fseek(msgfile,0,SEEK_SET); AllocateBuffers(size); if(!cipher || !plain) return 0; //read from file msg_len=0; while(fscanf(msgfile,"%c",&cur)!=EOF) { if(!IS_ASCII(cur) || cur==' ') continue; cipher[msg_len++]=cur; } cipher[msg_len]='\0'; fclose(msgfile); SetInfo(true); return msg_len; }
static int inTrieTranspose(TrieNode* node, char* word, char* suggestion, int maxEdits) { int result = FALSE; /*TrieNode* nextNode = node->children[CH_INDEX(word[1])]; if(IS_ASCII((int)word[1]) && nextNode != NULL) { TrieNode* nextNextNode = nextNode->children[CH_INDEX(word[0])]; if(nextNextNode != NULL) { suggestion[0] = word[1]; suggestion[1] = word[0]; result = inTrie(nextNextNode, word + 2, suggestion + 2, maxEdits); } }*/ if(IS_ASCII((int)word[1])) { TrieNode* nextNode = node->children[CH_INDEX(word[1])]; if(nextNode != NULL) { TrieNode* nextNextNode = nextNode->children[CH_INDEX(word[0])]; if(nextNextNode != NULL) { suggestion[0] = word[1]; suggestion[1] = word[0]; result = inTrie(nextNextNode, word + 2, suggestion + 2, maxEdits); } } } return result; }
/* Do the actual work of forwarding the command from an * upstream ascii conn to its assigned ascii downstream. */ bool cproxy_forward_a2a_downstream(downstream *d) { assert(d != NULL); conn *uc = d->upstream_conn; assert(uc != NULL); assert(uc->state == conn_pause); assert(uc->cmd_start != NULL); assert(uc->thread != NULL); assert(uc->thread->base != NULL); assert(IS_ASCII(uc->protocol)); assert(IS_PROXY(uc->protocol)); if (cproxy_connect_downstream(d, uc->thread) > 0) { assert(d->downstream_conns != NULL); if (uc->cmd == -1) { return cproxy_forward_a2a_simple_downstream(d, uc->cmd_start, uc); } else { return cproxy_forward_a2a_item_downstream(d, uc->cmd, uc->item, uc); } } return false; }
_WCRTLINK int __F_NAME(ispunct,iswpunct)( INTCHAR_TYPE c ) { if( IS_ASCII( c ) ) { return( IsWhat( c ) & _PUNCT ); } else { return( 0 ); } }
_WCRTLINK int __F_NAME(isgraph,iswgraph)( INTCHAR_TYPE c ) { if( IS_ASCII(c) ) { return( (IsWhat( c ) & (_PRINT|_SPACE)) == _PRINT ); } else { return( 0 ); } }
_WCRTLINK int __F_NAME(isalpha,iswalpha)( INTCHAR_TYPE c ) { if( IS_ASCII( c ) ) { return( IsWhat( c ) & (_LOWER|_UPPER) ); } else { return( 0 ); } }
_WCRTLINK int __F_NAME(isspace,iswspace)( INTCHAR_TYPE c ) { if( IS_ASCII( c ) ) { return( IsWhat( c ) & _SPACE ); } else { return( 0 ); } }
_WCRTLINK int __F_NAME(isdigit,iswdigit)( INTCHAR_TYPE c ) { if( IS_ASCII( c ) ) { return( IsWhat( c ) & _DIGIT ); } else { return( 0 ); } }
/* Do the actual work of forwarding the command from an * upstream ascii conn to its assigned ascii downstream. */ bool cproxy_forward_a2a_downstream(downstream *d) { assert(d != NULL); conn *uc = d->upstream_conn; assert(uc != NULL); assert(uc->state == conn_pause); assert(uc->cmd_start != NULL); assert(uc->thread != NULL); assert(uc->thread->base != NULL); assert(IS_ASCII(uc->protocol)); assert(IS_PROXY(uc->protocol)); int server_index = -1; if (cproxy_is_broadcast_cmd(uc->cmd_curr) == true) { cproxy_ascii_broadcast_suffix(d); } else { char *key = NULL; int key_len = 0; if (ascii_scan_key(uc->cmd_start, &key, &key_len) && key != NULL && key_len > 0) { server_index = cproxy_server_index(d, key, key_len, NULL); if (server_index < 0) { return false; } } } int nc = cproxy_connect_downstream(d, uc->thread, server_index); if (nc == -1) { return true; } if (nc > 0) { assert(d->downstream_conns != NULL); if (d->usec_start == 0 && d->ptd->behavior_pool.base.time_stats) { d->usec_start = usec_now(); } if (uc->cmd == -1) { return cproxy_forward_a2a_simple_downstream(d, uc->cmd_start, uc); } else { return cproxy_forward_a2a_item_downstream(d, uc->cmd, uc->item, uc); } } if (settings.verbose > 2) { moxi_log_write("%d: cproxy_forward_a2a_downstream connect failed\n", uc->sfd); } return false; }
uint32_t ToTitleCase(uint32_t aChar) { if (IS_ASCII(aChar)) { return ToUpperCase(aChar); } return mozilla::unicode::GetTitlecaseForLower(aChar); }
// We want ToLowerCase(PRUint32) and ToLowerCaseASCII(PRUint32) to be fast // when they're called from within the case-insensitive comparators, so we // define inlined versions. static NS_ALWAYS_INLINE PRUint32 ToLowerCase_inline(PRUint32 aChar) { if (IS_ASCII(aChar)) { return gASCIIToLower[aChar]; } return mozilla::unicode::GetLowercase(aChar); }
static NS_ALWAYS_INLINE PRUint32 ToLowerCaseASCII_inline(const PRUint32 aChar) { if (IS_ASCII(aChar)) { return gASCIIToLower[aChar]; } return aChar; }
static MOZ_ALWAYS_INLINE uint32_t ToLowerCaseASCII_inline(const uint32_t aChar) { if (IS_ASCII(aChar)) { return gASCIIToLower[aChar]; } return aChar; }
// We want ToLowerCase(uint32_t) and ToLowerCaseASCII(uint32_t) to be fast // when they're called from within the case-insensitive comparators, so we // define inlined versions. static MOZ_ALWAYS_INLINE uint32_t ToLowerCase_inline(uint32_t aChar) { if (IS_ASCII(aChar)) { return gASCIIToLower[aChar]; } return mozilla::unicode::GetLowercase(aChar); }
void cproxy_process_downstream_ascii_nread(conn *c) { downstream *d = c->extra; assert(d != NULL); assert(d->upstream_conn != NULL); if (IS_ASCII(d->upstream_conn->protocol)) { cproxy_process_a2a_downstream_nread(c); } else { assert(false); // TODO: b2a. } }
R_xlen_t get_first_reencode_pos(const CharacterVector& xc) { R_xlen_t len = xc.length(); for (R_xlen_t i = 0; i < len; ++i) { SEXP xci = xc[i]; if (xci != NA_STRING && !IS_ASCII(xci) && !IS_UTF8(xci)) { return i; } } return len; }
void cproxy_process_downstream_binary_nread(conn *c) { downstream *d = c->extra; cb_assert(d != NULL); cb_assert(d->upstream_conn != NULL); if (IS_ASCII(d->upstream_conn->protocol)) { cproxy_process_a2b_downstream_nread(c); } else { cproxy_process_b2b_downstream_nread(c); } }
/* This may return a R_alloc-ed result, so the caller has to manage the R_alloc stack */ const char *translateCharUTF8(SEXP x) { void *obj; const char *inbuf, *ans = CHAR(x); char *outbuf, *p; size_t inb, outb, res; R_StringBuffer cbuff = {NULL, 0, MAXELTSIZE}; if(TYPEOF(x) != CHARSXP) error(_("'%s' must be called on a CHARSXP"), "translateCharUTF8"); if(x == NA_STRING) return ans; if(IS_UTF8(x)) return ans; if(IS_ASCII(x)) return ans; if(IS_BYTES(x)) error(_("translating strings with \"bytes\" encoding is not allowed")); obj = Riconv_open("UTF-8", IS_LATIN1(x) ? "latin1" : ""); if(obj == (void *)(-1)) #ifdef Win32 error(_("unsupported conversion from '%s' in codepage %d"), "latin1", localeCP); #else error(_("unsupported conversion from '%s' to '%s'"), "latin1", "UTF-8"); #endif R_AllocStringBuffer(0, &cbuff); top_of_loop: inbuf = ans; inb = strlen(inbuf); outbuf = cbuff.data; outb = cbuff.bufsize - 1; /* First initialize output */ Riconv (obj, NULL, NULL, &outbuf, &outb); next_char: /* Then convert input */ res = Riconv(obj, &inbuf , &inb, &outbuf, &outb); if(res == -1 && errno == E2BIG) { R_AllocStringBuffer(2*cbuff.bufsize, &cbuff); goto top_of_loop; } else if(res == -1 && (errno == EILSEQ || errno == EINVAL)) { if(outb < 5) { R_AllocStringBuffer(2*cbuff.bufsize, &cbuff); goto top_of_loop; } snprintf(outbuf, 5, "<%02x>", (unsigned char)*inbuf); outbuf += 4; outb -= 4; inbuf++; inb--; goto next_char; } *outbuf = '\0'; Riconv_close(obj); res = strlen(cbuff.data) + 1; p = R_alloc(res, 1); memcpy(p, cbuff.data, res); R_FreeStringBuffer(&cbuff); return p; }
void cproxy_process_downstream_ascii(conn *c, char *line) { downstream *d = c->extra; assert(d != NULL); assert(d->upstream_conn != NULL); if (IS_ASCII(d->upstream_conn->protocol)) { cproxy_process_a2a_downstream(c, line); } else { assert(false); /* TODO: b2a. */ } }
NS_IMETHODIMP nsGB2312ToUnicodeV2::ConvertNoBuff(const char* aSrc, PRInt32 * aSrcLength, PRUnichar *aDest, PRInt32 * aDestLength) { PRInt32 i=0; PRInt32 iSrcLength = (*aSrcLength); PRInt32 iDestlen = 0; nsresult rv = NS_OK; for (i=0;i<iSrcLength;i++) { if ( iDestlen >= (*aDestLength) ) { rv = NS_OK_UDEC_MOREOUTPUT; break; } if(UINT8_IN_RANGE(0xa1, *aSrc, 0xfe)) { if(i+1 >= iSrcLength) { rv = NS_OK_UDEC_MOREINPUT; break; } // To make sure, the second byte has to be checked as well // The valid 2nd byte range: [0xA1,0xFE] if(UINT8_IN_RANGE(0xa1, aSrc[1], 0xfe)) { // Valid GB 2312 code point *aDest = mUtil.GBKCharToUnicode(aSrc[0], aSrc[1]); aSrc += 2; i++; } else { // Invalid GB 2312 code point *aDest = UCS2_NO_MAPPING; aSrc++; } } else { if(IS_ASCII(*aSrc)) { // The source is an ASCII *aDest = CAST_CHAR_TO_UNICHAR(*aSrc); } else { *aDest = UCS2_NO_MAPPING; } aSrc++; } iDestlen++; aDest++; *aSrcLength = i+1; } *aDestLength = iDestlen; return rv; }
uint32_t ToUpperCase(uint32_t aChar) { if (IS_ASCII(aChar)) { if (IS_ASCII_LOWER(aChar)) { return aChar - 0x20; } return aChar; } return mozilla::unicode::GetUppercase(aChar); }
/** Convert character vector to ASCII * * All charcodes > 127 are replaced with subst chars (0x1A) * * @param str character vector * @return character vector * * @version 0.1 (Marek Gagolewski) * @version 0.2 (Marek Gagolewski, 2013-06-16) make StriException-friendly */ SEXP stri_enc_toascii(SEXP str) { str = stri_prepare_arg_string(str, "str"); R_len_t n = LENGTH(str); STRI__ERROR_HANDLER_BEGIN SEXP ret; PROTECT(ret = Rf_allocVector(STRSXP, n)); for (R_len_t i=0; i<n; ++i) { SEXP curs = STRING_ELT(str, i); if (curs == NA_STRING) { SET_STRING_ELT(ret, i, NA_STRING); continue; } else if (IS_ASCII(curs)) { SET_STRING_ELT(ret, i, curs); } else if (IS_UTF8(curs)) { R_len_t curn = LENGTH(curs); const char* curs_tab = CHAR(curs); // TODO: buffer reuse.... String8 buf(curn+1); // this may be 4 times too much R_len_t k = 0; UChar32 c; for (int j=0; j<curn; ) { U8_NEXT(curs_tab, j, curn, c); if (c > ASCII_MAXCHARCODE) buf.data()[k++] = ASCII_SUBSTITUTE; else buf.data()[k++] = (char)c; } SET_STRING_ELT(ret, i, Rf_mkCharLenCE(buf.data(), k, CE_UTF8)); // will be marked as ASCII anyway by mkCharLenCE } else { // some 8-bit encoding R_len_t curn = LENGTH(curs); const char* curs_tab = CHAR(curs); // TODO: buffer reuse.... String8 buf(curn+1); R_len_t k = 0; for (R_len_t j=0; j<curn; ++j) { if (U8_IS_SINGLE(curs_tab[j])) buf.data()[k++] = curs_tab[j]; else { buf.data()[k++] = (char)ASCII_SUBSTITUTE; // subst char in ascii } } SET_STRING_ELT(ret, i, Rf_mkCharLenCE(buf.data(), k, CE_UTF8)); // will be marked as ASCII anyway by mkCharLenCE } } UNPROTECT(1); return ret; STRI__ERROR_HANDLER_END(;/* nothing special to be done on error */) }
static bool transform_latin_mule (RECODE_CONST_STEP step, RECODE_TASK task, unsigned prefix) { int character; while (character = get_byte (task), character != EOF) { if (!IS_ASCII (character)) put_byte (prefix, task); put_byte (character, task); } TASK_RETURN (task); }
NS_IMETHODIMP nsUnicodeToGB2312V2::ConvertNoBuff(const PRUnichar * aSrc, PRInt32 * aSrcLength, char * aDest, PRInt32 * aDestLength) { PRInt32 iSrcLength = 0; PRInt32 iDestLength = 0; nsresult res = NS_OK; while (iSrcLength < *aSrcLength) { //if unicode's hi byte has something, it is not ASCII, must be a GB if(IS_ASCII(*aSrc)) { // this is an ASCII *aDest = CAST_UNICHAR_TO_CHAR(*aSrc); aDest++; // increment 1 byte iDestLength +=1; } else { char byte1, byte2; if(mUtil.UnicodeToGBKChar(*aSrc, false, &byte1, &byte2)) { if(iDestLength+2 > *aDestLength) { res = NS_OK_UENC_MOREOUTPUT; break; } aDest[0]=byte1; aDest[1]=byte2; aDest += 2; // increment 2 bytes iDestLength +=2; // each GB char count as two in char* string } else { // cannot convert res= NS_ERROR_UENC_NOMAPPING; iSrcLength++; // include length of the unmapped character break; } } iSrcLength++ ; // each unicode char just count as one in PRUnichar* string aSrc++; if ( iDestLength >= (*aDestLength) && (iSrcLength < *aSrcLength )) { res = NS_OK_UENC_MOREOUTPUT; break; } } *aDestLength = iDestLength; *aSrcLength = iSrcLength; return res; }
static bool transform_mule_latin (RECODE_CONST_STEP step, RECODE_TASK task, unsigned prefix) { int character; while (character = get_byte (task), character != EOF) if (IS_ASCII (character)) put_byte (character, task); else if ((character & MASK (8)) == prefix) { character = get_byte (task); while ((character & MASK (8)) == prefix) { /* This happens in practice, sometimes, that Emacs goes a bit berzerk and generates strings of prefix characters. Remove all succeeding prefixes in a row. This is irreversible. */ RETURN_IF_NOGO (RECODE_NOT_CANONICAL, step, task); character = get_byte (task); } if (character == EOF) { RETURN_IF_NOGO (RECODE_INVALID_INPUT, step, task); break; } if (IS_ASCII (character)) RETURN_IF_NOGO (RECODE_NOT_CANONICAL, step, task); put_byte (character, task); } else RETURN_IF_NOGO (RECODE_UNTRANSLATABLE, step, task); TASK_RETURN (task); }
static jl_value_t *R_Julia_MD_NA_Factor(SEXP Var, const char *VarName) { SEXP levels = getAttrib(Var, R_LevelsSymbol); if (levels == R_NilValue) return jl_nothing; //create string array for levels in julia jl_array_t *ret1 = jl_alloc_array_1d(jl_apply_array_type(jl_ascii_string_type, 1), LENGTH(levels)); jl_value_t **retData1 = jl_array_data(ret1); for (size_t i = 0; i < jl_array_len(ret1); i++) if (!IS_ASCII(Var)) retData1[i] = jl_cstr_to_string(translateChar0(STRING_ELT(levels, i))); else retData1[i] = jl_cstr_to_string(CHAR(STRING_ELT(levels, i))); if ((LENGTH(Var)) != 0) { switch (TYPEOF(Var)) { case INTSXP: { jl_array_t *ret = jl_alloc_array_1d(jl_apply_array_type(jl_uint32_type, 1), LENGTH(Var)); JL_GC_PUSH(&ret, &ret1); int *retData = (int *)jl_array_data(ret); for (size_t i = 0; i < jl_array_len(ret); i++) { if (INTEGER(Var)[i] == NA_INTEGER) { //NA in poolarray is 0 retData[i] = 0; } else { retData[i] = INTEGER(Var)[i]; } } JL_GC_POP(); return TransArrayToPoolDataArray(ret, ret1, LENGTH(Var), VarName); break; } default: return (jl_value_t *) jl_nothing; break; }//case end return (jl_value_t *) jl_nothing; }//if length !=0 return (jl_value_t *) jl_nothing; }