int LoadCharSet(const char* fontFileName,const char* charArray,CharSet& charSet) { if (!fontFileName || !charArray) { return 0; } IplImage* largeImage = cvLoadImage(fontFileName,CV_LOAD_IMAGE_GRAYSCALE); if (!largeImage) { return 0; } int charNum = strlen(charArray); int width = largeImage->width/charNum; int height = largeImage->height; charSet.clear(); charSet.resize(charNum); for (int i = 0;i < charNum;i++) { IplImage* image = cvCreateImage(cvSize(width,height),8,1); cvSetImageROI(largeImage,cvRect(i*width,0,width,height)); cvCopy(largeImage,image); CharFont font; font.code = charArray[i]; font.image = image; charSet[i] = font; } cvReleaseImage(&largeImage); return charSet.size(); }
int FreeCharSet(CharSet& charSet) { for (int i = 0;i < charSet.size();i++) { cvReleaseImage(&charSet[i].image); } return charSet.size(); }
CharSet* Action::Symbols(Tab *tab) { CharSet *s; if (typ == Node::clas) s = tab->CharClassSet(sym)->Clone(); else { s = new CharSet(); s->Set(sym); } return s; }
void INTL_adjust_text_descriptor(thread_db* tdbb, dsc* desc) { /************************************** * * I N T L _ a d j u s t _ t e x t _ d e s c r i p t o r * ************************************** * * Functional description * This function receives a text descriptor with * dsc_length = numberOfCharacters * maxBytesPerChar * and change dsc_length to number of bytes used by the string. * **************************************/ if (desc->dsc_dtype == dtype_text) { SET_TDBB(tdbb); USHORT ttype = INTL_TTYPE(desc); CharSet* charSet = INTL_charset_lookup(tdbb, ttype); if (charSet->isMultiByte()) { Firebird::HalfStaticArray<UCHAR, BUFFER_SMALL> buffer; if (charSet->getFlags() & CHARSET_LEGACY_SEMANTICS) { desc->dsc_length = charSet->substring(TEXT_LEN(desc), desc->dsc_address, TEXT_LEN(desc), buffer.getBuffer(TEXT_LEN(desc) * charSet->maxBytesPerChar()), 0, TEXT_LEN(desc)); const ULONG maxLength = TEXT_LEN(desc) / charSet->maxBytesPerChar(); ULONG charLength = charSet->length(desc->dsc_length, desc->dsc_address, true); while (charLength > maxLength) { if (desc->dsc_address[desc->dsc_length - 1] == *charSet->getSpace()) { --desc->dsc_length; --charLength; } else break; } } else { desc->dsc_length = charSet->substring(TEXT_LEN(desc), desc->dsc_address, TEXT_LEN(desc), buffer.getBuffer(TEXT_LEN(desc)), 0, TEXT_LEN(desc) / charSet->maxBytesPerChar()); } } } }
void CharSet::Subtract(CharSet *s) { CharSet *x = new CharSet(); Range *p = head; while (p != NULL) { for (int i = p->from; i <= p->to; i++) if (!s->Get(i)) x->Set(i); Range *del = p; p = p->next; delete del; } head = x->head; x->head = NULL; delete x; }
// Recursive helper function for WordVariants(). void WordListLangModel::WordVariants(const CharSet &char_set, string_32 prefix_str32, WERD_CHOICE *word_so_far, string_32 str32, vector<WERD_CHOICE *> *word_variants) { int str_len = str32.length(); if (str_len == 0) { if (word_so_far->length() > 0) { word_variants->push_back(new WERD_CHOICE(*word_so_far)); } } else { // Try out all the possible prefixes of the str32. for (int len = 1; len <= str_len; len++) { // Check if prefix is supported in character set. string_32 str_pref32 = str32.substr(0, len); int class_id = char_set.ClassID(reinterpret_cast<const char_32 *>( str_pref32.c_str())); if (class_id <= 0) { continue; } else { string_32 new_prefix_str32 = prefix_str32 + str_pref32; string_32 new_str32 = str32.substr(len); word_so_far->append_unichar_id(class_id, 1, 0.0, 0.0); WordVariants(char_set, new_prefix_str32, word_so_far, new_str32, word_variants); word_so_far->remove_last_unichar_id(); } } } }
/// \brief Get the unicode representation of the current character /// \return the unicode character inline UChar chr() { if (val == 0) { val = charset.value( buf, state, input); } return val; }
void CharSet::Subtract(const CharSet& b) { CharSet tmp; Range *p = head; while (p != NULL) { for (int i = p->from; i <= p->to; i++) { if (!b.Get(i)) { tmp.Set(i); } } // cleanup old storage - as per Clear Range *del = p; p = p->next; delete del; } head = tmp.head; tmp.head = NULL; // avoid double deletion }
static char *unique( char *str ) { typedef std::set<char> CharSet; typedef std::pair<CharSet::iterator, bool> InsertRetType; if( !str || !(*str) ) return str; CharSet charset; char *end = str + strlen(str); for( char *p = end - 1; p >= str; --p ) { InsertRetType ret = charset.insert( *p ); if( !ret.second ) { memmove( p, p+1, end - p ); --end; } // if } // for return str; }
inline void copychar( CharSet& output_, Buffer& buf_) { /// \remark a check if the character sets fulfill is_equal(..) (IsoLatin code page !) if (CharSet::is_equal( charset, output_)) { // ... if the character sets are equal and of the same subclass (code pages) // then we do not decode/encode the character but copy it directly to the output charset.fetchbytes( buf, state, input); #ifdef __GNUC__ #if (__GNUC__ >= 5 && __GNUC_MINOR__ >= 0) for (unsigned int ii=0; ii<8 && ii<state; ++ii) buf_.push_back(buf[ii]); #else for (unsigned int ii=0; ii<state; ++ii) buf_.push_back(buf[ii]); #endif #else for (unsigned int ii=0; ii<state; ++ii) buf_.push_back(buf[ii]); #endif } else { output_.print( chr(), buf_); } }
CsConvert CharSetContainer::lookupConverter(thread_db* tdbb, CHARSET_ID toCsId) { if (toCsId == CS_UTF16) return CsConvert(cs->getStruct(), NULL); CharSet* toCs = INTL_charset_lookup(tdbb, toCsId); if (cs->getId() == CS_UTF16) return CsConvert(NULL, toCs->getStruct()); return CsConvert(cs->getStruct(), toCs->getStruct()); }
string part(const string &str, int &pos, const CharSet &d, bool empty) { string p; for (; pos < str.length(); ++pos) if (d.exists(str[pos])) { if (!p.empty() || empty) { ++pos; break; } p.clear(); } else { p += str[pos]; } return p; }
bool skip() { pos++; if (pos >= buf.size()) { if (ii >= NOF_TESTS) { pos = buf.size(); return false; } else { buf.clear(); unsigned int tt = character( ++ii, (unsigned int)CharSet::MaxChar); encoding.print( tt, buf); pos = 0; } } return true; }
static void pad_spaces(thread_db* tdbb, CHARSET_ID charset, BYTE* ptr, ULONG len) { /* byte count */ /************************************** * * p a d _ s p a c e s * ************************************** * * Functional description * Pad a buffer with the character set defined space character. * **************************************/ SET_TDBB(tdbb); fb_assert(ptr != NULL); CharSet* obj = INTL_charset_lookup(tdbb, charset); // Single-octet character sets are optimized here if (obj->getSpaceLength() == 1) { const BYTE* const end = &ptr[len]; while (ptr < end) *ptr++ = *obj->getSpace(); } else { const BYTE* const end = &ptr[len]; const UCHAR* space = obj->getSpace(); const UCHAR* const end_space = &space[obj->getSpaceLength()]; while (ptr < end) { space = obj->getSpace(); while (ptr < end && space < end_space) { *ptr++ = *space++; } // This fb_assert is checking that we didn't have a buffer-end // in the middle of a space character fb_assert(!(ptr == end) || (space == end_space)); } } }
ULONG INTL_convert_bytes(thread_db* tdbb, CHARSET_ID dest_type, BYTE* dest_ptr, const ULONG dest_len, CHARSET_ID src_type, const BYTE* src_ptr, const ULONG src_len, ErrorFunction err) { /************************************** * * I N T L _ c o n v e r t _ b y t e s * ************************************** * * Functional description * Given a string of bytes in one character set, convert it to another * character set. * * If (dest_ptr) is NULL, return the count of bytes needed to convert * the string. This does not guarantee the string can be converted, * the purpose of this is to allocate a large enough buffer. * * RETURNS: * Length of resulting string, in bytes. * calls (err) if conversion error occurs. * **************************************/ SET_TDBB(tdbb); fb_assert(src_ptr != NULL); fb_assert(src_type != dest_type); fb_assert(err != NULL); dest_type = INTL_charset(tdbb, dest_type); src_type = INTL_charset(tdbb, src_type); const UCHAR* const start_dest_ptr = dest_ptr; if (dest_type == CS_BINARY || dest_type == CS_NONE || src_type == CS_BINARY || src_type == CS_NONE) { // See if we just need a length estimate if (dest_ptr == NULL) return (src_len); if (dest_type != CS_BINARY && dest_type != CS_NONE) { CharSet* toCharSet = INTL_charset_lookup(tdbb, dest_type); if (!toCharSet->wellFormed(src_len, src_ptr)) err(Arg::Gds(isc_malformed_string)); } ULONG len = MIN(dest_len, src_len); if (len) { do { *dest_ptr++ = *src_ptr++; } while (--len); } // See if only space characters are remaining len = src_len - MIN(dest_len, src_len); if (len == 0 || allSpaces(INTL_charset_lookup(tdbb, src_type), src_ptr, len, 0)) return dest_ptr - start_dest_ptr; err(Arg::Gds(isc_arith_except) << Arg::Gds(isc_string_truncation) << Arg::Gds(isc_trunc_limits) << Arg::Num(dest_len) << Arg::Num(src_len)); } else if (src_len) { // character sets are known to be different // Do we know an object from cs1 to cs2? CsConvert cs_obj = INTL_convert_lookup(tdbb, dest_type, src_type); return cs_obj.convert(src_len, src_ptr, dest_len, dest_ptr, NULL, true); } return 0; }
void StandardChars<char16>::SetNonWordChars(ArenaAllocator* setAllocator, CharSet<Char> &set) { set.SetNotRanges(setAllocator, numWordPairs, wordStr); }
void EXE_send(thread_db* tdbb, jrd_req* request, USHORT msg, ULONG length, const UCHAR* buffer) { /************************************** * * E X E _ s e n d * ************************************** * * Functional description * Send a message from the host program to the engine. * This corresponds to a blr_receive or blr_select statement. * **************************************/ SET_TDBB(tdbb); DEV_BLKCHK(request, type_req); if (--tdbb->tdbb_quantum < 0) JRD_reschedule(tdbb, 0, true); if (!(request->req_flags & req_active)) ERR_post(Arg::Gds(isc_req_sync)); const StmtNode* message = NULL; const StmtNode* node; if (request->req_operation != jrd_req::req_receive) ERR_post(Arg::Gds(isc_req_sync)); node = request->req_message; jrd_tra* transaction = request->req_transaction; const JrdStatement* statement = request->getStatement(); const SelectNode* selectNode; if (StmtNode::is<MessageNode>(node)) message = node; else if ((selectNode = StmtNode::as<SelectNode>(node))) { const NestConst<StmtNode>* ptr = selectNode->statements.begin(); for (const NestConst<StmtNode>* end = selectNode->statements.end(); ptr != end; ++ptr) { const ReceiveNode* receiveNode = (*ptr)->as<ReceiveNode>(); message = receiveNode->message; if (message->as<MessageNode>()->messageNumber == msg) { request->req_next = *ptr; break; } } } else BUGCHECK(167); // msg 167 invalid SEND request const Format* format = StmtNode::as<MessageNode>(message)->format; if (msg != StmtNode::as<MessageNode>(message)->messageNumber) ERR_post(Arg::Gds(isc_req_sync)); if (length != format->fmt_length) ERR_post(Arg::Gds(isc_port_len) << Arg::Num(length) << Arg::Num(format->fmt_length)); memcpy(request->getImpure<UCHAR>(message->impureOffset), buffer, length); for (USHORT i = 0; i < format->fmt_count; ++i) { const DSC* desc = &format->fmt_desc[i]; // ASF: I'll not test for dtype_cstring because usage is only internal if (desc->dsc_dtype == dtype_text || desc->dsc_dtype == dtype_varying) { const UCHAR* p = request->getImpure<UCHAR>(message->impureOffset + (ULONG)(IPTR) desc->dsc_address); USHORT len; switch (desc->dsc_dtype) { case dtype_text: len = desc->dsc_length; break; case dtype_varying: len = reinterpret_cast<const vary*>(p)->vary_length; p += sizeof(USHORT); break; } CharSet* charSet = INTL_charset_lookup(tdbb, DSC_GET_CHARSET(desc)); if (!charSet->wellFormed(len, p)) ERR_post(Arg::Gds(isc_malformed_string)); } else if (desc->isBlob()) { if (desc->getCharSet() != CS_NONE && desc->getCharSet() != CS_BINARY) { const Jrd::bid* bid = request->getImpure<Jrd::bid>( message->impureOffset + (ULONG)(IPTR) desc->dsc_address); if (!bid->isEmpty()) { AutoBlb blob(tdbb, blb::open(tdbb, transaction/*tdbb->getTransaction()*/, bid)); blob.getBlb()->BLB_check_well_formed(tdbb, desc); } } } } execute_looper(tdbb, request, transaction, request->req_next, jrd_req::req_proceed); }
void StandardChars<char16>::SetNonNewline(ArenaAllocator* setAllocator, CharSet<Char> &set) { set.SetNotRanges(setAllocator, numNewlinePairs, newlineStr); }
Collation* CharSetContainer::lookupCollation(thread_db* tdbb, USHORT tt_id) { const USHORT id = TTYPE_TO_COLLATION(tt_id); if (id < charset_collations.getCount() && charset_collations[id] != NULL) { if (!charset_collations[id]->obsolete) return charset_collations[id]; } Jrd::Attachment* att = tdbb->getAttachment(); Jrd::Attachment::CheckoutLockGuard guard(att, createCollationMtx, FB_FUNCTION); // do we need it ? Collation* to_delete = NULL; if (id < charset_collations.getCount() && charset_collations[id] != NULL) { if (charset_collations[id]->obsolete) { // if obsolete collation is not used delete it immediately, // else wait until all references are released if (charset_collations[id]->useCount == 0) { charset_collations[id]->destroy(tdbb); delete charset_collations[id]; } else to_delete = charset_collations[id]; charset_collations[id] = NULL; } else return charset_collations[id]; } SubtypeInfo info; if (MET_get_char_coll_subtype_info(tdbb, tt_id, &info)) { CharSet* charset = INTL_charset_lookup(tdbb, TTYPE_TO_CHARSET(tt_id)); if (TTYPE_TO_CHARSET(tt_id) != CS_METADATA) { Firebird::UCharBuffer specificAttributes; ULONG size = info.specificAttributes.getCount() * charset->maxBytesPerChar(); size = INTL_convert_bytes(tdbb, TTYPE_TO_CHARSET(tt_id), specificAttributes.getBuffer(size), size, CS_METADATA, info.specificAttributes.begin(), info.specificAttributes.getCount(), ERR_post); specificAttributes.shrink(size); info.specificAttributes = specificAttributes; } texttype* tt = FB_NEW_POOL(*att->att_pool) texttype; memset(tt, 0, sizeof(texttype)); if (!lookup_texttype(tt, &info)) { delete tt; ERR_post(Arg::Gds(isc_collation_not_installed) << Arg::Str(info.collationName) << Arg::Str(info.charsetName)); } if (charset_collations.getCount() <= id) charset_collations.grow(id + 1); fb_assert((tt->texttype_canonical_width == 0 && tt->texttype_fn_canonical == NULL) || (tt->texttype_canonical_width != 0 && tt->texttype_fn_canonical != NULL)); if (tt->texttype_canonical_width == 0) { if (charset->isMultiByte()) tt->texttype_canonical_width = sizeof(ULONG); // UTF-32 else { tt->texttype_canonical_width = charset->minBytesPerChar(); // canonical is equal to string, then TEXTTYPE_DIRECT_MATCH can be turned on tt->texttype_flags |= TEXTTYPE_DIRECT_MATCH; } } charset_collations[id] = Collation::createInstance(*att->att_pool, tt_id, tt, charset); charset_collations[id]->name = info.collationName; // we don't need a lock in the charset if (id != 0) { Lock* lock = charset_collations[id]->existenceLock = CharSetContainer::createCollationLock(tdbb, tt_id, charset_collations[id]); fb_assert(charset_collations[id]->useCount == 0); fb_assert(!charset_collations[id]->obsolete); LCK_lock(tdbb, lock, LCK_SR, LCK_WAIT); // as we just obtained SR lock for new collation instance // we could safely delete obsolete instance if (to_delete) { to_delete->destroy(tdbb); delete to_delete; } } } else { if (to_delete) { LCK_lock(tdbb, to_delete->existenceLock, LCK_SR, LCK_WAIT); to_delete->destroy(tdbb); delete to_delete; } ERR_post(Arg::Gds(isc_text_subtype) << Arg::Num(tt_id)); } return charset_collations[id]; }
MatchCharRet MatchChar2(SimpleImage* image,SiRect roi,CharSet& charSet,int codeId) { MatchCharRet bestMatch = {'?'}; int XMargin = 4; int YMargin = 5; int XExtend = 0; const int SIZE = 20; SimpleImage* imgWork = siCloneImage(image); if (codeId == 3) { XExtend = 5; } // 遍历所有的字体图像 for (int i = 0;i < charSet.size();i++) { SimpleImage* fontImage = charSet[i].image; MatchCharRet localeBest = {0}; // 测试所有的位置 for (int y = -YMargin;y < roi.height+YMargin-SIZE;y++) { for (int x = -XMargin; x < roi.width+XMargin+XExtend - SIZE;x++) { // 在当前位置匹配所有的字符 int x0,y0,x1,y1; // 把模板放入图像坐标系中 x0 = x + roi.x; y0 = y + roi.y; x1 = x0+SIZE; y1 = y0+SIZE; // 计算模板在图像内的范围 if (x0 < roi.x) { x0 = roi.x; } else if (x0 > roi.x + roi.width) { x0 = roi.x+roi.width; } if (y0 < roi.y) { y0 = roi.y; } else if (y0 > roi.y + roi.height) { y0 = roi.y+roi.height; } if (x1 > roi.x + roi.width) { x1 = roi.x+roi.width; } if (y1 > roi.y + roi.height) { y1 = roi.y+roi.height; } // 字体图像的有效范围 int sx = x0 - roi.x - x; int sy = y0 - roi.y - y; int ex = x1 - roi.x - x; int ey = y1 - roi.y - y; // 匹配字体 MatchCharRet result; siCopyImage(image,imgWork); result = MatchFontImage(imgWork,x0,y0,fontImage,sx,sy,ex,ey); result.code = charSet[i].code; result.x = x+roi.x; result.y = y+roi.y; // 更新最佳匹配结果 localeBest = UpdateBestMatchInChar(localeBest,result); #if DEBUG_STEP == 1 //if (codeId == 1 && (result.code == '7' || result.code == 'M')) { printf("result bb:%4d localbest bb:%4d\n",result.bb,localeBest.bb); siShowImage("font",fontImage); siShowImage("match",imgWork); cvWaitKey(); } #endif } } // 更新全局最优 bestMatch = UpdateBestMatchBetweenChar(bestMatch,localeBest); #if DEBUG_STEP == 1 printf("\nlocalbest bb:%4d best bb:%4d ch:%c\n",localeBest.bb,bestMatch.bb,bestMatch.code); #endif } siReleaseImage(&imgWork); return bestMatch; }
bool operator==(CharSet other) const { return value_ == other.get(); }
void StandardChars<char16>::SetNonWhitespace(ArenaAllocator* setAllocator, CharSet<Char> &set) { set.SetNotRanges(setAllocator, numWhitespacePairs, whitespaceStr); }
MatchCharRet MatchChar(SimpleImage* image,SiRect roi,CharSet& charSet,int codeId) { MatchCharRet bestMatch = {'?'}; int XMargin = 4; int YMargin = 12; int XExtend = 0; const int SIZE = 20; SimpleImage* imgWork = siCloneImage(image); if (codeId == 3) { XExtend = 5; } // 测试所有的位置 for (int x = -XMargin; x < roi.width+XMargin+XExtend - SIZE;x++) { for (int y = -YMargin;y < roi.height+YMargin-SIZE;y++) { // 在当前位置匹配所有的字符 int x0,y0,x1,y1; // 把模板放入图像坐标系中 x0 = x + roi.x; y0 = y + roi.y; x1 = x0+SIZE; y1 = y0+SIZE; // 计算模板在图像内的范围 if (x0 < roi.x) { x0 = roi.x; } else if (x0 > roi.x + roi.width) { x0 = roi.x+roi.width; } if (y0 < roi.y) { y0 = roi.y; } else if (y0 > roi.y + roi.height) { y0 = roi.y+roi.height; } if (x1 > roi.x + roi.width) { x1 = roi.x+roi.width; } if (y1 > roi.y + roi.height) { y1 = roi.y+roi.height; } // 字体图像的有效范围 int sx = x0 - roi.x - x; int sy = y0 - roi.y - y; int ex = x1 - roi.x - x; int ey = y1 - roi.y - y; // 遍历所有的字体图像 for (int i = 0;i < charSet.size();i++) { SimpleImage* fontImage = charSet[i].image; // 匹配字体 MatchCharRet result; siCopyImage(image,imgWork); result = MatchFontImage(imgWork,x0,y0,fontImage,sx,sy,ex,ey); result.code = charSet[i].code; result.x = x+roi.x; result.y = y+roi.y; //cvShowImage("font",fontImage); //cvShowImage("match",imgWork); //cvWaitKey(); // 更新最佳匹配结果 bestMatch = UpdateBestMatchBetweenChar(bestMatch,result); } } } siReleaseImage(&imgWork); return bestMatch; }
void StandardChars<char16>::SetNonDigits(ArenaAllocator* setAllocator, CharSet<Char> &set) { set.SetNotRanges(setAllocator, numDigitPairs, digitStr); }