const char *CTranscription::Tengwar2Roman(const char *str) { string temp, cislo; static string res; unsigned int i, j; int hash; const char *p, *pend, *pp; // const char *p2; char next; string prev; bool letterfound; #ifdef DEBUG int tstart, tstop; #endif string testentry; #ifdef SPY printf("CTranscription::Tengwar2Roman\n"); #endif size_t len=strlen(str); #ifdef DEBUG tstart=time(NULL); #endif p=str; /*R2T only p2:=AllocMem(Length(p)); origp2:=p2; if(p2<>nil)then StrCopy(p2,p); */ res=""; prev=NON_ALPHA_NUM; /*R2T if not Mode.casesens then p:=PChar(AnsiLowerCase(p)); */ pend=p+len; while(p<pend) { temp=""; //bile znaky zkopirujeme beze zmeny while(Ord(p[0])<=32) { res+=p[0]; prev=NON_ALPHA_NUM; p++; // p2++; } if(p>=pend) break; //predelani cisel if(TreatNumbersSeparately) { pp=p; cislo=""; while((((pp[0]=='%')||(pp[0]=='T')||(pp[0]=='G')||(pp[0]=='B')|| ((Ord(pp[0])>=152)&&(Ord(pp[0])<=153))|| ((Ord(pp[0])>=168)&&(Ord(pp[0])<=169))|| ((Ord(pp[0])>=200)&&(Ord(pp[0])<=203)) // )&&(Ord(prev)>=240)&&(Ord(prev)<=251))|| )&&(Ord(prev[prev.size()])>=240)&&(Ord(prev[prev.size()])<=251))|| ((Ord(pp[0])>=240)&&(Ord(pp[0])<=251))) { cislo+=pp[0]; prev=pp[0]; pp++; } long l=pp-p; p+=l; //p2+=l; if(cislo!="") { for(long j=cislo.size();j>=1;j--) temp+=GetRomanDigit(cislo[j]); if(!decimal) temp=duodec2dec(temp); res+=temp; #ifdef KYLIX Application.ProcessMessages(); #endif if(stop) throw EAbort(""); continue; } } letterfound=false; hash=TengHash(p[0]);//Ord(p[0]); for(i=0;i<mode.t2r.src[hash]->count();i++) { size_t entrylen=(*mode.t2r.src[hash])[i].size(); testentry=""; for(j=0;j<entrylen;j++) testentry+=p[j]; if(SmartCompare((*mode.t2r.src[hash])[i],testentry)) { size_t l=(*mode.t2r.src[hash])[i].size(); if((*mode.t2r.next[hash])[i]!="") { next=(*mode.t2r.next[hash])[i][0]; if((*mode.t2r.next[hash])[i][0]==NON_ALPHA_NUM) { if((p+l<pend)&&(IsTengAlphaNum((p+l)[0]))) continue; } else if((p+l>=pend)||((p+l)[0]!=next)) continue; } if((*mode.t2r.prev[hash])[i]!="") { if(((*mode.t2r.prev[hash])[i][0]==NON_ALPHA_NUM)&&(IsTengAlphaNum(prev[prev.size()]))) continue; else if((*mode.t2r.prev[hash])[i]!=prev) continue; } temp=(*mode.t2r.dest[hash])[i]; letterfound=true; if((*mode.t2r.id[hash])[i].size()>0) // prev:=Mode.t2r.ID[hash][i][1]//Mode.tengwar[i][Length(Mode.tengwar[i])] prev=(*mode.t2r.id[hash])[i]; else prev=NON_ALPHA_NUM; p+=l; //p2+=l; break; } } if(!letterfound) { if(alert) { for(i=0;!IsWhiteChar(p[i]);i++); throw EPatternNotFound(string(p).substr(0,i).c_str()); } else { temp="?"; p++; // p2++; } } res+=temp; #ifdef KYLIX Application.ProcessMessages(); #endif if(stop) throw EAbort(""); }//while // showmessage(res); /*R2T FreeMem(origp2); */ #ifdef DEBUG tstop=time(NULL); fprintf(stderr,"%d bytes: %d ms\n",len,tstop-tstart); #endif return res.c_str(); }
/* @ 功能: 分析一个记号 @ 参数: @ 返回值:如果分析失败,返回false @ 注: */ bool CLexicalAnalyzer::GetNextToken( void ) { if( m_nCurCharPos >= m_nStringLen ) { m_curToken.m_eTokenType = EToken_Complete; return true; } bool bAddToLex = false; // 当前符号是否加入记号符号中 bool bDone = false; // 是否完成当前记号的解析 char cCurChar = 0; // 当前字符 static char szBuffer[1024]; static char szOpt[8]; int w = 0; int optW = 0; EToken curTokenType; memset( szBuffer, 0, sizeof( szBuffer ) ); memset( szOpt, 0, sizeof( szOpt ) ); while( true ) { cCurChar = m_lpszString[m_nCurCharPos++]; if( cCurChar == '\0' ) break; bAddToLex = true; switch( m_curState ) { //--------开始状态 case ELexState_Start: { // 略过前导空格 if( IsWhiteChar( cCurChar) ) { bAddToLex = false; } // 是否是一个整数 else if( IsNumeric( cCurChar ) ) { m_curState = ELexState_Int; } // 是否是一个浮点数 else if( cCurChar == '.' ) { m_curState = ELexState_Float; } // 是否是标识符 else if( IsCharIdentifier( cCurChar ) ) { m_curState = ELexState_Identifier; } // 分隔符 else if( GetDelimiterType( cCurChar ) != EToken_Invalid ) { m_curState = ELexState_Delimiter; curTokenType = GetDelimiterType( cCurChar ); bDone = true; } // 开始字符串解析 else if( cCurChar == '"' ) { m_curState = ELexState_String; bAddToLex = false; } // 运算符 else if( IsOptChar( cCurChar ) ) { m_curState = ELexState_Operator; szOpt[optW++] = cCurChar; } // 非法字符,解析失败 else { return false; } } break; //--------整数状态 case ELexState_Int: { // 整数保持当前状态 if( IsNumeric( cCurChar ) ) m_curState = ELexState_Int; // 如果是小数点,则转到浮点数 else if( cCurChar == '.' ) m_curState = ELexState_Float; // 如果是空格,完成识别 else if( IsWhiteChar( cCurChar ) ) { bAddToLex = false; bDone = true; } // 如果是运算符,完成识别 else if( IsOptChar( cCurChar ) ) { bAddToLex = false; bDone = true; -- m_nCurCharPos; } // 其它都是非法的 else return false; } break; //--------浮点数状态 case ELexState_Float: { if( IsNumeric( cCurChar ) ) m_curState = ELexState_Float; // 如果是空格,完成识别 else if( IsWhiteChar( cCurChar ) ) { bAddToLex = false; bDone = true; } // 如果是运算符,完成识别 else if( IsOptChar( cCurChar ) ) { bAddToLex = false; bDone = true; -- m_nCurCharPos; } // 其它都是非法的 else return false; } break; //--------标识符状态 case ELexState_Identifier: { // 如果是标识符字符,则保持当前状态 if( IsCharIdentifier( cCurChar ) ) { m_curState = ELexState_Identifier; } // 如果是空格,完成识别 else if( IsWhiteChar( cCurChar ) ) { bAddToLex = false; bDone = true; } // 如果是运算符,完成识别 else if( IsOptChar( cCurChar ) ) { bAddToLex = false; bDone = true; -- m_nCurCharPos; } // 其它都是非法的 else return false; } break; //---------字符串 case ELexState_String: { // 如果是 " 字符串识别完成 if( cCurChar == '"' ) { bAddToLex = false; m_curState = ELexState_StringEnd; } // 如果是转义字符,则转到转义字符状态 else if( cCurChar == '\\' ) { bAddToLex = false; m_curState = ELexState_StringEscape; } // 其它字符全部添加到字符串中 } break; //---------转义字符 case ELexState_StringEscape: { // TODO 转换当前字符 m_curState = ELexState_String; } break; //---------完成字符串识别 case ELexState_StringEnd: { bAddToLex = false; bDone = true; } break; //---------运算符 case ELexState_Operator: { szOpt[optW++] = cCurChar; if( GetOptTokenType( szOpt ) == EToken_Invalid ) { bAddToLex = false; bDone = true; -- m_nCurCharPos; } } break; } // end switch if( bAddToLex ) szBuffer[w++] = cCurChar; if( bDone ) break; } szBuffer[w++] = '\0'; // 确定记号类型 switch( m_curState ) { case ELexState_Int: curTokenType = GetIntType( szBuffer ); break; case ELexState_Float: curTokenType = EToken_Float; break; case ELexState_Identifier: curTokenType = GetKeywordType( szBuffer ); break; case ELexState_Delimiter: curTokenType = GetDelimiterType( szBuffer[0] ); break; case ELexState_Operator: curTokenType = GetOptTokenType( szBuffer ); break; case ELexState_Start: default: curTokenType = EToken_Complete; break; } m_curToken.m_strTokenString = szBuffer; m_curToken.m_eTokenType = curTokenType; return true; }
const char *CTranscription::Roman2Tengwar(const char *str) { string temp, cislo; static string res; unsigned int i; int hash; const char *p, *p2, *pend, *pp; char prev, next; bool letterfound; #ifdef DEBUG int tstart, tstop; #endif string testentry, str2; #ifdef SPY fprintf(stderr,"CTranscription::Roman2Tengwar\n"); #endif p2=str; size_t len=strlen(str); #ifdef DEBUG tstart=time(NULL); #endif res=""; prev=NON_ALPHA_NUM; if(!mode.casesens) { str2=lowercase(str); p=str2.c_str(); } else p=p2; pend=p+len; while(p<pend) { temp=""; //bile znaky zkopirujeme beze zmeny while(Ord(p[0])<=32) { res+=p[0]; prev=NON_ALPHA_NUM; p++; p2++; if(p>=pend) break; } if(p>=pend) break; //predelani cisel if(TreatNumbersSeparately) { pp=p; cislo=""; while((Ord(pp[0])>=Ord('0'))&&(Ord(pp[0])<=Ord('9'))) { cislo+=pp[0]; pp++; } long l=pp-p; p+=l; p2+=l; if(cislo!="") { if(!decimal) cislo=dec2duodec(cislo); for(long j=cislo.size()-1;j>=0;j--) { temp+=GetTengwarDigit(cislo[j]); if((lsd)&&(j==cislo.size()-1))//least sign. digit { if((cislo[j]=='0')||(cislo[j]=='4')||(cislo[j]=='7')||(cislo[j]=='8')||(cislo[j]=='a')||(cislo[j]=='b')) temp+='\x99';//153 else temp+='\x98';//152 } else if(digits) { if(decimal) { if((cislo[j]=='0')||(cislo[j]=='1')||(cislo[j]=='7')||(cislo[j]=='9')) temp+='T'; else if((cislo[j]=='4')||(cislo[j]=='8')) temp+='G'; else//2,3,5,6 temp+='%'; } else//duodecimal { if((cislo[j]=='3')||(cislo[j]=='5')||(cislo[j]=='6')||(cislo[j]=='9')) temp+='\xc8';//200 else//0,1,2,4,7,8,10,11 temp+='\xc9';//201 } }//if digits } res+=temp; #ifdef KYLIX Application.ProcessMessages(); #endif if(stop) throw EAbort(""); continue; }//if cislo }//if tns letterfound=false; hash=Ord(p[0]); for(i=0;i<mode.r2t.src[hash]->count();i++) { size_t entrylen=(*mode.r2t.src[hash])[i].size(); testentry=""; for(size_t j=0;j<entrylen;j++) testentry+=p[j]; if((*mode.r2t.src[hash])[i]==testentry) { size_t l=(*mode.r2t.src[hash])[i].size(); if((*mode.r2t.next[hash])[i]!="") { next=(*mode.r2t.next[hash])[i][0]; if((*mode.r2t.next[hash])[i][0]==NON_ALPHA_NUM) { if((p+l<pend)&&(isalnum((p+l)[0]))) continue; } else if((p+l>=pend)||((p+l)[0]!=next)) continue; } if((*mode.r2t.prev[hash])[i]!="") { if(((*mode.r2t.prev[hash])[i][0]==NON_ALPHA_NUM)&&(isalnum(prev))) continue; else if((*mode.r2t.prev[hash])[i][0]!=prev) continue; } temp=(*mode.r2t.dest[hash])[i]; letterfound=true; if((*mode.r2t.id[hash])[i].size()>0) prev=(*mode.r2t.id[hash])[i][0]; else prev=NON_ALPHA_NUM; p+=l; p2+=l; break; } } if(!letterfound) { if(alert) { for(i=0;!IsWhiteChar(p[i]);i++); throw EPatternNotFound(string(p2).substr(0,i).c_str()); } else { temp='\xae';//'?' p++; p2++; } } res+=temp; #ifdef KYLIX Application.ProcessMessages(); #endif if(stop) throw EAbort(""); } #ifdef DEBUG tstop=time(NULL); fprintf(stderr,"%d bytes: %d ms\n",len,tstop-tstart); #endif return res.c_str(); }