Esempio n. 1
0
const char *CTranscription::Tengwar2Roman(const char *str)
{
  string temp, cislo;
  static string res;
  unsigned int i, j;
  int hash;
  const char *p, *pend, *pp;
  //  const char *p2;
  char next;
  string prev;
  bool letterfound;
#ifdef DEBUG
  int tstart, tstop;
#endif
  string testentry;

#ifdef SPY
  printf("CTranscription::Tengwar2Roman\n");
#endif
  size_t len=strlen(str);
#ifdef DEBUG
  tstart=time(NULL);
#endif
  p=str;
/*R2T only
  p2:=AllocMem(Length(p));
  origp2:=p2;
  if(p2<>nil)then
    StrCopy(p2,p);
*/
  res="";
  prev=NON_ALPHA_NUM;
/*R2T
  if not Mode.casesens then
    p:=PChar(AnsiLowerCase(p));
*/
  pend=p+len;
  while(p<pend)
  {
    temp="";
    //bile znaky zkopirujeme beze zmeny
    while(Ord(p[0])<=32)
    {
      res+=p[0];
      prev=NON_ALPHA_NUM;
      p++;
     // p2++;
    }
    if(p>=pend)
      break;
    //predelani cisel
    if(TreatNumbersSeparately)
    {
      pp=p;
      cislo="";
      while((((pp[0]=='%')||(pp[0]=='T')||(pp[0]=='G')||(pp[0]=='B')||
              ((Ord(pp[0])>=152)&&(Ord(pp[0])<=153))||
              ((Ord(pp[0])>=168)&&(Ord(pp[0])<=169))||
              ((Ord(pp[0])>=200)&&(Ord(pp[0])<=203))
//            )&&(Ord(prev)>=240)&&(Ord(prev)<=251))||
             )&&(Ord(prev[prev.size()])>=240)&&(Ord(prev[prev.size()])<=251))||
            ((Ord(pp[0])>=240)&&(Ord(pp[0])<=251)))
      {
        cislo+=pp[0];
        prev=pp[0];
        pp++;
      }
      long l=pp-p;
      p+=l;
      //p2+=l;
      if(cislo!="")
      {
        for(long j=cislo.size();j>=1;j--)
          temp+=GetRomanDigit(cislo[j]);
        if(!decimal)
          temp=duodec2dec(temp);
        res+=temp;
#ifdef KYLIX
        Application.ProcessMessages();
#endif
        if(stop)
          throw EAbort("");
        continue;
      }
    }
    letterfound=false;
    hash=TengHash(p[0]);//Ord(p[0]);
    for(i=0;i<mode.t2r.src[hash]->count();i++)
    {
      size_t entrylen=(*mode.t2r.src[hash])[i].size();
      testentry="";
      for(j=0;j<entrylen;j++)
        testentry+=p[j];
      if(SmartCompare((*mode.t2r.src[hash])[i],testentry))
      {
        size_t l=(*mode.t2r.src[hash])[i].size();
        if((*mode.t2r.next[hash])[i]!="")
        {
          next=(*mode.t2r.next[hash])[i][0];
          if((*mode.t2r.next[hash])[i][0]==NON_ALPHA_NUM)
          {
            if((p+l<pend)&&(IsTengAlphaNum((p+l)[0])))
              continue;
          }
          else if((p+l>=pend)||((p+l)[0]!=next))
            continue;
        }
        if((*mode.t2r.prev[hash])[i]!="")
        {
          if(((*mode.t2r.prev[hash])[i][0]==NON_ALPHA_NUM)&&(IsTengAlphaNum(prev[prev.size()])))
            continue;
          else if((*mode.t2r.prev[hash])[i]!=prev)
            continue;
        }
        temp=(*mode.t2r.dest[hash])[i];
        letterfound=true;
        if((*mode.t2r.id[hash])[i].size()>0)
//          prev:=Mode.t2r.ID[hash][i][1]//Mode.tengwar[i][Length(Mode.tengwar[i])]
          prev=(*mode.t2r.id[hash])[i];
        else
          prev=NON_ALPHA_NUM;
        p+=l;
        //p2+=l;
        break;
      }
    }
      if(!letterfound) {
          if(alert)
          {
            for(i=0;!IsWhiteChar(p[i]);i++);
            throw EPatternNotFound(string(p).substr(0,i).c_str());
          }
          else
          {
            temp="?";
            p++;
           // p2++;
          }
      }
    res+=temp;
#ifdef KYLIX
    Application.ProcessMessages();
#endif    
    if(stop)
      throw EAbort("");
  }//while
//  showmessage(res);
/*R2T
  FreeMem(origp2);
*/
#ifdef DEBUG
  tstop=time(NULL);
  fprintf(stderr,"%d bytes: %d ms\n",len,tstop-tstart);
#endif

  return res.c_str();
}
Esempio n. 2
0
/*  
@	功能:	分析一个记号
@	参数:	
@	返回值:如果分析失败,返回false
@	注:	
*/
bool CLexicalAnalyzer::GetNextToken( void )
{
	if( m_nCurCharPos >= m_nStringLen )
	{
		m_curToken.m_eTokenType = EToken_Complete;
		return true;
	}
	bool bAddToLex = false;	// 当前符号是否加入记号符号中
	bool bDone = false;		// 是否完成当前记号的解析
	char cCurChar = 0;		// 当前字符
	static char szBuffer[1024];
	static char szOpt[8];
	int w = 0;
	int optW = 0;
	EToken	curTokenType;
	memset( szBuffer, 0, sizeof( szBuffer ) );
	memset( szOpt, 0, sizeof( szOpt ) );

	while( true )
	{
		cCurChar = m_lpszString[m_nCurCharPos++];
		if( cCurChar == '\0' )
			break;
		bAddToLex = true;

		switch( m_curState )
		{
			//--------开始状态
		case ELexState_Start:
			{
				// 略过前导空格
				if( IsWhiteChar( cCurChar) )
				{
					bAddToLex = false;
				}
				// 是否是一个整数
				else if( IsNumeric( cCurChar ) )
				{
					m_curState = ELexState_Int;
				}
				// 是否是一个浮点数
				else if( cCurChar == '.' )
				{
					m_curState = ELexState_Float;
				}
				// 是否是标识符
				else if( IsCharIdentifier( cCurChar ) )
				{
					m_curState = ELexState_Identifier;
				}
				// 分隔符
				else if( GetDelimiterType( cCurChar ) != EToken_Invalid )
				{
					m_curState = ELexState_Delimiter;
					curTokenType = GetDelimiterType( cCurChar );
					bDone = true;
				}
				// 开始字符串解析
				else if( cCurChar == '"' )
				{
					m_curState = ELexState_String;
					bAddToLex = false;
				}
				// 运算符
				else if( IsOptChar( cCurChar ) )
				{
					m_curState = ELexState_Operator;
					szOpt[optW++] = cCurChar;
				}
				// 非法字符,解析失败
				else
				{
					return false;
				}
			}
			break;

			//--------整数状态
		case ELexState_Int:
			{
				// 整数保持当前状态
				if( IsNumeric( cCurChar ) )
					m_curState = ELexState_Int;
				// 如果是小数点,则转到浮点数
				else if( cCurChar == '.' )
					m_curState = ELexState_Float;
				// 如果是空格,完成识别
				else if( IsWhiteChar( cCurChar ) )
				{
					bAddToLex = false;
					bDone = true;
				}
				// 如果是运算符,完成识别
				else if( IsOptChar( cCurChar ) )
				{
					bAddToLex = false;
					bDone = true;
					-- m_nCurCharPos;
				}
				// 其它都是非法的
				else
					return false;
			}
			break;

			//--------浮点数状态
		case ELexState_Float:
			{
				if( IsNumeric( cCurChar ) )
					m_curState = ELexState_Float;
				// 如果是空格,完成识别
				else if( IsWhiteChar( cCurChar ) )
				{
					bAddToLex = false;
					bDone = true;
				}
				// 如果是运算符,完成识别
				else if( IsOptChar( cCurChar ) )
				{
					bAddToLex = false;
					bDone = true;
					-- m_nCurCharPos;
				}
				// 其它都是非法的
				else
					return false;
			}
			break;

			//--------标识符状态
		case ELexState_Identifier:
			{
				// 如果是标识符字符,则保持当前状态
				if( IsCharIdentifier( cCurChar ) )
				{
					m_curState = ELexState_Identifier;
				}
				// 如果是空格,完成识别
				else if( IsWhiteChar( cCurChar ) )
				{
					bAddToLex = false;
					bDone = true;
				}
				// 如果是运算符,完成识别
				else if( IsOptChar( cCurChar ) )
				{
					bAddToLex = false;
					bDone = true;
					-- m_nCurCharPos;
				}
				// 其它都是非法的
				else
					return false;
			}
			break;

			//---------字符串
		case ELexState_String:
			{
				// 如果是 " 字符串识别完成
				if( cCurChar == '"' )
				{
					bAddToLex = false;
					m_curState = ELexState_StringEnd;
				}
				// 如果是转义字符,则转到转义字符状态
				else if( cCurChar == '\\' )
				{
					bAddToLex = false;
					m_curState = ELexState_StringEscape;
				}
				// 其它字符全部添加到字符串中
			}
			break;

			//---------转义字符
		case ELexState_StringEscape:
			{
				// TODO 转换当前字符
				m_curState = ELexState_String;
			}
			break;

			//---------完成字符串识别
		case ELexState_StringEnd:
			{
				bAddToLex = false;
				bDone = true;
			}
			break;

			//---------运算符
		case ELexState_Operator:
			{
				szOpt[optW++] = cCurChar;
				if( GetOptTokenType( szOpt ) == EToken_Invalid )
				{
					bAddToLex = false;
					bDone = true;
					-- m_nCurCharPos;
				}
			}
			break;


		}	// end switch

		if( bAddToLex )
			szBuffer[w++] = cCurChar;
		if( bDone )
			break;

	}
	szBuffer[w++] = '\0';

	// 确定记号类型
	switch( m_curState )
	{
	case ELexState_Int:
		curTokenType = GetIntType( szBuffer );
		break;

	case ELexState_Float:
		curTokenType = EToken_Float;
		break;

	case ELexState_Identifier:
		curTokenType = GetKeywordType( szBuffer );
		break;

	case ELexState_Delimiter:
		curTokenType = GetDelimiterType( szBuffer[0] );
		break;

	case ELexState_Operator:
		curTokenType = GetOptTokenType( szBuffer );
		break;

	case ELexState_Start:
	default:
		curTokenType = EToken_Complete;
		break;
	}
	m_curToken.m_strTokenString = szBuffer;
	m_curToken.m_eTokenType		= curTokenType;

	return true;
}
Esempio n. 3
0
const char *CTranscription::Roman2Tengwar(const char *str)
{
	string temp, cislo;
	static string res;
	unsigned int i;
	int hash;
	const char *p, *p2, *pend, *pp;
	char prev, next;
	bool letterfound;
#ifdef DEBUG
	int tstart, tstop;
#endif
	string testentry, str2;
	
#ifdef SPY
	fprintf(stderr,"CTranscription::Roman2Tengwar\n");
#endif
	p2=str;
	size_t len=strlen(str);
#ifdef DEBUG
	tstart=time(NULL);
#endif
	res="";
	prev=NON_ALPHA_NUM;
	if(!mode.casesens)
	{
		str2=lowercase(str);
		p=str2.c_str();
	}
	else
		p=p2;
	pend=p+len;
	while(p<pend)
	{
		temp="";
		//bile znaky zkopirujeme beze zmeny
		while(Ord(p[0])<=32)
		{
			res+=p[0];
			prev=NON_ALPHA_NUM;
			p++;
			p2++;
			if(p>=pend)
				break;
		}
		if(p>=pend)
			break;
		//predelani cisel
		if(TreatNumbersSeparately)
		{
			pp=p;
			cislo="";
			while((Ord(pp[0])>=Ord('0'))&&(Ord(pp[0])<=Ord('9')))
			{
				cislo+=pp[0];
				pp++;
			}
			long l=pp-p;
			p+=l;
			p2+=l;
			if(cislo!="")
			{
				if(!decimal)
					cislo=dec2duodec(cislo);
				for(long j=cislo.size()-1;j>=0;j--)
				{
					temp+=GetTengwarDigit(cislo[j]);
					if((lsd)&&(j==cislo.size()-1))//least sign. digit
					{
						if((cislo[j]=='0')||(cislo[j]=='4')||(cislo[j]=='7')||(cislo[j]=='8')||(cislo[j]=='a')||(cislo[j]=='b'))
							temp+='\x99';//153
						else
							temp+='\x98';//152
					}
					else if(digits)
					{
						if(decimal)
						{
							if((cislo[j]=='0')||(cislo[j]=='1')||(cislo[j]=='7')||(cislo[j]=='9'))
								temp+='T';
							else if((cislo[j]=='4')||(cislo[j]=='8'))
								temp+='G';
							else//2,3,5,6
								temp+='%';
						}
						else//duodecimal
						{
							if((cislo[j]=='3')||(cislo[j]=='5')||(cislo[j]=='6')||(cislo[j]=='9'))
								temp+='\xc8';//200
							else//0,1,2,4,7,8,10,11
								temp+='\xc9';//201
						}
					}//if digits
				}
				res+=temp;
#ifdef KYLIX
				Application.ProcessMessages();
#endif
				if(stop)
					throw EAbort("");
				continue;
			}//if cislo
		}//if tns
		letterfound=false;
		hash=Ord(p[0]);
		for(i=0;i<mode.r2t.src[hash]->count();i++)
		{
			size_t entrylen=(*mode.r2t.src[hash])[i].size();
			testentry="";
			for(size_t j=0;j<entrylen;j++)
				testentry+=p[j];
			if((*mode.r2t.src[hash])[i]==testentry)
			{
				size_t l=(*mode.r2t.src[hash])[i].size();
				if((*mode.r2t.next[hash])[i]!="")
				{
					next=(*mode.r2t.next[hash])[i][0];
					if((*mode.r2t.next[hash])[i][0]==NON_ALPHA_NUM)
					{
						if((p+l<pend)&&(isalnum((p+l)[0])))
							continue;
					}
					else if((p+l>=pend)||((p+l)[0]!=next))
						continue;
				}
				if((*mode.r2t.prev[hash])[i]!="")
				{
					if(((*mode.r2t.prev[hash])[i][0]==NON_ALPHA_NUM)&&(isalnum(prev)))
						continue;
					else if((*mode.r2t.prev[hash])[i][0]!=prev)
						continue;
				}
				temp=(*mode.r2t.dest[hash])[i];
				letterfound=true;
				if((*mode.r2t.id[hash])[i].size()>0)
					prev=(*mode.r2t.id[hash])[i][0];
				else
					prev=NON_ALPHA_NUM;
				p+=l;
				p2+=l;
				break;
			}
		}
        if(!letterfound) {
			if(alert)
			{
				for(i=0;!IsWhiteChar(p[i]);i++);
				throw EPatternNotFound(string(p2).substr(0,i).c_str());
			}
            else
            {
                temp='\xae';//'?'
                p++;
                p2++;
            }
        }
        res+=temp;
#ifdef KYLIX
		Application.ProcessMessages();
#endif
		if(stop)
			throw EAbort("");
	}
	
#ifdef DEBUG
	tstop=time(NULL);
	fprintf(stderr,"%d bytes: %d ms\n",len,tstop-tstart);
#endif
	
	return res.c_str();
}