Example #1
0
File: annot.c Project: koraa/annot
void *thr_read(void *arg__) {
  ArgRead *arg = arg__;

  Tok *t; 
  while (feof(arg->F)==0) { // Each token (mostly lines)
    t = newTok();
    nlock_lock(t->edit);

    fjoin(arg->F);   // Wait for the stream to have data
    ll_push(inQ, t); // Pass on for timestamp

    // Now read the data
    getItok(arg->F, arg->delim, t->str);
    nlock_unlock(t->edit);
  }
 
  inQ->EOT = true;   
  return 0;
}
Example #2
0
// Split XML into tokens and return token information (strings, type, ...).
int CParseXml::splitXML()
{
   STR_XML str;
   
   if (*cur != '/')
      str.s = cur+1;
   else
      str.s = cur;
   
   str.len = 0;
   
   int futf8 = 0;
   
   for (; cur < end; cur++) {
      unsigned char  cc = (unsigned char)*cur;
      int iCur = cc;//(unsigned)*cur;
      if(futf8 == 0 && iCur >= 192 ) {
         futf8 = 1;
         if (iCur&32) {
            futf8++;
            if (iCur&16) {
               futf8++;
               if (iCur&8) {
                  futf8++;
                  if (iCur&4) {
                     futf8++;
                  }
               }
            }
         }
         continue;
      }
      if (futf8 > 0 && iCur >= 128 && (iCur&64) == 0) {
         futf8--;
         continue;
      }
      futf8 = 0;
      
      if (iCur>65 && iCur != 92) //92 == '\\'
         continue;
      
      switch (iCur) {
         case '\r': 
         case '\n':
            if (flag & F_TRI_BRACE) {
               errorXML("CR or LF found after \"<\"");
               return -1;
            }
            str.len = cur-str.s;
            if (str.len>0) { 
               if (-1 == newTok(str)) 
                  return -1;
               return 0;
            }
            str.s = cur+1; 
            continue;
            
         case  0: case  1: case  2: case  3: case  4: case  5: case  6: case  7: case  8:
         case 11: case 12:          case 14: case 15: case 16: case 17: case 18: case 19:
         case 20: case 21: case 22: case 23: case 24: case 25: case 26: case 27: case 28:
         case 29: case 30: case 31:
            errorXML("found < 32");
            return -1;
            
         case '\\':
            if ((flag & (F_TRI_BRACE|F_SQUOTE|F_DQUOTE)) == 0 && *(cur+1) == '\\') {
               while(*cur && *cur != '\n')cur++;
               str.s = cur+1;
            }
            continue;
            
         case '/':
            if (flag & (F_SQUOTE|F_DQUOTE))
               continue;
            if (flag & F_TRI_BRACE) {
               str.len = cur - str.s;
               if (str.len > 0) { 
                  if (-1 == newTok(str))
                     return -1;
                  flag &= ~F_VNAME;
                  flag &= ~F_VALUE;
                  flag &= ~F_NODE_NOPEN;
                  flag |= F_NODE_NCLOSE;
                  
                  return 0;
               }
               flag &= ~F_VNAME;
               flag &= ~F_VALUE;
               flag &= ~F_NODE_NOPEN;
               flag |= F_NODE_NCLOSE;
            }
            continue;
            
         case '<': 
            if (flag & (F_SQUOTE|F_DQUOTE))
               continue;
            if (flag & F_TRI_BRACE) {
               errorXML(cur);
               errorXML("\"<\" found - after \"<\"");
               return -1;
            }
            if(*(cur+1) == '!' && *(cur+2) == '-' &&  *(cur+3) == '-') {
               cur += 3;
               while (*cur && (*(cur-2) != '-' || *(cur-1) != '-' || *(cur) != '>'))
                  cur++;
               if(*cur)
                  cur++;
               str.s = cur+1; 
               continue;
            }
            if (cur > str.s) {
               flag |= F_CONTENT;
               
               str.len = cur - str.s;
               if (-1 == newTok(str))
                  return -1;
               flag &= ~F_VALUE;
               flag &= ~F_VNAME;
               return 0;
            }
            flag |= F_TRI_BRACE; 
            flag |= F_NODE_NOPEN;
            flag &= ~F_NODE_NCLOSE;
            str.s = cur+1; 
            continue;
            
         case '>':
            if (flag & (F_SQUOTE|F_DQUOTE)) 
               continue;
            if ((flag & F_TRI_BRACE) == 0) {
               errorXML("<-not found");
               return -1;
            }
            flag &= ~F_VNAME;
            flag &= ~F_CONTENT;
            
            str.len = cur-str.s;
            if ((str.len == 0) && (cur[-1] != '<')) { 
               flag &= ~F_TRI_BRACE;str.s = cur+1;
               continue;
            }
            if (str.len > 0) {
               if (-1 == newTok(str))
                  return -1;
               return 0;
            }
            flag &= ~F_TRI_BRACE; 
            flag = F_CONTENT;
            str.s = cur+1; 
            continue;
            
         case 34:        // "
         case '\'':
            if ((flag & F_TRI_BRACE) == 0 || (flag & F_SQUOTE))
               continue; 
            flag ^= F_DQUOTE;
            if ((flag & F_DQUOTE) == 0) {
               flag |= F_VALUE;
               flag &= ~F_VNAME;
               str.len = cur - str.s;
               flag &= ~F_NODE_NCLOSE;
               flag &= ~F_NODE_NOPEN;
               if (-1 == newTok(str))
                  return -1;;
               flag &= ~F_VALUE;
               flag |= F_VNAME;
               cur++;
               return 0;
            }
            str.s = cur+1;
            continue;
            
         case '`':       // `
            if ((flag & F_TRI_BRACE) == 0 || (flag & F_DQUOTE))
               continue; 
            flag ^= F_SQUOTE;
            if ((flag & F_SQUOTE) == 0) {
               flag |= F_VALUE;
               flag &= ~F_VNAME;
               str.len = cur-str.s;
               flag &= ~F_NODE_NCLOSE;
               flag &= ~F_NODE_NOPEN;
               if (-1 == newTok(str))
                  return -1;
               /*
                continue; // *** WD: Ask Janis about this continue
                
                flag |= F_VNAME;
                flag &= ~F_VALUE;
                flag &= ~F_CONTENT;
                */
               flag &= ~F_VALUE;
               flag |= F_VNAME;
               cur++;
               return 0;
            }
            str.s = cur+1;
            continue;
            
         case '=' :
            if ((flag & F_TRI_BRACE) == 0)
               continue; 
            if (flag & (F_SQUOTE|F_DQUOTE))
               continue;
            
            flag &= ~F_NODE_NOPEN;
            flag &= ~F_NODE_NCLOSE;
            
            if (cur[-1] == '<') {
               errorXML("= sign found after '<'");
               return -1;
            }
            if (cur > str.s){
               str.len = cur - str.s;
               if (-1 == newTok(str)) 
                  return -1;
               flag &= ~F_VNAME;
               flag |= F_VALUE;
               return 0;
            }
            str.s = cur+1; 
            
            flag &= ~F_VNAME;
            flag |= F_VALUE;
            continue;
            
         case '\t': 
         case ' ':
            if (flag & (F_SQUOTE|F_DQUOTE))
               continue;
            
            if (flag & F_TRI_BRACE) {
               flag &= ~F_CONTENT;
            }
            else { 
               flag &= ~F_VALUE;
               flag &= ~F_VNAME;
               flag |= F_CONTENT;
            }
            if (cur[-1] == '<') {
               errorXML("TAB , SP or EQVAL found after '<'");
               return -1;
            }
            str.len = cur - str.s;
            if (cur > str.s) {
               if (-1 == newTok(str)) 
                  return -1;
               flag &= ~F_NODE_NOPEN;
               flag &= ~F_NODE_NCLOSE;
               if (flag & F_TRI_BRACE) 
                  flag |= F_VNAME;
               return 0;
            }
            flag &= ~F_NODE_NOPEN;
            flag &= ~F_NODE_NCLOSE;
            str.s = cur+1; 
            continue;
      }
   }
   flag = F_LAST;
   if (-1 == newTok(str)) 
      return -1;
   return 0;
}
void LexicalAnalyzer::analyze(ifstream *code){
	string readline;
	bool commentary = false;
	unsigned int counter = 0;
	while (!code->eof()){
		getline(*code, readline);
		unsigned int size = readline.length();
		char *reading = new char[size + 1],
			*tk = new char[size + 1];
		strcpy(reading, readline.c_str());
		counter++;
		for (unsigned int i = 0; i < size; /*Propositalmente Vazio*/){
			/* Comentário */
			if (reading[i] == '{'){
				commentary = true;
			}
			if (commentary){
				if (reading[i] == '}'){
					commentary = false;
				}
				i++;
			}
			else {
				/* Símbolos Compostos */
				if (reading[i] == '/' && reading[i + 1] == '/'){
					break;
				}
				/* Comando de Atribuição */
				else if (reading[i] == ':' && reading[i + 1] == '='){
					tk[0] = reading[i];	tk[1] = reading[i + 1];	tk[2] = '\0';
					string word(tk);
					lexToken newTok(counter, word, "Comando de Atribuicao");
					token->push_back(newTok);
					i += 2;
				}
				/* Operadores Relacionais */
				else if (reading[i] == '<' && reading[i + 1] == '='){
					tk[0] = reading[i];	tk[1] = reading[i + 1];	tk[2] = '\0';
					string word(tk);
					lexToken newTok(counter, word, "Operador Relacional");
					token->push_back(newTok);
					i += 2;
				}
				else if (reading[i] == '>' && reading[i + 1] == '='){
					tk[0] = reading[i];	tk[1] = reading[i + 1];	tk[2] = '\0';
					string word(tk);
					lexToken newTok(counter, word, "Operador Relacional");
					token->push_back(newTok);
					i += 2;
				}
				else if (reading[i] == '<' && reading[i + 1] == '>'){
					tk[0] = reading[i];	tk[1] = reading[i + 1];	tk[2] = '\0';
					string word(tk);
					lexToken newTok(counter, word, "Operador Relacional");
					token->push_back(newTok);
					i += 2;
				}
				/* Operadores Aditivos */
				else if (reading[i] == 'o' && reading[i + 1] == 'r' && reading[i + 2] != '_' && !isNumber(reading[i + 2]) && !isLetter(reading[i + 2])){
					tk[0] = reading[i];	tk[1] = reading[i + 1];	tk[2] = '\0';
					string word(tk);
					lexToken newTok(counter, word, "Operador Aditivo");
					token->push_back(newTok);
					i += 2;
				}
				/* Operadores Multiplicativo */
				else if (reading[i] == 'a' && reading[i + 1] == 'n' && reading[i + 2] == 'd' && reading[i + 3] != '_' && !isNumber(reading[i + 3])
					&& !isLetter(reading[i + 3])){
					tk[0] = reading[i];	tk[1] = reading[i + 1];	tk[2] = reading[i + 2]; tk[3] = '\0';
					string word(tk);
					lexToken newTok(counter, word, "Operador Multiplicativo");
					token->push_back(newTok);
					i += 3;
				}
				/********************/
				/* Símbolos Simples */
				/* Delimitadores */
				else if (reading[i] == ';' || reading[i] == '.' || reading[i] == ':' || reading[i] == '(' || reading[i] == ')' || reading[i] == ','){
					tk[0] = reading[i]; tk[1] = '\0';
					string word(tk);
					lexToken newTok(counter, word, "Delimitador");
					token->push_back(newTok);
					i++;
				}
				/* Operador Relacional */
				else if (reading[i] == '=' || reading[i] == '<' || reading[i] == '>'){
					tk[0] = reading[i]; tk[1] = '\0';
					string word(tk);
					lexToken newTok(counter, word, "Operador Relacional");
					token->push_back(newTok);
					i++;
				}
				/* Operador Aditivo */
				else if (reading[i] == '+' || reading[i] == '-'){
					tk[0] = reading[i]; tk[1] = '\0';
					string word(tk);
					lexToken newTok(counter, word, "Operador Aditivo");
					token->push_back(newTok);
					i++;
				}
				/* Operador Multiplicativo */
				else if (reading[i] == '*' || reading[i] == '/'){
					tk[0] = reading[i]; tk[1] = '\0';
					string word(tk);
					lexToken newTok(counter, word, "Operador Multiplicativo");
					token->push_back(newTok);
					i++;
				}
				/*************************/
				/* Conjuntos de Símbolos */
				/* Palavra Reservada ou Identificador */
				else if (isLetter(reading[i])){
					bool found = false;
					unsigned int j = 0;
					tk[j] = reading[i++];
					for (j++; isLetter(reading[i]) || isNumber(reading[i]) || reading[i] == '_'; j++){
						tk[j] = reading[i++];
					}
					tk[j] = '\0';
					for (j = 0; j < restricted_word->size(); j++){
						if (!strcmp(tk, restricted_word->at(j).c_str())){
							string word(tk);
							lexToken lex(counter, word, "Palavra reservada");
							token->push_back(lex);
							found = true;
							break;
						}
					}
					if (!found){
						string word(tk);
						lexToken lex(counter, word, "Identificador");
						token->push_back(lex);
					}
				}
				/* Números Inteiros ou Reais */
				else if (isNumber(reading[i])){
					int j = 0;
					tk[j] = reading[i++];
					for (j++; isNumber(reading[i]); j++){
						tk[j] = reading[i++];
					}
					if (reading[i] == '.'){
						tk[j] = reading[i++];
						for (j++; isNumber(reading[i]); j++){
							tk[j] = reading[i++];
						}
						tk[j] = '\0';
						string word(tk);
						lexToken lex(counter, word, "Numero real");
						token->push_back(lex);
					}
					else {
						tk[j] = '\0';
						string word(tk);
						lexToken lex(counter, word, "Numero inteiro");
						token->push_back(lex);
					}
				}
				/* Espaço em Branco */
				else if (reading[i] == ' ' || reading[i] == '\t' || reading[i] == '\n'){
					i++;
				}
				else {
					cout << "Simbolo " << reading[i] << " nao reconhecido." << endl;
					i++;
				}
			}
		}
		//Deletando ponteiros de leitura
		delete[] reading;
		delete[] tk;
	}
	if (commentary){
		cout << "Esperado } ao fim de um comentario." << endl;
	}
}