void *thr_read(void *arg__) { ArgRead *arg = arg__; Tok *t; while (feof(arg->F)==0) { // Each token (mostly lines) t = newTok(); nlock_lock(t->edit); fjoin(arg->F); // Wait for the stream to have data ll_push(inQ, t); // Pass on for timestamp // Now read the data getItok(arg->F, arg->delim, t->str); nlock_unlock(t->edit); } inQ->EOT = true; return 0; }
// Split XML into tokens and return token information (strings, type, ...). int CParseXml::splitXML() { STR_XML str; if (*cur != '/') str.s = cur+1; else str.s = cur; str.len = 0; int futf8 = 0; for (; cur < end; cur++) { unsigned char cc = (unsigned char)*cur; int iCur = cc;//(unsigned)*cur; if(futf8 == 0 && iCur >= 192 ) { futf8 = 1; if (iCur&32) { futf8++; if (iCur&16) { futf8++; if (iCur&8) { futf8++; if (iCur&4) { futf8++; } } } } continue; } if (futf8 > 0 && iCur >= 128 && (iCur&64) == 0) { futf8--; continue; } futf8 = 0; if (iCur>65 && iCur != 92) //92 == '\\' continue; switch (iCur) { case '\r': case '\n': if (flag & F_TRI_BRACE) { errorXML("CR or LF found after \"<\""); return -1; } str.len = cur-str.s; if (str.len>0) { if (-1 == newTok(str)) return -1; return 0; } str.s = cur+1; continue; case 0: case 1: case 2: case 3: case 4: case 5: case 6: case 7: case 8: case 11: case 12: case 14: case 15: case 16: case 17: case 18: case 19: case 20: case 21: case 22: case 23: case 24: case 25: case 26: case 27: case 28: case 29: case 30: case 31: errorXML("found < 32"); return -1; case '\\': if ((flag & (F_TRI_BRACE|F_SQUOTE|F_DQUOTE)) == 0 && *(cur+1) == '\\') { while(*cur && *cur != '\n')cur++; str.s = cur+1; } continue; case '/': if (flag & (F_SQUOTE|F_DQUOTE)) continue; if (flag & F_TRI_BRACE) { str.len = cur - str.s; if (str.len > 0) { if (-1 == newTok(str)) return -1; flag &= ~F_VNAME; flag &= ~F_VALUE; flag &= ~F_NODE_NOPEN; flag |= F_NODE_NCLOSE; return 0; } flag &= ~F_VNAME; flag &= ~F_VALUE; flag &= ~F_NODE_NOPEN; flag |= F_NODE_NCLOSE; } continue; case '<': if (flag & (F_SQUOTE|F_DQUOTE)) continue; if (flag & F_TRI_BRACE) { errorXML(cur); errorXML("\"<\" found - after \"<\""); return -1; } if(*(cur+1) == '!' && *(cur+2) == '-' && *(cur+3) == '-') { cur += 3; while (*cur && (*(cur-2) != '-' || *(cur-1) != '-' || *(cur) != '>')) cur++; if(*cur) cur++; str.s = cur+1; continue; } if (cur > str.s) { flag |= F_CONTENT; str.len = cur - str.s; if (-1 == newTok(str)) return -1; flag &= ~F_VALUE; flag &= ~F_VNAME; return 0; } flag |= F_TRI_BRACE; flag |= F_NODE_NOPEN; flag &= ~F_NODE_NCLOSE; str.s = cur+1; continue; case '>': if (flag & (F_SQUOTE|F_DQUOTE)) continue; if ((flag & F_TRI_BRACE) == 0) { errorXML("<-not found"); return -1; } flag &= ~F_VNAME; flag &= ~F_CONTENT; str.len = cur-str.s; if ((str.len == 0) && (cur[-1] != '<')) { flag &= ~F_TRI_BRACE;str.s = cur+1; continue; } if (str.len > 0) { if (-1 == newTok(str)) return -1; return 0; } flag &= ~F_TRI_BRACE; flag = F_CONTENT; str.s = cur+1; continue; case 34: // " case '\'': if ((flag & F_TRI_BRACE) == 0 || (flag & F_SQUOTE)) continue; flag ^= F_DQUOTE; if ((flag & F_DQUOTE) == 0) { flag |= F_VALUE; flag &= ~F_VNAME; str.len = cur - str.s; flag &= ~F_NODE_NCLOSE; flag &= ~F_NODE_NOPEN; if (-1 == newTok(str)) return -1;; flag &= ~F_VALUE; flag |= F_VNAME; cur++; return 0; } str.s = cur+1; continue; case '`': // ` if ((flag & F_TRI_BRACE) == 0 || (flag & F_DQUOTE)) continue; flag ^= F_SQUOTE; if ((flag & F_SQUOTE) == 0) { flag |= F_VALUE; flag &= ~F_VNAME; str.len = cur-str.s; flag &= ~F_NODE_NCLOSE; flag &= ~F_NODE_NOPEN; if (-1 == newTok(str)) return -1; /* continue; // *** WD: Ask Janis about this continue flag |= F_VNAME; flag &= ~F_VALUE; flag &= ~F_CONTENT; */ flag &= ~F_VALUE; flag |= F_VNAME; cur++; return 0; } str.s = cur+1; continue; case '=' : if ((flag & F_TRI_BRACE) == 0) continue; if (flag & (F_SQUOTE|F_DQUOTE)) continue; flag &= ~F_NODE_NOPEN; flag &= ~F_NODE_NCLOSE; if (cur[-1] == '<') { errorXML("= sign found after '<'"); return -1; } if (cur > str.s){ str.len = cur - str.s; if (-1 == newTok(str)) return -1; flag &= ~F_VNAME; flag |= F_VALUE; return 0; } str.s = cur+1; flag &= ~F_VNAME; flag |= F_VALUE; continue; case '\t': case ' ': if (flag & (F_SQUOTE|F_DQUOTE)) continue; if (flag & F_TRI_BRACE) { flag &= ~F_CONTENT; } else { flag &= ~F_VALUE; flag &= ~F_VNAME; flag |= F_CONTENT; } if (cur[-1] == '<') { errorXML("TAB , SP or EQVAL found after '<'"); return -1; } str.len = cur - str.s; if (cur > str.s) { if (-1 == newTok(str)) return -1; flag &= ~F_NODE_NOPEN; flag &= ~F_NODE_NCLOSE; if (flag & F_TRI_BRACE) flag |= F_VNAME; return 0; } flag &= ~F_NODE_NOPEN; flag &= ~F_NODE_NCLOSE; str.s = cur+1; continue; } } flag = F_LAST; if (-1 == newTok(str)) return -1; return 0; }
void LexicalAnalyzer::analyze(ifstream *code){ string readline; bool commentary = false; unsigned int counter = 0; while (!code->eof()){ getline(*code, readline); unsigned int size = readline.length(); char *reading = new char[size + 1], *tk = new char[size + 1]; strcpy(reading, readline.c_str()); counter++; for (unsigned int i = 0; i < size; /*Propositalmente Vazio*/){ /* Comentário */ if (reading[i] == '{'){ commentary = true; } if (commentary){ if (reading[i] == '}'){ commentary = false; } i++; } else { /* Símbolos Compostos */ if (reading[i] == '/' && reading[i + 1] == '/'){ break; } /* Comando de Atribuição */ else if (reading[i] == ':' && reading[i + 1] == '='){ tk[0] = reading[i]; tk[1] = reading[i + 1]; tk[2] = '\0'; string word(tk); lexToken newTok(counter, word, "Comando de Atribuicao"); token->push_back(newTok); i += 2; } /* Operadores Relacionais */ else if (reading[i] == '<' && reading[i + 1] == '='){ tk[0] = reading[i]; tk[1] = reading[i + 1]; tk[2] = '\0'; string word(tk); lexToken newTok(counter, word, "Operador Relacional"); token->push_back(newTok); i += 2; } else if (reading[i] == '>' && reading[i + 1] == '='){ tk[0] = reading[i]; tk[1] = reading[i + 1]; tk[2] = '\0'; string word(tk); lexToken newTok(counter, word, "Operador Relacional"); token->push_back(newTok); i += 2; } else if (reading[i] == '<' && reading[i + 1] == '>'){ tk[0] = reading[i]; tk[1] = reading[i + 1]; tk[2] = '\0'; string word(tk); lexToken newTok(counter, word, "Operador Relacional"); token->push_back(newTok); i += 2; } /* Operadores Aditivos */ else if (reading[i] == 'o' && reading[i + 1] == 'r' && reading[i + 2] != '_' && !isNumber(reading[i + 2]) && !isLetter(reading[i + 2])){ tk[0] = reading[i]; tk[1] = reading[i + 1]; tk[2] = '\0'; string word(tk); lexToken newTok(counter, word, "Operador Aditivo"); token->push_back(newTok); i += 2; } /* Operadores Multiplicativo */ else if (reading[i] == 'a' && reading[i + 1] == 'n' && reading[i + 2] == 'd' && reading[i + 3] != '_' && !isNumber(reading[i + 3]) && !isLetter(reading[i + 3])){ tk[0] = reading[i]; tk[1] = reading[i + 1]; tk[2] = reading[i + 2]; tk[3] = '\0'; string word(tk); lexToken newTok(counter, word, "Operador Multiplicativo"); token->push_back(newTok); i += 3; } /********************/ /* Símbolos Simples */ /* Delimitadores */ else if (reading[i] == ';' || reading[i] == '.' || reading[i] == ':' || reading[i] == '(' || reading[i] == ')' || reading[i] == ','){ tk[0] = reading[i]; tk[1] = '\0'; string word(tk); lexToken newTok(counter, word, "Delimitador"); token->push_back(newTok); i++; } /* Operador Relacional */ else if (reading[i] == '=' || reading[i] == '<' || reading[i] == '>'){ tk[0] = reading[i]; tk[1] = '\0'; string word(tk); lexToken newTok(counter, word, "Operador Relacional"); token->push_back(newTok); i++; } /* Operador Aditivo */ else if (reading[i] == '+' || reading[i] == '-'){ tk[0] = reading[i]; tk[1] = '\0'; string word(tk); lexToken newTok(counter, word, "Operador Aditivo"); token->push_back(newTok); i++; } /* Operador Multiplicativo */ else if (reading[i] == '*' || reading[i] == '/'){ tk[0] = reading[i]; tk[1] = '\0'; string word(tk); lexToken newTok(counter, word, "Operador Multiplicativo"); token->push_back(newTok); i++; } /*************************/ /* Conjuntos de Símbolos */ /* Palavra Reservada ou Identificador */ else if (isLetter(reading[i])){ bool found = false; unsigned int j = 0; tk[j] = reading[i++]; for (j++; isLetter(reading[i]) || isNumber(reading[i]) || reading[i] == '_'; j++){ tk[j] = reading[i++]; } tk[j] = '\0'; for (j = 0; j < restricted_word->size(); j++){ if (!strcmp(tk, restricted_word->at(j).c_str())){ string word(tk); lexToken lex(counter, word, "Palavra reservada"); token->push_back(lex); found = true; break; } } if (!found){ string word(tk); lexToken lex(counter, word, "Identificador"); token->push_back(lex); } } /* Números Inteiros ou Reais */ else if (isNumber(reading[i])){ int j = 0; tk[j] = reading[i++]; for (j++; isNumber(reading[i]); j++){ tk[j] = reading[i++]; } if (reading[i] == '.'){ tk[j] = reading[i++]; for (j++; isNumber(reading[i]); j++){ tk[j] = reading[i++]; } tk[j] = '\0'; string word(tk); lexToken lex(counter, word, "Numero real"); token->push_back(lex); } else { tk[j] = '\0'; string word(tk); lexToken lex(counter, word, "Numero inteiro"); token->push_back(lex); } } /* Espaço em Branco */ else if (reading[i] == ' ' || reading[i] == '\t' || reading[i] == '\n'){ i++; } else { cout << "Simbolo " << reading[i] << " nao reconhecido." << endl; i++; } } } //Deletando ponteiros de leitura delete[] reading; delete[] tk; } if (commentary){ cout << "Esperado } ao fim de um comentario." << endl; } }