bool getNextLine(const char*& buffer, char out[4096]) { if ('\0' == *buffer)return false; char* _out = out; char* const end = _out + 4096; while (!isLineEnd(*buffer) && _out < end) *_out++ = *buffer++; *_out = '\0'; while (isLineEnd(*buffer) && '\0' != *buffer)++buffer; return true; }
bool skipSpaces(const char* in, const char** out) { while (isSpace(*in)) in++; *out = in; return !isLineEnd(*in); }
int paraseRequest(char *buffer, size_t size, LINE_PARASE_PARAM ¶m) { size_t &wordIndex = param.wordMgr.wordCount;//数组下标,是个引用哈 for(size_t i = 0; i < size; ++i) { if(isLineEnd(buffer[i])) { param.state = HTTP_OPTION; ++wordIndex; return i+1; //已经解析了i+1字节 } else if(isSeparator(buffer[i])) //是分隔符,组成一个单词 { ++wordIndex; }else { pushToStack(param.wordMgr.word[wordIndex], buffer+i); } } return 0; }
bool isSpaceOrNewLine(wchar_t in) { return isSpace(in) || isLineEnd(in); }
bool MEModel::loadTrainingExamples( const char *trainFile ) { FILE *fin = fopen(trainFile,"r"); if ( fin == NULL ) { fprintf( stderr, "open train file %s failed!\n", trainFile ); return false; } int lineNum = 0; int fieldNum = 0; int ch; bool endOfFile = false; int curFieldCnt = 0; while( !endOfFile ) { ch = fgetc(fin); curFieldCnt = 0; while( true ) { while( !isLineEnd(ch) && isSpace(ch) ) ch = fgetc(fin); if( isLineEnd(ch) ) break; while( !isLineEnd(ch) && !isSpace(ch) ) ch = fgetc(fin); curFieldCnt ++; } if ( curFieldCnt > 0 ) { lineNum ++; fieldNum += curFieldCnt; } if( ch == EOF ) endOfFile = true; } rewind(fin); M = lineNum; VarStr word; endOfFile = false; xspace = new SpElem[fieldNum]; x = new SpElem*[M]; y = new int[M]; lineNum = 0; int xIndex = 0; while( !endOfFile ) { ch = fgetc(fin); curFieldCnt = 0; int curY = 0, curX = 0; char *ystr = NULL; while( true ) { while( !isLineEnd(ch) && isSpace(ch) ) ch = fgetc(fin); if( isLineEnd(ch) ) break; word.clear(); while( !isLineEnd(ch) && !isSpace(ch) ) { word.add(ch); ch = fgetc(fin); } curFieldCnt ++; if( curFieldCnt == 1 ) { ystr = new char[word.length()+1]; strcpy(ystr,word.c_str()); } else { curX = feaIdMap.add(word.c_str()); xspace[xIndex].index = curX; xspace[xIndex].value = 1; if ( curFieldCnt == 2 ) x[lineNum] = &xspace[xIndex]; xIndex ++; } } if ( curFieldCnt > 1 ) { xspace[xIndex].index = -1; xspace[xIndex].value = 0; xIndex ++; curY = classNameIdMap.add(ystr); y[lineNum] = curY; lineNum ++; } if( ystr != NULL ) delete []ystr; if ( ch == EOF ) endOfFile = true; } fclose(fin); M = lineNum; classNumber = (int)classNameIdMap.size(); needDestroyTrain = true; return true; }
DMJSON * Parser::parseCode(char* code_char_string, int string_length) { indicator_stack_top = 0; CodeIndicator * indicator = indicator_stack + indicator_stack_top; //current indicator DMJSON * root = this->json_pool->getDMJSON(); root->type = 34; indicator->code = root; indicator->parse_status = 14; DMString * code_string = newDMString(string_length); code_string->copy(code_char_string, string_length); char* char_string = code_string->char_string; char preChar; char localChar = 0; char nextChar; int last_index = 0; int i = -1; int line_number = 0; while (true) { if (i >= string_length) { break; } else { i++; } if (last_index > i) { continue; } preChar = localChar; localChar = char_string[i]; nextChar = char_string[i + 1]; if (localChar == '\n') { line_number++; } if (indicator->parse_status == 51) { //@operator@ if (isOperator(localChar)) { continue; } indicator->end = i; DMString * dm_string = parseDMString(char_string, indicator); dm_string->type = TYPE_CODE_OPERATOR; clearCodeIndicator(indicator); indicator_stack_top--; indicator = indicator_stack + indicator_stack_top; pushMeta((MemorySpace *) dm_string, indicator); last_index = i; } if (indicator->parse_status == 41) { //"string" int string_state = checkString(preChar, localChar, nextChar); if (string_state == 0 || indicator->flag != string_state || indicator->start + 1 > i) { continue; } if (string_state % 10 == 1) { indicator->end = i; last_index = i + 1; } else if (string_state % 10 == 3) { indicator->end = i - 1; last_index = i + 2; } DMString * dm_string = parseDMString(char_string, indicator); clearCodeIndicator(indicator); indicator_stack_top--; indicator = indicator_stack + indicator_stack_top; pushMeta((MemorySpace *) dm_string, indicator); continue; } if (indicator->parse_status == 61) { // //comments if (isLineEnd(localChar)) { clearCodeIndicator(indicator); indicator_stack_top--; indicator = indicator_stack + indicator_stack_top; last_index = i + 2; } else { continue; } } if (isLetter(localChar)) { if (indicator->isInLine == NOT_IN_LINE) { indicator->isInLine = IN_LINE; } if (indicator->isInWord == NOT_IN_WORD) { indicator->isInWord = IN_WORD; } continue; } else { if (indicator->isInWord == IN_WORD && indicator->isInLine == IN_LINE) { if (last_index < i) { MemorySpace * element = parseElement(char_string, last_index, i); pushMeta(element, indicator); } last_index = i + 1; indicator->isInWord = NOT_IN_WORD; } } if (indicator->parse_status != 74) { if (indicator->includer == NULL) { } else if (indicator->includer->type == 80) { DMJSON * last_includer = indicator->includer; indicator_stack_top++; indicator = indicator_stack + indicator_stack_top; indicator->parse_status = 74; indicator->includer = last_includer; indicator->isInLine = IN_LINE; } else if (indicator->includer->type == 33) { DMJSON * last_includer = indicator->includer; indicator_stack_top++; indicator = indicator_stack + indicator_stack_top; indicator->parse_status = 33; indicator->code = last_includer; indicator->isInLine = IN_LINE; } else { } } if (isLineEnd(localChar)) { if (indicator->isInLine == NOT_IN_LINE) { last_index = i + 1; continue; } else { indicator->isInLine = NOT_IN_LINE; } if (indicator->parse_status == 74) { DMJSON * last_expression = indicator->includer; MemorySpace * last_meta = last_expression->get(-1, NOT_DEL); if (last_meta == NULL) { } else if (last_meta->type == TYPE_CODE_OPERATOR) { continue; } clearCodeIndicator(indicator); indicator_stack_top--; indicator = indicator_stack + indicator_stack_top; MemorySpace * element = resolveExpression(last_expression); pushElement(element, indicator); indicator->includer = NULL; } if (indicator->parse_status == 33) { DMJSON* functionReturn = indicator->code; clearCodeIndicator(indicator); indicator_stack_top--; indicator = indicator_stack + indicator_stack_top; pushElement(functionReturn->self, indicator); indicator->includer = NULL; } if (indicator->parse_status == 81) { clearCodeIndicator(indicator); indicator_stack_top--; indicator = indicator_stack + indicator_stack_top; } last_index = i + 1; continue; } if (isBlank(localChar)) { if (indicator->isInWord == NOT_IN_WORD) { last_index = i + 1; continue; } if (indicator->parse_status == 31) { clearCodeIndicator(indicator); indicator_stack_top--; indicator = indicator_stack + indicator_stack_top; last_index = i + 1; continue; } if (indicator->parse_status == 32) { clearCodeIndicator(indicator); indicator_stack_top--; indicator = indicator_stack + indicator_stack_top; last_index = i + 1; continue; } if (indicator->parse_status == 33) { clearCodeIndicator(indicator); indicator_stack_top--; indicator = indicator_stack + indicator_stack_top; last_index = i + 1; continue; } if (indicator->parse_status == 22) { clearCodeIndicator(indicator); indicator_stack_top--; indicator = indicator_stack + indicator_stack_top; last_index = i + 1; continue; } last_index = i + 1; continue; } if (isSeparator(localChar)) { if (indicator->parse_status == 74) { DMJSON * last_expression = indicator->includer; clearCodeIndicator(indicator); indicator_stack_top--; indicator = indicator_stack + indicator_stack_top; MemorySpace * element = resolveExpression(last_expression); pushElement(element, indicator); indicator->includer = NULL; } if (indicator->parse_status == 81) { clearCodeIndicator(indicator); indicator_stack_top--; indicator = indicator_stack + indicator_stack_top; } if (indicator->parse_status == 12) { DMJSON * last_function_definition = indicator->includer; if (last_function_definition == NULL) { } else if (last_function_definition->type == 30) { pushElement(last_function_definition->self, indicator); indicator->includer = NULL; } else { //syntax error } } last_index = i + 1; continue; } if (localChar == ':') { MemorySpace * key = NULL; if (indicator->parse_status == 74) { DMJSON * last_expression = indicator->includer; clearCodeIndicator(indicator); indicator_stack_top--; indicator = indicator_stack + indicator_stack_top; MemorySpace * element = resolveExpression(last_expression); if (element->type == TYPE_CODE_NAME) { key = element; CodeName * key_name = (CodeName *) key; key_name->nameType = 0x99; } else if (element->type == TYPE_STRING) { key = element; key->type = TYPE_CODE_NAME; CodeName * key_name = (CodeName *) key; key_name->nameType = 0x99; } else { //syntax error } indicator->includer = NULL; last_index = i + 1; } if (key != NULL && (indicator->parse_status == 11 || indicator->parse_status == 21)) { DMKeyValue * key_value = newDMKeyValue(); key_value->key = key; DMJSON * parent = indicator->code; indicator_stack_top++; indicator = indicator_stack + indicator_stack_top; indicator->key_value = key_value; indicator->parse_status = 81; parent->set(-1, key_value->self, NOT_REPLACE); } last_index = i + 1; continue; } int string_state = checkString(preChar, localChar, nextChar); if (0 != string_state) { if (indicator->parse_status != 41) { if (string_state == 12 || string_state == 22) { DMString * dm_string = newDMString(0); pushMeta((MemorySpace *) dm_string, indicator); last_index = i + 1; continue; } indicator_stack_top++; indicator = indicator_stack + indicator_stack_top; indicator->parse_status = 41; indicator->flag = string_state; if (string_state % 10 == 1) { indicator->start = i + 1; } else if (string_state % 10 == 3) { indicator->start = i + 2; } continue; } else { //logic error } } if (localChar == '/' && nextChar == '/') { indicator_stack_top++; indicator = indicator_stack + indicator_stack_top; indicator->parse_status = 61; last_index = i + 1; continue; } if (isBracket(localChar)) { if (localChar == '{') { if (indicator->includer == NULL) { DMJSON* includer = this->json_pool->getDMJSON(); includer->type = 80; indicator->includer = includer; } DMJSON * last_includer = indicator->includer; indicator_stack_top++; indicator = indicator_stack + indicator_stack_top; indicator->start = i; if (last_includer != NULL && last_includer->type == 20) { indicator->parse_status = 12; indicator->code = last_includer; } else if (last_includer != NULL && last_includer->type == 30) { indicator->parse_status = 13; indicator->code = last_includer; } else if (last_includer != NULL && last_includer->type / 10 == 4) { indicator->parse_status = 14; indicator->code = last_includer; } else { indicator->parse_status = 11; DMJSON* json = this->json_pool->getDMJSON(); json->type = 0; indicator->code = json; last_includer->set(-1, json->self, NOT_REPLACE); } last_index = i + 1; continue; } if (localChar == '}') { if (indicator->parse_status == 74) { DMJSON * last_expression = indicator->includer; clearCodeIndicator(indicator); indicator_stack_top--; indicator = indicator_stack + indicator_stack_top; MemorySpace * element = resolveExpression(last_expression); pushElement(element, indicator); indicator->includer = NULL; } if (indicator->parse_status == 81) { clearCodeIndicator(indicator); indicator_stack_top--; indicator = indicator_stack + indicator_stack_top; last_index = i + 1; } if (indicator->parse_status / 10 != 1) { //syntax error break; } DMJSON * code = indicator->code; clearCodeIndicator(indicator); indicator_stack_top--; indicator = indicator_stack + indicator_stack_top; if (indicator->parse_status == 11 && code->type == 0) { } else if (indicator->parse_status == 12 || indicator->parse_status == 13 || indicator->parse_status == 14) { pushElement(code->self, indicator); indicator->includer = NULL; } else if (indicator->parse_status == 74) { clearCodeIndicator(indicator); indicator_stack_top--; indicator = indicator_stack + indicator_stack_top; } last_index = i + 1; continue; } if (localChar == '[') { if (indicator->includer == NULL) { DMJSON* includer = this->json_pool->getDMJSON(); includer->type = 80; indicator->includer = includer; } CodeIndicator * last_indicator = indicator; DMJSON * last_includer = indicator->includer; indicator_stack_top++; indicator = indicator_stack + indicator_stack_top; indicator->start = i; MemorySpace * last_meta = last_indicator->last_meta; if (last_meta == NULL) { if (last_includer != NULL) { last_meta = last_includer->get(-1, NOT_DEL); } } if (last_meta == NULL) { //21:[json] indicator->parse_status = 21; DMJSON* json = this->json_pool->getDMJSON(); json->type = 0; indicator->code = json; last_includer->set(-1, json->self, NOT_REPLACE); } else if (last_meta->type == TYPE_CODE_KEYWORD) { //has with 22:[enum] in: 21:[json] DMString *last_keyword = (DMString *) last_meta; if (*last_keyword == "with" || *last_keyword == "has") { indicator->parse_status = 22; DMJSON* enumDefinition = this->json_pool->getDMJSON(); enumDefinition->type = 61; indicator->code = enumDefinition; last_includer->set(-1, enumDefinition->self, NOT_REPLACE); } else if (*last_keyword == "in" || *last_keyword == "import") { indicator->parse_status = 21; DMJSON* json = this->json_pool->getDMJSON(); json->type = 0; indicator->code = json; last_includer->set(-1, json->self, NOT_REPLACE); } else { //syntax error break; } } else if (last_meta->type == TYPE_CODE_NAME) { //23:[key|index|selector], indicator->parse_status = 23; DMJSON* selector = this->json_pool->getDMJSON(); selector->type = 71; indicator->code = selector; last_includer->set(-1, selector->self, NOT_REPLACE); } else { //21:[json] indicator->parse_status = 21; DMJSON* json = this->json_pool->getDMJSON(); json->type = 0; indicator->code = json; last_includer->set(-1, json->self, NOT_REPLACE); } last_index = i + 1; continue; } if (localChar == ']') { if (indicator->parse_status == 74) { DMJSON * last_expression = indicator->includer; clearCodeIndicator(indicator); indicator_stack_top--; indicator = indicator_stack + indicator_stack_top; MemorySpace * element = resolveExpression(last_expression); pushElement(element, indicator); indicator->includer = NULL; } if (indicator->parse_status == 81) { clearCodeIndicator(indicator); indicator_stack_top--; indicator = indicator_stack + indicator_stack_top; last_index = i + 1; } if (indicator->parse_status / 10 != 2) { //syntax error break; } DMJSON * code = indicator->code; CodeIndicator * last_indicator = indicator; indicator_stack_top--; indicator = indicator_stack + indicator_stack_top; clearCodeIndicator(last_indicator); if (indicator->parse_status == 21 && code->type == 0) { } else if (indicator->parse_status == 12 || indicator->parse_status == 13 || indicator->parse_status == 14) { pushElement(code->self, indicator); indicator->includer = NULL; } else if (indicator->parse_status == 74) { clearCodeIndicator(indicator); indicator_stack_top--; indicator = indicator_stack + indicator_stack_top; } last_index = i + 1; continue; } if (localChar == '(') { if (indicator->includer == NULL) { DMJSON* includer = this->json_pool->getDMJSON(); includer->type = 80; indicator->includer = includer; } CodeIndicator * last_indicator = indicator; if (last_indicator->parse_status == 33) { last_index = i + 1; continue; } DMJSON * last_includer = indicator->includer; indicator_stack_top++; indicator = indicator_stack + indicator_stack_top; indicator->start = i; if (last_includer != NULL && last_includer->type == 30) { indicator->parse_status = 31; indicator->code = last_includer; } else if (last_includer != NULL && last_includer->type / 10 == 4) { indicator->parse_status = 35; indicator->code = last_includer; // last_includer->set(-1, last_includer->self, NOT_REPLACE); } else if (last_includer != NULL && last_includer->type == 33) { indicator_stack_top--; indicator = indicator_stack + indicator_stack_top; if (indicator->parse_status != 33) { //syntax error break; } } else { MemorySpace * last_meta = last_includer->get(-1, NOT_DEL); if (last_meta != NULL && last_meta->type == TYPE_CODE_NAME) { //function call () if (last_includer->dm_list->length > 0) { CodeName * function_name = (CodeName *) last_includer->get(-1, DEL); if (function_name->nameType != 0x82 || function_name->nameType != 0x83) { function_name->nameType = 0x81; } DMJSON * functionCall = this->json_pool->getDMJSON(); functionCall->type = 32; functionCall->name = (DMString *) function_name; indicator->parse_status = 32; indicator->code = functionCall; last_includer->set(-1, functionCall->self, NOT_REPLACE); } else { //syntax error break; } } else if (last_meta != NULL && last_meta->type == TYPE_JSON) { //Anonymous function definition if (last_includer->dm_list->length > 0) { MemorySpace * function_definition_space = last_includer->get(-1, NOT_DEL); DMJSON * functionDefinition = (DMJSON *) function_definition_space->pointer; if (functionDefinition->type != 30) { //syntax error break; } MemorySpace * name = last_includer->get(-3, NOT_DEL); if (name == NULL && indicator_stack_top >= 2) { CodeIndicator * last_last_indicator = indicator_stack + indicator_stack_top - 2; if (last_last_indicator->parse_status == 81 && last_last_indicator->key_value != NULL) { name = last_last_indicator->key_value->key; } } if (name == NULL) { functionDefinition->name = this->helper->no_name; } else if (name->type == TYPE_JSON) { DMJSON * selector = (DMJSON *) name->pointer; if (selector->type != 71) { //syntax error } DMString * function_name = (DMString *) selector->get(-1, NOT_DEL); if (function_name == NULL) { functionDefinition->name = this->helper->no_name; } else if (function_name->type == TYPE_CODE_NAME || function_name->type == TYPE_STRING) { functionDefinition->name = function_name; } else { functionDefinition->name = this->helper->no_name; } } else if (name->type == TYPE_CODE_NAME) { DMString * function_name = spliteFunctionName((DMString *) name); functionDefinition->name = function_name; } else { //syntax error break; } indicator->parse_status = 74; indicator->includer = functionDefinition; indicator_stack_top++; indicator = indicator_stack + indicator_stack_top; indicator->parse_status = 31; indicator->code = functionDefinition; } else { //syntax error break; } } else { //expression () DMJSON * expression = this->json_pool->getDMJSON(); expression->type = 10; indicator->parse_status = 34; indicator->includer = expression; last_includer->set(-1, expression->self, NOT_REPLACE); } } last_index = i + 1; continue; } if (localChar == ')') { if (indicator->parse_status == 74) { DMJSON * last_expression = indicator->includer; clearCodeIndicator(indicator); indicator_stack_top--; indicator = indicator_stack + indicator_stack_top; MemorySpace * element = resolveExpression(last_expression); pushElement(element, indicator); indicator->includer = NULL; } CodeIndicator * last_indicator = indicator; indicator_stack_top--; indicator = indicator_stack + indicator_stack_top; if (last_indicator->parse_status == 31) { last_indicator->code->block_start = last_indicator->code->dm_list->length; } else if (last_indicator->parse_status == 32) { } else if (last_indicator->parse_status == 33) { DMJSON* functionReturn = last_indicator->code; pushElement(functionReturn->self, indicator); indicator->includer = NULL; } else if (last_indicator->parse_status == 34) { } else if (last_indicator->parse_status == 35) { last_indicator->code->block_start = last_indicator->code->dm_list->length; } else { //syntax error break; } clearCodeIndicator(last_indicator); last_index = i + 1; continue; } } if (isOperator(localChar)) { if (indicator->parse_status != 51) { indicator_stack_top++; indicator = indicator_stack + indicator_stack_top; indicator->parse_status = 51; indicator->start = i; last_index = i; continue; } } } return root; }