// Should only be called when we actually have the start of an array // Otherwise it is an internal error bool JSON::parse_json_array() { NOT_PRODUCT(const char* prev_pos); int c; mark_pos(); // Check that we are not called in error if (expect_any("[", "array start character", INTERNAL_ERROR) <= 0) { return false; } if (!callback(JSON_ARRAY_BEGIN, NULL, level++)) { return false; } for (;;) { mark_pos(); c = skip_to_token(); if (c == 0) { error(SYNTAX_ERROR, "EOS when expecting a json value or array end"); return false; } else if (c < 0) { return false; } else if (c == ']') { // We got here from either empty array "[]" or ending comma "[1,]" next(); break; } mark_pos(); NOT_PRODUCT(prev_pos = pos); if (parse_json_value() == false) { return false; } assert(pos > prev_pos, "parsing stalled"); c = skip_to_token(); mark_pos(); if (expect_any(",]", "value separator or array end") <= 0) { return false; } if (c == ']') { break; } } assert(c == ']', "array parsing ended without array end token (']')"); return callback(JSON_ARRAY_END, NULL, --level); }
/* * Should only be called when we actually have a block comment to skip. * Otherwise it is an internal error. * * Returns the first token after the block comment without consuming it. * Returns -1 if EOS is encountered in the middle of a comment. */ int JSON::skip_block_comment() { const char* current; // Check that we are not called in error. if (peek() != '/' || peek(1) != '*') { // Let expect handle EOS. expect_string("/*", "block comment start", INTERNAL_ERROR); return 0; } current = pos; for (;;) { current = strchrnul_(current, '*'); if (current[0] == 0 || current[1] == 0) { // Advance error marker to start of block comment mark_pos(); error(SYNTAX_ERROR, "Block comment started here never ended. Expected \"*/\" before EOS."); return -1; } if (current[1] == '/') { pos = current; if (expect_string("*/", "block comment end", INTERNAL_ERROR) == false) { return -1; } // Found block comment end return peek(); } current++; } }
bool JSON::parse_json_symbol(const char* name, JSON_TYPE symbol) { if (expect_string(name, "maybe you forgot to quote your strings?") == false) { mark_pos(); return false; } return callback(symbol, NULL, level); }
bool JSON::parse_json_number() { double double_value; int tokens, read; JSON_VAL v; mark_pos(); // Parsing number - for simplicity ints are limited to 2**53 // sscanf as a double and check if part is 0. tokens = sscanf(pos, "%lf%n", &double_value, &read); assert(tokens <= 1, "scanf implementation that counts as a token, parsing json numbers will always fail"); if (tokens == 1) { assert(read > 0, "sanity"); if (floor(double_value) == double_value) { // No exponent - treat as an int v.int_value = (int)double_value; if (!callback(JSON_NUMBER_INT, &v, level)) { return false; } } else { v.double_value = double_value; if (!callback(JSON_NUMBER_FLOAT, &v, level)) { return false; } } skip(read); return true; } error(SYNTAX_ERROR, "Couldn't parse json number (note that exponents are not supported)."); return false; }
bool JSON::parse_json_string(bool key) { const char* end; JSON_VAL v; mark_pos(); if (expect_any("\"", "string start character", INTERNAL_ERROR) <= 0) { return false; } end = strchr(pos, '"'); // TODO: escapes if (end == NULL) { error(SYNTAX_ERROR, "String started here never ended. Expected \'\"\' before EOS."); return false; } v.str.start = pos; v.str.length = end - pos; skip(end - pos); if (expect_any("\"", "string end character", INTERNAL_ERROR) <= 0) { return false; } if (key == true) { return callback(JSON_KEY, &v, level); } else { return callback(JSON_STRING, &v, level); } }
int Tokenizer::tokenize( const string &raw_sent, V2_STR &data, const size_t init_offset ) { string trimmed_sent = trim_ws( raw_sent ); if( trimmed_sent == "") { return 0; }else { int n_tokens = splitter( trimmed_sent, data ); mark_pos( raw_sent, data, init_offset ); return n_tokens; } }
// Allow object keys to be without quotation, // but then restrict to ([a-zA-Z0-9_])+ bool JSON::parse_json_key() { const char* begin; JSON_VAL v; u_char c; mark_pos(); c = peek(); if (c == '"') { return parse_json_string(true); } begin = pos; c = peek(); if (c == 0) { error(SYNTAX_ERROR, "Got EOS when expecting an object key."); return false; } else if (is_word(c) == false) { error(SYNTAX_ERROR, "Expected an object key, which can be a double-quoted (\") string or a simple string (only alphanumeric characters and underscore, separated by whitespace) that doesn't need to be quoted."); return false; } for (;;) { c = peek(); // Allow the key to be delimited by control characters and the object key-value separator ':' if (c <= ' ' || c == ':') { break; } else if (is_word(c) == false) { error(SYNTAX_ERROR, "Object key need to be quoted, or consist entirely of alphanumeric characters and underscores."); return false; } next(); } v.str.start = begin; v.str.length = pos - begin; return callback(JSON_KEY, &v, level); }
bool JSON::parse_json_value() { int c; c = skip_to_token(); if (c == -1) { return false; } // Must start with object or array if (level == 0) { switch (c) { case '{': if (parse_json_object() == false) { return false; } c = skip_to_token(); if (c > 0) { mark_pos(); error(SYNTAX_ERROR, "Only one top level object/array is allowed."); return false; } else if (c < 0) { return false; } return true; case '[': if (parse_json_array() == false) { return false; } c = skip_to_token(); if (c > 0) { mark_pos(); error(SYNTAX_ERROR, "Only one top level object/array is allowed."); return false; } else if (c < 0) { return false; } return true; case 0: error(SYNTAX_ERROR, "EOS was encountered before any json declarations"); return false; default: error(SYNTAX_ERROR, "Json must start with an object or an array."); return false; } } else { // level > 0 switch (c) { case '{': return parse_json_object(); case '[': return parse_json_array(); case '"': return parse_json_string(); case '-': case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': return parse_json_number(); case 't': return parse_json_symbol("true", JSON_TRUE); case 'f': return parse_json_symbol("false", JSON_FALSE); case 'n': return parse_json_symbol("null", JSON_NULL); case 0: error(SYNTAX_ERROR, "EOS was encountered when expecting a json value."); return false; default: error(SYNTAX_ERROR, "Could not parse as a json value (did you forget to quote your strings?)."); return false; } } }
// Should only be called when we actually have the start of an object // Otherwise it is an internal error bool JSON::parse_json_object() { NOT_PRODUCT(const char* prev_pos); int c; mark_pos(); // Check that we are not called in error if (expect_any("{", "object start", INTERNAL_ERROR) <= 0) { return false; } if (!callback(JSON_OBJECT_BEGIN, NULL, level++)) { return false; } for (;;) { mark_pos(); c = skip_to_token(); if (c == 0) { error(SYNTAX_ERROR, "EOS when expecting an object key or object end"); return false; } else if (c < 0) { return false; } else if (c == '}') { // We got here from either empty object "{}" or ending comma "{a:1,}" next(); break; } NOT_PRODUCT(prev_pos = pos); if (parse_json_key() == false) { return false; } assert(pos > prev_pos, "parsing stalled"); skip_to_token(); mark_pos(); if (expect_any(":", "object key-value separator") <= 0) { return false; } skip_to_token(); mark_pos(); NOT_PRODUCT(prev_pos = pos); if (parse_json_value() == false) { return false; } assert(pos > prev_pos, "parsing stalled"); c = skip_to_token(); mark_pos(); if (expect_any(",}", "value separator or object end") <= 0) { return false; } if (c == '}') { break; } } assert(c == '}', "array parsing ended without object end token ('}')"); return callback(JSON_OBJECT_END, NULL, --level); }
void *process(void *path) { char i_char[23]; char j_char[23]; char k_char[23]; char t_char = '0'; char line[512] = "0"; int i = 0; FILE *f; getxattr(path, XATTR_ANALYZER, &t_char, 1); printf("\nProcessing file %s \n", path); printf(" attr %s is %d\n", XATTR_ANALYZER, t_char - '0'); switch(t_char - '0') { case X_A_DONE: mark_done(path); break; case X_A_INPROGRESS: case X_A_NONE: memset(i_char, 0, 23); memset(j_char, 0, 23); memset(k_char, 0, 23); getxattr(path, XATTR_ANALYZER_POS, i_char, 23); getxattr(path, XATTR_ANALYZER_POS_TS, j_char, 23); printf(" attr %s is %s\n", XATTR_ANALYZER_POS, i_char); printf(" attr %s is %s\n", XATTR_ANALYZER_POS_TS, j_char); f = fopen(path, "r"); if(f == NULL) { fprintf(stderr, "Error when openning file %s\n", path); perror("fopen"); return; } if(strlen(i_char) != 0) { fseek(f, atol(i_char), SEEK_SET); printf(" Moved to pos %d\n", atol(i_char)); if(fgets(line, 512, f) == NULL) { fclose(f); return; } if(strlen(j_char) != 0 && strncmp(line, j_char, strlen(j_char)) !=0) { printf(" The position is not matched: %s, %s", j_char, line); fclose(f); return; } } if(fgets(line, 512, f) == NULL) { printf(" End of the file or error occured, will read this file again after 1 min!\n"); for(i = 0; i < 60; i++) { sleep(1); if(killed) { fclose(f); return; } } if(fgets(line, 512, f) == NULL) { printf(" Got EOF in second read, marking this file as finished!\n"); mark_done(path); fclose(f); return; } } do { if(read_line(line, j_char) == -1) { continue; } strcpy(k_char, i_char); sprintf(i_char, "%d", ftell(f)); printf(" Finished line start with %s, location was %s, location is %s, killed is %d\n", j_char, k_char, i_char, killed); if(i++ == 600) { mark_pos(path, k_char, j_char); i = 0; } //sleep(1); } while(fgets(line, 512, f) != NULL && (killed == 0)); mark_pos(path, k_char, j_char); MYSQL *conn = pthread_getspecific(my_con); if(conn != NULL) mysql_close(conn); mysql_thread_end(); fclose(f); return; } }