/* * Separates multiple values in an array and ensures that the * line length is within the preferred value when possible. */ static void write_value(JSONWR_T* jsonwr) { int line_len, val_len; enforce_state(jsonwr->state, 4, JSON_PROPERTY, JSON_EMPTY_ARRAY, JSON_SL_ARRAY, JSON_ML_ARRAY); val_len = str_len(jsonwr->value_buf); if (jsonwr->state == JSON_EMPTY_ARRAY) { if ((jsonwr->indent + 1 + val_len + 2) < jsonwr->line_cols) { str_clear(jsonwr->line_buf); str_append(jsonwr->line_buf, str_internal(jsonwr->value_buf), val_len); jsonwr->state = JSON_SL_ARRAY; return; // don't write anything yet } else { fputc('[', jsonwr->file); jsonwr->column += 1; write_nl_indent(jsonwr); } } else if (jsonwr->state == JSON_SL_ARRAY) { line_len = str_len(jsonwr->line_buf); if ((jsonwr->indent + 1 + line_len + 2 + val_len + 2) < jsonwr->line_cols) { str_append(jsonwr->line_buf, ", ", 2); str_append(jsonwr->line_buf, str_internal(jsonwr->value_buf), val_len); return; // don't write anything yet } else { fputc('[', jsonwr->file); jsonwr->column += 1; write_nl_indent(jsonwr); fputs(str_internal(jsonwr->line_buf), jsonwr->file); jsonwr->column += line_len; jsonwr->state = JSON_ML_ARRAY; } } if (jsonwr->state == JSON_ML_ARRAY) { fputc(',', jsonwr->file); jsonwr->column += 1; if ((jsonwr->column + 1 + val_len + 2) < jsonwr->line_cols) { fputc(' ', jsonwr->file); jsonwr->column += 1; } else { write_nl_indent(jsonwr); } } fputs(str_internal(jsonwr->value_buf), jsonwr->file); jsonwr->column += str_len(jsonwr->value_buf); if (jsonwr->state == JSON_PROPERTY) { jsonwr->state = pop_state(jsonwr->stack); } else { // ARRAY jsonwr->state = JSON_ML_ARRAY; } }
static void expect_property_or_endobj(JSONRD_T *jsonrd) { if (jsonrd->token.type == TOK_STRING) { STR_T *property; property = jsonrd->token.value_string; if (jsonrd->callbacks.start_property) { jsonrd->callbacks.start_property(jsonrd->user_data, str_internal(property), str_len(property)); } jsonrd->state = PS_COLON; } else if (jsonrd->token.type == TOK_ENDOBJ) { if (jsonrd->callbacks.end_object) { jsonrd->callbacks.end_object(jsonrd->user_data); } jsonrd->state = pop_state(jsonrd); if (jsonrd->state == PS_COMMA_OR_ENDOBJ) { if (jsonrd->callbacks.end_property) { jsonrd->callbacks.end_property(jsonrd->user_data); } } else if (jsonrd->state == PS_FIND_LANDMARK) { if (jsonrd->callbacks.end_data) { jsonrd->callbacks.end_data(jsonrd->user_data); } } } else { error(jsonrd, "Expected a object property or the end of the object."); } }
/* * jsonwr_property * Write a property. The next call must be a value. */ void jsonwr_property(JSONWR_T* jsonwr, char* property) { enforce_state(jsonwr->state, 2, JSON_EMPTY_OBJECT, JSON_OBJECT); if (jsonwr->state != JSON_EMPTY_OBJECT) fputs(",", jsonwr->file); write_nl_indent(jsonwr); convert_string(jsonwr->value_buf, property); fputs(str_internal(jsonwr->value_buf), jsonwr->file); fputs(": ", jsonwr->file); push_state(jsonwr->stack, JSON_OBJECT); jsonwr->state = JSON_PROPERTY; }
static void expect_value_or_endlst(JSONRD_T *jsonrd) { switch (jsonrd->token.type) { case TOK_STRING: if (jsonrd->callbacks.atom_string) { STR_T *value; value = jsonrd->token.value_string; jsonrd->callbacks.atom_string(jsonrd->user_data, str_internal(value), str_len(value)); } goto atom_value; case TOK_NUMBER: if (jsonrd->callbacks.atom_number) { jsonrd->callbacks.atom_number(jsonrd->user_data, jsonrd->token.value_number); } goto atom_value; case TOK_TRUE: case TOK_FALSE: if (jsonrd->callbacks.atom_bool) { jsonrd->callbacks.atom_bool(jsonrd->user_data, jsonrd->token.value_bool); } goto atom_value; case TOK_NULL: if (jsonrd->callbacks.atom_null) { jsonrd->callbacks.atom_null(jsonrd->user_data); } atom_value: jsonrd->state = PS_COMMA_OR_ENDLST; break; case TOK_STARTOBJ: if (jsonrd->callbacks.start_object) { jsonrd->callbacks.start_object(jsonrd->user_data); } push_state(jsonrd, PS_COMMA_OR_ENDLST); jsonrd->state = PS_PROPERTY_OR_ENDOBJ; break; case TOK_STARTLST: if (jsonrd->callbacks.start_list) { jsonrd->callbacks.start_list(jsonrd->user_data); } push_state(jsonrd, PS_COMMA_OR_ENDLST); jsonrd->state = PS_VALUE_OR_ENDLST; break; case TOK_ENDLST: if (jsonrd->callbacks.end_list) { jsonrd->callbacks.end_list(jsonrd->user_data); } jsonrd->state = pop_state(jsonrd); if (jsonrd->state == PS_COMMA_OR_ENDOBJ && jsonrd->callbacks.end_property) { jsonrd->callbacks.end_property(jsonrd->user_data); } break; default: error(jsonrd, "Expected a list value or the end of the list."); } }
static void expect_property(JSONRD_T *jsonrd) { if (jsonrd->token.type == TOK_STRING) { STR_T *property; property = jsonrd->token.value_string; if (jsonrd->callbacks.start_property) { jsonrd->callbacks.start_property(jsonrd->user_data, str_internal(property), str_len(property)); } jsonrd->state = PS_COLON; } else { error(jsonrd, "Expected a property."); } }
/************************************************************************** * Generate logos for a motif * Warning, this may modify the path and motif arguments. **************************************************************************/ static void generate_motif_logos(OPTIONS_T *options, STR_T *path, MOTIF_T *motif) { int path_len; char name[MAX_MOTIF_ID_LENGTH + 1]; copy_and_sanatise_name(name, get_motif_id(motif), MAX_MOTIF_ID_LENGTH); name[MAX_MOTIF_ID_LENGTH] = '\0'; path_len = str_len(path); str_appendf(path, "logo%s", name); CL_create1(motif, FALSE, FALSE, "MEME (no SSC)", str_internal(path), options->eps, options->png); if (options->rc) { str_truncate(path, path_len); str_appendf(path, "logo_rc%s", name); reverse_complement_motif(motif); CL_create1(motif, FALSE, FALSE, "MEME (no SSC)", str_internal(path), options->eps, options->png); } str_truncate(path, path_len); }
static int find_start(JSONRD_T *jsonrd, const char *chunk, size_t size) { int i; for (i = 0; i < size; i++) { if (chunk[i] == '{') { push_state(jsonrd, PS_FIND_LANDMARK); jsonrd->state = PS_PROPERTY_OR_ENDOBJ; jsonrd->token.state = TS_FOUND_TOKEN; // causes the token data to be reset on next_token call if (jsonrd->callbacks.start_data) { jsonrd->callbacks.start_data(jsonrd->user_data, str_internal(jsonrd->buf), str_len(jsonrd->buf)); } if (jsonrd->callbacks.start_object) { jsonrd->callbacks.start_object(jsonrd->user_data); } return i + 1; } } return size; }
static int find_landmark(JSONRD_T *jsonrd, const char *chunk, size_t size) { int len, pos; len = str_len(jsonrd->buf); // check for previous partial matches if (len > 0) { // append up to landmark size -1 characters to the buffer str_append(jsonrd->buf, chunk, (size < bmstr_length(jsonrd->landmark) ? size : bmstr_length(jsonrd->landmark) - 1)); // now look for the landmark pos = bmstr_substring(jsonrd->landmark, str_internal(jsonrd->buf), str_len(jsonrd->buf)); if (pos >= 0) { //match pos = pos + bmstr_length(jsonrd->landmark) - len; jsonrd->state = PS_READ_VAR; return pos; } else { //possible partial match pos = -(pos + 1); if (pos < len) { // apparently the chunk wasn't large enough to actually test everything // in the buffer. As there's nothing more to test we'll just delete the // bits that definately don't match at the front of the buffer if (pos > 0) str_delete(jsonrd->buf, 0, pos); return size; } else { // possible partial match, but we don't care because we'll find it when // we look at the chunk str_clear(jsonrd->buf); } } } pos = bmstr_substring(jsonrd->landmark, chunk, size); if (pos >= 0) { pos = pos + bmstr_length(jsonrd->landmark); jsonrd->state = PS_READ_VAR; return pos; } pos = -(pos + 1); if (pos < size) { str_append(jsonrd->buf, chunk+pos, size - pos); } return size; }
/************************************************************************** * Generate logos for all motifs in a file **************************************************************************/ static void generate_file_logos(OPTIONS_T *options) { STR_T *path; MREAD_T *mread; MOTIF_T *motif; // file path buffer path = str_create(100); str_append(path, options->dir, strlen(options->dir)); if (str_char(path, -1) != '/') str_append(path, "/", 1); // create output directory if (create_output_directory(str_internal(path), TRUE, FALSE)) exit(EXIT_FAILURE); // open motif file mread = mread_create(options->motifs_file, OPEN_MFILE); while (mread_has_motif(mread)) { motif = mread_next_motif(mread); generate_motif_logos(options, path, motif); destroy_motif(motif); } mread_destroy(mread); str_destroy(path, FALSE); }
/* * jsonwr_end_array_value */ void jsonwr_end_array_value(JSONWR_T* jsonwr) { int line_len; enforce_state(jsonwr->state, 3, JSON_EMPTY_ARRAY, JSON_SL_ARRAY, JSON_ML_ARRAY); jsonwr->indent -= jsonwr->tab_cols; if (jsonwr->state == JSON_ML_ARRAY) { write_nl_indent(jsonwr); } else { line_len = (jsonwr->state == JSON_SL_ARRAY ? str_len(jsonwr->line_buf) : 0); if ((jsonwr->column + 1 + line_len + 2) >= jsonwr->line_cols) write_nl_indent(jsonwr); fputc('[', jsonwr->file); jsonwr->column += 1; } if (jsonwr->state == JSON_SL_ARRAY) { fputs(str_internal(jsonwr->line_buf), jsonwr->file); jsonwr->column += str_len(jsonwr->line_buf); } fputc(']', jsonwr->file); jsonwr->column += 1; jsonwr->state = pop_state(jsonwr->stack); }
/* * Start either an array or object which is surrounded by brackets */ static void write_start(JSONWR_T* jsonwr, JSON_EN new_state) { enforce_state(jsonwr->state, 4, JSON_PROPERTY, JSON_EMPTY_ARRAY, JSON_SL_ARRAY, JSON_ML_ARRAY); if (jsonwr->state != JSON_PROPERTY) {// an array if (jsonwr->state != JSON_ML_ARRAY) { fputc('[', jsonwr->file); jsonwr->column += 1; write_nl_indent(jsonwr); } if (jsonwr->state == JSON_SL_ARRAY) { fputs(str_internal(jsonwr->line_buf), jsonwr->file); jsonwr->column += str_len(jsonwr->line_buf); } if (jsonwr->state != JSON_EMPTY_ARRAY) { fputs(", ", jsonwr->file); jsonwr->column += 2; } push_state(jsonwr->stack, JSON_ML_ARRAY); if ((jsonwr->column + 1) >= jsonwr->line_cols) write_nl_indent(jsonwr); } jsonwr->state = new_state; jsonwr->column += 1; jsonwr->indent += jsonwr->tab_cols; }
/* * Load background file frequencies into the array. */ ARRAY_T* get_file_frequencies(ALPH_T *alph, char *bg_filename, ARRAY_T *freqs) { regmatch_t matches[4]; STR_T *line; char chunk[BG_CHUNK_SIZE+1], letter[2], *key; int size, terminate, offset, i; FILE *fp; regex_t bgfreq; double freq; RBTREE_T *letters; RBNODE_T *node; regcomp_or_die("bg freq", &bgfreq, BGFREQ_RE, REG_EXTENDED); letters = rbtree_create(rbtree_strcasecmp, rbtree_strcpy, free, rbtree_dblcpy, free); line = str_create(100); if (!(fp = fopen(bg_filename, "r"))) { die("Unable to open background file \"%s\" for reading.\n", bg_filename); } terminate = feof(fp); while (!terminate) { size = fread(chunk, sizeof(char), BG_CHUNK_SIZE, fp); chunk[size] = '\0'; terminate = feof(fp); offset = 0; while (offset < size) { // skip mac newline if (str_len(line) == 0 && chunk[offset] == '\r') { offset++; continue; } // find next new line for (i = offset; i < size; ++i) { if (chunk[i] == '\n') break; } // append portion up to the new line or end of chunk str_append(line, chunk+offset, i - offset); // read more if we didn't find a new line if (i == size && !terminate) break; // move the offset past the new line offset = i + 1; // handle windows new line if (str_char(line, -1) == '\r') str_truncate(line, -1); // remove everything to the right of a comment character for (i = 0; i < str_len(line); ++i) { if (str_char(line, i) == '#') { str_truncate(line, i); break; } } // check the line for a single letter followed by a number if (regexec_or_die("bg freq", &bgfreq, str_internal(line), 4, matches, 0)) { // parse the letter and frequency value regex_strncpy(matches+1, str_internal(line), letter, 2); freq = regex_dbl(matches+2, str_internal(line)); // check the frequency is acceptable if (freq < 0 || freq > 1) { die("The background file lists the illegal probability %g for " "the letter %s.\n", freq, letter); } else if (freq == 0) { die("The background file lists a probability of zero for the " "letter %s\n", letter); } if (freq >= 0 && freq <= 1) rbtree_put(letters, letter, &freq); } str_clear(line); } } // finished with the file so clean up file parsing stuff fclose(fp); str_destroy(line, FALSE); regfree(&bgfreq); // guess the alphabet if (*alph == INVALID_ALPH) { switch (rbtree_size(letters)) { case PROTEIN_ASIZE: *alph = PROTEIN_ALPH; break; case DNA_ASIZE: *alph = DNA_ALPH; break; default: die("Number of single character entries in background does not match " "an alphabet.\n"); } } // make the background if (freqs == NULL) freqs = allocate_array(alph_size(*alph, ALL_SIZE)); assert(get_array_length(freqs) >= alph_size(*alph, ALL_SIZE)); init_array(-1, freqs); for (node = rbtree_first(letters); node != NULL; node = rbtree_next(node)) { key = (char*)rbtree_key(node); i = alph_index(*alph, key[0]); freq = *((double*)rbtree_value(node)); if (i == -1) { die("Background contains letter %s which is not in the %s alphabet.\n", key, alph_name(*alph)); } if (get_array_item(i, freqs) != -1) { die("Background contains letter %s which has the same meaning as an " "already listed letter.\n", key); } set_array_item(i, freq, freqs); } // check that all items were set for (i = 0; i < alph_size(*alph, ALPH_SIZE); i++) { if (get_array_item(i, freqs) == -1) { die("Background is missing letter %c.\n", alph_char(*alph, i)); } } // disabled for backwards compatability (AMA test was failing) //normalize_subarray(0, ALPH_ASIZE[*alph], 0.0, freqs); // calculate the values of the ambiguous letters from the concrete ones calc_ambigs(*alph, FALSE, freqs); // cleanup rbtree_destroy(letters); // return result return freqs; }
static inline int process_string_token(JSONRD_T *jsonrd, const char *chunk, size_t size, size_t offset) { int i, bytes, bytes_needed; int32_t code_point; STR_T *str; str = jsonrd->token.value_string; // check to see if there are any incomplete UTF-8 code units if (str_len(jsonrd->buf) > 0 && jsonrd->token.str_state == SS_NORMAL) { // complete the code unit // count the bytes needed for the complete code unit unicode_from_string(str_internal(jsonrd->buf), str_len(jsonrd->buf), &bytes); bytes_needed = bytes - str_len(jsonrd->buf); if ((size - offset) >= bytes_needed) { // got enough bytes to calculate the code point str_append(jsonrd->buf, chunk+offset, bytes_needed); code_point = unicode_from_string(str_internal(jsonrd->buf), str_len(jsonrd->buf), &bytes); if (code_point < 0) { // bad UTF-8 jsonrd->token.type = TOK_ILLEGAL; jsonrd->token.state = TS_FOUND_TOKEN; return offset + bytes_needed; } str_append(str, str_internal(jsonrd->buf), str_len(jsonrd->buf)); str_clear(jsonrd->buf); offset += bytes_needed; } else { // incomplete code unit, need to buffer it str_append(jsonrd->buf, chunk+offset, size - offset); return size; } } // loop over buffer for (i = offset; i < size; i += bytes) { // check for valid UTF-8 code_point = unicode_from_string(chunk+i, size - i, &bytes); // characters out of the ASCII range are not involved in ending // the string or character escapes so they can be handled first if (code_point > 0x7F) { // non-ASCII, complete UTF-8 code unit if (jsonrd->token.str_state != SS_NORMAL) { // only ASCII allowed for escape jsonrd->token.type = TOK_ILLEGAL; jsonrd->token.state = TS_FOUND_TOKEN; return i + bytes; } str_append(str, chunk+i, bytes); } else if (code_point == -2) { // incomplete code unit, need to buffer it if (jsonrd->token.str_state != SS_NORMAL) { // only ASCII allowed for escape jsonrd->token.type = TOK_ILLEGAL; jsonrd->token.state = TS_FOUND_TOKEN; return i + bytes; } str_clear(jsonrd->buf); str_append(jsonrd->buf, chunk+i, size - i); return size; } else if (code_point < 0) { // error bad UTF-8! jsonrd->token.type = TOK_ILLEGAL; jsonrd->token.state = TS_FOUND_TOKEN; return i + bytes; } // now we handle the ASCII characters switch (jsonrd->token.str_state) { case SS_NORMAL: if (chunk[i] == '"') { // End of string jsonrd->token.state = TS_FOUND_TOKEN; return i + 1; } else if (chunk[i] == '\\') { jsonrd->token.str_state = SS_ESCAPE; } else { str_append(str, chunk+i, 1); } break; case SS_ESCAPE: if (chunk[i] == '"') { str_append(str, "\"", 1); } else if (chunk[i] == '\\') { str_append(str, "\\", 1); } else if (chunk[i] == '/') { str_append(str, "/", 1); } else if (chunk[i] == 'b') { str_append(str, "\b", 1); } else if (chunk[i] == 'f') { str_append(str, "\f", 1); } else if (chunk[i] == 'n') { str_append(str, "\n", 1); } else if (chunk[i] == 'r') { str_append(str, "\r", 1); } else if (chunk[i] == 't') { str_append(str, "\t", 1); } else if (chunk[i] == 'u') { jsonrd->token.str_state = SS_HEX_NUM; str_clear(jsonrd->buf); break; } else { jsonrd->token.type = TOK_ILLEGAL; jsonrd->token.state = TS_FOUND_TOKEN; return i + 1; } jsonrd->token.str_state = SS_NORMAL; break; case SS_HEX_NUM: if ((chunk[i] >= 'a' && chunk[i] <= 'f') || (chunk[i] >= 'A' && chunk[i] <= 'F') || (chunk[i] >= '0' && chunk[i] <= '9')) { str_append(jsonrd->buf, chunk+i, 1); } else { jsonrd->token.type = TOK_ILLEGAL; jsonrd->token.state = TS_FOUND_TOKEN; return i + 1; } if (str_len(jsonrd->buf) == 4) { code_point = strtoll(str_internal(jsonrd->buf), NULL, 16); str_clear(jsonrd->buf); str_append_code(str, code_point); jsonrd->token.str_state = SS_NORMAL; } break; } } return size; }
static inline int process_number_token(JSONRD_T *jsonrd, const char *chunk, size_t size, size_t offset) { int i; for (i = offset; i < size; i++) { switch (jsonrd->token.num_state) { case NS_BEGIN: if (chunk[i] == '-') { jsonrd->token.num_state = NS_LEADING_MINUS; } else if (chunk[i] == '0') { jsonrd->token.num_state = NS_LEADING_ZERO; } else if (chunk[i] > '0' && chunk[i] <= '9') { jsonrd->token.num_state = NS_LEADING_DIGITS; } else { jsonrd->token.type = TOK_ILLEGAL; jsonrd->token.state = TS_FOUND_TOKEN; return i + 1; } break; case NS_LEADING_MINUS: if (chunk[i] == '0') { jsonrd->token.num_state = NS_LEADING_ZERO; } else if (chunk[i] > '0' && chunk[i] <= '9') { jsonrd->token.num_state = NS_LEADING_DIGITS; } else { jsonrd->token.type = TOK_ILLEGAL; jsonrd->token.state = TS_FOUND_TOKEN; return i + 1; } break; case NS_LEADING_ZERO: if (chunk[i] == '.') { jsonrd->token.num_state = NS_DECIMAL_PLACE; } else if (chunk[i] == 'e' || chunk[i] == 'E') { jsonrd->token.num_state = NS_EXPONENT_CHAR; } else { jsonrd->token.value_number = strtold(str_internal(jsonrd->buf), NULL); jsonrd->token.state = TS_FOUND_TOKEN; return i; } break; case NS_LEADING_DIGITS: if (chunk[i] >= '0' && chunk[i] <= '9') { // keep current state } else if (chunk[i] == '.') { jsonrd->token.num_state = NS_DECIMAL_PLACE; } else if (chunk[i] == 'e' || chunk[i] == 'E') { jsonrd->token.num_state = NS_EXPONENT_CHAR; } else { jsonrd->token.value_number = strtold(str_internal(jsonrd->buf), NULL); jsonrd->token.state = TS_FOUND_TOKEN; return i; } break; case NS_DECIMAL_PLACE: if (chunk[i] >= '0' && chunk[i] <= '9') { jsonrd->token.num_state = NS_DECIMAL_DIGITS; } else { jsonrd->token.type = TOK_ILLEGAL; jsonrd->token.state = TS_FOUND_TOKEN; return i + 1; } break; case NS_DECIMAL_DIGITS: if (chunk[i] >= '0' && chunk[i] <= '9') { // keep current state } else if (chunk[i] == 'e' || chunk[i] == 'E') { jsonrd->token.num_state = NS_EXPONENT_CHAR; } else { jsonrd->token.value_number = strtold(str_internal(jsonrd->buf), NULL); jsonrd->token.state = TS_FOUND_TOKEN; return i; } break; case NS_EXPONENT_CHAR: if (chunk[i] == '+' || chunk[i] == '-') { jsonrd->token.num_state = NS_EXPONENT_SIGN; } else if (chunk[i] >= '0' && chunk[i] <= '9') { jsonrd->token.num_state = NS_EXPONENT_DIGITS; } else { jsonrd->token.type = TOK_ILLEGAL; jsonrd->token.state = TS_FOUND_TOKEN; return i + 1; } break; case NS_EXPONENT_SIGN: if (chunk[i] >= '0' && chunk[i] <= '9') { jsonrd->token.num_state = NS_EXPONENT_DIGITS; } else { jsonrd->token.type = TOK_ILLEGAL; jsonrd->token.state = TS_FOUND_TOKEN; return i + 1; } break; case NS_EXPONENT_DIGITS: if (chunk[i] >= '0' && chunk[i] <= '9') { // keep current state } else { jsonrd->token.value_number = strtold(str_internal(jsonrd->buf), NULL); jsonrd->token.state = TS_FOUND_TOKEN; return i; } break; default: die("Illegal state"); return size; } str_append(jsonrd->buf, chunk+i, 1); } return size; }