bool FileTokenizer::get_double_internal( double& result, MsqError& err ) { // Get a token const char *token_end, *token = get_string( err ); if (MSQ_CHKERR(err)) return false; // Check for hex value -- on some platforms (e.g. Linux), strtod // will accept hex values, on others (e.g. Sun) it wil not. Force // failure on hex numbers for consistancy. if (token[0] && token[1] && token[0] == '0' && toupper(token[1]) == 'X') { MSQ_SETERR(err)( MsqError::PARSE_ERROR, "Syntax error at line %d: expected number, got \"%s\"", line_number(), token ); return false; } // Parse token as double result = strtod( token, (char**)&token_end ); // If the one past the last char read by strtod is // not the NULL character terminating the string, // then parse failed. if (*token_end) { MSQ_SETERR(err)( MsqError::PARSE_ERROR, "Syntax error at line %d: expected number, got \"%s\"", line_number(), token ); return false; } return true; }
void JsonIn::skip_array() { char ch; int brackets = 1; eat_whitespace(); int startpos = tell(); stream->get(ch); if (ch != '[') { std::stringstream err; err << line_number(-1) << ": expected array but found '" << ch << "'"; throw err.str(); } while (brackets && stream->good()) { stream->get(ch); // ignore everything inside strings if (ch == '"') { stream->unget(); skip_string(); // otherwise count opening and closing brackets until they all match } else if (ch == '[') { brackets += 1; } else if (ch == ']') { brackets -= 1; } } if (brackets != 0) { // something messed up! std::stringstream err; if (stream->fail()) { throw (std::string)"stream failure while reading array."; } else if (stream->eof()) { stream->clear(); seek(startpos); err << line_number() << ": "; err << "couldn't find end of array, reached EOF with "; err << brackets << " bracket(s) left."; throw err.str(); } else { // this should be impossible err << line_number() << " "; seek(startpos); err << "(" << line_number() << "): "; err << "array brackets didn't match?"; err << " " << brackets << " bracket(s) left."; throw err.str(); } } skip_separator(); }
void JsonIn::skip_array() { char ch; int brackets = 1; eat_whitespace(); stream->get(ch); if (ch != '[') { std::stringstream err; err << line_number(-1) << ": expected array but found '" << ch << "'"; throw err.str(); } while (brackets && stream->good()) { stream->get(ch); // ignore everything inside strings if (ch == '"') { stream->unget(); skip_string(); // otherwise count opening and closing brackets until they all match } else if (ch == '[') { brackets += 1; } else if (ch == ']') { brackets -= 1; } } if (brackets != 0) { // something messed up! std::stringstream err; err << "couldn't find end of array!"; err << " " << brackets << " bracket(s) left."; throw err.str(); } skip_separator(); }
bool JsonIn::skip_value() { char ch; bool foundsep; eat_whitespace(); ch = peek(); // it's either a string '"' if (ch == '"') { foundsep = skip_string(); // or an object '{' } else if (ch == '{') { foundsep = skip_object(); // or an array '[' } else if (ch == '[') { foundsep = skip_array(); // or a number (-0123456789) } else if (ch == '-' || (ch >= '0' && ch <= '9')) { foundsep = skip_number(); // or "true", "false" or "null" } else if (ch == 't') { foundsep = skip_true(); } else if (ch == 'f') { foundsep = skip_false(); } else if (ch == 'n') { foundsep = skip_null(); // or an error. } else { std::stringstream err; err << line_number() << ": expected JSON value but got '" << ch << "'"; throw err.str(); } return foundsep;//b( foundsep || skip_separator() ); }
int FileTokenizer::match_token( const char* const* list, MsqError& err ) { // Get a token const char *token = get_string( err ); if (MSQ_CHKERR(err)) return false; // Check if it matches any input string const char* const* ptr; for (ptr = list; *ptr; ++ptr) if (0 == strcmp( token, *ptr )) return ptr - list + 1; // No match, constuct error message std::string message( "Parsing error at line " ); char lineno[16]; sprintf( lineno, "%d", line_number() ); message += lineno; message += ": expected one of {"; for (ptr = list; *ptr; ++ptr) { message += " "; message += *ptr; } message += " } got \""; message += token; message += "\""; MSQ_SETERR(err)( message, MsqError::PARSE_ERROR ); return false; }
void JsonIn::skip_pair_separator() { char ch; eat_whitespace(); stream->get(ch); if (ch != ':') { std::stringstream err; err << line_number(-1) << ": expected pair separator ':', not '" << ch << "'"; throw err.str(); } }
void edit_set_number_of_lines(void *window, int numlin) { EDITINFO *einf; void *pdata; Window pwin; (void) get_window_type(*(Window*) window, &pwin, &pdata); (void) get_window_type(pwin, &pwin, &pdata); einf = (EDITINFO*) pdata; scrollbar_set(einf->scrollver, line_number(einf->info), numlin); }
bool JsonIn::skip_null() { char text[5]; eat_whitespace(); stream->get(text, 5); if (strcmp(text, "null") != 0) { std::stringstream err; err << line_number(-4) << ": expected \"null\", but found \"" << text << "\""; throw err.str(); } return skip_separator(); }
void JsonIn::skip_true() { char text[5]; eat_whitespace(); stream->get(text, 5); if (strcmp(text, "true") != 0) { std::stringstream err; err << line_number(-4) << ": expected \"true\", but found \"" << text << "\""; throw err.str(); } skip_separator(); }
bool JsonIn::skip_false() { char text[6]; eat_whitespace(); stream->get(text, 6); if (strcmp(text, "false") != 0) { std::stringstream err; err << line_number(-5) << ": expected \"false\", but found \"" << text << "\""; throw err.str(); } return skip_separator(); }
bool FileTokenizer::get_newline( MsqError& err ) { if (lastChar == '\n') { lastChar = ' '; ++lineNumber; return true; } // Loop until either we a) find a newline, b) find a non-whitespace // character or c) reach the end of the file. for (;;) { // If the buffer is empty, read more. if (nextToken == bufferEnd) { size_t count = fread( buffer, 1, sizeof(buffer), filePtr ); if (!count) { if (eof()) MSQ_SETERR(err)( "File truncated.", MsqError::PARSE_ERROR ); else MSQ_SETERR(err)( MsqError::IO_ERROR ); return false; } nextToken = buffer; bufferEnd = buffer + count; } // If the current character is not a space, the we've failed. if (!isspace(*nextToken)) { MSQ_SETERR(err)( MsqError::PARSE_ERROR, "Expected newline at line %d.", line_number() ); return false; } // If the current space character is a newline, // increment the line number count. if (*nextToken == '\n') { ++lineNumber; ++nextToken; lastChar = ' '; return true; } ++nextToken; } // should never reach this return false; }
bool JsonIn::get_bool() { char ch; char text[5]; int pos = tell(); std::stringstream err; eat_whitespace(); stream->get(ch); if (ch == 't') { stream->get(text, 4); if (strcmp(text, "rue") == 0) { skip_separator(); return true; } else { err << line_number(-4) << ": "; err << "not a boolean. expected \"true\", but got \""; err << ch << text << "\""; seek(pos); throw err.str(); } } else if (ch == 'f') { stream->get(text, 5); if (strcmp(text, "alse") == 0) { skip_separator(); return false; } else { err << line_number(-5) << ": "; err << "not a boolean. expected \"false\", but got \""; err << ch << text << "\""; seek(pos); throw err.str(); } } err << line_number(-1) << ": "; err << "not a boolean value! expected 't' or 'f' but got '" << ch << "'"; seek(pos); throw err.str(); }
void JsonIn::start_array() { eat_whitespace(); if (peek() == '[') { stream->get(); return; } else { // expecting an array, so this is an error std::stringstream err; err << line_number() << ": "; err << "tried to start array, but found '"; err << peek() << "', not '['"; throw err.str(); } }
void JsonIn::start_object() { eat_whitespace(); if (peek() == '{') { stream->get(); return; } else { // expecting an object, so fail loudly std::stringstream err; err << line_number() << ": "; err << "tried to start object, but found '"; err << peek() << "', not '{'"; throw err.str(); } }
bool FileTokenizer::get_float_internal( float& result, MsqError& err ) { double d; get_double_internal( d, err ); if (MSQ_CHKERR(err)) return false; result = (float)d; if (d != (double)result) { MSQ_SETERR(err)( MsqError::PARSE_ERROR, "Numberic overflow at line %d.", line_number() ); return false; } return true; }
string error_handler::getErrorReport() { string temp; temp += filename; temp += " (line "; bignum line_number(line); temp += line_number.getNumberString(true, false, 0); temp += "): "; temp += message; return temp; }
bool FileTokenizer::get_integer_internal( int& result, MsqError& err ) { long i; get_long_int_internal( i, err ); if (MSQ_CHKERR(err)) return false; result = (int)i; if (i != (long)result) { MSQ_SETERR(err)( MsqError::PARSE_ERROR, "Numberic overflow at line %d.", line_number() ); return false; } return true; }
bool FileTokenizer::match_token( const char* str, MsqError& err ) { // Get a token const char *token = get_string( err ); if (MSQ_CHKERR(err)) return false; // Check if it matches if (0 == strcmp( token, str )) return true; // Construct error message MSQ_SETERR(err)( MsqError::PARSE_ERROR, "Syntax error at line %d: expected \"%s\", got \"%s\"", line_number(), str, token ); return false; } // namespace Mesquite
bool FileTokenizer::get_boolean_internal( bool& result, MsqError& err ) { // Get a token const char *token = get_string( err ); if (MSQ_CHKERR(err)) return false; if (token[1] || (token[0] != '0' && token[0] != '1')) { MSQ_SETERR(err)( MsqError::PARSE_ERROR, "Syntax error at line %d: expected 0 or 1, got \"%s\"", line_number(), token ); return false; } result = token[0] == '1'; return true; }
bool JsonIn::skip_string() { char ch; eat_whitespace(); stream->get(ch); if (ch != '"') { std::stringstream err; err << line_number(-1) << ": expecting string but found '" << ch << "'"; throw err.str(); } while (stream->good()) { stream->get(ch); if (ch == '\\') { stream->get(ch); continue; } else if (ch == '"') { break; } } return skip_separator(); }
bool FileTokenizer::get_long_int_internal( long& result, MsqError& err ) { // Get a token const char *token_end, *token = get_string( err ); if (MSQ_CHKERR(err)) return false; // Parse token as long result = strtol( token, (char**)&token_end, 0 ); // If the one past the last char read by strtol is // not the NULL character terminating the string, // then parse failed. if (*token_end) { MSQ_SETERR(err)( MsqError::PARSE_ERROR, "Syntax error at line %d: expected integer, got \"%s\"", line_number(), token ); return false; } return true; }
bool JsonIn::skip_object() { char ch; bool lastsep = false; int brackets = 1; eat_whitespace(); int startpos = tell(); stream->get(ch); if (ch != '{') { std::stringstream err; err << line_number(-1) << ": expected object but found '" << ch << "'"; throw err.str(); } while (brackets && stream->good()) { stream->get(ch); // ignore everything inside strings if (ch == '"') { stream->unget(); lastsep = skip_string(); // otherwise count opening and closing brackets until they all match } else if (ch == '{') { brackets += 1; lastsep = false; } else if (ch == '}') { brackets -= 1; if ( strict && lastsep ) { std::stringstream err; std::string txt; int errpos = tell(); err << line_number(-1) << ": trailing comma: "; stream->seekg(startpos); stream->read(&txt[0],errpos-startpos); err << txt; throw err.str(); } lastsep = false; } else if (!is_whitespace(ch)) { lastsep = false; } } if (brackets != 0) { // something messed up! std::stringstream err; if (stream->fail()) { throw (std::string)"stream failure while reading object."; } else if (stream->eof()) { stream->clear(); seek(startpos); err << line_number() << ": "; err << "couldn't find end of object, reached EOF with "; err << brackets << " bracket(s) left."; throw err.str(); } else { // this should be impossible err << line_number() << " "; seek(startpos); err << "(" << line_number() << "): "; err << "object brackets didn't match?"; err << " " << brackets << " bracket(s) left."; throw err.str(); } } return skip_separator(); }
void PepNovoOutfile::load( const std::string & result_filename, vector<PeptideIdentification> & peptide_identifications, ProteinIdentification & protein_identification, const double & score_threshold, const IndexPosMappingType & index_to_precursor, const map<String, String> & pnovo_modkey_to_mod_id ) { // generally used variables StringList substrings; map<String, Int> columns; PeptideHit peptide_hit; String line, score_type = "PepNovo", version = "unknown", identifier, filename, sequence, sequence_with_mods; DateTime datetime = DateTime::now(); // there's no date given from PepNovo protein_identification.setDateTime(datetime); peptide_identifications.clear(); PeptideIdentification peptide_identification; protein_identification = ProteinIdentification(); // open the result ifstream result_file(result_filename.c_str()); if (!result_file) { throw Exception::FileNotFound(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, result_filename); } Size line_number(0); // used to report in which line an error occurred Size id_count(0); // number of IDs seen (not necessarily the ones finally returned) getSearchEngineAndVersion(result_filename, protein_identification); //if information could not be retrieved from the outfile use defaults if (protein_identification.getSearchEngineVersion().empty()) { protein_identification.setSearchEngine("PepNovo"); protein_identification.setSearchEngineVersion(version); } identifier = protein_identification.getSearchEngine() + "_" + datetime.getDate(); protein_identification.setIdentifier(identifier); map<String, String> mod_mask_map; const vector<String> & mods = protein_identification.getSearchParameters().variable_modifications; for (vector<String>::const_iterator mod_it = mods.begin(); mod_it != mods.end(); ++mod_it) { if (mod_it->empty()) continue; //cout<<*mod_it<<endl; if (pnovo_modkey_to_mod_id.find(*mod_it) != pnovo_modkey_to_mod_id.end()) { //cout<<keys_to_id.find(*mod_it)->second<<endl; ResidueModification tmp_mod = ModificationsDB::getInstance()->getModification(pnovo_modkey_to_mod_id.find(*mod_it)->second); if (mod_it->prefix(1) == "^" || mod_it->prefix(1) == "$") { mod_mask_map[*mod_it] = "(" + tmp_mod.getId() + ")"; } else { mod_mask_map[*mod_it] = String(tmp_mod.getOrigin()) + "(" + tmp_mod.getId() + ")"; } } else { if (mod_it->prefix(1) != "^" && mod_it->prefix(1) != "$") { mod_mask_map[*mod_it] = mod_it->prefix(1) + "[" + mod_it->substr(1) + "]"; //cout<<mod_mask_map[*mod_it]<<endl; } else { mod_mask_map[*mod_it] = "[" + *mod_it + "]"; //cout<<mod_mask_map[*mod_it]<<endl; } } } Size index; while (getline(result_file, line)) { if (!line.empty() && (line[line.length() - 1] < 33)) line.resize(line.length() - 1); // remove weird EOL character line.trim(); ++line_number; if (line.hasPrefix(">> ")) // >> 1 /home/shared/pepnovo/4611_raw_ms2_picked.mzXML.1001.2.dta { ++id_count; if (!peptide_identification.empty() && !peptide_identification.getHits().empty()) { peptide_identifications.push_back(peptide_identification); } line.split(' ', substrings); //String index = File::basename(line.substr(line.find(' ', strlen(">> ")) + 1)); if (substrings.size() < 3) { throw Exception::ParseError(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "Not enough columns (spectrum Id) in file in line " + String(line_number) + String(" (should be 2 or more)!"), result_filename); } try { index = substrings[2].trim().toInt(); } catch (...) { throw Exception::ParseError(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "Expected an index number in line " + String(line_number) + String(" at position 2 (line was: '" + line + "')!"), result_filename); } //cout<<"INDEX: "<<index<<endl; peptide_identification = PeptideIdentification(); bool success = false; if (index_to_precursor.size()>0) { if (index_to_precursor.find(index) != index_to_precursor.end()) { peptide_identification.setRT(index_to_precursor.find(index)->second.first); peptide_identification.setMZ(index_to_precursor.find(index)->second.second); success = true; } else throw Exception::ParseError(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "Index '" + String(index) + String("' in line '" + line + "' not found in index table (line was: '" + line + "')!"), result_filename); } if (!success) { // try to reconstruct from title entry (usually sensible when MGF is supplied to PepNovo) try { if (substrings.size() >= 4) { StringList parts = ListUtils::create<String>(substrings[3], '_'); if (parts.size() >= 2) { peptide_identification.setRT(parts[1].toDouble()); peptide_identification.setMZ(parts[0].toDouble()); success = true; } } } catch (...) { } if (!success) throw Exception::ParseError(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "Precursor could not be reconstructed from title '" + substrings[3] + String("' in line '" + line + "' (line was: '" + line + "')!"), result_filename); } peptide_identification.setSignificanceThreshold(score_threshold); peptide_identification.setScoreType(score_type); peptide_identification.setIdentifier(identifier); } else if (line.hasPrefix("#Index")) // #Index Prob Score N-mass C-Mass [M+H] Charge Sequence { if (columns.empty()) // map the column names to their column number { line.split('\t', substrings); for (vector<String>::const_iterator s_i = substrings.begin(); s_i != substrings.end(); ++s_i) { if ((*s_i) == "#Index") columns["Index"] = s_i - substrings.begin(); else if ((*s_i) == "RnkScr") columns["RnkScr"] = s_i - substrings.begin(); else if ((*s_i) == "PnvScr") columns["PnvScr"] = s_i - substrings.begin(); else if ((*s_i) == "N-Gap") columns["N-Gap"] = s_i - substrings.begin(); else if ((*s_i) == "C-Gap") columns["C-Gap"] = s_i - substrings.begin(); else if ((*s_i) == "[M+H]") columns["[M+H]"] = s_i - substrings.begin(); else if ((*s_i) == "Charge") columns["Charge"] = s_i - substrings.begin(); else if ((*s_i) == "Sequence") columns["Sequence"] = s_i - substrings.begin(); } if (columns.size() != 8) { result_file.close(); result_file.clear(); throw Exception::ParseError(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "Not enough columns in file in line " + String(line_number) + String(" (should be 8)!"), result_filename); } } while (getline(result_file, line)) { ++line_number; if (!line.empty() && (line[line.length() - 1] < 33)) line.resize(line.length() - 1); line.trim(); if (line.empty()) break; line.split('\t', substrings); if (!substrings.empty()) { if (substrings.size() != 8) { result_file.close(); result_file.clear(); throw Exception::ParseError(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "Not enough columns in file in line " + String(line_number) + String(" (should be 8)!"), result_filename); } if (substrings[columns["RnkScr"]].toFloat() >= score_threshold) { peptide_hit = PeptideHit(); peptide_hit.setCharge(substrings[columns["Charge"]].toInt()); peptide_hit.setRank(substrings[columns["Index"]].toInt() + 1); peptide_hit.setScore(substrings[columns["RnkScr"]].toFloat()); peptide_hit.setMetaValue("PnvScr", substrings[columns["PnvScr"]].toFloat()); peptide_hit.setMetaValue("N-Gap", substrings[columns["N-Gap"]].toFloat()); peptide_hit.setMetaValue("C-Gap", substrings[columns["C-Gap"]].toFloat()); peptide_hit.setMetaValue("MZ", substrings[columns["[M+H]"]].toFloat()); sequence = substrings[columns["Sequence"]]; for (map<String, String>::iterator mask_it = mod_mask_map.begin(); mask_it != mod_mask_map.end(); ++mask_it) { if (mask_it->first.hasPrefix("^") && sequence.hasSubstring(mask_it->first)) { sequence.substitute(mask_it->first, ""); sequence = mask_it->second + sequence; } //cout<<mask_it->first<<" "<<mask_it->second<<endl; sequence.substitute(mask_it->first, mask_it->second); } peptide_hit.setSequence(AASequence::fromString(sequence)); peptide_identification.insertHit(peptide_hit); } } } } } if (!peptide_identifications.empty() || !peptide_identification.getHits().empty()) { peptide_identifications.push_back(peptide_identification); } result_file.close(); result_file.clear(); LOG_INFO << "Parsed " << id_count << " ids, retained " << peptide_identifications.size() << "." << std::endl; }
StringX PWSAuxParse::GetAutoTypeString(const StringX &sx_in_autotype, const StringX &sx_group, const StringX &sx_title, const StringX &sx_user, const StringX &sx_pwd, const StringX &sx_notes, const StringX &sx_url, const StringX &sx_email, std::vector<size_t> &vactionverboffsets) { StringX sxtmp(_T("")); StringX sxNotes(sx_notes); TCHAR curChar; StringX sx_autotype(sx_in_autotype); StringX::size_type st_index; std::vector<StringX> vsxnotes_lines; vactionverboffsets.clear(); // If empty, try the database default if (sx_autotype.empty()) { sx_autotype = PWSprefs::GetInstance()-> GetPref(PWSprefs::DefaultAutotypeString); // If still empty, take this default if (sx_autotype.empty()) { // checking for user and password for default settings if (!sx_pwd.empty()){ if (!sx_user.empty()) sx_autotype = DEFAULT_AUTOTYPE; else sx_autotype = _T("\\p\\n"); } } } // No recursive substitution (e.g. \p or \u), although '\t' will be replaced by a tab if (!sx_notes.empty()) { // Use \n and \r to tokenise this line StringX::size_type st_start(0), st_end(0); const StringX sxdelim = _T("\r\n"); StringX sxline; while (st_end != StringX::npos) { st_end = sxNotes.find_first_of(sxdelim, st_start); sxline = (sxNotes.substr(st_start, (st_end == StringX::npos) ? StringX::npos : st_end - st_start)); st_index = 0; for (;;) { st_index = sxline.find(_T("\\t"), st_index); if (st_index == StringX::npos) break; sxline.replace(st_index, 2, _T("\t")); st_index += 1; } vsxnotes_lines.push_back(sxline); // If we just hit a "\r\n", move past it. Or else, it is a "\r" without // a following "\n" or a "\n", so just move past one single char if (st_end != StringX::npos) { st_start = st_end + (sxNotes.compare(st_end, 2, sxdelim) == 0 ? 2 : 1); if (st_start >= sxNotes.length()) break; } } // Now change '\n' to '\r' in the complete notes field st_index = 0; for (;;) { st_index = sxNotes.find(sxdelim, st_index); if (st_index == StringX::npos) break; sxNotes.replace(st_index, 2, _T("\r")); st_index += 1; } st_index = 0; for (;;) { st_index = sxNotes.find(_T("\\t"), st_index); if (st_index == StringX::npos) break; sxNotes.replace(st_index, 2, _T("\t")); st_index += 1; } } const size_t N = sx_autotype.length(); const StringX sxZeroes = _T("000"); int gNumIts; for (size_t n = 0; n < N; n++){ curChar = sx_autotype[n]; if (curChar == TCHAR('\\')) { n++; if (n < N) curChar = sx_autotype[n]; switch (curChar){ case TCHAR('\\'): sxtmp += TCHAR('\\'); break; case TCHAR('n'): case TCHAR('r'): sxtmp += TCHAR('\r'); break; case TCHAR('t'): sxtmp += TCHAR('\t'); break; case TCHAR('s'): sxtmp += TCHAR('\v'); break; case TCHAR('g'): sxtmp += sx_group; break; case TCHAR('i'): sxtmp += sx_title; break; case TCHAR('u'): sxtmp += sx_user; break; case TCHAR('p'): sxtmp += sx_pwd; break; case TCHAR('l'): sxtmp += sx_url; break; case TCHAR('m'): sxtmp += sx_email; break; case TCHAR('o'): { if (n == (N - 1)) { // This was the last character - send the lot! sxtmp += sxNotes; break; } size_t line_number(0); gNumIts = 0; for (n++; n < N && (gNumIts < 3); ++gNumIts, n++) { if (_istdigit(sx_autotype[n])) { line_number *= 10; line_number += (sx_autotype[n] - TCHAR('0')); } else break; // for loop } if (line_number == 0) { // Send the lot sxtmp += sx_notes; } else if (line_number <= vsxnotes_lines.size()) { // User specifies a too big a line number - ignore the lot sxtmp += vsxnotes_lines[line_number - 1]; } // Backup the extra character that delimited the \oNNN string n--; break; // case 'o' } // Action Verbs: // These are the only ones processed specially by the UI as they involve // actions it performs whilst doing the key sending. // Copy them to output string unchanged. case TCHAR('b'): // backspace! case TCHAR('z'): // Use older method vactionverboffsets.push_back(sxtmp.length()); sxtmp += _T("\\"); sxtmp += curChar; break; // case 'b' & 'z' case TCHAR('d'): // Delay case TCHAR('w'): // Wait milli-seconds case TCHAR('W'): // Wait seconds { // Need to ensure that the field length is 3, even if it wasn't vactionverboffsets.push_back(sxtmp.length()); sxtmp += _T("\\"); sxtmp += curChar; gNumIts = 0; size_t i = n; for (i++; i < N && (gNumIts < 3); ++gNumIts, i++) { if (!_istdigit(sx_autotype[i])) break; } // Insert sufficient zeroes to ensure field is 3 characters long sxtmp += sxZeroes.substr(0, 3 - gNumIts); break; // case 'd', 'w' & 'W' } // Also copy explicit control characters to output string unchanged. case TCHAR('a'): // bell (can't hear it during testing!) case TCHAR('v'): // vertical tab case TCHAR('f'): // form feed case TCHAR('e'): // escape case TCHAR('x'): // hex digits (\xNN) // and any others we have forgotten! // '\cC', '\uXXXX', '\OOO', '\<any other charatcer not recognised above>' default: sxtmp += L'\\'; sxtmp += curChar; break; } } else sxtmp += curChar; } vsxnotes_lines.clear(); return sxtmp; }
void *read_rows(FILE *f, int *nrows, char *fmt, char delimiter, char quote, char comment, char sci, char decimal, int allow_embedded_newline, char *datetime_fmt, int tz_offset, int32_t *usecols, int num_usecols, int skiprows, void *data_array, int *p_error_type, int *p_error_lineno) { void *fb; char *data_ptr; int num_fields, current_num_fields; char **result; int fmt_nfields; field_type *ftypes; int size; int row_count; int j; int *valid_usecols; char word_buffer[WORD_BUFFER_SIZE]; int tok_error_type; *p_error_type = 0; *p_error_lineno = 0; if (datetime_fmt == NULL || strlen(datetime_fmt) == 0) { datetime_fmt = "%Y-%m-%d %H:%M:%S"; } size = (*nrows) * calc_size(fmt, &fmt_nfields); ftypes = enumerate_fields(fmt); /* Must free this when finished. */ if (ftypes == NULL) { /* Out of memory. */ *p_error_type = READ_ERROR_OUT_OF_MEMORY; return NULL; } /* for (k = 0; k < fmt_nfields; ++k) { printf("k = %d typechar = '%c' size = %d\n", k, ftypes[k].typechar, ftypes[k].size); } printf("size = %d\n", size); printf("-----\n"); */ if (data_array == NULL) { /* XXX The case where data_ptr is allocated here is untested. */ data_ptr = malloc(size); } else { data_ptr = data_array; } fb = new_file_buffer(f, -1); if (fb == NULL) { free(ftypes); *p_error_type = ERROR_OUT_OF_MEMORY; return NULL; } /* XXX Check interaction of skiprows with comments. */ while ((skiprows > 0) && ((result = tokenize(fb, word_buffer, WORD_BUFFER_SIZE, delimiter, quote, comment, &num_fields, TRUE, &tok_error_type)) != NULL)) { if (result == NULL) { break; } free(result); --skiprows; } if (skiprows > 0) { /* There were fewer rows in the file than skiprows. */ /* This is not treated as an error. The result should be an empty array. */ *nrows = 0; free(ftypes); del_file_buffer(fb, RESTORE_FINAL); return data_ptr; } /* XXX Assume *nrows > 0! */ /* * Read the first row to get the number of fields in the file. * We'll then use this to pre-validate the values in usecols. * (It might be easier to do this in the Python wrapper, but that * would require refactoring the C interface a bit to expose more * to Python.) */ row_count = 0; result = tokenize(fb, word_buffer, WORD_BUFFER_SIZE, delimiter, quote, comment, &num_fields, TRUE, &tok_error_type); if (result == NULL) { *p_error_type = tok_error_type; *p_error_lineno = 1; free(ftypes); del_file_buffer(fb, RESTORE_FINAL); return NULL; } valid_usecols = (int *) malloc(num_usecols * sizeof(int)); if (valid_usecols == NULL) { /* Out of memory. */ *p_error_type = ERROR_OUT_OF_MEMORY; free(result); free(ftypes); del_file_buffer(fb, RESTORE_FINAL); return NULL; } /* * Validate the column indices in usecols, and put the validated * column indices in valid_usecols. */ for (j = 0; j < num_usecols; ++j) { int32_t k; k = usecols[j]; if (k < -num_fields || k >= num_fields) { /* Invalid column index. */ *p_error_type = ERROR_INVALID_COLUMN_INDEX; *p_error_lineno = j; /* Abuse 'lineno' and put the bad column index there. */ free(valid_usecols); free(result); free(ftypes); del_file_buffer(fb, RESTORE_FINAL); return NULL; } if (k < 0) { k += num_fields; } valid_usecols[j] = k; } current_num_fields = num_fields; row_count = 0; do { int j, k; if (current_num_fields != num_fields) { *p_error_type = ERROR_CHANGED_NUMBER_OF_FIELDS; *p_error_lineno = line_number(fb); break; } for (j = 0; j < num_usecols; ++j) { int error; char typ = ftypes[j].typechar; /* k is the column index of the field in the file. */ k = valid_usecols[j]; /* XXX Handle error != 0 in the following cases. */ if (typ == 'b') { int8_t x = (int8_t) str_to_int64(result[k], INT8_MIN, INT8_MAX, &error); *(int8_t *) data_ptr = x; data_ptr += ftypes[j].size; } else if (typ == 'B') { uint8_t x = (uint8_t) str_to_uint64(result[k], UINT8_MAX, &error); *(uint8_t *) data_ptr = x; data_ptr += ftypes[j].size; } else if (typ == 'h') { int16_t x = (int16_t) str_to_int64(result[k], INT16_MIN, INT16_MAX, &error); *(int16_t *) data_ptr = x; data_ptr += ftypes[j].size; } else if (typ == 'H') { uint16_t x = (uint16_t) str_to_uint64(result[k], UINT16_MAX, &error); *(uint16_t *) data_ptr = x; data_ptr += ftypes[j].size; } else if (typ == 'i') { int32_t x = (int32_t) str_to_int64(result[k], INT32_MIN, INT32_MAX, &error); *(int32_t *) data_ptr = x; data_ptr += ftypes[j].size; } else if (typ == 'I') { uint32_t x = (uint32_t) str_to_uint64(result[k], UINT32_MAX, &error); *(uint32_t *) data_ptr = x; data_ptr += ftypes[j].size; } else if (typ == 'q') { int64_t x = (int64_t) str_to_int64(result[k], INT64_MIN, INT64_MAX, &error); *(int64_t *) data_ptr = x; data_ptr += ftypes[j].size; } else if (typ == 'Q') { uint64_t x = (uint64_t) str_to_uint64(result[k], UINT64_MAX, &error); *(uint64_t *) data_ptr = x; data_ptr += ftypes[j].size; } else if (typ == 'f' || typ == 'd') { // Convert to float. double x; if ((strlen(result[k]) == 0) || !to_double(result[k], &x, sci, decimal)) { // XXX Find the canonical platform-independent method to assign nan. x = 0.0 / 0.0; } if (typ == 'f') *(float *) data_ptr = (float) x; else *(double *) data_ptr = x; data_ptr += ftypes[j].size; } else if (typ == 'c' || typ == 'z') { // Convert to complex. double x, y; if ((strlen(result[k]) == 0) || !to_complex(result[k], &x, &y, sci, decimal)) { // XXX Find the canonical platform-independent method to assign nan. x = 0.0 / 0.0; y = x; } if (typ == 'c') { *(float *) data_ptr = (float) x; data_ptr += ftypes[j].size / 2; *(float *) data_ptr = (float) y; } else { *(double *) data_ptr = x; data_ptr += ftypes[j].size / 2; *(double *) data_ptr = y; } data_ptr += ftypes[j].size / 2; } else if (typ == 'U') { // Datetime64, microseconds. struct tm tm = {0,0,0,0,0,0,0,0,0}; time_t t; if (strptime(result[k], datetime_fmt, &tm) == NULL) { memset(data_ptr, 0, 8); } else { tm.tm_isdst = -1; t = mktime(&tm); if (t == -1) { memset(data_ptr, 0, 8); } else { *(uint64_t *) data_ptr = (long long) (t - tz_offset) * 1000000L; } } data_ptr += 8; } else { // String strncpy(data_ptr, result[k], ftypes[j].size); data_ptr += ftypes[j].size; } } free(result); ++row_count; } while ((row_count < *nrows) && (result = tokenize(fb, word_buffer, WORD_BUFFER_SIZE, delimiter, quote, comment, ¤t_num_fields, TRUE, &tok_error_type)) != NULL); del_file_buffer(fb, RESTORE_FINAL); *nrows = row_count; free(valid_usecols); return (void *) data_ptr; }
void JsonIn::error(std::string message, int offset) { std::ostringstream err; err << line_number(offset) << ": " << message; // if we can't get more info from the stream don't try if (!stream->good()) { throw err.str(); } // also print surrounding few lines of context, if not too large err << "\n\n"; stream->seekg(offset, std::istream::cur); size_t pos = tell(); rewind(3, 240); size_t startpos = tell(); char buffer[241]; stream->read(&buffer[0], pos-startpos); buffer[pos-startpos] = '\0'; err << buffer; if (!is_whitespace(peek())) { err << peek(); } // display a pointer to the position rewind(1, 240); startpos = tell(); err << '\n'; for (int i=0; i < pos-startpos-1; ++i) { err << ' '; } err << "^\n"; seek(pos); // if that wasn't the end of the line, continue underneath pointer char ch = stream->get(); if (ch == '\r') { if (peek() == '\n') { stream->get(); } } else if (ch == '\n') { // pass } else if (peek() != '\r' && peek() != '\n') { for (int i=0; i < pos-startpos; ++i) { err << ' '; } } // print the next couple lines as well int line_count = 0; for (int i=0; i < 240; ++i) { stream->get(ch); err << ch; if (ch == '\r') { ++line_count; if (peek() == '\n') { err << stream->get(); } } else if (ch == '\n') { ++line_count; } if (line_count > 2) { break; } } throw err.str(); }
std::string JsonIn::get_string() { std::string s = ""; char ch; bool backslash = false; char unihex[5] = "0000"; eat_whitespace(); int startpos = tell(); // the first character had better be a '"' stream->get(ch); if (ch != '"') { std::stringstream err; err << line_number(-1) << ": expecting string but got '" << ch << "'"; throw err.str(); } // add chars to the string, one at a time, converting: // \", \\, \/, \b, \f, \n, \r, \t and \uxxxx according to JSON spec. while (stream->good()) { stream->get(ch); if (ch == '\\') { if (backslash) { s += '\\'; backslash = false; } else { backslash = true; continue; } } else if (backslash) { backslash = false; if (ch == '"') { s += '"'; } else if (ch == '/') { s += '/'; } else if (ch == 'b') { s += '\b'; } else if (ch == 'f') { s += '\f'; } else if (ch == 'n') { s += '\n'; } else if (ch == 'r') { s += '\r'; } else if (ch == 't') { s += '\t'; } else if (ch == 'u') { // get the next four characters as hexadecimal stream->get(unihex, 5); // insert the appropriate unicode character in utf8 // TODO: verify that unihex is in fact 4 hex digits. char** endptr = 0; unsigned u = (unsigned)strtoul(unihex, endptr, 16); s += utf32_to_utf8(u); } else { // for anything else, just add the character, i suppose s += ch; } } else if (ch == '"') { // end of the string skip_separator(); return s; } else { s += ch; } } // if we get to here, probably hit a premature EOF? if (stream->fail()) { throw (std::string)"stream failure while reading string."; } else if (stream->eof()) { stream->clear(); seek(startpos); std::stringstream err; err << line_number() << ": "; err << "couldn't find end of string, reached EOF."; throw err.str(); } throw (std::string)"something went wrong D:"; }
static int32_t line_number(Reader *reader, int32_t pos){ int32_t result = line_number(reader->text, pos); return(result); }