void ODMatrix::readO(LineReader& lr, double scale, std::string vehType, bool matrixHasVehType) { PROGRESS_BEGIN_MESSAGE("Reading matrix '" + lr.getFileName() + "' stored as OR"); // parse first defs std::string line; if (matrixHasVehType) { line = getNextNonCommentLine(lr); int type = TplConvert::_2int(StringUtils::prune(line).c_str()); if (vehType == "") { vehType = toString(type); } } // parse time std::pair<SUMOTime, SUMOTime> times = readTime(lr); SUMOTime begin = times.first; SUMOTime end = times.second; // factor double factor = readFactor(lr, scale); // parse the cells while (lr.hasMore()) { line = getNextNonCommentLine(lr); if (line.length() == 0) { continue; } StringTokenizer st2(line, StringTokenizer::WHITECHARS); if (st2.size() == 0) { continue; } try { std::string sourceD = st2.next(); std::string destD = st2.next(); double vehNumber = TplConvert::_2double(st2.next().c_str()) * factor; if (vehNumber != 0) { add(vehNumber, begin, end, sourceD, destD, vehType); } } catch (OutOfBoundsException&) { throw ProcessError("Missing at least one information in line '" + line + "'."); } catch (NumberFormatException&) { throw ProcessError("Not numeric vehicle number in line '" + line + "'."); } } PROGRESS_DONE_MESSAGE(); }
LanguageModel::LanguageModel(char *fileName) { FILE *f = NULL; if ((fileName != NULL) && (fileName[0] != 0)) f = fopen(fileName, "r"); if (f == NULL) { snprintf(errorMessage, sizeof(errorMessage), "Unable to open file: %s", fileName); log(LOG_ERROR, LOG_ID, errorMessage); initialize(); corpusSize = 1.0; documentCount = 1.0; stemmed = false; } else { initialize(); int s; char line[1024]; getNextNonCommentLine(f, line, sizeof(line)); sscanf(line, "%d", &s); stemmed = (s != 0); getNextNonCommentLine(f, line, sizeof(line)); sscanf(line, "%d%lf%lf", &termSlotsUsed, &corpusSize, &documentCount); termSlotsAllocated = termSlotsUsed + 32; if (termSlotsAllocated < INITIAL_TERM_SLOTS) termSlotsAllocated = INITIAL_TERM_SLOTS; terms = typed_realloc(LanguageModelTermDescriptor, terms, termSlotsAllocated); for (int i = 0; i < termSlotsUsed; i++) { getNextNonCommentLine(f, line, sizeof(line)); long long tf, df; int status = sscanf(line, "%s%s%lld%lld", terms[i].term, terms[i].stemmed, &tf, &df); terms[i].termFrequency = tf; terms[i].documentCount = df; assert(status == 4); assert(strlen(terms[i].term) > 0); assert(strlen(terms[i].stemmed) > 0); } resizeHashTable(termSlotsAllocated); fclose(f); } } // end of LanguageModel(char*)
double ODMatrix::readFactor(LineReader& lr, double scale) { std::string line = getNextNonCommentLine(lr); double factor = -1; try { factor = TplConvert::_2double(line.c_str()) * scale; } catch (NumberFormatException&) { throw ProcessError("Broken factor: '" + line + "'."); } return factor; }
std::pair<SUMOTime, SUMOTime> ODMatrix::readTime(LineReader& lr) { std::string line = getNextNonCommentLine(lr); try { StringTokenizer st(line, StringTokenizer::WHITECHARS); SUMOTime begin = parseSingleTime(st.next()); SUMOTime end = parseSingleTime(st.next()); if (begin >= end) { throw ProcessError("Begin time is larger than end time."); } return std::make_pair(begin, end); } catch (OutOfBoundsException&) { throw ProcessError("Broken period definition '" + line + "'."); } catch (NumberFormatException&) { throw ProcessError("Broken period definition '" + line + "'."); } }
/* * searchForProtoSignature() * * Input: sa (output from cpp, by line) * begin (beginning index to search) * &start (<return> starting index for function definition) * &stop (<return> index of line on which proto is completed) * &charindex (<return> char index of completing ')' character) * &found (<return> 1 if valid signature is found; 0 otherwise) * Return: 0 if OK, 1 on error * * Notes: * (1) If this returns found == 0, it means that there are no * more function definitions in the file. Caller must check * this value and exit the loop over the entire cpp file. * (2) This follows plan 3 (see above). We skip comment and blank * lines at the beginning. Then we don't check for keywords. * Instead, find the relative locations of the first occurrences * of these four tokens: left parenthesis (lp), right * parenthesis (rp), left brace (lb) and semicolon (sc). * (3) The signature of a function definition looks like this: * .... '(' .... ')' '{' * where the lp and rp must both precede the lb, with only * whitespace between the rp and the lb. The '....' * are sets of tokens that have no braces. * (4) If a function definition is found, this returns found = 1, * with 'start' being the first line of the definition and * 'charindex' being the position of the ')' in line 'stop' * at the end of the arg list. */ static l_int32 searchForProtoSignature(SARRAY *sa, l_int32 begin, l_int32 *pstart, l_int32 *pstop, l_int32 *pcharindex, l_int32 *pfound) { l_int32 next, rbline, rbindex, scline; l_int32 soffsetlp, soffsetrp, soffsetlb, soffsetsc; l_int32 boffsetlp, boffsetrp, boffsetlb, boffsetsc; l_int32 toffsetlp, toffsetrp, toffsetlb, toffsetsc; PROCNAME("searchForProtoSignature"); if (!sa) return ERROR_INT("sa not defined", procName, 1); if (!pstart) return ERROR_INT("&start not defined", procName, 1); if (!pstop) return ERROR_INT("&stop not defined", procName, 1); if (!pcharindex) return ERROR_INT("&charindex not defined", procName, 1); if (!pfound) return ERROR_INT("&found not defined", procName, 1); *pfound = FALSE; while (1) { /* Skip over sequential '#' comment lines */ getNextNonCommentLine(sa, begin, &next); if (next == -1) return 0; if (next != begin) { begin = next; continue; } /* Skip over sequential blank lines */ getNextNonBlankLine(sa, begin, &next); if (next == -1) return 0; if (next != begin) { begin = next; continue; } /* Skip over sequential lines starting with '//' */ getNextNonDoubleSlashLine(sa, begin, &next); if (next == -1) return 0; if (next != begin) { begin = next; continue; } /* Search for specific character sequence patterns; namely * a lp, a matching rp, a lb and a semicolon. * Abort the search if no lp is found. */ getOffsetForCharacter(sa, next, '(', &soffsetlp, &boffsetlp, &toffsetlp); if (soffsetlp == -1) break; getOffsetForMatchingRP(sa, next, soffsetlp, boffsetlp, toffsetlp, &soffsetrp, &boffsetrp, &toffsetrp); getOffsetForCharacter(sa, next, '{', &soffsetlb, &boffsetlb, &toffsetlb); getOffsetForCharacter(sa, next, ';', &soffsetsc, &boffsetsc, &toffsetsc); /* We've found a lp. Now weed out the case where a matching * rp and a lb are not both found. */ if (soffsetrp == -1 || soffsetlb == -1) break; /* Check if a left brace occurs before a left parenthesis; * if so, skip it */ if (toffsetlb < toffsetlp) { skipToMatchingBrace(sa, next + soffsetlb, boffsetlb, &rbline, &rbindex); skipToSemicolon(sa, rbline, rbindex, &scline); begin = scline + 1; continue; } /* Check if a semicolon occurs before a left brace or * a left parenthesis; if so, skip it */ if ((soffsetsc != -1) && (toffsetsc < toffsetlb || toffsetsc < toffsetlp)) { skipToSemicolon(sa, next, 0, &scline); begin = scline + 1; continue; } /* OK, it should be a function definition. We haven't * checked that there is only white space between the * rp and lb, but we've only seen problems with two * extern inlines in sys/stat.h, and this is handled * later by eliminating any prototype beginning with 'extern'. */ *pstart = next; *pstop = next + soffsetrp; *pcharindex = boffsetrp; *pfound = TRUE; break; } return 0; }
void ODMatrix::readV(LineReader& lr, double scale, std::string vehType, bool matrixHasVehType) { PROGRESS_BEGIN_MESSAGE("Reading matrix '" + lr.getFileName() + "' stored as VMR"); // parse first defs std::string line; if (matrixHasVehType) { line = getNextNonCommentLine(lr); if (vehType == "") { vehType = StringUtils::prune(line); } } // parse time std::pair<SUMOTime, SUMOTime> times = readTime(lr); SUMOTime begin = times.first; SUMOTime end = times.second; // factor double factor = readFactor(lr, scale); // districts line = getNextNonCommentLine(lr); const int numDistricts = TplConvert::_2int(StringUtils::prune(line).c_str()); // parse district names (normally ints) std::vector<std::string> names; while ((int)names.size() != numDistricts) { line = getNextNonCommentLine(lr); StringTokenizer st2(line, StringTokenizer::WHITECHARS); while (st2.hasNext()) { names.push_back(st2.next()); } } // parse the cells for (std::vector<std::string>::iterator si = names.begin(); si != names.end(); ++si) { std::vector<std::string>::iterator di = names.begin(); // do { line = getNextNonCommentLine(lr); if (line.length() == 0) { continue; } try { StringTokenizer st2(line, StringTokenizer::WHITECHARS); while (st2.hasNext()) { assert(di != names.end()); double vehNumber = TplConvert::_2double(st2.next().c_str()) * factor; if (vehNumber != 0) { add(vehNumber, begin, end, *si, *di, vehType); } if (di == names.end()) { throw ProcessError("More entries than districts found."); } ++di; } } catch (NumberFormatException&) { throw ProcessError("Not numeric vehicle number in line '" + line + "'."); } if (!lr.hasMore()) { break; } } while (di != names.end()); } PROGRESS_DONE_MESSAGE(); }