static uint_t FindXMLMarker(const AString& str, uint_t p1) { uint_t i, p2 = 0; int quote = 0; for (i = p1; str[i]; i++) { if (quote && (str[i] == quote)) quote = 0; else if (!quote && IsQuoteChar(str[i])) quote = str[i]; else if (!quote && (str[i] == '>')) { p2 = i; break; } } return p2; }
static char *FindQuotedEnd(char *s, int& len) { char *start = s; if (IsQuoteChar(*s)) { char c = *s++; char *res = s; while (*s && *s != c) ++s; if (!*s) return NULL; *s++ = 0; len = s - start; return res; } while (*s && *s != ' ') ++s; if (*s == ' ') *s++ = 0; len = s - start; return start; }
/** Split given line into a certain number of tokens. Data might be * split across multiple lines. * \param NexpectedCols Number of expected data cols. * \param infile File being read. * \param isSerial true if inside a serial data block. */ int CIFfile::DataBlock::GetColumnData(int NexpectedCols, BufferedLine& infile, bool isSerial) { const char* SEP = " \t"; // Allocate for a line of data columnData_.push_back( Sarray() ); // Tokenize the initial line int nReadCols = 0; int Ncols = infile.TokenizeLine(SEP); int idx = 0; bool insideQuote = false; bool insideSemi = false; while (nReadCols < NexpectedCols) { // Load up the next line if needed if (idx == Ncols) { if (infile.Line() == 0) break; Ncols = infile.TokenizeLine(SEP); idx = 0; } const char *tkn = infile.NextToken(); // Skip blanks if (tkn == 0) continue; idx++; //mprintf("DEBUG: Token %i '%s'\n", idx, tkn); if (isSerial && nReadCols == 0) { // First column for serial data is header.id std::string ID, Header; if (ParseData( std::string(tkn), Header, ID )) return 1; //mprintf(" Ndata=%i Data=%s\n", serialData.Nargs(), serialData[1].c_str()); if (AddHeader( Header )) return 1; columnHeaders_.push_back( ID ); nReadCols++; } else if (insideQuote) { // Append this to the current data column. columnData_.back().back().append( " " + std::string(tkn) ); // Check for an end quote. if (HasEndQuote( columnData_.back().back() )) { // Remove that end quote. columnData_.back().back() = RemoveEndQuote( columnData_.back().back() ); insideQuote = false; nReadCols++; } } else if (insideSemi) { // End if line begins with semicolon, otherwise append. if (tkn[0] == ';') { insideSemi = false; nReadCols++; } else columnData_.back().back().append( std::string(tkn) ); } else { // Add new data column if (idx == 1 && tkn[0] == ';') { // Semicolon indicates more lines to be read. columnData_.back().push_back( std::string(tkn+1) ); insideSemi = true; } else { columnData_.back().push_back( std::string(tkn) ); // Check if column began and did not end with a quote. if (IsQuoteChar(columnData_.back().back()[0])) { // Remove leading quote. std::string tmps = columnData_.back().back().substr(1); columnData_.back().back() = tmps; if ( !HasEndQuote((columnData_.back().back())) ) { // Still need to look for the end quote. insideQuote = true; } else { // We have the end quote. Remove it. columnData_.back().back() = RemoveEndQuote( columnData_.back().back() ); } } } if (!insideSemi && !insideQuote) nReadCols++; } } if (nReadCols != NexpectedCols) { mprinterr("Error: Line %i: '%s': Read %i columns, expected %i\n", infile.LineNumber(), dataHeader_.c_str(), nReadCols, NexpectedCols); return 1; } return 0; }
/// \return true if string has end quote. Skip any terminal whitespace. static inline bool HasEndQuote(std::string const& strIn) { std::string::const_reverse_iterator it = strIn.rbegin(); while (it != strIn.rend() && isspace(*it)) --it; if ( IsQuoteChar(*it) ) return true; return false; }
bool DecodeXML(AStructuredNode& root, const AString& str) { AStructuredNode *pNode = &root; AKeyValuePair *pAttr; ADataList stack; uint_t p1 = 0, p2; while (IsXMLWhiteSpace(str[p1])) p1++; while (!HasQuit() && pNode && (str[p1] == '<')) { bool popnode = false; p1++; if (str[p1] == '/') { p1++; if ((pNode != &root) && ((p2 = FindXMLMarker(str, p1)) > p1)) { uint_t p3 = p1; if (IsSymbolStart(str[p1])) p1++; while (IsXMLNameChar(str[p1])) p1++; AString name = str.Mid(p3, p1 - p3); if (name == pNode->Key) { if (debug_decode) debug("Closing node '%s'\n", pNode->Key.str()); popnode = true; p1 = p2 + 1; } else { debug("Unknown close object '%s' at %u\n", name.str(), p1); break; } } else { debug("Unknown close marker at %u\n", p1); break; } } else if (str.Mid(p1, 3) == "!--") { if ((p2 = str.Pos("-->", p1 + 3)) > p1) p1 = p2 + 2; else { debug("Unterminated comment marker at %u\n", p1); break; } } else if ((p2 = FindXMLMarker(str, p1)) > p1) { if (pNode) stack.Push((void *)pNode); if ((pNode = new AStructuredNode) != NULL) { uint_t p3 = p2; bool complete = ((str[p3 - 1] == '/') || ((str[p1] == '?') && (str[p3 - 1] == '?'))); if (complete) p3--; complete |= (str[p1] == '!'); if ((str[p1] == '?') || (str[p1] == '!')) pNode->SetType(str[p1++]); while (IsXMLWhiteSpace(str[p1])) p1++; uint_t p4 = p1; if (IsSymbolStart(str[p1])) p1++; while (IsXMLNameChar(str[p1])) p1++; pNode->Key = str.Mid(p4, p1 - p4).DeHTMLify(); if (debug_decode) debug("Created new node '%s' (%s)\n", pNode->Key.str(), complete ? "complete" : "open"); while (IsXMLWhiteSpace(str[p1])) p1++; while (p1 < p3) { p4 = p1; if (IsSymbolStart(str[p1])) p1++; while (IsXMLNameChar(str[p1])) p1++; uint_t p5 = p1; while (IsXMLWhiteSpace(str[p1])) p1++; if (str[p1] == '=') p1++; while (IsXMLWhiteSpace(str[p1])) p1++; int quote = 0; if (IsQuoteChar(str[p1])) quote = str[p1++]; uint_t p6 = p1; while (str[p1] && ((quote && (str[p1] != quote)) || (!quote && !IsXMLWhiteSpace(str[p1])))) p1++; uint_t p7 = p1; if (quote && (str[p1] == quote)) p1++; while (IsXMLWhiteSpace(str[p1])) p1++; if ((p5 > p4) && ((pAttr = new AKeyValuePair) != NULL)) { pAttr->Key = str.Mid(p4, p5 - p4).DeHTMLify(); pAttr->Value = str.Mid(p6, p7 - p6).DeHTMLify(); pNode->AddAttribute(pAttr); if (debug_decode) debug("Added attribute '%s'='%s' to '%s'\n", pAttr->Key.str(), pAttr->Value.str(), pNode->Key.str()); } else break; } AStructuredNode *pParent = (AStructuredNode *)stack.Last(); if (pParent) pParent->AddChild(pNode); p2++; while (IsXMLWhiteSpace(str[p2])) p2++; p3 = p1 = p2; while (str[p2] && (str[p2] != '<')) { p2++; if (!IsXMLWhiteSpace(str[p2 - 1])) p3 = p2; } if (p3 > p1) { pNode->Value = str.Mid(p1, p3 - p1).DeHTMLify(); if (debug_decode) debug("Set value of '%s' to '%s'\n", pNode->Key.str(), pNode->Value.str()); } p1 = p2; popnode = complete; } } if (popnode) { if (stack.Count()) { pNode = (AStructuredNode *)stack.EndPop(); if (debug_decode) debug("Back to node '%s'\n", pNode->Key.str()); } else { debug("Stack empty at %u\n", p1); break; } } while (IsXMLWhiteSpace(str[p1])) p1++; } if (stack.Count()) debug("Unterminated XML entries at %u\n", p1); if (!pNode) debug("Extra XML termination at %u\n", p1); return (!str[p1] && (pNode == &root)); }
AString ADVBPatterns::ParsePattern(const AString& _line, PATTERN& pattern, const AString& user) { const ADVBConfig& config = ADVBConfig::Get(); ADataList& list = pattern.list; AString& errors = pattern.errors; AString line = config.ReplaceTerms(user, _line); TERM *term; uint_t i; pattern.exclude = false; pattern.enabled = true; pattern.scorebased = false; pattern.pri = 0; pattern.user = user; pattern.pattern = line; if (pattern.user.Valid()) { pattern.pri = (int)config.GetUserConfigItem(pattern.user, "pri"); } list.DeleteList(); list.SetDestructor(&__DeleteTerm); i = 0; while (IsWhiteSpace(line[i])) i++; if (line[i] == '#') { pattern.enabled = false; i++; } else if (line[i] == ';') { return errors; } while (IsWhiteSpace(line[i])) i++; if (line[i]) { while (line[i] && errors.Empty()) { if (!IsSymbolStart(line[i])) { errors.printf("Character '%c' (at %u) is not a legal field start character (term %u)", line[i], i, list.Count() + 1); break; } uint_t fieldstart = i++; while (IsSymbolChar(line[i])) i++; AString field = line.Mid(fieldstart, i - fieldstart).ToLower(); while (IsWhiteSpace(line[i])) i++; if (field == "exclude") { pattern.exclude = true; continue; } const FIELD *fieldptr = (const FIELD *)ADVBProg::fieldhash.Read(field); if (!fieldptr) { uint_t nfields; const FIELD *fields = ADVBProg::GetFields(nfields); errors.printf("'%s' (at %u) is not a valid search field (term %u), valid search fields are: ", field.str(), fieldstart, list.Count() + 1); for (i = 0; i < nfields; i++) { const FIELD& field = fields[i]; if (i) errors.printf(", "); errors.printf("'%s'", field.name); } break; } uint_t opstart = i; const char *str = line.str() + i; bool isassign = fieldptr->assignable; uint_t j; uint_t opindex = 0, opcode = Operator_EQ; for (j = 0; j < NUMBEROF(operators); j++) { if (((isassign == operators[j].assign) || (isassign && !operators[j].assign)) && (operators[j].fieldtypes & (1U << fieldptr->type)) && (strncmp(str, operators[j].str, operators[j].len) == 0)) { i += operators[j].len; opindex = j; opcode = operators[j].opcode; break; } } while (IsWhiteSpace(line[i])) i++; AString value; bool implicitvalue = false; if (j == NUMBEROF(operators)) { if (!line[i] || IsSymbolStart(line[i])) { if (fieldptr->assignable) { switch (fieldptr->type) { case FieldType_string: break; case FieldType_date: value = "now"; break; default: value = "1"; break; } opcode = Operator_Assign; } else opcode = Operator_NE; for (j = 0; j < NUMBEROF(operators); j++) { if ((opcode == operators[j].opcode) && ((isassign == operators[j].assign) || (isassign && !operators[j].assign)) && (operators[j].fieldtypes & (1U << fieldptr->type))) { opindex = j; break; } } implicitvalue = true; } else { errors.printf("Symbols at %u do not represent a legal operator (term %u), legal operators for the field '%s' are: ", opstart, list.Count() + 1, field.str()); bool flag = false; for (j = 0; j < NUMBEROF(operators); j++) { if (((isassign == operators[j].assign) || (isassign && !operators[j].assign)) && (operators[j].fieldtypes & (1U << fieldptr->type))) { if (flag) errors.printf(", "); errors.printf("'%s'", operators[j].str); flag = true; } } break; } } if (!implicitvalue) { char quote = 0; if (IsQuoteChar(line[i])) quote = line[i++]; uint_t valuestart = i; while (line[i] && ((!quote && !IsWhiteSpace(line[i])) || (quote && (line[i] != quote)))) { if (line[i] == '\\') i++; i++; } value = line.Mid(valuestart, i - valuestart).DeEscapify(); if (quote && (line[i] == quote)) i++; while (IsWhiteSpace(line[i])) i++; } bool orflag = false; if ((line.Mid(i, 2).ToLower() == "or") && IsWhiteSpace(line[i + 2])) { orflag = true; i += 2; while (IsWhiteSpace(line[i])) i++; } else if ((line[i] == '|') && IsWhiteSpace(line[i + 1])) { orflag = true; i += 1; while (IsWhiteSpace(line[i])) i++; } if ((term = new TERM) != NULL) { term->data.start = fieldstart; term->data.length = i - fieldstart; term->data.field = fieldptr - ADVBProg::fields; term->data.opcode = opcode; term->data.opindex = opindex; term->data.value = value; term->data.orflag = (orflag && !RANGE(opcode, Operator_First_Assignable, Operator_Last_Assignable)); term->field = fieldptr; term->datetype = DateType_none; switch (term->field->type) { case FieldType_string: #if DVBDATVERSION > 1 if (fieldptr->offset == ADVBProg::GetTagsDataOffset()) { value = "|" + value + "|"; } #endif if ((opcode & ~Operator_Inverted) == Operator_Regex) { AString regexerrors; AString rvalue; rvalue = ParseRegex(value, regexerrors); if (regexerrors.Valid()) { errors.printf("Regex error in value '%s' (term %u): %s", value.str(), list.Count() + 1, regexerrors.str()); } value = rvalue; } term->value.str = value.Steal(); break; case FieldType_date: { ADateTime dt; uint_t specified; dt.StrToDate(value, ADateTime::Time_Relative_Local, &specified); //debug("Value '%s', specified %u\n", value.str(), specified); if (!specified) { errors.printf("Failed to parse date '%s' (term %u)", value.str(), list.Count() + 1); break; } else if (((specified == ADateTime::Specified_Day) && (stricmp(term->field->name, "on") == 0)) || (stricmp(term->field->name, "day") == 0)) { //debug("Date from '%s' is '%s' (week day only)\n", value.str(), dt.DateToStr().str()); term->value.u64 = dt.GetWeekDay(); term->datetype = DateType_weekday; } else if (specified & ADateTime::Specified_Day) { specified |= ADateTime::Specified_Date; } if (term->datetype == DateType_none) { specified &= ADateTime::Specified_Date | ADateTime::Specified_Time; if (specified == (ADateTime::Specified_Date | ADateTime::Specified_Time)) { //debug("Date from '%s' is '%s' (full date and time)\n", value.str(), dt.DateToStr().str()); term->value.u64 = (uint64_t)dt; term->datetype = DateType_fulldate; } else if (specified == ADateTime::Specified_Date) { //debug("Date from '%s' is '%s' (date only)\n", value.str(), dt.DateToStr().str()); term->value.u64 = dt.GetDays(); term->datetype = DateType_date; } else if (specified == ADateTime::Specified_Time) { //debug("Date from '%s' is '%s' (time only)\n", value.str(), dt.DateToStr().str()); term->value.u64 = dt.GetMS(); term->datetype = DateType_time; } else { errors.printf("Unknown date specifier '%s' (term %u)", value.str(), list.Count() + 1); } } break; } case FieldType_span: case FieldType_age: { ADateTime dt; //ADateTime::EnableDebugStrToDate(true); term->value.u64 = (uint64_t)ADateTime(value, ADateTime::Time_Absolute); //ADateTime::EnableDebugStrToDate(false); break; } case FieldType_uint32_t: case FieldType_external_uint32_t: term->value.u32 = (uint32_t)value; break; case FieldType_sint32_t: case FieldType_external_sint32_t: term->value.s32 = (sint32_t)value; break; case FieldType_uint16_t: term->value.u16 = (uint16_t)value; break; case FieldType_sint16_t: term->value.s16 = (sint16_t)value; break; case FieldType_uint8_t: term->value.u8 = (uint8_t)(uint16_t)value; break; case FieldType_sint8_t: term->value.s8 = (sint8_t)(sint16_t)value; break; case FieldType_flag...FieldType_lastflag: term->value.u8 = ((uint32_t)value != 0); //debug("Setting test of flag to %u\n", (uint_t)term->value.u8); break; case FieldType_prog: { ADVBProg *prog; if ((prog = new ADVBProg) != NULL) { if (prog->Base64Decode(value)) { term->value.prog = prog; } else { errors.printf("Failed to decode base64 programme ('%s') for %s at %u (term %u)", value.str(), field.str(), fieldstart, list.Count() + 1); delete prog; } } else { errors.printf("Failed to allocate memory for base64 programme ('%s') for %s at %u (term %u)", value.str(), field.str(), fieldstart, list.Count() + 1); } break; } default: errors.printf("Unknown field '%s' type (%u) (term %u)", field.str(), (uint_t)term->field->type, list.Count() + 1); break; } //debug("term: field {name '%s', type %u, assignable %u, offset %u} type %u dateflags %u value '%s'\n", term->field->name, (uint_t)term->field->type, (uint_t)term->field->assignable, term->field->offset, (uint_t)term->data.opcode, (uint_t)term->dateflags, term->value.str); if (errors.Empty()) { pattern.scorebased |= (term->field->offset == ADVBProg::GetScoreDataOffset()); list.Add((uptr_t)term); } else { __DeleteTerm((uptr_t)term, NULL); break; } } } }