SWBuf parseRangeKey(const char* keyValue, const char* locale) { const char* oldLocale = LocaleMgr::getSystemLocaleMgr()->getDefaultLocaleName(); LocaleMgr::getSystemLocaleMgr()->setDefaultLocaleName(locale); SWBuf ret; VerseKey DefaultVSKey; DefaultVSKey = "jas3:1"; ListKey verses = DefaultVSKey.ParseVerseList(keyValue, DefaultVSKey, true); for (int i = 0; i < verses.Count(); i++) { VerseKey *element = dynamic_cast<VerseKey *>(verses.GetElement(i)); if (element) { if (ret.length()) { ret.append(" "); } ret.appendFormatted( "%s - %s;", (const char*)element->LowerBound(), (const char*)element->UpperBound() ); } else { if (ret.length()) { ret.append(" "); } ret.appendFormatted("%s;", (const char *)*verses.GetElement(i)); } } // cout << ret.c_str() << endl; LocaleMgr::getSystemLocaleMgr()->setDefaultLocaleName(oldLocale); return ret; };
char PapyriPlain::processText (SWBuf &text, const SWKey *key, const SWModule *module) { SWBuf orig = text; const char *from = orig.c_str(); for (text = ""; *from; ++from) { // remove hyphen and whitespace if that is all that separates words // also be sure we're not a double hyphen '--' if ((*from == '-') && (text.length() > 0) && (text[text.length()-1] != '-')) { char remove = 0; const char *c; for (c = from+1; *c; c++) { if ((*c == 10) || (*c == 13)) { remove = 1; } if (!strchr(" \t\n", *c)) { if (remove) remove++; break; } } if (remove > 1) { from = c-1; continue; } } // remove all newlines if ((*from == 10) || (*from == 13)) { if ((text.length()>1) && (text[text.length()-2] != ' ') && (*(from+1) != ' ')) text.append(' '); continue; } // strip odd characters switch (*from) { case '(': case ')': case '[': case ']': case '{': case '}': case '<': case '>': continue; } // if we've made it this far text.append(*from); } return 0; }
char OSISRedLetterWords::processText(SWBuf &text, const SWKey *key, const SWModule *module) { if (option) //leave in the red lettered words return 0; SWBuf token; bool intoken = false; SWBuf orig = text; const char *from = orig.c_str(); //taken out of the loop const char* start = 0; const char* end = 0; for (text = ""; *from; from++) { if (*from == '<') { intoken = true; token = ""; continue; } else if (*from == '>') { // process tokens intoken = false; if ((token[0] == 'q') && (token[1] == ' ')) { //q tag start = strstr(token.c_str(), " who=\"Jesus\""); if (start && (strlen(start) >= 12)) { //we found a quote of Jesus Christ end = start+12; //marks the end of the who attribute value text.append('<'); text.append(token, start - (token.c_str())); //the text before the who attr text.append(end, token.c_str() + token.length() - end); //text after the who attr text.append('>'); continue; } } //token not processed, append it. We don't want to alter the text text.append('<'); text.append(token); text.append('>'); continue; } if (intoken) { //copy token token.append(*from); } else { //copy text which is not inside a token text.append(*from); } } return 0; }
char ThMLVariants::processText(SWBuf &text, const SWKey *key, const SWModule *module) { if ( option == 0 || option == 1) { //we want primary or variant only bool intoken = false; bool hide = false; bool invar = false; SWBuf token; SWBuf orig = text; const char *from = orig.c_str(); //we use a fixed comparision string to make sure the loop is as fast as the original two blocks with almost the same code const char* variantCompareString = (option == 0) ? "div type=\"variant\" class=\"1\"" : "div type=\"variant\" class=\"2\""; for (text = ""; *from; from++) { if (*from == '<') { intoken = true; token = ""; continue; } else if (*from == '>') { // process tokens intoken = false; if ( !strncmp(token.c_str(), variantCompareString, 28)) { //only one of the variants, length of the two strings is 28 in both cases invar = true; hide = true; continue; } if (!strncmp(token.c_str(), "div type=\"variant\"", 18)) { invar = true; continue; } if (!strncmp(token.c_str(), "/div", 4)) { hide = false; if (invar) { invar = false; continue; } } if (!hide) { text += '<'; text.append(token); text += '>'; } continue; } if (intoken) { token += *from; } else if (!hide) { text += *from; } } } return 0; }
char OSISGlosses::processText(SWBuf &text, const SWKey *key, const SWModule *module) { SWBuf token; bool intoken = false; const SWBuf orig = text; const char * from = orig.c_str(); if (!option) { for (text = ""; *from; ++from) { if (*from == '<') { intoken = true; token = ""; continue; } if (*from == '>') { // process tokens intoken = false; if (token.startsWith("w ")) { // Word XMLTag wtag(token); const char *l = wtag.getAttribute("gloss"); if (l) { wtag.setAttribute("gloss", 0); token = wtag; token.trim(); // drop <> token << 1; token--; } } // keep token in text text.append('<'); text.append(token); text.append('>'); continue; } if (intoken) { token += *from; } else { text.append(*from); } } } return 0; }
char OSISMorph::processText(SWBuf &text, const SWKey *key, const SWModule *module) { if (!option) { // if we don't want morph tags char token[2048]; // cheese. Fix. int tokpos = 0; bool intoken = false; SWBuf orig = text; const char *from = orig.c_str(); //taken out of the loop for speed const char* start = 0; const char* end = 0; for (text = ""; *from; ++from) { if (*from == '<') { intoken = true; tokpos = 0; token[0] = 0; continue; } if (*from == '>') { // process tokens intoken = false; if ((*token == 'w') && (token[1] == ' ')) { start = strstr(token+2, "morph=\""); //we leave out "w " at the start end = start ? strchr(start+7, '"') : 0; //search the end of the morph value if (start && end) { //start and end of the morph tag found text.append('<'); text.append(token, start-token); //the text before the morph attr text.append(end+1); //text after the morph attr text.append('>'); continue; //next loop } } text.append('<'); text.append(token); text.append('>'); continue; } if (intoken) { if (tokpos < 2045) token[tokpos++] = *from; token[tokpos] = 0; } else { text.append(*from); } } } return 0; }
const SWBuf URL::decode(const char *encoded) { /*static*/ SWBuf text; text = encoded; SWBuf decoded; const int length = text.length(); int i = 0; while (i < length) { char a = text[i]; if ( a == '+' ) { //handle special cases decoded.append(' '); } else if ( (a == '%') && (i+2 < length)) { //decode the %ab hex encoded char const char b = toupper( text[i+1] ); const char c = toupper( text[i+2] ); if (isxdigit(b) && isxdigit(c)) { //valid %ab part unsigned int dec = 16 * ((b >= 'A' && b <= 'F') ? (b - 'A' + 10) : (b - '0')); //dec value of the most left digit (b) dec += (c >= 'A' && c <= 'F') ? (c - 'A' + 10) : (c - '0'); //dec value of the right digit (c) decoded.append((char)dec); //append the decoded char i += 2; //we jump over the %ab part; we have to leave out three, but the while loop adds one, too } } else { //just append the char decoded.append(a); } i++; } if (decoded.length()) { text = decoded; } return text; }
const SWBuf URL::encode(const char *urlText) { /*static*/ SWBuf url; url = urlText; SWBuf buf; const int length = url.length(); for (int i = 0; i < length; i++) { //fill "buf" const char& c = url[i]; buf.append( ((m[c].length()) ? m[c] : SWBuf(c)) ); } url = buf; return url; }
vector<struct DirEntry> RemoteTransport::getDirList(const char *dirURL) { SWLog::getSystemLog()->logDebug("RemoteTransport::getDirList(%s)", dirURL); vector<struct DirEntry> dirList; SWBuf dirBuf; if (!getURL("", dirURL, &dirBuf)) { char *start = dirBuf.getRawData(); char *end = start; while (start < (dirBuf.getRawData()+dirBuf.size())) { struct ftpparse item; bool looking = true; for (end = start; *end; end++) { if (looking) { if ((*end == 10) || (*end == 13)) { *end = 0; looking = false; } } else if ((*end != 10) && (*end != 13)) break; } SWLog::getSystemLog()->logDebug("getDirList: parsing item %s(%d)\n", start, end-start); int status = ftpparse(&item, start, end - start); // in ftpparse.h, there is a warning that name is not necessarily null terminated SWBuf name; name.append(item.name, item.namelen); SWLog::getSystemLog()->logDebug("getDirList: got item %s\n", name.c_str()); if (status && name != "." && name != "..") { struct DirEntry i; i.name = name; i.size = item.size; i.isDirectory = (item.flagtrycwd == 1); dirList.push_back(i); } start = end; } } else { SWLog::getSystemLog()->logWarning("getDirList: failed to get dir %s\n", dirURL); } return dirList; }
char UTF8GreekAccents::processText(SWBuf &text, const SWKey *key, const SWModule *module) { if (!option) { //we don't want greek accents SWBuf orig = text; const unsigned char* from = (unsigned char*)orig.c_str(); text = ""; map<__u32, SWBuf>::const_iterator it = converters.end(); while (*from) { __u32 ch = getUniCharFromUTF8(&from, true); // if ch is bad, then convert to replacement char if (!ch) ch = 0xFFFD; it = converters.find(ch); if (it == converters.end()) { getUTF8FromUniChar(ch, &text); } else text.append((const char *)it->second, it->second.size()); // save a strlen, since we know our size } } return 0; }
char GBFOSIS::processText(SWBuf &text, const SWKey *key, const SWModule *module) { char token[2048]; //cheesy, we seem to like cheese :) int tokpos = 0; bool intoken = false; bool keepToken = false; // static QuoteStack quoteStack; SWBuf orig = text; SWBuf tmp; SWBuf value; bool suspendTextPassThru = false; bool handled = false; bool newWord = false; bool newText = false; bool lastspace = false; const char *wordStart = text.c_str(); const char *wordEnd = NULL; const char *textStart = NULL; const char *textEnd = NULL; SWBuf textNode = ""; SWBuf buf; text = ""; for (const char* from = orig.c_str(); *from; ++from) { if (*from == '<') { //start of new token detected intoken = true; tokpos = 0; token[0] = 0; token[1] = 0; token[2] = 0; textEnd = from-1; //end of last text node found wordEnd = text.c_str() + text.length();//not good, instead of wordEnd = to! continue; } if (*from == '>') { // process tokens intoken = false; keepToken = false; suspendTextPassThru = false; newWord = true; handled = false; while (wordStart < (text.c_str() + text.length())) { //hack if (strchr(";,. :?!()'\"", *wordStart) && wordStart[0] && wordStart[1]) wordStart++; else break; } while (wordEnd > wordStart) { if (strchr(" ,;:.?!()'\"", *wordEnd)) wordEnd--; else break; } // Scripture Reference if (!strncmp(token, "scripRef", 8)) { suspendTextPassThru = true; newText = true; handled = true; } else if (!strncmp(token, "/scripRef", 9)) { tmp = ""; tmp.append(textStart, (int)(textEnd - textStart)+1); text += VerseKey::convertToOSIS(tmp.c_str(), key); lastspace = false; suspendTextPassThru = false; handled = true; } // Footnote if (!strcmp(token, "RF") || !strncmp(token, "RF ", 3)) { //the GBFFootnotes filter adds the attribute "swordFootnote", we want to catch that, too // pushString(buf, "<reference work=\"Bible.KJV\" reference=\""); text += "<note type=\"x-StudyNote\">"; newText = true; lastspace = false; handled = true; } else if (!strcmp(token, "Rf")) { text += "</note>"; lastspace = false; handled = true; } // hebrew titles if (!strcmp(token, "TH")) { text += "<title type=\"psalm\">"; newText = true; lastspace = false; handled = true; } else if (!strcmp(token, "Th")) { text += "</title>"; lastspace = false; handled = true; } // Italics assume transchange if (!strcmp(token, "FI")) { text += "<transChange type=\"added\">"; newText = true; lastspace = false; handled = true; } else if (!strcmp(token, "Fi")) { text += "</transChange>"; lastspace = false; handled = true; } // less than if (!strcmp(token, "CT")) { text += "<"; newText = true; lastspace = false; handled = true; } // greater than if (!strcmp(token, "CG")) { text += ">"; newText = true; lastspace = false; handled = true; } // Paragraph break. For now use empty paragraph element if (!strcmp(token, "CM")) { text += "<milestone type=\"x-p\" />"; newText = true; lastspace = false; handled = true; } // Figure else if (!strncmp(token, "img ", 4)) { const char *src = strstr(token, "src"); if (!src) // assert we have a src attribute continue; // return false; text += "<figure src=\""; const char *c; for (c = src;((*c) && (*c != '"')); c++); // uncomment for SWORD absolute path logic // if (*(c+1) == '/') { // pushString(buf, "file:"); // pushString(buf, module->getConfigEntry("AbsoluteDataPath")); // if (*((*buf)-1) == '/') // c++; // skip '/' // } // end of uncomment for asolute path logic for (c++;((*c) && (*c != '"')); c++) { text += *c; } text += "\" />"; lastspace = false; handled = true; } // Strongs numbers else if (*token == 'W' && (token[1] == 'G' || token[1] == 'H')) { // Strongs bool divineName = false; value = token+1; // normal strongs number //strstrip(val); if (!strncmp(wordStart, "<w ", 3)) { const char *attStart = strstr(wordStart, "lemma"); if (attStart) { attStart += 7; buf = ""; buf.appendFormatted("strong:%s ", value.c_str()); } else { // no lemma attribute attStart = wordStart + 3; buf = ""; buf.appendFormatted(buf, "lemma=\"strong:%s\" ", value.c_str()); } text.insert(attStart - text.c_str(), buf); } else { //wordStart doesn't point to an existing <w> attribute! if (!strcmp(value.c_str(), "H03068")) { //divineName buf = ""; buf.appendFormatted("<divineName><w lemma=\"strong:%s\">", value.c_str()); divineName = true; } else { buf = ""; buf.appendFormatted("<w lemma=\"strong:%s\">", value.c_str()); } text.insert(wordStart - text.c_str(), buf); if (divineName) { wordStart += 12; text += "</w></divineName>"; } else text += "</w>"; lastspace = false; } handled = true; } // Morphology else if (*token == 'W' && token[1] == 'T') { if (token[2] == 'G' || token[2] == 'H') { // Strongs value = token+2; } else value = token+1; if (!strncmp(wordStart, "<w ", 3)) { const char *attStart = strstr(wordStart, "morph"); if (attStart) { //existing morph attribute, append this one to it attStart += 7; buf = ""; buf.appendFormatted("%s:%s ", "robinson", value.c_str()); } else { // no lemma attribute attStart = wordStart + 3; buf = ""; buf.appendFormatted("morph=\"%s:%s\" ", "robinson", value.c_str()); } text.insert(attStart - text.c_str(), buf); //hack, we have to } else { //no existing <w> attribute fond buf = ""; buf.appendFormatted("<w morph=\"%s:%s\">", "robinson", value.c_str()); text.insert(wordStart - text.c_str(), buf); text += "</w>"; lastspace = false; } handled = true; } if (!keepToken) { if (!handled) { SWLog::getSystemLog()->logError("Unprocessed Token: <%s> in key %s", token, key ? (const char*)*key : "<unknown>"); // exit(-1); } if (from[1] && strchr(" ,;.:?!()'\"", from[1])) { if (lastspace) { text--; } } if (newText) { textStart = from+1; newText = false; } continue; } // if not a strongs token, keep token in text text.appendFormatted("<%s>", token); if (newText) { textStart = text.c_str() + text.length(); newWord = false; } continue; } if (intoken) { if ((tokpos < 2045) && ((*from != 10)&&(*from != 13))) { token[tokpos++] = *from; token[tokpos+2] = 0; } } else { switch (*from) { case '\'': case '\"': case '`': // quoteStack.handleQuote(fromStart, from, &to); text += *from; //from++; //this line removes chars after an apostrophe! Needs fixing. break; default: if (newWord && (*from != ' ')) { wordStart = text.c_str() + text.length(); newWord = false; //fix this if required? //memset(to, 0, 10); } if (!suspendTextPassThru) { text += (*from); lastspace = (*from == ' '); } } } } VerseKey *vkey = SWDYNAMIC_CAST(VerseKey, key); if (vkey) { SWBuf ref = ""; if (vkey->getVerse()) { ref.appendFormatted("\t\t<verse osisID=\"%s\">", vkey->getOSISRef()); } if (ref.length() > 0) { text = ref + text; if (vkey->getVerse()) { VerseKey *tmp = (VerseKey *)vkey->clone(); *tmp = *vkey; tmp->setAutoNormalize(false); tmp->setIntros(true); text += "</verse>"; *tmp = MAXVERSE; if (*vkey == *tmp) { tmp->setVerse(0); // sprintf(ref, "\t</div>"); // pushString(&to, ref); *tmp = MAXCHAPTER; *tmp = MAXVERSE; if (*vkey == *tmp) { tmp->setChapter(0); tmp->setVerse(0); // sprintf(ref, "\t</div>"); // pushString(&to, ref); /* if (!quoteStack.empty()) { SWLog::getSystemLog()->logError("popping unclosed quote at end of book"); quoteStack.clear(); } */ } } delete tmp; } // else if (vkey->Chapter()) { // sprintf(ref, "\t<div type=\"chapter\" osisID=\"%s\">", vkey->getOSISRef()); // } // else sprintf(ref, "\t<div type=\"book\" osisID=\"%s\">", vkey->getOSISRef()); } } return 0; }
char GBFPlain::processText (SWBuf &text, const SWKey *key, const SWModule *module) { char token[2048]; int tokpos = 0; bool intoken = false; SWBuf orig = text; const char* from = orig.c_str(); for (text = ""; *from; ++from) { if (*from == '<') { intoken = true; tokpos = 0; token[0] = 0; token[1] = 0; token[2] = 0; continue; } if (*from == '>') { intoken = false; // process desired tokens switch (*token) { case 'W': // Strongs switch(token[1]) { case 'G': // Greek case 'H': // Hebrew case 'T': // Tense text.append(" <"); //for (char *tok = token + 2; *tok; tok++) // text += *tok; text.append(token+2); text.append("> "); continue; } break; case 'R': switch(token[1]) { case 'F': // footnote begin text.append(" ["); continue; case 'f': // footnote end text.append("] "); continue; } break; case 'C': switch(token[1]) { case 'A': // ASCII value text.append((char)atoi(&token[2])); continue; case 'G': text.append('>'); continue; /* Bug in WEB case 'L': *to++ = '<'; continue; */ case 'L': // Bug in WEB. Use above entry when fixed case 'N': // new line text.append('\n'); continue; case 'M': // new paragraph text.append("\n\n"); continue; } break; } continue; } if (intoken) { if (tokpos < 2045) token[tokpos++] = *from; token[tokpos+2] = 0; } else text.append(*from); } return 0; }
char OSISMorphSegmentation::processText(SWBuf &text, const SWKey * /*key*/, const SWModule *module) { SWBuf token; bool intoken = false; bool hide = false; SWBuf orig( text ); const char *from = orig.c_str(); XMLTag tag; SWBuf tagText = ""; unsigned int morphemeNum = 0; bool inMorpheme = false; SWBuf buf; for (text = ""; *from; ++from) { if (*from == '<') { intoken = true; token = ""; continue; } if (*from == '>') { // process tokens intoken = false; if (!strncmp(token.c_str(), "seg ", 4) || !strncmp(token.c_str(), "/seg", 4)) { tag = token; if (!tag.isEndTag() && tag.getAttribute("type") && !strcmp("morph", tag.getAttribute("type"))) { //<seg type="morph"> start tag hide = !option; //only hide if option is Off tagText = ""; inMorpheme = true; } if (tag.isEndTag()) { buf.setFormatted("%.3d", morphemeNum++); module->getEntryAttributes()["Morpheme"][buf]["body"] = tagText; inMorpheme = false; } if (hide) { //hides start and end tags as long as hide is set if (tag.isEndTag()) { //</seg> hide = false; } continue; //leave out the current token } } //end of seg tag handling text.append('<'); text.append(token); text.append('>'); if (inMorpheme) { tagText.append('<'); tagText.append(token); tagText.append('>'); } hide = false; continue; } //end of intoken part if (intoken) { //copy token token.append(*from); } else { //copy text which is not inside of a tag text.append(*from); if (inMorpheme) { tagText.append(*from); } } } return 0; }
bool TEIRTF::handleToken(SWBuf &buf, const char *token, BasicFilterUserData *userData) { // manually process if it wasn't a simple substitution if (!substituteToken(buf, token)) { MyUserData *u = (MyUserData *)userData; XMLTag tag(token); // <p> paragraph tag if (!strcmp(tag.getName(), "p")) { if (!tag.isEndTag()) { // non-empty start tag buf += "{\\sb100\\fi200\\par}"; } } // <hi> else if (!strcmp(tag.getName(), "hi") || !strcmp(tag.getName(), "emph")) { SWBuf rend = tag.getAttribute("rend"); if ((!tag.isEndTag()) && (!tag.isEmpty())) { if (rend == "italic" || rend == "ital") buf += "{\\i1 "; else if (rend == "bold") buf += "{\\b1 "; else if (rend == "super" || rend == "sup") buf += "{\\super "; else if (rend == "sub") buf += "{\\sub "; } else if (tag.isEndTag()) { buf += "}"; } } // <entryFree> else if (!strcmp(tag.getName(), "entryFree")) { SWBuf n = tag.getAttribute("n"); if ((!tag.isEndTag()) && (!tag.isEmpty())) { if (n != "") { buf += "{\\b1 "; buf += n; buf += ". }"; } } } // <sense> else if (!strcmp(tag.getName(), "sense")) { SWBuf n = tag.getAttribute("n"); if ((!tag.isEndTag()) && (!tag.isEmpty())) { if (n != "") { buf += "{\\sb100\\par\\b1 "; buf += n; buf += ". }"; } } } // <orth> else if (!strcmp(tag.getName(), "orth")) { if ((!tag.isEndTag()) && (!tag.isEmpty())) { buf += "{\\b1 "; } else if (tag.isEndTag()) { buf += "}"; } } // <div> else if (!strcmp(tag.getName(), "div")) { if ((!tag.isEndTag()) && (!tag.isEmpty())) { buf.append("{\\pard\\sa300}"); } else if (tag.isEndTag()) { } } // <pos>, <gen>, <case>, <gram>, <number>, <mood> else if (!strcmp(tag.getName(), "pos") || !strcmp(tag.getName(), "gen") || !strcmp(tag.getName(), "case") || !strcmp(tag.getName(), "gram") || !strcmp(tag.getName(), "number") || !strcmp(tag.getName(), "mood")) { if ((!tag.isEndTag()) && (!tag.isEmpty())) { buf += "{\\i1 "; } else if (tag.isEndTag()) { buf += "}"; } } // <tr> else if (!strcmp(tag.getName(), "tr")) { if ((!tag.isEndTag()) && (!tag.isEmpty())) { buf += "{\\i1 "; } else if (tag.isEndTag()) { buf += "}"; } } // <etym> else if (!strcmp(tag.getName(), "etym")) { if ((!tag.isEndTag()) && (!tag.isEmpty())) { buf += "["; } else if (tag.isEndTag()) { buf += "]"; } } // <note> tag else if (!strcmp(tag.getName(), "note")) { if (!tag.isEndTag()) { if (!tag.isEmpty()) { SWBuf type = tag.getAttribute("type"); SWBuf footnoteNumber = tag.getAttribute("swordFootnote"); VerseKey *vkey = 0; // see if we have a VerseKey * or descendant SWTRY { vkey = SWDYNAMIC_CAST(VerseKey, u->key); } SWCATCH ( ... ) { } if (vkey) { buf.appendFormatted("{\\super <a href=\"\">*%s</a>} ", footnoteNumber.c_str()); } u->suspendTextPassThru = true; } } if (tag.isEndTag()) { u->suspendTextPassThru = false; } }
bool OSISHeadings::handleToken(SWBuf &buf, const char *token, BasicFilterUserData *userData) { MyUserData *u = (MyUserData *)userData; XMLTag tag(token); SWBuf name = tag.getName(); // we only care about titles and divs or if we're already in a heading // // are we currently in a heading? if (u->currentHeadingName.size()) { u->heading.append(u->lastTextNode); if (name == u->currentHeadingName) { if (tag.isEndTag(u->sID)) { if (!u->depth-- || u->sID) { // see comment below about preverse div changed and needing to preserve the <title> container tag for old school pre-verse titles // we've just finished a heading. It's all stored up in u->heading bool canonical = (SWBuf("true") == u->currentHeadingTag.getAttribute("canonical")); bool preverse = (SWBuf("x-preverse") == u->currentHeadingTag.getAttribute("subType") || SWBuf("x-preverse") == u->currentHeadingTag.getAttribute("subtype")); // do we want to put anything in EntryAttributes? if (u->module->isProcessEntryAttributes() && (option || canonical || !preverse)) { SWBuf buf; buf.appendFormatted("%i", u->headerNum++); // leave the actual <title...> wrapper in if we're part of an old school preverse title // because now frontend have to deal with preverse as a div which may or may not include <title> elements // and they can't simply wrap all preverse material in <h1>, like they probably did previously SWBuf heading; if (u->currentHeadingName == "title") { XMLTag wrapper = u->currentHeadingTag; if (SWBuf("x-preverse") == wrapper.getAttribute("subType")) wrapper.setAttribute("subType", 0); else if (SWBuf("x-preverse") == wrapper.getAttribute("subtype")) wrapper.setAttribute("subtype", 0); heading = wrapper; heading += u->heading; heading += tag; } else heading = u->heading; u->module->getEntryAttributes()["Heading"][(preverse)?"Preverse":"Interverse"][buf] = heading; StringList attributes = u->currentHeadingTag.getAttributeNames(); for (StringList::const_iterator it = attributes.begin(); it != attributes.end(); it++) { u->module->getEntryAttributes()["Heading"][buf][it->c_str()] = u->currentHeadingTag.getAttribute(it->c_str()); } } // do we want the heading in the body? if (!preverse && (option || canonical)) { buf.append(u->currentHeadingTag); buf.append(u->heading); buf.append(tag); } u->suspendTextPassThru = false; u->clear(); } } else u->depth++; } u->heading.append(tag); return true; } // are we a title or a preverse div? else if ( name == "title" || (name == "div" && ( SWBuf("x-preverse") == tag.getAttribute("subType") || SWBuf("x-preverse") == tag.getAttribute("subtype")))) { u->currentHeadingName = name; u->currentHeadingTag = tag; u->heading = ""; u->sID = u->currentHeadingTag.getAttribute("sID"); u->depth = 0; u->suspendTextPassThru = true; return true; } return false; }
char OSISStrongs::processText(SWBuf &text, const SWKey *key, const SWModule *module) { SWBuf token; bool intoken = false; int wordNum = 1; char wordstr[5]; const char *wordStart = 0; SWBuf page = ""; // some modules include <seg> page info, so we add these to the words const SWBuf orig = text; const char * from = orig.c_str(); for (text = ""; *from; ++from) { if (*from == '<') { intoken = true; token = ""; continue; } if (*from == '>') { // process tokens intoken = false; // possible page seg -------------------------------- if (token.startsWith("seg ")) { XMLTag stag(token); SWBuf type = stag.getAttribute("type"); if (type == "page") { SWBuf number = stag.getAttribute("subtype"); if (number.length()) { page = number; } } } // --------------------------------------------------- if (token.startsWith("w ")) { // Word XMLTag wtag(token); if (module->isProcessEntryAttributes()) { wordStart = from+1; char gh = 0; VerseKey *vkey = 0; if (key) { vkey = SWDYNAMIC_CAST(VerseKey, key); } SWBuf lemma = ""; SWBuf morph = ""; SWBuf src = ""; SWBuf morphClass = ""; SWBuf lemmaClass = ""; const char *attrib; sprintf(wordstr, "%03d", wordNum); // why is morph entry attribute processing done in here? Well, it's faster. It makes more local sense to place this code in osismorph. // easier to keep lemma and morph in same wordstr number too maybe. if ((attrib = wtag.getAttribute("morph"))) { int count = wtag.getAttributePartCount("morph", ' '); int i = (count > 1) ? 0 : -1; // -1 for whole value cuz it's faster, but does the same thing as 0 do { SWBuf mClass = ""; SWBuf mp = ""; attrib = wtag.getAttribute("morph", i, ' '); if (i < 0) i = 0; // to handle our -1 condition const char *m = strchr(attrib, ':'); if (m) { int len = m-attrib; mClass.append(attrib, len); attrib += (len+1); } if ((mClass == "x-Robinsons") || (mClass == "x-Robinson") || (mClass == "Robinson")) { mClass = "robinson"; } if (i) { morphClass += " "; morph += " "; } mp += attrib; morphClass += mClass; morph += mp; if (count > 1) { SWBuf tmp; tmp.setFormatted("Morph.%d", i+1); module->getEntryAttributes()["Word"][wordstr][tmp] = mp; tmp.setFormatted("MorphClass.%d", i+1); module->getEntryAttributes()["Word"][wordstr][tmp] = mClass; } } while (++i < count); } if ((attrib = wtag.getAttribute("lemma"))) { int count = wtag.getAttributePartCount("lemma", ' '); int i = (count > 1) ? 0 : -1; // -1 for whole value cuz it's faster, but does the same thing as 0 do { gh = 0; SWBuf lClass = ""; SWBuf l = ""; attrib = wtag.getAttribute("lemma", i, ' '); if (i < 0) i = 0; // to handle our -1 condition const char *m = strchr(attrib, ':'); if (m) { int len = m-attrib; lClass.append(attrib, len); attrib += (len+1); } if ((lClass == "x-Strongs") || (lClass == "strong") || (lClass == "Strong")) { if (isdigit(attrib[0])) { if (vkey) { gh = vkey->getTestament() ? 'H' : 'G'; } } else { gh = *attrib; attrib++; } lClass = "strong"; } if (gh) l += gh; l += attrib; if (i) { lemmaClass += " "; lemma += " "; } lemma += l; lemmaClass += lClass; if (count > 1) { SWBuf tmp; tmp.setFormatted("Lemma.%d", i+1); module->getEntryAttributes()["Word"][wordstr][tmp] = l; tmp.setFormatted("LemmaClass.%d", i+1); module->getEntryAttributes()["Word"][wordstr][tmp] = lClass; } } while (++i < count); module->getEntryAttributes()["Word"][wordstr]["PartCount"].setFormatted("%d", count); } if ((attrib = wtag.getAttribute("src"))) { int count = wtag.getAttributePartCount("src", ' '); int i = (count > 1) ? 0 : -1; // -1 for whole value cuz it's faster, but does the same thing as 0 do { SWBuf mp = ""; attrib = wtag.getAttribute("src", i, ' '); if (i < 0) i = 0; // to handle our -1 condition if (i) src += " "; mp += attrib; src += mp; if (count > 1) { SWBuf tmp; tmp.setFormatted("Src.%d", i+1); module->getEntryAttributes()["Word"][wordstr][tmp] = mp; } } while (++i < count); } if (lemma.length()) module->getEntryAttributes()["Word"][wordstr]["Lemma"] = lemma; if (lemmaClass.length()) module->getEntryAttributes()["Word"][wordstr]["LemmaClass"] = lemmaClass; if (morph.length()) module->getEntryAttributes()["Word"][wordstr]["Morph"] = morph; if (morphClass.length()) module->getEntryAttributes()["Word"][wordstr]["MorphClass"] = morphClass; if (src.length()) module->getEntryAttributes()["Word"][wordstr]["Src"] = src; if (page.length()) module->getEntryAttributes()["Word"][wordstr]["Page"] = page; if (wtag.isEmpty()) { int j; for (j = token.length()-1; ((j>0) && (strchr(" /", token[j]))); j--); token.size(j+1); } token += " wn=\""; token += wordstr; token += "\""; if (wtag.isEmpty()) { token += "/"; } wordNum++; } if (!option) { /* * Code which handles multiple lemma types. Kindof works but breaks at least WEBIF filters for strongs. * int count = wtag.getAttributePartCount("lemma", ' '); for (int i = 0; i < count; i++) { SWBuf a = wtag.getAttribute("lemma", i, ' '); const char *prefix = a.stripPrefix(':'); if ((prefix) && (!strcmp(prefix, "x-Strongs") || !strcmp(prefix, "strong") || !strcmp(prefix, "Strong"))) { // remove attribute part wtag.setAttribute("lemma", 0, i, ' '); i--; count--; } } * Instead the codee below just removes the lemma attribute *****/ const char *l = wtag.getAttribute("lemma"); if (l) { SWBuf savlm = l; wtag.setAttribute("lemma", 0); wtag.setAttribute("savlm", savlm); token = wtag; token.trim(); // drop <> token << 1; token--; } } } if (token.startsWith("/w")) { // Word End if (module->isProcessEntryAttributes()) { if (wordStart) { SWBuf tmp; tmp.append(wordStart, (from-wordStart)-3); sprintf(wordstr, "%03d", wordNum-1); module->getEntryAttributes()["Word"][wordstr]["Text"] = tmp; } } wordStart = 0; } // keep token in text text.append('<'); text.append(token); text.append('>'); continue; } if (intoken) { token += *from; } else { text.append(*from); } } return 0; }
char ThMLFootnotes::processText(SWBuf &text, const SWKey *key, const SWModule *module) { SWBuf token; bool intoken = false; bool hide = false; SWBuf tagText; XMLTag startTag; SWBuf refs = ""; int footnoteNum = 1; char buf[254]; SWKey *p = (module) ? module->createKey() : (key) ? key->clone() : new VerseKey(); VerseKey *parser = SWDYNAMIC_CAST(VerseKey, p); if (!parser) { delete p; parser = new VerseKey(); } *parser = key->getText(); SWBuf orig = text; const char *from = orig.c_str(); for (text = ""; *from; from++) { if (*from == '<') { intoken = true; token = ""; continue; } if (*from == '>') { // process tokens intoken = false; XMLTag tag(token); if (!strcmp(tag.getName(), "note")) { if (!tag.isEndTag()) { if (!tag.isEmpty()) { refs = ""; startTag = tag; hide = true; tagText = ""; continue; } } if (hide && tag.isEndTag()) { if (module->isProcessEntryAttributes()) { SWBuf fc = module->getEntryAttributes()["Footnote"]["count"]["value"]; footnoteNum = (fc.length()) ? atoi(fc.c_str()) : 0; sprintf(buf, "%i", ++footnoteNum); module->getEntryAttributes()["Footnote"]["count"]["value"] = buf; StringList attributes = startTag.getAttributeNames(); for (StringList::iterator it = attributes.begin(); it != attributes.end(); it++) { module->getEntryAttributes()["Footnote"][buf][it->c_str()] = startTag.getAttribute(it->c_str()); } module->getEntryAttributes()["Footnote"][buf]["body"] = tagText; startTag.setAttribute("swordFootnote", buf); if ((startTag.getAttribute("type")) && (!strcmp(startTag.getAttribute("type"), "crossReference"))) { if (!refs.length()) refs = parser->parseVerseList(tagText.c_str(), *parser, true).getRangeText(); module->getEntryAttributes()["Footnote"][buf]["refList"] = refs.c_str(); } } hide = false; if ((option) || ((startTag.getAttribute("type") && (!strcmp(startTag.getAttribute("type"), "crossReference"))))) { // we want the tag in the text; crossReferences are handled by another filter text += startTag; text.append(tagText); } else continue; } } // if not a note token, keep token in text if ((!strcmp(tag.getName(), "scripRef")) && (!tag.isEndTag())) { SWBuf osisRef = tag.getAttribute("passage"); if (refs.length()) refs += "; "; refs += osisRef; } if (!hide) { text += '<'; text.append(token); text += '>'; } else { tagText += '<'; tagText.append(token); tagText += '>'; } continue; } if (intoken) { //copy token token += *from; } else if (!hide) { //copy text which is not inside a token text += *from; } else tagText += *from; } delete parser; return 0; }
char ThMLHeadings::processText(SWBuf &text, const SWKey *key, const SWModule *module) { SWBuf token; bool intoken = false; bool isheader = false; bool hide = false; bool preverse = false; bool withinDiv = false; SWBuf header; int headerNum = 0; int pvHeaderNum = 0; char buf[254]; XMLTag startTag; SWBuf orig = text; const char *from = orig.c_str(); XMLTag tag; for (text = ""; *from; ++from) { if (*from == '<') { intoken = true; token = ""; continue; } if (*from == '>') { // process tokens intoken = false; if (!strnicmp(token.c_str(), "div", 3) || !strnicmp(token.c_str(), "/div", 4)) { withinDiv = (!strnicmp(token.c_str(), "div", 3)); tag = token; if (hide && tag.isEndTag()) { if (module->isProcessEntryAttributes() && (option || (!preverse))) { if (preverse) { sprintf(buf, "%i", pvHeaderNum++); module->getEntryAttributes()["Heading"]["Preverse"][buf] = header; } else { sprintf(buf, "%i", headerNum++); module->getEntryAttributes()["Heading"]["Interverse"][buf] = header; if (option) { // we want the tag in the text text.append(header); } } StringList attributes = startTag.getAttributeNames(); for (StringList::const_iterator it = attributes.begin(); it != attributes.end(); it++) { module->getEntryAttributes()["Heading"][buf][it->c_str()] = startTag.getAttribute(it->c_str()); } } hide = false; if (!option || preverse) { // we don't want the tag in the text anymore preverse = false; continue; } preverse = false; } if (tag.getAttribute("class") && ((!stricmp(tag.getAttribute("class"), "sechead")) || (!stricmp(tag.getAttribute("class"), "title")))) { isheader = true; if (!tag.isEndTag()) { //start tag if (!tag.isEmpty()) { startTag = tag; /* how do we tell a ThML preverse title from one that should be in the text? probably if any text is before the title... just assuming all are preverse for now } if (tag.getAttribute("subtype") && !stricmp(tag.getAttribute("subtype"), "x-preverse")) { */ hide = true; preverse = true; header = ""; continue; } // move back up under startTag = tag } /* this is where non-preverse will go eventually if (!tag.isEndTag()) { //start tag hide = true; header = ""; if (option) { // we want the tag in the text text.append('<'); text.append(token); text.append('>'); } continue; } */ } else isheader = false; } if (withinDiv && isheader) { header.append('<'); header.append(token); header.append('>'); } else { // if not a heading token, keep token in text if (!hide) { text.append('<'); text.append(token); text.append('>'); } } continue; } if (intoken) { //copy token token.append(*from); } else if (!hide) { //copy text which is not inside a token text.append(*from); } else header.append(*from); } return 0; }
char ThMLWordJS::processText(SWBuf &text, const SWKey *key, const SWModule *module) { if (option) { char token[2112]; // cheese. Fix. int tokpos = 0; bool intoken = false; int word = 1; char val[128]; char *valto; char *ch; char wordstr[5]; unsigned int textStart = 0, lastAppendLen = 0, textEnd = 0; SWBuf tmp; bool newText = false; bool needWordOut = false; AttributeValue *wordAttrs = 0; SWBuf modName = (module)?module->getName():""; SWBuf wordSrcPrefix = modName; const SWBuf orig = text; const char * from = orig.c_str(); VerseKey *vkey = 0; if (key) { vkey = SWDYNAMIC_CAST(VerseKey, key); } for (text = ""; *from; from++) { if (*from == '<') { intoken = true; tokpos = 0; token[0] = 0; token[1] = 0; token[2] = 0; textEnd = text.length(); continue; } if (*from == '>') { // process tokens intoken = false; if (!strnicmp(token, "sync type=\"Strongs\" ", 20)) { // Strongs valto = val; for (unsigned int i = 27; token[i] != '\"' && i < 150; i++) *valto++ = token[i]; *valto = 0; if (atoi((!isdigit(*val))?val+1:val) < 5627) { // normal strongs number sprintf(wordstr, "%03d", word++); needWordOut = (word > 2); wordAttrs = &(module->getEntryAttributes()["Word"][wordstr]); (*wordAttrs)["Strongs"] = val; //printf("Adding: [\"Word\"][%s][\"Strongs\"] = %s\n", wordstr, val); tmp = ""; tmp.append(text.c_str()+textStart, (int)(textEnd - textStart)); (*wordAttrs)["Text"] = tmp; text.append("</span>"); SWBuf ts; ts.appendFormatted("%d", textStart); (*wordAttrs)["TextStart"] = ts; //printf("Adding: [\"Word\"][%s][\"Text\"] = %s\n", wordstr, tmp.c_str()); newText = true; } else { // verb morph (*wordAttrs)["Morph"] = val; //printf("Adding: [\"Word\"][%s][\"Morph\"] = %s\n", wordstr, val); } } if (!strncmp(token, "sync type=\"morph\"", 17)) { for (ch = token+17; *ch; ch++) { if (!strncmp(ch, "class=\"", 7)) { valto = val; for (unsigned int i = 7; ch[i] != '\"' && i < 127; i++) *valto++ = ch[i]; *valto = 0; (*wordAttrs)["MorphClass"] = val; //printf("Adding: [\"Word\"][%s][\"MorphClass\"] = %s\n", wordstr, val); } if (!strncmp(ch, "value=\"", 7)) { valto = val; for (unsigned int i = 7; ch[i] != '\"' && i < 127; i++) *valto++ = ch[i]; *valto = 0; (*wordAttrs)["Morph"] = val; //printf("Adding: [\"Word\"][%s][\"Morph\"] = %s\n", wordstr, val); } } newText = true; } // if not a strongs token, keep token in text text += '<'; text += token; text += '>'; if (needWordOut) { char wstr[10]; sprintf(wstr, "%03d", word-2); AttributeValue *wAttrs = &(module->getEntryAttributes()["Word"][wstr]); needWordOut = false; SWBuf strong = (*wAttrs)["Strongs"]; SWBuf morph = (*wAttrs)["Morph"]; SWBuf morphClass = (*wAttrs)["MorphClass"]; SWBuf wordText = (*wAttrs)["Text"]; SWBuf textSt = (*wAttrs)["TextStart"]; if (strong.size()) { char gh = 0; gh = isdigit(strong[0]) ? 0:strong[0]; if (!gh) { if (vkey) { gh = vkey->getTestament() ? 'H' : 'G'; } } else strong << 1; SWModule *sLex = 0; SWModule *sMorph = 0; if (gh == 'G') { sLex = defaultGreekLex; sMorph = defaultGreekParse; } if (gh == 'H') { sLex = defaultHebLex; sMorph = defaultHebParse; } SWBuf lexName = ""; if (sLex) { // we can pass the real lex name in, but we have some // aliases in the javascript to optimize bandwidth lexName = sLex->getName(); if (lexName == "StrongsGreek") lexName = "G"; if (lexName == "StrongsHebrew") lexName = "H"; } SWBuf wordID; if (vkey) { // optimize for bandwidth and use only the verse as the unique entry id wordID.appendFormatted("%d", vkey->getVerse()); } else { wordID = key->getText(); } for (unsigned int i = 0; i < wordID.size(); i++) { if ((!isdigit(wordID[i])) && (!isalpha(wordID[i]))) { wordID[i] = '_'; } } wordID.appendFormatted("_%s%d", wordSrcPrefix.c_str(), atoi(wstr)); if (textSt.size()) { int textStr = atoi(textSt.c_str()); textStr += lastAppendLen; SWBuf spanStart = ""; if (!sMorph) sMorph = 0; // avoid unused warnings for now /* if (sMorph) { SWBuf popMorph = "<a onclick=\""; popMorph.appendFormatted("p(\'%s\',\'%s\','%s','');\" >%s</a>", sMorph->getName(), morph.c_str(), wordID.c_str(), morph.c_str()); morph = popMorph; } */ // 'p' = 'fillpop' to save bandwidth const char *m = strchr(morph.c_str(), ':'); if (m) m++; else m = morph.c_str(); spanStart.appendFormatted("<span class=\"clk\" onclick=\"p('%s','%s','%s','%s','','%s');\" >", lexName.c_str(), strong.c_str(), wordID.c_str(), m, modName.c_str()); text.insert(textStr, spanStart); lastAppendLen = spanStart.length(); } } } if (newText) { textStart = text.length(); newText = false; } continue; } if (intoken) { if (tokpos < 2045) { token[tokpos++] = *from; // TODO: why is this + 2 ? token[tokpos+2] = 0; } } else { text += *from; } } char wstr[10]; sprintf(wstr, "%03d", word-1); AttributeValue *wAttrs = &(module->getEntryAttributes()["Word"][wstr]); needWordOut = false; SWBuf strong = (*wAttrs)["Strongs"]; SWBuf morph = (*wAttrs)["Morph"]; SWBuf morphClass = (*wAttrs)["MorphClass"]; SWBuf wordText = (*wAttrs)["Text"]; SWBuf textSt = (*wAttrs)["TextStart"]; if (strong.size()) { char gh = 0; gh = isdigit(strong[0]) ? 0:strong[0]; if (!gh) { if (vkey) { gh = vkey->getTestament() ? 'H' : 'G'; } } else strong << 1; SWModule *sLex = 0; if (gh == 'G') { sLex = defaultGreekLex; } if (gh == 'H') { sLex = defaultHebLex; } SWBuf lexName = ""; if (sLex) { // we can pass the real lex name in, but we have some // aliases in the javascript to optimize bandwidth lexName = sLex->getName(); if (lexName == "StrongsGreek") lexName = "G"; if (lexName == "StrongsHebrew") lexName = "H"; } SWBuf wordID; if (vkey) { // optimize for bandwidth and use only the verse as the unique entry id wordID.appendFormatted("%d", vkey->getVerse()); } else { wordID = key->getText(); } for (unsigned int i = 0; i < wordID.size(); i++) { if ((!isdigit(wordID[i])) && (!isalpha(wordID[i]))) { wordID[i] = '_'; } } wordID.appendFormatted("_%s%d", wordSrcPrefix.c_str(), atoi(wstr)); if (textSt.size()) { int textStr = atoi(textSt.c_str()); textStr += lastAppendLen; SWBuf spanStart = ""; // 'p' = 'fillpop' to save bandwidth const char *m = strchr(morph.c_str(), ':'); if (m) m++; else m = morph.c_str(); spanStart.appendFormatted("<span class=\"clk\" onclick=\"p('%s','%s','%s','%s','','%s');\" >", lexName.c_str(), strong.c_str(), wordID.c_str(), m, modName.c_str()); text.insert(textStr, spanStart); } } } return 0; }
char OSISXHTMLXS::processText(SWBuf &text, const SWKey *key, const SWModule *module) { char *from; char token[4096]; int tokpos = 0; bool intoken = false; bool inEsc = false; SWBuf lastTextNode; MyUserDataXS *userData = (MyUserDataXS *)createUserData(module, key); SWBuf orig = text; from = orig.getRawData(); text = ""; for (;*from; from++) { if (*from == '<') { intoken = true; tokpos = 0; token[0] = 0; token[1] = 0; token[2] = 0; inEsc = false; continue; } if (*from == '&') { intoken = true; tokpos = 0; token[0] = 0; token[1] = 0; token[2] = 0; inEsc = true; continue; } if (inEsc) { if (*from == ';') { intoken = inEsc = false; userData->lastTextNode = lastTextNode; if (!userData->suspendTextPassThru) { //if text through is disabled no tokens should pass, too handleEscapeString(text, token, userData); } lastTextNode = ""; continue; } } if (!inEsc) { if (*from == '>') { intoken = false; userData->lastTextNode = lastTextNode; handleToken(text, token, userData); lastTextNode = ""; continue; } } if (intoken) { if (tokpos < 4090) { token[tokpos++] = *from; token[tokpos+2] = 0; } } else { if ((!userData->supressAdjacentWhitespace) || (*from != ' ')) { if (!userData->suspendTextPassThru) { text.append(*from); userData->lastSuspendSegment.size(0); } else userData->lastSuspendSegment.append(*from); lastTextNode.append(*from); } userData->supressAdjacentWhitespace = false; } } // THE MAIN PURPOSE OF THIS OVERRIDE FUNCTION: is to insure all opened HTML tags are closed while (!userData->htmlTagStack->empty()) { text.append((SWBuf)"</" + userData->htmlTagStack->top().c_str() + ">"); userData->htmlTagStack->pop(); } delete userData; return 0; }
char OSISWordJS::processText(SWBuf &text, const SWKey *key, const SWModule *module) { if (option) { char token[2112]; // cheese. Fix. int tokpos = 0; bool intoken = false; int wordNum = 1; char wordstr[5]; SWBuf modName = (module)?module->getName():""; // add TR to w src in KJV then remove this next line SWBuf wordSrcPrefix = (modName == "KJV")?SWBuf("TR"):modName; VerseKey *vkey = 0; if (key) { vkey = SWDYNAMIC_CAST(VerseKey, key); } const SWBuf orig = text; const char * from = orig.c_str(); for (text = ""; *from; ++from) { if (*from == '<') { intoken = true; tokpos = 0; token[0] = 0; token[1] = 0; token[2] = 0; continue; } if (*from == '>') { // process tokens intoken = false; if ((*token == 'w') && (token[1] == ' ')) { // Word XMLTag wtag(token); sprintf(wordstr, "%03d", wordNum); SWBuf lemmaClass; SWBuf lemma; SWBuf morph; SWBuf page; SWBuf src; char gh = 0; page = module->getEntryAttributes()["Word"][wordstr]["Page"].c_str(); if (page.length()) page = (SWBuf)"p:" + page; int count = atoi(module->getEntryAttributes()["Word"][wordstr]["PartCount"].c_str()); for (int i = 0; i < count; i++) { // for now, lemma class can just be equal to last lemma class in multi part word SWBuf tmp = "LemmaClass"; if (count > 1) tmp.appendFormatted(".%d", i+1); lemmaClass = module->getEntryAttributes()["Word"][wordstr][tmp]; tmp = "Lemma"; if (count > 1) tmp.appendFormatted(".%d", i+1); tmp = (module->getEntryAttributes()["Word"][wordstr][tmp].c_str()); // if we're strongs, if (lemmaClass == "strong") { gh = tmp[0]; tmp << 1; } if (lemma.size()) lemma += "|"; lemma += tmp; tmp = "Morph"; if (count > 1) tmp.appendFormatted(".%d", i+1); tmp = (module->getEntryAttributes()["Word"][wordstr][tmp].c_str()); if (morph.size()) morph += "|"; morph += tmp; tmp = "Src"; if (count > 1) tmp.appendFormatted(".%d", i+1); tmp = (module->getEntryAttributes()["Word"][wordstr][tmp].c_str()); if (!tmp.length()) tmp.appendFormatted("%d", wordNum); tmp.insert(0, wordSrcPrefix); if (src.size()) src += "|"; src += tmp; } SWBuf lexName = ""; // we can pass the real lex name in, but we have some // aliases in the javascript to optimize bandwidth if ((gh == 'G') && (defaultGreekLex)) { lexName = (!strcmp(defaultGreekLex->getName(), "StrongsGreek"))?"G":defaultGreekLex->getName(); } else if ((gh == 'H') && (defaultHebLex)) { lexName = (!strcmp(defaultHebLex->getName(), "StrongsHebrew"))?"H":defaultHebLex->getName(); } SWBuf xlit = wtag.getAttribute("xlit"); if ((lemmaClass != "strong") && (xlit.startsWith("betacode:"))) { lexName = "betacode"; // const char *m = strchr(xlit.c_str(), ':'); // strong = ++m; } SWBuf wordID; if (vkey) { // optimize for bandwidth and use only the verse as the unique entry id wordID.appendFormatted("%d", vkey->getVerse()); } else { wordID = key->getText(); } wordID.appendFormatted("_%s", src.c_str()); // clean up our word ID for XHTML for (unsigned int i = 0; i < wordID.size(); i++) { if ((!isdigit(wordID[i])) && (!isalpha(wordID[i]))) { wordID[i] = '_'; } } // 'p' = 'fillpop' to save bandwidth text.appendFormatted("<span class=\"clk\" onclick=\"p('%s','%s','%s','%s','%s','%s');\" >", lexName.c_str(), lemma.c_str(), wordID.c_str(), morph.c_str(), page.c_str(), modName.c_str()); wordNum++; if (wtag.isEmpty()) { text += "</w></span>"; } } if ((*token == '/') && (token[1] == 'w') && option) { // Word text += "</w></span>"; continue; } // if not a strongs token, keep token in text text.append('<'); text.append(token); text.append('>'); continue; } if (intoken) { if (tokpos < 2045) { token[tokpos++] = *from; token[tokpos+2] = 0; } } else { text.append(*from); } } } return 0; }
char OSISLemma::processText(SWBuf &text, const SWKey *key, const SWModule *module) { SWBuf token; bool intoken = false; const SWBuf orig = text; const char * from = orig.c_str(); if (!option) { for (text = ""; *from; ++from) { if (*from == '<') { intoken = true; token = ""; continue; } if (*from == '>') { // process tokens intoken = false; if (token.startsWith("w ")) { // Word XMLTag wtag(token); // always save off lemma if we haven't yet if (!wtag.getAttribute("savlm")) { const char *l = wtag.getAttribute("lemma"); if (l) { wtag.setAttribute("savlm", l); } } int count = wtag.getAttributePartCount("lemma", ' '); for (int i = 0; i < count; i++) { SWBuf a = wtag.getAttribute("lemma", i, ' '); const char *prefix = a.stripPrefix(':'); if ((!prefix) || ((SWBuf)prefix).startsWith("lemma.")) { // remove attribute part wtag.setAttribute("lemma", 0, i, ' '); i--; count--; } } token = wtag; token.trim(); // drop <> token << 1; token--; } // keep token in text text.append('<'); text.append(token); text.append('>'); continue; } if (intoken) { token += *from; } else { text.append(*from); } } } return 0; }
char OSISFootnotes::processText(SWBuf &text, const SWKey *key, const SWModule *module) { SWBuf token; bool intoken = false; bool hide = false; SWBuf tagText; XMLTag startTag; SWBuf refs = ""; int footnoteNum = 1; char buf[254]; SWKey *p = (module) ? module->createKey() : (key) ? key->clone() : new VerseKey(); VerseKey *parser = SWDYNAMIC_CAST(VerseKey, p); if (!parser) { delete p; parser = new VerseKey(); } *parser = key->getText(); SWBuf orig = text; const char *from = orig.c_str(); XMLTag tag; bool strongsMarkup = false; for (text = ""; *from; ++from) { // remove all newlines temporarily to fix kjv2003 module if ((*from == 10) || (*from == 13)) { if ((text.length()>1) && (text[text.length()-2] != ' ') && (*(from+1) != ' ')) text.append(' '); continue; } if (*from == '<') { intoken = true; token = ""; continue; } if (*from == '>') { // process tokens intoken = false; if (!strncmp(token, "note", 4) || !strncmp(token.c_str(), "/note", 5)) { tag = token; if (!tag.isEndTag()) { if (tag.getAttribute("type") && (!strcmp("x-strongsMarkup", tag.getAttribute("type")) || !strcmp("strongsMarkup", tag.getAttribute("type"))) // deprecated ) { tag.setEmpty(false); // handle bug in KJV2003 module where some note open tags were <note ... /> strongsMarkup = true; } if (!tag.isEmpty()) { // if ((!tag.isEmpty()) || (SWBuf("strongsMarkup") == tag.getAttribute("type"))) { refs = ""; startTag = tag; hide = true; tagText = ""; continue; } } if (hide && tag.isEndTag()) { if (module->isProcessEntryAttributes() && !strongsMarkup) { //don`t parse strongsMarkup to EntryAttributes as Footnote sprintf(buf, "%i", footnoteNum++); StringList attributes = startTag.getAttributeNames(); for (StringList::const_iterator it = attributes.begin(); it != attributes.end(); it++) { module->getEntryAttributes()["Footnote"][buf][it->c_str()] = startTag.getAttribute(it->c_str()); } module->getEntryAttributes()["Footnote"][buf]["body"] = tagText; startTag.setAttribute("swordFootnote", buf); if ((startTag.getAttribute("type")) && (!strcmp(startTag.getAttribute("type"), "crossReference"))) { if (!refs.length()) refs = parser->parseVerseList(tagText.c_str(), *parser, true).getRangeText(); module->getEntryAttributes()["Footnote"][buf]["refList"] = refs.c_str(); } } hide = false; if (option || (startTag.getAttribute("type") && !strcmp(startTag.getAttribute("type"), "crossReference"))) { // we want the tag in the text; crossReferences are handled by another filter text.append(startTag); // text.append(tagText); // we don't put the body back in because it is retrievable from EntryAttributes["Footnotes"][]["body"]. } else continue; } strongsMarkup = false; } // if not a heading token, keep token in text //if ((!strcmp(tag.getName(), "reference")) && (!tag.isEndTag())) { // SWBuf osisRef = tag.getAttribute("osisRef"); if (!strncmp(token, "reference", 9)) { if (refs.length()) { refs.append("; "); } const char* attr = strstr(token.c_str() + 9, "osisRef=\""); const char* end = attr ? strchr(attr+9, '"') : 0; if (attr && end) { refs.append(attr+9, end-(attr+9)); } } if (!hide) { text.append('<'); text.append(token); text.append('>'); } else { tagText.append('<'); tagText.append(token); tagText.append('>'); } continue; } if (intoken) { //copy token token.append(*from); } else if (!hide) { //copy text which is not inside a token text.append(*from); } else tagText.append(*from); } delete parser; return 0; }
char ThMLStrongs::processText(SWBuf &text, const SWKey *key, const SWModule *module) { char token[2048]; // cheese. Fix. const char *from; int tokpos = 0; bool intoken = false; bool lastspace = false; int word = 1; char val[128]; char wordstr[5]; char *valto; char *ch; unsigned int textStart = 0, textEnd = 0; SWBuf tmp; bool newText = false; SWBuf orig = text; from = orig.c_str(); for (text = ""; *from; from++) { if (*from == '<') { intoken = true; tokpos = 0; token[0] = 0; token[1] = 0; token[2] = 0; textEnd = text.length(); continue; } if (*from == '>') { // process tokens intoken = false; if (!strnicmp(token, "sync type=\"Strongs\" ", 20)) { // Strongs if (module->isProcessEntryAttributes()) { valto = val; for (unsigned int i = 27; token[i] != '\"' && i < 150; i++) *valto++ = token[i]; *valto = 0; if (atoi((!isdigit(*val))?val+1:val) < 5627) { // normal strongs number sprintf(wordstr, "%03d", word); module->getEntryAttributes()["Word"][wordstr]["PartCount"] = "1"; module->getEntryAttributes()["Word"][wordstr]["Lemma"] = val; module->getEntryAttributes()["Word"][wordstr]["LemmaClass"] = "strong"; tmp = ""; tmp.append(text.c_str()+textStart, (int)(textEnd - textStart)); module->getEntryAttributes()["Word"][wordstr]["Text"] = tmp; newText = true; } else { /* // verb morph sprintf(wordstr, "%03d", word); module->getEntryAttributes()["Word"][wordstr]["Morph"] = val; module->getEntryAttributes()["Word"][wordstr]["MorphClass"] = "OLBMorph"; */ word--; // for now, completely ignore this word attribute. } word++; } if (!option) { // if we don't want strongs if ((from[1] == ' ') || (from[1] == ',') || (from[1] == ';') || (from[1] == '.') || (from[1] == '?') || (from[1] == '!') || (from[1] == ')') || (from[1] == '\'') || (from[1] == '\"')) { if (lastspace) text--; } if (newText) {textStart = text.length(); newText = false; } continue; } } if (module->isProcessEntryAttributes()) { if (!strncmp(token, "sync type=\"morph\"", 17)) { for (ch = token+17; *ch; ch++) { if (!strncmp(ch, "class=\"", 7)) { valto = val; for (unsigned int i = 7; ch[i] != '\"' && i < 127; i++) *valto++ = ch[i]; *valto = 0; sprintf(wordstr, "%03d", word-1); if ((!stricmp(val, "Robinsons")) || (!stricmp(val, "Robinson"))) { strcpy(val, "robinson"); } module->getEntryAttributes()["Word"][wordstr]["MorphClass"] = val; } if (!strncmp(ch, "value=\"", 7)) { valto = val; for (unsigned int i = 7; ch[i] != '\"' && i < 127; i++) *valto++ = ch[i]; *valto = 0; sprintf(wordstr, "%03d", word-1); module->getEntryAttributes()["Word"][wordstr]["Morph"] = val; } } newText = true; } } // if not a strongs token, keep token in text text += '<'; text += token; text += '>'; if (newText) {textStart = text.length(); newText = false; } continue; } if (intoken) { if (tokpos < 2045) token[tokpos++] = *from; token[tokpos+2] = 0; } else { text += *from; lastspace = (*from == ' '); } } return 0; }
bool TEIPlain::handleToken(SWBuf &buf, const char *token, BasicFilterUserData *userData) { // manually process if it wasn't a simple substitution if (!substituteToken(buf, token)) { //MyUserData *u = (MyUserData *)userData; XMLTag tag(token); // <p> paragraph tag if (!strcmp(tag.getName(), "p")) { if ((!tag.isEndTag()) && (!tag.isEmpty())) { // non-empty start tag buf += "\n"; } else if (tag.isEndTag()) { // end tag buf += "\n"; userData->supressAdjacentWhitespace = true; } else { // empty paragraph break marker buf += "\n\n"; userData->supressAdjacentWhitespace = true; } } // <entryFree> else if (!strcmp(tag.getName(), "entryFree")) { SWBuf n = tag.getAttribute("n"); if ((!tag.isEndTag()) && (!tag.isEmpty())) { if (n != "") { buf += n; buf += ". "; } } } // <sense> else if (!strcmp(tag.getName(), "sense")) { SWBuf n = tag.getAttribute("n"); if ((!tag.isEndTag()) && (!tag.isEmpty())) { if (n != "") { buf += n; buf += ". "; } } else if (tag.isEndTag()) { buf += "\n"; } } // <div> else if (!strcmp(tag.getName(), "div")) { if ((!tag.isEndTag()) && (!tag.isEmpty())) { buf.append("\n\n\n"); } else if (tag.isEndTag()) { } } // <etym> else if (!strcmp(tag.getName(), "etym")) { if ((!tag.isEndTag()) && (!tag.isEmpty())) { buf += "["; } else if (tag.isEndTag()) { buf += "]"; } } else { return false; // we still didn't handle token } } return true; }
int main(int argc, char **argv) { SWBuf program = argv[0]; fprintf(stderr, "You are running %s: $Rev: 2138 $\n", argv[0]); // Let's test our command line arguments if (argc < 3) { usage(*argv); } // variables for arguments, holding defaults SWBuf path = argv[1]; SWBuf teiDoc = argv[2]; SWBuf compType = ""; SWBuf modDrv = ""; SWBuf recommendedPath = "./modules/lexdict/"; SWBuf cipherKey = ""; SWCompress *compressor = 0; for (int i = 3; i < argc; i++) { if (!strcmp(argv[i], "-z")) { if (compType.size()) usage(*argv, "Cannot specify both -z and -Z"); if (modDrv.size()) usage(*argv, "Cannot specify both -z and -s"); compType = "ZIP"; modDrv = "zLD"; recommendedPath += "zld/"; } else if (!strcmp(argv[i], "-Z")) { if (compType.size()) usage(*argv, "Cannot specify both -z and -Z"); if (modDrv.size()) usage(*argv, "Cannot specify both -Z and -s"); compType = "LZSS"; recommendedPath += "zld/"; } else if (!strcmp(argv[i], "-s")) { if (compType.size()) usage(*argv, "Cannot specify both -s and -z or -Z"); if (i+1 < argc) { int size = atoi(argv[++i]); if (size == 2) { modDrv = "RawLD"; recommendedPath += "rawld/"; continue; } if (size == 4) { modDrv = "RawLD4"; recommendedPath += "rawld4/"; continue; } } usage(*argv, "-s requires one of <2|4>"); } else if (!strcmp(argv[i], "-N")) { normalize = false; } else if (!strcmp(argv[i], "-c")) { if (i+1 < argc) cipherKey = argv[++i]; else usage(*argv, "-c requires <cipher_key>"); } else usage(*argv, (((SWBuf)"Unknown argument: ")+ argv[i]).c_str()); } if (!modDrv.size()) { modDrv = "RawLD4"; recommendedPath += "rawld4/"; } #ifndef _ICU_ if (normalize) { normalize = false; cout << program << " is not compiled with support for ICU. Setting -N flag." << endl; } #endif if (compType == "ZIP") { compressor = new ZipCompress(); } else if (compType = "LZSS") { compressor = new LZSSCompress(); } #ifdef DEBUG // cout << "path: " << path << " teiDoc: " << teiDoc << " compressType: " << compType << " ldType: " << modDrv << " cipherKey: " << cipherKey.c_str() << " normalize: " << normalize << "\n"; cout << "path: " << path << " teiDoc: " << teiDoc << " compressType: " << compType << " ldType: " << modDrv << " normalize: " << normalize << "\n"; cout << ""; // exit(-3); #endif SWBuf modName = path; int pathlen = path.length(); char lastChar = path[pathlen - 1]; if (lastChar != '/' && lastChar != '\\') { modName += "/"; } modName += "dict"; SWBuf keyBuf; SWBuf entBuf; SWBuf lineBuf; vector<string> linkBuf; if (modDrv == "zLD") { if (zLD::createModule(modName)) { fprintf(stderr, "error: %s: couldn't create module at path: %s \n", program.c_str(), modName.c_str()); exit(-3); } module = new zLD(modName, 0, 0, 30, compressor); } else if (modDrv == "RawLD") { if (RawLD::createModule(modName)) { fprintf(stderr, "error: %s: couldn't create module at path: %s \n", program.c_str(), modName.c_str()); exit(-3); } module = new RawLD(modName); } else { if (RawLD4::createModule(modName)) { fprintf(stderr, "error: %s: couldn't create module at path: %s \n", program.c_str(), modName.c_str()); exit(-3); } module = new RawLD4(modName); } SWFilter *cipherFilter = 0; if (cipherKey.size()) { fprintf(stderr, "Adding cipher filter with phrase: %s\n", cipherKey.c_str() ); cipherFilter = new CipherFilter(cipherKey.c_str()); module->AddRawFilter(cipherFilter); } if (!module->isWritable()) { fprintf(stderr, "The module is not writable. Writing text to it will not work.\nExiting.\n" ); exit(-1); } // Let's see if we can open our input file ifstream infile(teiDoc); if (infile.fail()) { fprintf(stderr, "error: %s: couldn't open input file: %s \n", program.c_str(), teiDoc.c_str()); exit(-2); } currentKey = module->CreateKey(); currentKey->Persist(1); module->setKey(*currentKey); (*module) = TOP; SWBuf token; SWBuf text; bool intoken = false; char curChar = '\0'; while (infile.good()) { curChar = infile.get(); // skip the character if it is bad. infile.good() will catch the problem if (curChar == -1) { continue; } if (!intoken && curChar == '<') { intoken = true; token = "<"; continue; } if (intoken && curChar == '>') { intoken = false; token.append('>'); XMLTag *t = new XMLTag(token.c_str()); if (!handleToken(text, t)) { text.append(*t); } delete t; continue; } if (intoken) token.append(curChar); else switch (curChar) { case '>' : text.append(">"); break; case '<' : text.append("<"); break; default : text.append(curChar); break; } } // Force the last entry from the text buffer. //text = ""; //writeEntry(*currentKey, text); delete module; delete currentKey; if (cipherFilter) delete cipherFilter; infile.close(); #ifdef _ICU_ if (converted) fprintf(stderr, "tei2mod converted %d verses to UTF-8\n", converted); if (normalized) fprintf(stderr, "tei2mod normalized %d verses to NFC\n", normalized); #endif /* * Suggested module name detection. * Only used for suggesting a conf. * * Various forms of path. * . and .. - no module name given, use "dict". * Or one of the following where z is the module name * and x may be . or .. * z * x/y/z * x/y/z/ * x/y/z/z */ SWBuf suggestedModuleName = path; if (lastChar == '/' || lastChar == '\\') { suggestedModuleName.setSize(--pathlen); } lastChar = suggestedModuleName[pathlen - 1]; if (lastChar == '.') { suggestedModuleName = "???"; } else { /* At this point the suggestion is either * what follows the last / or \ * or the entire string */ const char *m = strrchr(suggestedModuleName.c_str(), '/'); if (!m) { m = strrchr(suggestedModuleName.c_str(), '\\'); } if (m) { suggestedModuleName = m+1; } } recommendedPath += suggestedModuleName; recommendedPath += "/dict"; fprintf(stderr, "\nSuggested conf (replace ??? with appropriate values)\n\n"); fprintf(stderr, "[%s]\n", suggestedModuleName.c_str()); fprintf(stderr, "DataPath=%s\n", recommendedPath.c_str()); fprintf(stderr, "Description=???\n"); fprintf(stderr, "SourceType=TEI\n"); fprintf(stderr, "Encoding=%s\n", (normalize ? "UTF-8" : "???")); fprintf(stderr, "ModDrv=%s\n", modDrv.c_str()); if (compressor) { fprintf(stderr, "CompressType=%s\n", compType.c_str()); } if (cipherKey.size()) { fprintf(stderr, "CipherKey=%s\n", cipherKey.c_str()); } }
char FileMgr::getLine(FileDesc *fDesc, SWBuf &line) { int len; bool more = true; char chunk[255]; line = ""; // assert we have a valid file handle if (fDesc->getFd() < 1) return 0; while (more) { more = false; long index = fDesc->seek(0, SEEK_CUR); len = fDesc->read(chunk, 254); // assert we have a readable file (not a directory) if (len < 1) break; int start = 0; // clean up any preceding white space if we're at the beginning of line if (!line.length()) { for (;start < len; start++) { if ((chunk[start] != 13) && (chunk[start] != ' ') && (chunk[start] != '\t')) break; } } // find the end int end; for (end = start; ((end < (len-1)) && (chunk[end] != 10)); end++); if ((chunk[end] != 10) && (len == 254)) { more = true; } index += (end + 1); // reposition to next valid place to read fDesc->seek(index, SEEK_SET); // clean up any trailing junk on line if we're at the end if (!more) { for (; end > start; end--) { if ((chunk[end] != 10) && (chunk[end] != 13) && (chunk[end] != ' ') && (chunk[end] != '\t')) { if (chunk[end] == '\\') { more = true; end--; } break; } } } int size = (end - start) + 1; if (size > 0) { // line.appendFormatted("%.*s", size, chunk+start); line.append(chunk+start, size); } } return ((len > 0) || line.length()); }
char GBFStrongs::processText(SWBuf &text, const SWKey *key, const SWModule *module) { char token[2048]; // cheese. Fix. int tokpos = 0; bool intoken = false; bool lastspace = false; int word = 1; char val[128]; char wordstr[5]; char *valto; unsigned int textStart = 0, textEnd = 0; bool newText = false; SWBuf tmp; const char *from; SWBuf orig = text; from = orig.c_str(); for (text = ""; *from; from++) { if (*from == '<') { intoken = true; tokpos = 0; token[0] = 0; token[1] = 0; token[2] = 0; textEnd = text.size(); continue; } if (*from == '>') { // process tokens intoken = false; if (*token == 'W' && (token[1] == 'G' || token[1] == 'H')) { // Strongs if (module->isProcessEntryAttributes()) { valto = val; for (unsigned int i = 1; ((token[i]) && (i < 150)); i++) *valto++ = token[i]; *valto = 0; if (atoi((!isdigit(*val))?val+1:val) < 5627) { // normal strongs number sprintf(wordstr, "%03d", word++); module->getEntryAttributes()["Word"][wordstr]["PartsCount"] = "1"; module->getEntryAttributes()["Word"][wordstr]["Lemma"] = val; module->getEntryAttributes()["Word"][wordstr]["LemmaClass"] = "strong"; tmp = ""; tmp.append(text.c_str()+textStart, (int)(textEnd - textStart)); module->getEntryAttributes()["Word"][wordstr]["Text"] = tmp; newText = true; } else { // verb morph sprintf(wordstr, "%03d", word-1); module->getEntryAttributes()["Word"][wordstr]["Morph"] = val; module->getEntryAttributes()["Word"][wordstr]["MorphClass"] = "OLBMorph"; } } if (!option) { if ((from[1] == ' ') || (from[1] == ',') || (from[1] == ';') || (from[1] == '.') || (from[1] == '?') || (from[1] == '!') || (from[1] == ')') || (from[1] == '\'') || (from[1] == '\"')) { if (lastspace) text--; } if (newText) {textStart = text.size(); newText = false; } continue; } } if (module->isProcessEntryAttributes()) { if ((*token == 'W') && (token[1] == 'T')) { // Morph valto = val; for (unsigned int i = 2; ((token[i]) && (i < 150)); i++) *valto++ = token[i]; *valto = 0; sprintf(wordstr, "%03d", word-1); module->getEntryAttributes()["Word"][wordstr]["MorphClass"] = "GBFMorph"; module->getEntryAttributes()["Word"][wordstr]["Morph"] = val; newText = true; } } // if not a strongs token, keep token in text text += '<'; text += token; text += '>'; if (newText) {textStart = text.size(); newText = false; } continue; } if (intoken) { if (tokpos < 2045) token[tokpos++] = *from; token[tokpos+2] = 0; } else { text += *from; lastspace = (*from == ' '); } } return 0; }
/** Parse the URL. * Parse the URL into the protocol, the hostname, the path and the paramters with their values * */ void URL::parse() { /* format example protocol://hostname/path/path/path.pl?param1=value1&param2=value2 * we include the script name in the path, so the path would be /path/path/path.pl in this example * & could also be & */ //1. Init const char *urlPtr = url.c_str(); protocol = ""; hostname = ""; path = ""; parameterMap.clear(); // 2. Get the protocol, which is from the begining to the first :// const char *end = strchr( urlPtr, ':' ); if (end) { //protocol was found protocol.append(urlPtr, end-urlPtr); urlPtr = end + 1; //find the end of the protocol separator (e.g. "://") for (; (*urlPtr == ':') || (*urlPtr == '/'); urlPtr++); } //3.Get the hostname part. This is the part from pos up to the first slash bool checkPath = true; bool checkParams = true; bool checkAnchor = true; end = strchr(urlPtr, '/'); if (!end) { checkPath = false; end = strchr(urlPtr, '?'); } if (!end) { checkParams = false; end = strchr(urlPtr, '#'); } if (!end) { checkAnchor = false; end = urlPtr+strlen(urlPtr); } hostname.append(urlPtr, end-urlPtr); urlPtr = end + ((*end)? 1 : 0); if (checkPath) { end = strchr(urlPtr, '?'); if (!end) { checkParams = false; end = strchr(urlPtr, '#'); } if (!end) { checkAnchor = false; end = urlPtr+strlen(urlPtr); } path.append(urlPtr, end-urlPtr); urlPtr = end + ((*end)? 1 : 0); } if (checkParams) { //5. Fill the map with the parameters and their values SWBuf paramName; SWBuf paramValue; if (checkAnchor) checkAnchor = false; /* end = strchr(urlPtr, '#'); if (!end) { checkAnchor = false; end = urlPtr+strlen(urlPtr); } */ //end = (start && strchr(start, '?')) ? strchr(start, '?')+1 :0; end = urlPtr; while (end) { paramName = ""; paramValue = ""; //search for the equal sign to find the value part const char *valueStart = strchr(end, '='); if (valueStart) { const char* valueEnd = strstr(valueStart, "&") ? strstr(valueStart, "&") : strstr(valueStart, "&"); //try to find a new paramter part if (valueEnd) { paramName.append(end, valueStart-end); paramValue.append(valueStart+1, valueEnd-(valueStart+1)); } else { //this is the last paramter of the URL paramName.append(end, valueStart-end); paramValue.append(valueStart+1); } if (paramName.length() && paramValue.length()) {//insert the param into the map if it's valid paramName = decode(paramName.c_str()); paramValue = decode(paramValue.c_str()); parameterMap[ paramName ] = paramValue; } } else { break; //no valid parameter in the url } const char *start = end+1; end = strstr(start, "&") ? strstr(start, "&")+5 : (strstr(start, "&") ? strstr(start, "&")+1 : 0); //try to find a new paramter part } } }