char OSISLemma::processText(std::string &text, const SWKey *key, const SWModule *module) { (void) key; (void) module; std::string token; bool intoken = false; const std::string orig = text; const char * from = orig.c_str(); if (!option) { for (text = ""; *from; ++from) { if (*from == '<') { intoken = true; token = ""; continue; } if (*from == '>') { // process tokens intoken = false; if (hasPrefix(token, "w ")) { // Word XMLTag wtag(token.c_str()); int count = wtag.getAttributePartCount("lemma", ' '); for (int i = 0; i < count; i++) { std::string a = wtag.getAttribute("lemma", i, ' '); auto const prefixPos(a.find(':')); if (prefixPos == std::string::npos || (prefixPos >= sizeof("lemma.") - 1u && std::strncmp(a.c_str(), "lemma.", sizeof("lemma.") - 1u))) { // remove attribute part wtag.setAttribute("lemma", 0, i, ' '); i--; count--; } } token = wtag; trimString(token); // drop <> token.erase(0u, 1u); token.pop_back(); } // keep token in text text.push_back('<'); text.append(token); text.push_back('>'); continue; } if (intoken) { token += *from; } else { text.push_back(*from); } } } return 0; }
char OSISXlit::processText(std::string &text, const SWKey *key, const SWModule *module) { (void) key; (void) module; std::string token; bool intoken = false; const std::string orig = text; const char * from = orig.c_str(); if (!option) { for (text = ""; *from; ++from) { if (*from == '<') { intoken = true; token = ""; continue; } if (*from == '>') { // process tokens intoken = false; if (hasPrefix(token, "w ")) { // Word XMLTag wtag(token.c_str()); if (!wtag.attribute("xlit").empty()) { wtag.eraseAttribute("xlit"); token = wtag.toString(); trimString(token); // drop <> token.erase(0u, 1u); token.pop_back(); } } // keep token in text text.push_back('<'); text.append(token); text.push_back('>'); continue; } if (intoken) { token += *from; } else { text.push_back(*from); } } } return 0; }
char OSISGlosses::processText(SWBuf &text, const SWKey *key, const SWModule *module) { SWBuf token; bool intoken = false; const SWBuf orig = text; const char * from = orig.c_str(); if (!option) { for (text = ""; *from; ++from) { if (*from == '<') { intoken = true; token = ""; continue; } if (*from == '>') { // process tokens intoken = false; if (token.startsWith("w ")) { // Word XMLTag wtag(token); const char *l = wtag.getAttribute("gloss"); if (l) { wtag.setAttribute("gloss", 0); token = wtag; token.trim(); // drop <> token << 1; token--; } } // keep token in text text.append('<'); text.append(token); text.append('>'); continue; } if (intoken) { token += *from; } else { text.append(*from); } } } return 0; }
char OSISEnum::processText(std::string &text, const SWKey *key, const SWModule *module) { (void) key; (void) module; if (!option) { std::string result; std::string token; bool inToken = false; for (auto const & c : text) { if (c == '<') { inToken = true; } else if (c == '>') { // Process token: inToken = false; if (hasPrefix(token, "w ")) { // Word XMLTag wtag(token.c_str()); if (!wtag.attribute("n").empty()) { wtag.eraseAttribute("n"); token = wtag.toString(); trimString(token); result.append(token); } else { // Keep original token in text: result.push_back('<'); result.append(token); result.push_back('>'); } } else { // Keep original token in text: result.push_back('<'); result.append(token); result.push_back('>'); } token.clear(); } else { (inToken ? token : result).push_back(c); } } text = std::move(result); } return 0; }
char OSISLemma::processText(SWBuf &text, const SWKey *key, const SWModule *module) { SWBuf token; bool intoken = false; const SWBuf orig = text; const char * from = orig.c_str(); if (!option) { for (text = ""; *from; ++from) { if (*from == '<') { intoken = true; token = ""; continue; } if (*from == '>') { // process tokens intoken = false; if (token.startsWith("w ")) { // Word XMLTag wtag(token); // always save off lemma if we haven't yet if (!wtag.getAttribute("savlm")) { const char *l = wtag.getAttribute("lemma"); if (l) { wtag.setAttribute("savlm", l); } } int count = wtag.getAttributePartCount("lemma", ' '); for (int i = 0; i < count; i++) { SWBuf a = wtag.getAttribute("lemma", i, ' '); const char *prefix = a.stripPrefix(':'); if ((!prefix) || ((SWBuf)prefix).startsWith("lemma.")) { // remove attribute part wtag.setAttribute("lemma", 0, i, ' '); i--; count--; } } token = wtag; token.trim(); // drop <> token << 1; token--; } // keep token in text text.append('<'); text.append(token); text.append('>'); continue; } if (intoken) { token += *from; } else { text.append(*from); } } } return 0; }
char OSISWordJS::processText(std::string &text, const SWKey *key, const SWModule *module) { if (option) { char token[2112]; // cheese. Fix. int tokpos = 0; bool intoken = false; int wordNum = 1; char wordstr[5]; std::string modName = (module)?module->getName():""; // add TR to w src in KJV then remove this next line std::string wordSrcPrefix = (modName == "KJV")?std::string("TR"):modName; VerseKey const * vkey = 0; if (key) { vkey = dynamic_cast<VerseKey const *>(key); } const std::string orig = text; const char * from = orig.c_str(); for (text = ""; *from; ++from) { if (*from == '<') { intoken = true; tokpos = 0; token[0] = 0; token[1] = 0; token[2] = 0; continue; } if (*from == '>') { // process tokens intoken = false; if ((*token == 'w') && (token[1] == ' ')) { // Word XMLTag wtag(token); sprintf(wordstr, "%03d", wordNum); std::string lemmaClass; std::string lemma; std::string morph; std::string page; std::string src; char gh = 0; page = module->getEntryAttributes()["Word"][wordstr]["Page"].c_str(); if (page.length()) page = (std::string)"p:" + page; int count = atoi(module->getEntryAttributes()["Word"][wordstr]["PartCount"].c_str()); for (int i = 0; i < count; i++) { // for now, lemma class can just be equal to last lemma class in multi part word std::string tmp = "LemmaClass"; if (count > 1) tmp += formatted(".%d", i+1); lemmaClass = module->getEntryAttributes()["Word"][wordstr][tmp]; tmp = "Lemma"; if (count > 1) tmp += formatted(".%d", i+1); tmp = (module->getEntryAttributes()["Word"][wordstr][tmp].c_str()); // if we're strongs, if (lemmaClass == "strong") { gh = tmp[0]; tmp.erase(0u, 1u); } if (lemma.size()) lemma += "|"; lemma += tmp; tmp = "Morph"; if (count > 1) tmp += formatted(".%d", i+1); tmp = (module->getEntryAttributes()["Word"][wordstr][tmp].c_str()); if (morph.size()) morph += "|"; morph += tmp; tmp = "Src"; if (count > 1) tmp += formatted(".%d", i+1); tmp = (module->getEntryAttributes()["Word"][wordstr][tmp].c_str()); if (!tmp.length()) tmp += formatted("%d", wordNum); tmp.insert(0, wordSrcPrefix); if (src.size()) src += "|"; src += tmp; } std::string lexName = ""; // we can pass the real lex name in, but we have some // aliases in the javascript to optimize bandwidth if ((gh == 'G') && (defaultGreekLex)) { lexName = (!strcmp(defaultGreekLex->getName(), "StrongsGreek"))?"G":defaultGreekLex->getName(); } else if ((gh == 'H') && (defaultHebLex)) { lexName = (!strcmp(defaultHebLex->getName(), "StrongsHebrew"))?"H":defaultHebLex->getName(); } std::string xlit = wtag.getAttribute("xlit"); if ((lemmaClass != "strong") && (hasPrefix(xlit, "betacode:"))) { lexName = "betacode"; // const char *m = strchr(xlit.c_str(), ':'); // strong = ++m; } std::string wordID; if (vkey) { // optimize for bandwidth and use only the verse as the unique entry id wordID += formatted("%d", vkey->getVerse()); } else { wordID = key->getText(); } wordID += formatted("_%s", src.c_str()); // clean up our word ID for XHTML for (unsigned int i = 0; i < wordID.size(); i++) { if ((!isdigit(wordID[i])) && (!isalpha(wordID[i]))) { wordID[i] = '_'; } } // 'p' = 'fillpop' to save bandwidth text += formatted("<span class=\"clk\" onclick=\"p('%s','%s','%s','%s','%s','%s');\" >", lexName.c_str(), lemma.c_str(), wordID.c_str(), morph.c_str(), page.c_str(), modName.c_str()); wordNum++; if (wtag.isEmpty()) { text += "</w></span>"; } } if ((*token == '/') && (token[1] == 'w') && option) { // Word text += "</w></span>"; continue; } // if not a strongs token, keep token in text text.push_back('<'); text.append(token); text.push_back('>'); continue; } if (intoken) { if (tokpos < 2045) { token[tokpos++] = *from; token[tokpos+2] = 0; } } else { text.push_back(*from); } } } return 0; }
char OSISStrongs::processText(SWBuf &text, const SWKey *key, const SWModule *module) { SWBuf token; bool intoken = false; int wordNum = 1; char wordstr[5]; const char *wordStart = 0; SWBuf page = ""; // some modules include <seg> page info, so we add these to the words const SWBuf orig = text; const char * from = orig.c_str(); for (text = ""; *from; ++from) { if (*from == '<') { intoken = true; token = ""; continue; } if (*from == '>') { // process tokens intoken = false; // possible page seg -------------------------------- if (token.startsWith("seg ")) { XMLTag stag(token); SWBuf type = stag.getAttribute("type"); if (type == "page") { SWBuf number = stag.getAttribute("subtype"); if (number.length()) { page = number; } } } // --------------------------------------------------- if (token.startsWith("w ")) { // Word XMLTag wtag(token); if (module->isProcessEntryAttributes()) { wordStart = from+1; char gh = 0; VerseKey *vkey = 0; if (key) { vkey = SWDYNAMIC_CAST(VerseKey, key); } SWBuf lemma = ""; SWBuf morph = ""; SWBuf src = ""; SWBuf morphClass = ""; SWBuf lemmaClass = ""; const char *attrib; sprintf(wordstr, "%03d", wordNum); // why is morph entry attribute processing done in here? Well, it's faster. It makes more local sense to place this code in osismorph. // easier to keep lemma and morph in same wordstr number too maybe. if ((attrib = wtag.getAttribute("morph"))) { int count = wtag.getAttributePartCount("morph", ' '); int i = (count > 1) ? 0 : -1; // -1 for whole value cuz it's faster, but does the same thing as 0 do { SWBuf mClass = ""; SWBuf mp = ""; attrib = wtag.getAttribute("morph", i, ' '); if (i < 0) i = 0; // to handle our -1 condition const char *m = strchr(attrib, ':'); if (m) { int len = m-attrib; mClass.append(attrib, len); attrib += (len+1); } if ((mClass == "x-Robinsons") || (mClass == "x-Robinson") || (mClass == "Robinson")) { mClass = "robinson"; } if (i) { morphClass += " "; morph += " "; } mp += attrib; morphClass += mClass; morph += mp; if (count > 1) { SWBuf tmp; tmp.setFormatted("Morph.%d", i+1); module->getEntryAttributes()["Word"][wordstr][tmp] = mp; tmp.setFormatted("MorphClass.%d", i+1); module->getEntryAttributes()["Word"][wordstr][tmp] = mClass; } } while (++i < count); } if ((attrib = wtag.getAttribute("lemma"))) { int count = wtag.getAttributePartCount("lemma", ' '); int i = (count > 1) ? 0 : -1; // -1 for whole value cuz it's faster, but does the same thing as 0 do { gh = 0; SWBuf lClass = ""; SWBuf l = ""; attrib = wtag.getAttribute("lemma", i, ' '); if (i < 0) i = 0; // to handle our -1 condition const char *m = strchr(attrib, ':'); if (m) { int len = m-attrib; lClass.append(attrib, len); attrib += (len+1); } if ((lClass == "x-Strongs") || (lClass == "strong") || (lClass == "Strong")) { if (isdigit(attrib[0])) { if (vkey) { gh = vkey->getTestament() ? 'H' : 'G'; } } else { gh = *attrib; attrib++; } lClass = "strong"; } if (gh) l += gh; l += attrib; if (i) { lemmaClass += " "; lemma += " "; } lemma += l; lemmaClass += lClass; if (count > 1) { SWBuf tmp; tmp.setFormatted("Lemma.%d", i+1); module->getEntryAttributes()["Word"][wordstr][tmp] = l; tmp.setFormatted("LemmaClass.%d", i+1); module->getEntryAttributes()["Word"][wordstr][tmp] = lClass; } } while (++i < count); module->getEntryAttributes()["Word"][wordstr]["PartCount"].setFormatted("%d", count); } if ((attrib = wtag.getAttribute("src"))) { int count = wtag.getAttributePartCount("src", ' '); int i = (count > 1) ? 0 : -1; // -1 for whole value cuz it's faster, but does the same thing as 0 do { SWBuf mp = ""; attrib = wtag.getAttribute("src", i, ' '); if (i < 0) i = 0; // to handle our -1 condition if (i) src += " "; mp += attrib; src += mp; if (count > 1) { SWBuf tmp; tmp.setFormatted("Src.%d", i+1); module->getEntryAttributes()["Word"][wordstr][tmp] = mp; } } while (++i < count); } if (lemma.length()) module->getEntryAttributes()["Word"][wordstr]["Lemma"] = lemma; if (lemmaClass.length()) module->getEntryAttributes()["Word"][wordstr]["LemmaClass"] = lemmaClass; if (morph.length()) module->getEntryAttributes()["Word"][wordstr]["Morph"] = morph; if (morphClass.length()) module->getEntryAttributes()["Word"][wordstr]["MorphClass"] = morphClass; if (src.length()) module->getEntryAttributes()["Word"][wordstr]["Src"] = src; if (page.length()) module->getEntryAttributes()["Word"][wordstr]["Page"] = page; if (wtag.isEmpty()) { int j; for (j = token.length()-1; ((j>0) && (strchr(" /", token[j]))); j--); token.size(j+1); } token += " wn=\""; token += wordstr; token += "\""; if (wtag.isEmpty()) { token += "/"; } wordNum++; } if (!option) { /* * Code which handles multiple lemma types. Kindof works but breaks at least WEBIF filters for strongs. * int count = wtag.getAttributePartCount("lemma", ' '); for (int i = 0; i < count; i++) { SWBuf a = wtag.getAttribute("lemma", i, ' '); const char *prefix = a.stripPrefix(':'); if ((prefix) && (!strcmp(prefix, "x-Strongs") || !strcmp(prefix, "strong") || !strcmp(prefix, "Strong"))) { // remove attribute part wtag.setAttribute("lemma", 0, i, ' '); i--; count--; } } * Instead the codee below just removes the lemma attribute *****/ const char *l = wtag.getAttribute("lemma"); if (l) { SWBuf savlm = l; wtag.setAttribute("lemma", 0); wtag.setAttribute("savlm", savlm); token = wtag; token.trim(); // drop <> token << 1; token--; } } } if (token.startsWith("/w")) { // Word End if (module->isProcessEntryAttributes()) { if (wordStart) { SWBuf tmp; tmp.append(wordStart, (from-wordStart)-3); sprintf(wordstr, "%03d", wordNum-1); module->getEntryAttributes()["Word"][wordstr]["Text"] = tmp; } } wordStart = 0; } // keep token in text text.append('<'); text.append(token); text.append('>'); continue; } if (intoken) { token += *from; } else { text.append(*from); } } return 0; }