char ThMLWordJS::processText(SWBuf &text, const SWKey *key, const SWModule *module) { if (option) { char token[2112]; // cheese. Fix. int tokpos = 0; bool intoken = false; int word = 1; char val[128]; char *valto; char *ch; char wordstr[5]; unsigned int textStart = 0, lastAppendLen = 0, textEnd = 0; SWBuf tmp; bool newText = false; bool needWordOut = false; AttributeValue *wordAttrs = 0; SWBuf modName = (module)?module->getName():""; SWBuf wordSrcPrefix = modName; const SWBuf orig = text; const char * from = orig.c_str(); VerseKey *vkey = 0; if (key) { vkey = SWDYNAMIC_CAST(VerseKey, key); } for (text = ""; *from; from++) { if (*from == '<') { intoken = true; tokpos = 0; token[0] = 0; token[1] = 0; token[2] = 0; textEnd = text.length(); continue; } if (*from == '>') { // process tokens intoken = false; if (!strnicmp(token, "sync type=\"Strongs\" ", 20)) { // Strongs valto = val; for (unsigned int i = 27; token[i] != '\"' && i < 150; i++) *valto++ = token[i]; *valto = 0; if (atoi((!isdigit(*val))?val+1:val) < 5627) { // normal strongs number sprintf(wordstr, "%03d", word++); needWordOut = (word > 2); wordAttrs = &(module->getEntryAttributes()["Word"][wordstr]); (*wordAttrs)["Strongs"] = val; //printf("Adding: [\"Word\"][%s][\"Strongs\"] = %s\n", wordstr, val); tmp = ""; tmp.append(text.c_str()+textStart, (int)(textEnd - textStart)); (*wordAttrs)["Text"] = tmp; text.append("</span>"); SWBuf ts; ts.appendFormatted("%d", textStart); (*wordAttrs)["TextStart"] = ts; //printf("Adding: [\"Word\"][%s][\"Text\"] = %s\n", wordstr, tmp.c_str()); newText = true; } else { // verb morph (*wordAttrs)["Morph"] = val; //printf("Adding: [\"Word\"][%s][\"Morph\"] = %s\n", wordstr, val); } } if (!strncmp(token, "sync type=\"morph\"", 17)) { for (ch = token+17; *ch; ch++) { if (!strncmp(ch, "class=\"", 7)) { valto = val; for (unsigned int i = 7; ch[i] != '\"' && i < 127; i++) *valto++ = ch[i]; *valto = 0; (*wordAttrs)["MorphClass"] = val; //printf("Adding: [\"Word\"][%s][\"MorphClass\"] = %s\n", wordstr, val); } if (!strncmp(ch, "value=\"", 7)) { valto = val; for (unsigned int i = 7; ch[i] != '\"' && i < 127; i++) *valto++ = ch[i]; *valto = 0; (*wordAttrs)["Morph"] = val; //printf("Adding: [\"Word\"][%s][\"Morph\"] = %s\n", wordstr, val); } } newText = true; } // if not a strongs token, keep token in text text += '<'; text += token; text += '>'; if (needWordOut) { char wstr[10]; sprintf(wstr, "%03d", word-2); AttributeValue *wAttrs = &(module->getEntryAttributes()["Word"][wstr]); needWordOut = false; SWBuf strong = (*wAttrs)["Strongs"]; SWBuf morph = (*wAttrs)["Morph"]; SWBuf morphClass = (*wAttrs)["MorphClass"]; SWBuf wordText = (*wAttrs)["Text"]; SWBuf textSt = (*wAttrs)["TextStart"]; if (strong.size()) { char gh = 0; gh = isdigit(strong[0]) ? 0:strong[0]; if (!gh) { if (vkey) { gh = vkey->getTestament() ? 'H' : 'G'; } } else strong << 1; SWModule *sLex = 0; SWModule *sMorph = 0; if (gh == 'G') { sLex = defaultGreekLex; sMorph = defaultGreekParse; } if (gh == 'H') { sLex = defaultHebLex; sMorph = defaultHebParse; } SWBuf lexName = ""; if (sLex) { // we can pass the real lex name in, but we have some // aliases in the javascript to optimize bandwidth lexName = sLex->getName(); if (lexName == "StrongsGreek") lexName = "G"; if (lexName == "StrongsHebrew") lexName = "H"; } SWBuf wordID; if (vkey) { // optimize for bandwidth and use only the verse as the unique entry id wordID.appendFormatted("%d", vkey->getVerse()); } else { wordID = key->getText(); } for (unsigned int i = 0; i < wordID.size(); i++) { if ((!isdigit(wordID[i])) && (!isalpha(wordID[i]))) { wordID[i] = '_'; } } wordID.appendFormatted("_%s%d", wordSrcPrefix.c_str(), atoi(wstr)); if (textSt.size()) { int textStr = atoi(textSt.c_str()); textStr += lastAppendLen; SWBuf spanStart = ""; if (!sMorph) sMorph = 0; // avoid unused warnings for now /* if (sMorph) { SWBuf popMorph = "<a onclick=\""; popMorph.appendFormatted("p(\'%s\',\'%s\','%s','');\" >%s</a>", sMorph->getName(), morph.c_str(), wordID.c_str(), morph.c_str()); morph = popMorph; } */ // 'p' = 'fillpop' to save bandwidth const char *m = strchr(morph.c_str(), ':'); if (m) m++; else m = morph.c_str(); spanStart.appendFormatted("<span class=\"clk\" onclick=\"p('%s','%s','%s','%s','','%s');\" >", lexName.c_str(), strong.c_str(), wordID.c_str(), m, modName.c_str()); text.insert(textStr, spanStart); lastAppendLen = spanStart.length(); } } } if (newText) { textStart = text.length(); newText = false; } continue; } if (intoken) { if (tokpos < 2045) { token[tokpos++] = *from; // TODO: why is this + 2 ? token[tokpos+2] = 0; } } else { text += *from; } } char wstr[10]; sprintf(wstr, "%03d", word-1); AttributeValue *wAttrs = &(module->getEntryAttributes()["Word"][wstr]); needWordOut = false; SWBuf strong = (*wAttrs)["Strongs"]; SWBuf morph = (*wAttrs)["Morph"]; SWBuf morphClass = (*wAttrs)["MorphClass"]; SWBuf wordText = (*wAttrs)["Text"]; SWBuf textSt = (*wAttrs)["TextStart"]; if (strong.size()) { char gh = 0; gh = isdigit(strong[0]) ? 0:strong[0]; if (!gh) { if (vkey) { gh = vkey->getTestament() ? 'H' : 'G'; } } else strong << 1; SWModule *sLex = 0; if (gh == 'G') { sLex = defaultGreekLex; } if (gh == 'H') { sLex = defaultHebLex; } SWBuf lexName = ""; if (sLex) { // we can pass the real lex name in, but we have some // aliases in the javascript to optimize bandwidth lexName = sLex->getName(); if (lexName == "StrongsGreek") lexName = "G"; if (lexName == "StrongsHebrew") lexName = "H"; } SWBuf wordID; if (vkey) { // optimize for bandwidth and use only the verse as the unique entry id wordID.appendFormatted("%d", vkey->getVerse()); } else { wordID = key->getText(); } for (unsigned int i = 0; i < wordID.size(); i++) { if ((!isdigit(wordID[i])) && (!isalpha(wordID[i]))) { wordID[i] = '_'; } } wordID.appendFormatted("_%s%d", wordSrcPrefix.c_str(), atoi(wstr)); if (textSt.size()) { int textStr = atoi(textSt.c_str()); textStr += lastAppendLen; SWBuf spanStart = ""; // 'p' = 'fillpop' to save bandwidth const char *m = strchr(morph.c_str(), ':'); if (m) m++; else m = morph.c_str(); spanStart.appendFormatted("<span class=\"clk\" onclick=\"p('%s','%s','%s','%s','','%s');\" >", lexName.c_str(), strong.c_str(), wordID.c_str(), m, modName.c_str()); text.insert(textStr, spanStart); } } } return 0; }
char GBFOSIS::processText(SWBuf &text, const SWKey *key, const SWModule *module) { char token[2048]; //cheesy, we seem to like cheese :) int tokpos = 0; bool intoken = false; bool keepToken = false; // static QuoteStack quoteStack; SWBuf orig = text; SWBuf tmp; SWBuf value; bool suspendTextPassThru = false; bool handled = false; bool newWord = false; bool newText = false; bool lastspace = false; const char *wordStart = text.c_str(); const char *wordEnd = NULL; const char *textStart = NULL; const char *textEnd = NULL; SWBuf textNode = ""; SWBuf buf; text = ""; for (const char* from = orig.c_str(); *from; ++from) { if (*from == '<') { //start of new token detected intoken = true; tokpos = 0; token[0] = 0; token[1] = 0; token[2] = 0; textEnd = from-1; //end of last text node found wordEnd = text.c_str() + text.length();//not good, instead of wordEnd = to! continue; } if (*from == '>') { // process tokens intoken = false; keepToken = false; suspendTextPassThru = false; newWord = true; handled = false; while (wordStart < (text.c_str() + text.length())) { //hack if (strchr(";,. :?!()'\"", *wordStart) && wordStart[0] && wordStart[1]) wordStart++; else break; } while (wordEnd > wordStart) { if (strchr(" ,;:.?!()'\"", *wordEnd)) wordEnd--; else break; } // Scripture Reference if (!strncmp(token, "scripRef", 8)) { suspendTextPassThru = true; newText = true; handled = true; } else if (!strncmp(token, "/scripRef", 9)) { tmp = ""; tmp.append(textStart, (int)(textEnd - textStart)+1); text += VerseKey::convertToOSIS(tmp.c_str(), key); lastspace = false; suspendTextPassThru = false; handled = true; } // Footnote if (!strcmp(token, "RF") || !strncmp(token, "RF ", 3)) { //the GBFFootnotes filter adds the attribute "swordFootnote", we want to catch that, too // pushString(buf, "<reference work=\"Bible.KJV\" reference=\""); text += "<note type=\"x-StudyNote\">"; newText = true; lastspace = false; handled = true; } else if (!strcmp(token, "Rf")) { text += "</note>"; lastspace = false; handled = true; } // hebrew titles if (!strcmp(token, "TH")) { text += "<title type=\"psalm\">"; newText = true; lastspace = false; handled = true; } else if (!strcmp(token, "Th")) { text += "</title>"; lastspace = false; handled = true; } // Italics assume transchange if (!strcmp(token, "FI")) { text += "<transChange type=\"added\">"; newText = true; lastspace = false; handled = true; } else if (!strcmp(token, "Fi")) { text += "</transChange>"; lastspace = false; handled = true; } // less than if (!strcmp(token, "CT")) { text += "<"; newText = true; lastspace = false; handled = true; } // greater than if (!strcmp(token, "CG")) { text += ">"; newText = true; lastspace = false; handled = true; } // Paragraph break. For now use empty paragraph element if (!strcmp(token, "CM")) { text += "<milestone type=\"x-p\" />"; newText = true; lastspace = false; handled = true; } // Figure else if (!strncmp(token, "img ", 4)) { const char *src = strstr(token, "src"); if (!src) // assert we have a src attribute continue; // return false; text += "<figure src=\""; const char *c; for (c = src;((*c) && (*c != '"')); c++); // uncomment for SWORD absolute path logic // if (*(c+1) == '/') { // pushString(buf, "file:"); // pushString(buf, module->getConfigEntry("AbsoluteDataPath")); // if (*((*buf)-1) == '/') // c++; // skip '/' // } // end of uncomment for asolute path logic for (c++;((*c) && (*c != '"')); c++) { text += *c; } text += "\" />"; lastspace = false; handled = true; } // Strongs numbers else if (*token == 'W' && (token[1] == 'G' || token[1] == 'H')) { // Strongs bool divineName = false; value = token+1; // normal strongs number //strstrip(val); if (!strncmp(wordStart, "<w ", 3)) { const char *attStart = strstr(wordStart, "lemma"); if (attStart) { attStart += 7; buf = ""; buf.appendFormatted("strong:%s ", value.c_str()); } else { // no lemma attribute attStart = wordStart + 3; buf = ""; buf.appendFormatted(buf, "lemma=\"strong:%s\" ", value.c_str()); } text.insert(attStart - text.c_str(), buf); } else { //wordStart doesn't point to an existing <w> attribute! if (!strcmp(value.c_str(), "H03068")) { //divineName buf = ""; buf.appendFormatted("<divineName><w lemma=\"strong:%s\">", value.c_str()); divineName = true; } else { buf = ""; buf.appendFormatted("<w lemma=\"strong:%s\">", value.c_str()); } text.insert(wordStart - text.c_str(), buf); if (divineName) { wordStart += 12; text += "</w></divineName>"; } else text += "</w>"; lastspace = false; } handled = true; } // Morphology else if (*token == 'W' && token[1] == 'T') { if (token[2] == 'G' || token[2] == 'H') { // Strongs value = token+2; } else value = token+1; if (!strncmp(wordStart, "<w ", 3)) { const char *attStart = strstr(wordStart, "morph"); if (attStart) { //existing morph attribute, append this one to it attStart += 7; buf = ""; buf.appendFormatted("%s:%s ", "robinson", value.c_str()); } else { // no lemma attribute attStart = wordStart + 3; buf = ""; buf.appendFormatted("morph=\"%s:%s\" ", "robinson", value.c_str()); } text.insert(attStart - text.c_str(), buf); //hack, we have to } else { //no existing <w> attribute fond buf = ""; buf.appendFormatted("<w morph=\"%s:%s\">", "robinson", value.c_str()); text.insert(wordStart - text.c_str(), buf); text += "</w>"; lastspace = false; } handled = true; } if (!keepToken) { if (!handled) { SWLog::getSystemLog()->logError("Unprocessed Token: <%s> in key %s", token, key ? (const char*)*key : "<unknown>"); // exit(-1); } if (from[1] && strchr(" ,;.:?!()'\"", from[1])) { if (lastspace) { text--; } } if (newText) { textStart = from+1; newText = false; } continue; } // if not a strongs token, keep token in text text.appendFormatted("<%s>", token); if (newText) { textStart = text.c_str() + text.length(); newWord = false; } continue; } if (intoken) { if ((tokpos < 2045) && ((*from != 10)&&(*from != 13))) { token[tokpos++] = *from; token[tokpos+2] = 0; } } else { switch (*from) { case '\'': case '\"': case '`': // quoteStack.handleQuote(fromStart, from, &to); text += *from; //from++; //this line removes chars after an apostrophe! Needs fixing. break; default: if (newWord && (*from != ' ')) { wordStart = text.c_str() + text.length(); newWord = false; //fix this if required? //memset(to, 0, 10); } if (!suspendTextPassThru) { text += (*from); lastspace = (*from == ' '); } } } } VerseKey *vkey = SWDYNAMIC_CAST(VerseKey, key); if (vkey) { SWBuf ref = ""; if (vkey->getVerse()) { ref.appendFormatted("\t\t<verse osisID=\"%s\">", vkey->getOSISRef()); } if (ref.length() > 0) { text = ref + text; if (vkey->getVerse()) { VerseKey *tmp = (VerseKey *)vkey->clone(); *tmp = *vkey; tmp->setAutoNormalize(false); tmp->setIntros(true); text += "</verse>"; *tmp = MAXVERSE; if (*vkey == *tmp) { tmp->setVerse(0); // sprintf(ref, "\t</div>"); // pushString(&to, ref); *tmp = MAXCHAPTER; *tmp = MAXVERSE; if (*vkey == *tmp) { tmp->setChapter(0); tmp->setVerse(0); // sprintf(ref, "\t</div>"); // pushString(&to, ref); /* if (!quoteStack.empty()) { SWLog::getSystemLog()->logError("popping unclosed quote at end of book"); quoteStack.clear(); } */ } } delete tmp; } // else if (vkey->Chapter()) { // sprintf(ref, "\t<div type=\"chapter\" osisID=\"%s\">", vkey->getOSISRef()); // } // else sprintf(ref, "\t<div type=\"book\" osisID=\"%s\">", vkey->getOSISRef()); } } return 0; }
char OSISWordJS::processText(SWBuf &text, const SWKey *key, const SWModule *module) { if (option) { char token[2112]; // cheese. Fix. int tokpos = 0; bool intoken = false; int wordNum = 1; char wordstr[5]; SWBuf modName = (module)?module->getName():""; // add TR to w src in KJV then remove this next line SWBuf wordSrcPrefix = (modName == "KJV")?SWBuf("TR"):modName; VerseKey *vkey = 0; if (key) { vkey = SWDYNAMIC_CAST(VerseKey, key); } const SWBuf orig = text; const char * from = orig.c_str(); for (text = ""; *from; ++from) { if (*from == '<') { intoken = true; tokpos = 0; token[0] = 0; token[1] = 0; token[2] = 0; continue; } if (*from == '>') { // process tokens intoken = false; if ((*token == 'w') && (token[1] == ' ')) { // Word XMLTag wtag(token); sprintf(wordstr, "%03d", wordNum); SWBuf lemmaClass; SWBuf lemma; SWBuf morph; SWBuf page; SWBuf src; char gh = 0; page = module->getEntryAttributes()["Word"][wordstr]["Page"].c_str(); if (page.length()) page = (SWBuf)"p:" + page; int count = atoi(module->getEntryAttributes()["Word"][wordstr]["PartCount"].c_str()); for (int i = 0; i < count; i++) { // for now, lemma class can just be equal to last lemma class in multi part word SWBuf tmp = "LemmaClass"; if (count > 1) tmp.appendFormatted(".%d", i+1); lemmaClass = module->getEntryAttributes()["Word"][wordstr][tmp]; tmp = "Lemma"; if (count > 1) tmp.appendFormatted(".%d", i+1); tmp = (module->getEntryAttributes()["Word"][wordstr][tmp].c_str()); // if we're strongs, if (lemmaClass == "strong") { gh = tmp[0]; tmp << 1; } if (lemma.size()) lemma += "|"; lemma += tmp; tmp = "Morph"; if (count > 1) tmp.appendFormatted(".%d", i+1); tmp = (module->getEntryAttributes()["Word"][wordstr][tmp].c_str()); if (morph.size()) morph += "|"; morph += tmp; tmp = "Src"; if (count > 1) tmp.appendFormatted(".%d", i+1); tmp = (module->getEntryAttributes()["Word"][wordstr][tmp].c_str()); if (!tmp.length()) tmp.appendFormatted("%d", wordNum); tmp.insert(0, wordSrcPrefix); if (src.size()) src += "|"; src += tmp; } SWBuf lexName = ""; // we can pass the real lex name in, but we have some // aliases in the javascript to optimize bandwidth if ((gh == 'G') && (defaultGreekLex)) { lexName = (!strcmp(defaultGreekLex->getName(), "StrongsGreek"))?"G":defaultGreekLex->getName(); } else if ((gh == 'H') && (defaultHebLex)) { lexName = (!strcmp(defaultHebLex->getName(), "StrongsHebrew"))?"H":defaultHebLex->getName(); } SWBuf xlit = wtag.getAttribute("xlit"); if ((lemmaClass != "strong") && (xlit.startsWith("betacode:"))) { lexName = "betacode"; // const char *m = strchr(xlit.c_str(), ':'); // strong = ++m; } SWBuf wordID; if (vkey) { // optimize for bandwidth and use only the verse as the unique entry id wordID.appendFormatted("%d", vkey->getVerse()); } else { wordID = key->getText(); } wordID.appendFormatted("_%s", src.c_str()); // clean up our word ID for XHTML for (unsigned int i = 0; i < wordID.size(); i++) { if ((!isdigit(wordID[i])) && (!isalpha(wordID[i]))) { wordID[i] = '_'; } } // 'p' = 'fillpop' to save bandwidth text.appendFormatted("<span class=\"clk\" onclick=\"p('%s','%s','%s','%s','%s','%s');\" >", lexName.c_str(), lemma.c_str(), wordID.c_str(), morph.c_str(), page.c_str(), modName.c_str()); wordNum++; if (wtag.isEmpty()) { text += "</w></span>"; } } if ((*token == '/') && (token[1] == 'w') && option) { // Word text += "</w></span>"; continue; } // if not a strongs token, keep token in text text.append('<'); text.append(token); text.append('>'); continue; } if (intoken) { if (tokpos < 2045) { token[tokpos++] = *from; token[tokpos+2] = 0; } } else { text.append(*from); } } } return 0; }