char OSISReferenceLinks::processText(std::string &text, const SWKey *key, const SWModule *module) { (void) key; (void) module; if (option) return 0; std::string token; bool intoken = false; bool stripThisToken = false; std::string orig = text; const char *from = orig.c_str(); for (text = ""; *from; ++from) { if (*from == '<') { intoken = true; token = ""; continue; } else if (*from == '>') { // process tokens intoken = false; if (std::strncmp(token.c_str(), "reference", 9) && std::strncmp(token.c_str(), "/reference", 10)) { text.push_back('<'); text.append(token); text.push_back('>'); } else { XMLTag tag; tag = token.c_str(); if (!tag.isEndTag() && type == tag.getAttribute("type") && (!subType.size() || subType == tag.getAttribute("subType"))) { stripThisToken = true; continue; } else if (tag.isEndTag() && stripThisToken) { stripThisToken = false; continue; } text.push_back('<'); text.append(token); text.push_back('>'); } continue; } if (intoken) { //copy token token.push_back(*from); } else { //copy text which is not inside a token text.push_back(*from); } } return 0; }
char OSISFootnotes::processText(SWBuf &text, const SWKey *key, const SWModule *module) { SWBuf token; bool intoken = false; bool hide = false; SWBuf tagText; XMLTag startTag; SWBuf refs = ""; int footnoteNum = 1; char buf[254]; SWKey *p = (module) ? module->createKey() : (key) ? key->clone() : new VerseKey(); VerseKey *parser = SWDYNAMIC_CAST(VerseKey, p); if (!parser) { delete p; parser = new VerseKey(); } *parser = key->getText(); SWBuf orig = text; const char *from = orig.c_str(); XMLTag tag; bool strongsMarkup = false; for (text = ""; *from; ++from) { // remove all newlines temporarily to fix kjv2003 module if ((*from == 10) || (*from == 13)) { if ((text.length()>1) && (text[text.length()-2] != ' ') && (*(from+1) != ' ')) text.append(' '); continue; } if (*from == '<') { intoken = true; token = ""; continue; } if (*from == '>') { // process tokens intoken = false; if (!strncmp(token, "note", 4) || !strncmp(token.c_str(), "/note", 5)) { tag = token; if (!tag.isEndTag()) { if (tag.getAttribute("type") && (!strcmp("x-strongsMarkup", tag.getAttribute("type")) || !strcmp("strongsMarkup", tag.getAttribute("type"))) // deprecated ) { tag.setEmpty(false); // handle bug in KJV2003 module where some note open tags were <note ... /> strongsMarkup = true; } if (!tag.isEmpty()) { // if ((!tag.isEmpty()) || (SWBuf("strongsMarkup") == tag.getAttribute("type"))) { refs = ""; startTag = tag; hide = true; tagText = ""; continue; } } if (hide && tag.isEndTag()) { if (module->isProcessEntryAttributes() && !strongsMarkup) { //don`t parse strongsMarkup to EntryAttributes as Footnote sprintf(buf, "%i", footnoteNum++); StringList attributes = startTag.getAttributeNames(); for (StringList::const_iterator it = attributes.begin(); it != attributes.end(); it++) { module->getEntryAttributes()["Footnote"][buf][it->c_str()] = startTag.getAttribute(it->c_str()); } module->getEntryAttributes()["Footnote"][buf]["body"] = tagText; startTag.setAttribute("swordFootnote", buf); if ((startTag.getAttribute("type")) && (!strcmp(startTag.getAttribute("type"), "crossReference"))) { if (!refs.length()) refs = parser->parseVerseList(tagText.c_str(), *parser, true).getRangeText(); module->getEntryAttributes()["Footnote"][buf]["refList"] = refs.c_str(); } } hide = false; if (option || (startTag.getAttribute("type") && !strcmp(startTag.getAttribute("type"), "crossReference"))) { // we want the tag in the text; crossReferences are handled by another filter text.append(startTag); // text.append(tagText); // we don't put the body back in because it is retrievable from EntryAttributes["Footnotes"][]["body"]. } else continue; } strongsMarkup = false; } // if not a heading token, keep token in text //if ((!strcmp(tag.getName(), "reference")) && (!tag.isEndTag())) { // SWBuf osisRef = tag.getAttribute("osisRef"); if (!strncmp(token, "reference", 9)) { if (refs.length()) { refs.append("; "); } const char* attr = strstr(token.c_str() + 9, "osisRef=\""); const char* end = attr ? strchr(attr+9, '"') : 0; if (attr && end) { refs.append(attr+9, end-(attr+9)); } } if (!hide) { text.append('<'); text.append(token); text.append('>'); } else { tagText.append('<'); tagText.append(token); tagText.append('>'); } continue; } if (intoken) { //copy token token.append(*from); } else if (!hide) { //copy text which is not inside a token text.append(*from); } else tagText.append(*from); } delete parser; return 0; }
char OSISScripref::processText(std::string &text, const SWKey *key, const SWModule *module) { (void) key; (void) module; std::string token; bool intoken = false; bool hide = false; std::string tagText; XMLTag startTag; std::string orig = text; const char *from = orig.c_str(); XMLTag tag; for (text = ""; *from; ++from) { if (*from == '<') { intoken = true; token = ""; continue; } if (*from == '>') { // process tokens intoken = false; tag = token.c_str(); if (!std::strncmp(token.c_str(), "note", 4) || !std::strncmp(token.c_str(), "/note", 5)) { if (!tag.isEndTag() && !tag.isEmpty()) { startTag = tag; if (tag.attribute("type") == "crossReference") { hide = true; tagText = ""; if (option) { // we want the tag in the text text.push_back('<'); text.append(token); text.push_back('>'); } continue; } } if (hide && tag.isEndTag()) { hide = false; if (option) { // we want the tag in the text text.append(tagText); // end tag gets added further down } else continue; // don't let the end tag get added to the text } } // if not a heading token, keep token in text if (!hide) { text.push_back('<'); text.append(token); text.push_back('>'); } else { tagText.push_back('<'); tagText.append(token); tagText.push_back('>'); } continue; } if (intoken) { //copy token token.push_back(*from); } else if (!hide) { //copy text which is not inside a token text.push_back(*from); } else tagText.push_back(*from); } return 0; }
char ThMLHeadings::processText(SWBuf &text, const SWKey *key, const SWModule *module) { SWBuf token; bool intoken = false; bool isheader = false; bool hide = false; bool preverse = false; bool withinDiv = false; SWBuf header; int headerNum = 0; int pvHeaderNum = 0; char buf[254]; XMLTag startTag; SWBuf orig = text; const char *from = orig.c_str(); XMLTag tag; for (text = ""; *from; ++from) { if (*from == '<') { intoken = true; token = ""; continue; } if (*from == '>') { // process tokens intoken = false; if (!strnicmp(token.c_str(), "div", 3) || !strnicmp(token.c_str(), "/div", 4)) { withinDiv = (!strnicmp(token.c_str(), "div", 3)); tag = token; if (hide && tag.isEndTag()) { if (module->isProcessEntryAttributes() && (option || (!preverse))) { if (preverse) { sprintf(buf, "%i", pvHeaderNum++); module->getEntryAttributes()["Heading"]["Preverse"][buf] = header; } else { sprintf(buf, "%i", headerNum++); module->getEntryAttributes()["Heading"]["Interverse"][buf] = header; if (option) { // we want the tag in the text text.append(header); } } StringList attributes = startTag.getAttributeNames(); for (StringList::const_iterator it = attributes.begin(); it != attributes.end(); it++) { module->getEntryAttributes()["Heading"][buf][it->c_str()] = startTag.getAttribute(it->c_str()); } } hide = false; if (!option || preverse) { // we don't want the tag in the text anymore preverse = false; continue; } preverse = false; } if (tag.getAttribute("class") && ((!stricmp(tag.getAttribute("class"), "sechead")) || (!stricmp(tag.getAttribute("class"), "title")))) { isheader = true; if (!tag.isEndTag()) { //start tag if (!tag.isEmpty()) { startTag = tag; /* how do we tell a ThML preverse title from one that should be in the text? probably if any text is before the title... just assuming all are preverse for now } if (tag.getAttribute("subtype") && !stricmp(tag.getAttribute("subtype"), "x-preverse")) { */ hide = true; preverse = true; header = ""; continue; } // move back up under startTag = tag } /* this is where non-preverse will go eventually if (!tag.isEndTag()) { //start tag hide = true; header = ""; if (option) { // we want the tag in the text text.append('<'); text.append(token); text.append('>'); } continue; } */ } else isheader = false; } if (withinDiv && isheader) { header.append('<'); header.append(token); header.append('>'); } else { // if not a heading token, keep token in text if (!hide) { text.append('<'); text.append(token); text.append('>'); } } continue; } if (intoken) { //copy token token.append(*from); } else if (!hide) { //copy text which is not inside a token text.append(*from); } else header.append(*from); } return 0; }
// Return true if the content was handled or is to be ignored. // false if the what has been seen is to be accumulated and considered later. bool handleToken(std::string & text, XMLTag & token) { // The start token for the current entry; static XMLTag startTag; // Flags to indicate whether we are in a entry, entryFree or superentry static bool inEntry = false; static bool inEntryFree = false; static bool inSuperEntry = false; std::string const & tokenName = token.name(); static char const * splitPtr; static char const * splitPtr2 = nullptr; static std::array<char, 4096> splitBuffer; static SWKey tmpKey; //-- START TAG ------------------------------------------------------------------------- if (!token.isEndTag()) { // If we are not in an "entry" and we see one, then enter it. if (!inEntry && !inEntryFree && !inSuperEntry) { inEntry = (tokenName == "entry"); inEntryFree = (tokenName == "entryFree"); inSuperEntry = (tokenName == "superentry"); if (inEntry || inEntryFree || inSuperEntry) { #ifdef DEBUG cout << "Entering " << tokenName << endl; #endif startTag = token; text = ""; keyStr = token.attribute("n"); // P5 with linking and/or non-URI chars if (keyStr.empty()) { keyStr = token.attribute("sortKey"); // P5 otherwise if (keyStr.empty()) { keyStr = token.attribute("key"); // P4 } } return false; // make tag be part of the output } } } //-- EMPTY and END TAG --------------------------------------------------------------------------------------------- else { // ENTRY end // If we see the end of an entry that we are in, then leave it if ((inEntry && (tokenName == "entry" )) || (inEntryFree && (tokenName == "entryFree" )) || (inSuperEntry && (tokenName == "superentry"))) { #ifdef DEBUG cout << "Leaving " << tokenName << endl; #endif // Only one is false coming into here, // but all must be on leaving. inEntry = false; inEntryFree = false; inSuperEntry = false; text += token.toString(); entryCount++; #ifdef DEBUG cout << "keyStr: " << keyStr << endl; #endif splitPtr = std::strchr(keyStr.c_str(), '|'); if (splitPtr) { std::strncpy (splitBuffer.data(), keyStr.c_str(), splitPtr - keyStr.c_str()); splitBuffer[splitPtr - keyStr.c_str()] = 0; currentKey->setText(splitBuffer.data()); #ifdef DEBUG cout << "splitBuffer: " << splitBuffer.data() << endl; cout << "currentKey: " << currentKey->getText() << endl; #endif writeEntry(*currentKey, text); #if 1 while (splitPtr) { splitPtr += 1; splitPtr2 = std::strstr(splitPtr, "|"); entryCount++; if (splitPtr2) { std::strncpy (splitBuffer.data(), splitPtr, splitPtr2 - splitPtr); splitBuffer[splitPtr2 - splitPtr] = 0; #ifdef DEBUG cout << "splitBuffer: " << splitBuffer.data() << endl; cout << "currentKey: " << currentKey->getText() << endl; #endif linkToEntry(currentKey->getText(), splitBuffer.data()); splitPtr = splitPtr2; } else { std::strcpy(splitBuffer.data(), splitPtr); #ifdef DEBUG cout << "splitBuffer: " << splitBuffer.data() << endl; cout << "currentKey: " << currentKey->getText() << endl; #endif linkToEntry(currentKey->getText(), splitBuffer.data()); splitPtr = nullptr; } } #endif } else { currentKey->setText(keyStr); writeEntry(*currentKey, text); } // Since we consumed the text, clear it // and tell the caller that the tag was consumed. text = ""; return true; } } return false; }
char OSISMorphSegmentation::processText(SWBuf &text, const SWKey * /*key*/, const SWModule *module) { SWBuf token; bool intoken = false; bool hide = false; SWBuf orig( text ); const char *from = orig.c_str(); XMLTag tag; SWBuf tagText = ""; unsigned int morphemeNum = 0; bool inMorpheme = false; SWBuf buf; for (text = ""; *from; ++from) { if (*from == '<') { intoken = true; token = ""; continue; } if (*from == '>') { // process tokens intoken = false; if (!strncmp(token.c_str(), "seg ", 4) || !strncmp(token.c_str(), "/seg", 4)) { tag = token; if (!tag.isEndTag() && tag.getAttribute("type") && !strcmp("morph", tag.getAttribute("type"))) { //<seg type="morph"> start tag hide = !option; //only hide if option is Off tagText = ""; inMorpheme = true; } if (tag.isEndTag()) { buf.setFormatted("%.3d", morphemeNum++); module->getEntryAttributes()["Morpheme"][buf]["body"] = tagText; inMorpheme = false; } if (hide) { //hides start and end tags as long as hide is set if (tag.isEndTag()) { //</seg> hide = false; } continue; //leave out the current token } } //end of seg tag handling text.append('<'); text.append(token); text.append('>'); if (inMorpheme) { tagText.append('<'); tagText.append(token); tagText.append('>'); } hide = false; continue; } //end of intoken part if (intoken) { //copy token token.append(*from); } else { //copy text which is not inside of a tag text.append(*from); if (inMorpheme) { tagText.append(*from); } } } return 0; }