Beispiel #1
0
char OSISReferenceLinks::processText(std::string &text, const SWKey *key, const SWModule *module) {
    (void) key;
    (void) module;
    if (option) return 0;

    std::string token;
    bool intoken        = false;
    bool stripThisToken = false;

    std::string orig = text;
    const char *from = orig.c_str();

    for (text = ""; *from; ++from) {
        if (*from == '<') {
            intoken = true;
            token = "";
            continue;
        }
        else if (*from == '>') {    // process tokens
            intoken = false;
            if (std::strncmp(token.c_str(), "reference", 9)
                && std::strncmp(token.c_str(), "/reference", 10)) {
                text.push_back('<');
                text.append(token);
                text.push_back('>');
            }
            else {
                XMLTag tag;
                tag = token.c_str();
                if (!tag.isEndTag() && type == tag.getAttribute("type") && (!subType.size() || subType == tag.getAttribute("subType"))) {
                    stripThisToken = true;
                    continue;
                }
                else if (tag.isEndTag() && stripThisToken) {
                    stripThisToken = false;
                    continue;
                }
                text.push_back('<');
                text.append(token);
                text.push_back('>');
            }
            continue;
        }

        if (intoken) { //copy token
            token.push_back(*from);
        }
        else { //copy text which is not inside a token
            text.push_back(*from);
        }
    }
    return 0;
}
Beispiel #2
0
char OSISFootnotes::processText(SWBuf &text, const SWKey *key, const SWModule *module) {
	SWBuf token;
	bool intoken    = false;
	bool hide       = false;
	SWBuf tagText;
	XMLTag startTag;
	SWBuf refs = "";
	int footnoteNum = 1;
	char buf[254];
	SWKey *p = (module) ? module->createKey() : (key) ? key->clone() : new VerseKey();
        VerseKey *parser = SWDYNAMIC_CAST(VerseKey, p);
        if (!parser) {
        	delete p;
                parser = new VerseKey();
        }
        *parser = key->getText();

	SWBuf orig = text;
	const char *from = orig.c_str();

	XMLTag tag;
	bool strongsMarkup = false;


	for (text = ""; *from; ++from) {

		// remove all newlines temporarily to fix kjv2003 module
		if ((*from == 10) || (*from == 13)) {
			if ((text.length()>1) && (text[text.length()-2] != ' ') && (*(from+1) != ' '))
				text.append(' ');
			continue;
		}


		if (*from == '<') {
			intoken = true;
			token = "";
			continue;
		}



		if (*from == '>') {	// process tokens
			intoken = false;
			if (!strncmp(token, "note", 4) || !strncmp(token.c_str(), "/note", 5)) {
				tag = token;

				if (!tag.isEndTag()) {
					if (tag.getAttribute("type") && (!strcmp("x-strongsMarkup", tag.getAttribute("type"))
											|| !strcmp("strongsMarkup", tag.getAttribute("type")))	// deprecated
							) {
						tag.setEmpty(false);  // handle bug in KJV2003 module where some note open tags were <note ... />
						strongsMarkup = true;
					}

					if (!tag.isEmpty()) {
//					if ((!tag.isEmpty()) || (SWBuf("strongsMarkup") == tag.getAttribute("type"))) {
						refs = "";
						startTag = tag;
						hide = true;
						tagText = "";
						continue;
					}
				}
				if (hide && tag.isEndTag()) {
					if (module->isProcessEntryAttributes() && !strongsMarkup) { //don`t parse strongsMarkup to EntryAttributes as Footnote
						sprintf(buf, "%i", footnoteNum++);
						StringList attributes = startTag.getAttributeNames();
						for (StringList::const_iterator it = attributes.begin(); it != attributes.end(); it++) {
							module->getEntryAttributes()["Footnote"][buf][it->c_str()] = startTag.getAttribute(it->c_str());
						}
						module->getEntryAttributes()["Footnote"][buf]["body"] = tagText;
						startTag.setAttribute("swordFootnote", buf);
						if ((startTag.getAttribute("type")) && (!strcmp(startTag.getAttribute("type"), "crossReference"))) {
							if (!refs.length())
								refs = parser->parseVerseList(tagText.c_str(), *parser, true).getRangeText();
							module->getEntryAttributes()["Footnote"][buf]["refList"] = refs.c_str();
						}
					}
					hide = false;
					if (option || (startTag.getAttribute("type") && !strcmp(startTag.getAttribute("type"), "crossReference"))) {	// we want the tag in the text; crossReferences are handled by another filter
						text.append(startTag);
//						text.append(tagText);	// we don't put the body back in because it is retrievable from EntryAttributes["Footnotes"][]["body"].
					}
					else	continue;
				}
				strongsMarkup = false;
			}

			// if not a heading token, keep token in text
			//if ((!strcmp(tag.getName(), "reference")) && (!tag.isEndTag())) {
			//	SWBuf osisRef = tag.getAttribute("osisRef");
			if (!strncmp(token, "reference", 9)) {
				if (refs.length()) {
					refs.append("; ");
				}

				const char* attr = strstr(token.c_str() + 9, "osisRef=\"");
				const char* end  = attr ? strchr(attr+9, '"') : 0;

				if (attr && end) {
					refs.append(attr+9, end-(attr+9));
				}
			}
			if (!hide) {
				text.append('<');
				text.append(token);
				text.append('>');
			}
			else {
				tagText.append('<');
				tagText.append(token);
				tagText.append('>');
			}
			continue;
		}
		if (intoken) { //copy token
			token.append(*from);
		}
		else if (!hide) { //copy text which is not inside a token
			text.append(*from);
		}
		else tagText.append(*from);
	}
        delete parser;
	return 0;
}
Beispiel #3
0
char OSISScripref::processText(std::string &text, const SWKey *key, const SWModule *module) {
    (void) key;
    (void) module;
    std::string token;
    bool intoken    = false;
    bool hide       = false;
    std::string tagText;
    XMLTag startTag;

    std::string orig = text;
    const char *from = orig.c_str();

    XMLTag tag;

    for (text = ""; *from; ++from) {
        if (*from == '<') {
            intoken = true;
            token = "";
            continue;
        }
        if (*from == '>') {    // process tokens
            intoken = false;

            tag = token.c_str();

            if (!std::strncmp(token.c_str(), "note", 4) || !std::strncmp(token.c_str(), "/note", 5)) {
                if (!tag.isEndTag() && !tag.isEmpty()) {
                    startTag = tag;
                    if (tag.attribute("type") == "crossReference") {
                        hide = true;
                        tagText = "";
                        if (option) {    // we want the tag in the text
                            text.push_back('<');
                            text.append(token);
                            text.push_back('>');
                        }
                        continue;
                    }
                }
                if (hide && tag.isEndTag()) {
                    hide = false;
                    if (option) {    // we want the tag in the text
                        text.append(tagText);  // end tag gets added further down
                    }
                    else    continue;    // don't let the end tag get added to the text
                }
            }

            // if not a heading token, keep token in text
            if (!hide) {
                text.push_back('<');
                text.append(token);
                text.push_back('>');
            }
            else {
                tagText.push_back('<');
                tagText.append(token);
                tagText.push_back('>');
            }
            continue;
        }
        if (intoken) { //copy token
            token.push_back(*from);
        }
        else if (!hide) { //copy text which is not inside a token
            text.push_back(*from);
        }
        else tagText.push_back(*from);
    }
    return 0;
}
Beispiel #4
0
char ThMLHeadings::processText(SWBuf &text, const SWKey *key, const SWModule *module) {
	SWBuf token;
	bool intoken    = false;
	bool isheader   = false;
	bool hide       = false;
	bool preverse   = false;
	bool withinDiv  = false;
	SWBuf header;
	int headerNum   = 0;
	int pvHeaderNum = 0;
	char buf[254];
	XMLTag startTag;

	SWBuf orig = text;
	const char *from = orig.c_str();
	
	XMLTag tag;

	for (text = ""; *from; ++from) {
		if (*from == '<') {
			intoken = true;
			token = "";
			
			continue;
		}
		if (*from == '>') {	// process tokens
			intoken = false;

			if (!strnicmp(token.c_str(), "div", 3) || !strnicmp(token.c_str(), "/div", 4)) {
				withinDiv =  (!strnicmp(token.c_str(), "div", 3));
				tag = token;
				if (hide && tag.isEndTag()) {
					if (module->isProcessEntryAttributes() && (option || (!preverse))) {
						if (preverse) {
							sprintf(buf, "%i", pvHeaderNum++);
							module->getEntryAttributes()["Heading"]["Preverse"][buf] = header;
						}
						else {
							sprintf(buf, "%i", headerNum++);
							module->getEntryAttributes()["Heading"]["Interverse"][buf] = header;
							if (option) {	// we want the tag in the text
								text.append(header);
							}
						}
						
						StringList attributes = startTag.getAttributeNames();
						for (StringList::const_iterator it = attributes.begin(); it != attributes.end(); it++) {
							module->getEntryAttributes()["Heading"][buf][it->c_str()] = startTag.getAttribute(it->c_str());
						}
					}
					
					hide = false;
					if (!option || preverse) {	// we don't want the tag in the text anymore
						preverse = false;
						continue;
					}
					preverse = false;
				}
				if (tag.getAttribute("class") && ((!stricmp(tag.getAttribute("class"), "sechead"))
										 ||  (!stricmp(tag.getAttribute("class"), "title")))) {

					isheader = true;
					
					if (!tag.isEndTag()) { //start tag
						if (!tag.isEmpty()) {
							startTag = tag;
					
/* how do we tell a ThML preverse title from one that should be in the text?  probably if any text is before the title...  just assuming all are preverse for now
					}
					if (tag.getAttribute("subtype") && !stricmp(tag.getAttribute("subtype"), "x-preverse")) {
*/
						hide = true;
						preverse = true;
						header = "";
						continue;
						}	// move back up under startTag = tag
					}
/* this is where non-preverse will go eventually
					if (!tag.isEndTag()) { //start tag
						hide = true;
						header = "";
						if (option) {	// we want the tag in the text
							text.append('<');
							text.append(token);
							text.append('>');
						}
						continue;
					}
*/
				}
				else
					isheader = false;
			}

			if (withinDiv && isheader) {
				header.append('<');
				header.append(token);
				header.append('>');
			} else {
				// if not a heading token, keep token in text
				if (!hide) {
					text.append('<');
					text.append(token);
					text.append('>');
				}
			}
			continue;
		}
		if (intoken) { //copy token
			token.append(*from);
		}
		else if (!hide) { //copy text which is not inside a token
			text.append(*from);
		}
		else header.append(*from);
	}
	return 0;
}
Beispiel #5
0
// Return true if the content was handled or is to be ignored.
//      false if the what has been seen is to be accumulated and considered later.
bool handleToken(std::string & text, XMLTag & token) {
        // The start token for the current entry;
    static XMLTag startTag;

        // Flags to indicate whether we are in a entry, entryFree or superentry
        static bool inEntry      = false;
        static bool inEntryFree  = false;
        static bool inSuperEntry = false;

        std::string const &  tokenName = token.name();

        static char const * splitPtr;
        static char const * splitPtr2 = nullptr;
        static std::array<char, 4096> splitBuffer;
    static SWKey tmpKey;
//-- START TAG -------------------------------------------------------------------------
    if (!token.isEndTag()) {

        // If we are not in an "entry" and we see one, then enter it.
        if (!inEntry && !inEntryFree && !inSuperEntry) {
            inEntry      = (tokenName == "entry");
            inEntryFree  = (tokenName == "entryFree");
            inSuperEntry = (tokenName == "superentry");
                        if (inEntry || inEntryFree || inSuperEntry) {
#ifdef DEBUG
                cout << "Entering " << tokenName << endl;
#endif
                startTag    = token;
                text        = "";

                                keyStr = token.attribute("n"); // P5 with linking and/or non-URI chars
                                if (keyStr.empty()) {
                                    keyStr = token.attribute("sortKey"); // P5 otherwise
                                    if (keyStr.empty()) {
                            keyStr = token.attribute("key"); // P4
                                        }
                                }

                return false; // make tag be part of the output
            }
        }
    }

//-- EMPTY and END TAG ---------------------------------------------------------------------------------------------
    else {

        // ENTRY end
        // If we see the end of an entry that we are in, then leave it
        if ((inEntry      && (tokenName == "entry"     )) ||
            (inEntryFree  && (tokenName == "entryFree" )) ||
            (inSuperEntry && (tokenName == "superentry"))) {
#ifdef DEBUG
            cout << "Leaving " << tokenName << endl;
#endif
            // Only one is false coming into here,
            // but all must be on leaving.
            inEntry       = false;
            inEntryFree   = false;
            inSuperEntry  = false;
            text         += token.toString();

                        entryCount++;
#ifdef DEBUG
            cout << "keyStr: " << keyStr << endl;
#endif
                        splitPtr = std::strchr(keyStr.c_str(), '|');
                        if (splitPtr) {
                                std::strncpy (splitBuffer.data(), keyStr.c_str(), splitPtr - keyStr.c_str());
                                splitBuffer[splitPtr - keyStr.c_str()] = 0;
                currentKey->setText(splitBuffer.data());
#ifdef DEBUG
                cout << "splitBuffer: " << splitBuffer.data() << endl;
                cout << "currentKey: " << currentKey->getText() << endl;
#endif
                writeEntry(*currentKey, text);
#if 1
                                while (splitPtr) {
                                    splitPtr += 1;
                                    splitPtr2 = std::strstr(splitPtr, "|");
                                        entryCount++;
                                        if (splitPtr2) {
                        std::strncpy (splitBuffer.data(), splitPtr, splitPtr2 - splitPtr);
                                                splitBuffer[splitPtr2 - splitPtr] = 0;
#ifdef DEBUG
                        cout << "splitBuffer: " << splitBuffer.data() << endl;
                        cout << "currentKey: " << currentKey->getText() << endl;
#endif
                        linkToEntry(currentKey->getText(), splitBuffer.data());
                                            splitPtr = splitPtr2;
                                        }
                                        else {
                        std::strcpy(splitBuffer.data(), splitPtr);
#ifdef DEBUG
                                      cout << "splitBuffer: " << splitBuffer.data() << endl;
                        cout << "currentKey: " << currentKey->getText() << endl;
#endif
                        linkToEntry(currentKey->getText(), splitBuffer.data());
                                                splitPtr = nullptr;
                                        }
                                }
#endif
                        }
                        else {
                currentKey->setText(keyStr);
                writeEntry(*currentKey, text);
                        }

            // Since we consumed the text, clear it
            // and tell the caller that the tag was consumed.
            text = "";
            return true;
        }
    }
    return false;
}
char OSISMorphSegmentation::processText(SWBuf &text, const SWKey * /*key*/, const SWModule *module) {
	SWBuf token;
	bool intoken    = false;
	bool hide       = false;

	SWBuf orig( text );
	const char *from = orig.c_str();

	XMLTag tag;
	SWBuf tagText = "";
	unsigned int morphemeNum = 0;
	bool inMorpheme = false;
	SWBuf buf;

	for (text = ""; *from; ++from) {
		if (*from == '<') {
			intoken = true;
			token = "";
			continue;
		}

		if (*from == '>') { // process tokens
			intoken = false;

			if (!strncmp(token.c_str(), "seg ", 4) || !strncmp(token.c_str(), "/seg", 4)) {
				tag = token;

				if (!tag.isEndTag() && tag.getAttribute("type") && !strcmp("morph", tag.getAttribute("type"))) {  //<seg type="morph"> start tag
					hide = !option; //only hide if option is Off
					tagText = "";
					inMorpheme = true;
				}

				if (tag.isEndTag()) {
						buf.setFormatted("%.3d", morphemeNum++);
						module->getEntryAttributes()["Morpheme"][buf]["body"] = tagText;
						inMorpheme = false;
				}
				if (hide) { //hides start and end tags as long as hide is set

					if (tag.isEndTag()) { //</seg>
						hide = false;
					}

					continue; //leave out the current token
				}
			} //end of seg tag handling

			text.append('<');
			text.append(token);
			text.append('>');

			if (inMorpheme) {
				tagText.append('<');
				tagText.append(token);
				tagText.append('>');
			}

			hide = false;

			continue;
		} //end of intoken part

		if (intoken) { //copy token
			token.append(*from);
		}
		else { //copy text which is not inside of a tag
			text.append(*from);
			if (inMorpheme) {
				tagText.append(*from);
			}
		}
	}
	return 0;
}