Пример #1
0
char ThMLWordJS::processText(SWBuf &text, const SWKey *key, const SWModule *module) {
	if (option) {
		char token[2112]; // cheese.  Fix.
		int tokpos = 0;
		bool intoken = false;
		int word = 1;
		char val[128];
		char *valto;
		char *ch;
		char wordstr[5];
		unsigned int textStart = 0, lastAppendLen = 0, textEnd = 0;
		SWBuf tmp;
		bool newText = false;
		bool needWordOut = false;
		AttributeValue *wordAttrs = 0;
		SWBuf modName = (module)?module->getName():"";
		SWBuf wordSrcPrefix = modName;
		
		const SWBuf orig = text;
		const char * from = orig.c_str();
		VerseKey *vkey = 0;
		if (key) {
			vkey = SWDYNAMIC_CAST(VerseKey, key);
		}

		for (text = ""; *from; from++) {
			if (*from == '<') {
				intoken = true;
				tokpos = 0;
				token[0] = 0;
				token[1] = 0;
				token[2] = 0;
				textEnd = text.length();
				continue;
			}
			if (*from == '>') {	// process tokens
				intoken = false;
				if (!strnicmp(token, "sync type=\"Strongs\" ", 20)) {	// Strongs
					valto = val;
					for (unsigned int i = 27; token[i] != '\"' && i < 150; i++)
						*valto++ = token[i];
					*valto = 0;
					if (atoi((!isdigit(*val))?val+1:val) < 5627) {
						// normal strongs number
						sprintf(wordstr, "%03d", word++);
						needWordOut = (word > 2);
						wordAttrs = &(module->getEntryAttributes()["Word"][wordstr]);
						(*wordAttrs)["Strongs"] = val;
	//printf("Adding: [\"Word\"][%s][\"Strongs\"] = %s\n", wordstr, val);
						tmp = "";
						tmp.append(text.c_str()+textStart, (int)(textEnd - textStart));
						(*wordAttrs)["Text"] = tmp;
						text.append("</span>");
						SWBuf ts;
						ts.appendFormatted("%d", textStart);
						(*wordAttrs)["TextStart"] = ts;
	//printf("Adding: [\"Word\"][%s][\"Text\"] = %s\n", wordstr, tmp.c_str());
						newText = true;
					}
					else {
						// verb morph
						(*wordAttrs)["Morph"] = val;
	//printf("Adding: [\"Word\"][%s][\"Morph\"] = %s\n", wordstr, val);
					}

				}
				if (!strncmp(token, "sync type=\"morph\"", 17)) {
					for (ch = token+17; *ch; ch++) {
						if (!strncmp(ch, "class=\"", 7)) {
							valto = val;
							for (unsigned int i = 7; ch[i] != '\"' && i < 127; i++)
								*valto++ = ch[i];
							*valto = 0;
							(*wordAttrs)["MorphClass"] = val;
	//printf("Adding: [\"Word\"][%s][\"MorphClass\"] = %s\n", wordstr, val);
						}
						if (!strncmp(ch, "value=\"", 7)) {
							valto = val;
							for (unsigned int i = 7; ch[i] != '\"' && i < 127; i++)
								*valto++ = ch[i];
							*valto = 0;
							(*wordAttrs)["Morph"] = val;
	//printf("Adding: [\"Word\"][%s][\"Morph\"] = %s\n", wordstr, val);
						}
					}
					newText = true;
				}
				// if not a strongs token, keep token in text
				text += '<';
				text += token;
				text += '>';
				if (needWordOut) {
					char wstr[10];
					sprintf(wstr, "%03d", word-2);
					AttributeValue *wAttrs = &(module->getEntryAttributes()["Word"][wstr]);
					needWordOut = false;
					SWBuf strong = (*wAttrs)["Strongs"];
					SWBuf morph = (*wAttrs)["Morph"];
					SWBuf morphClass = (*wAttrs)["MorphClass"];
					SWBuf wordText = (*wAttrs)["Text"];
					SWBuf textSt = (*wAttrs)["TextStart"];
					if (strong.size()) {
						char gh = 0;
						gh = isdigit(strong[0]) ? 0:strong[0];
						if (!gh) {
							if (vkey) {
								gh = vkey->getTestament() ? 'H' : 'G';
							}
						}
						else strong << 1;

						SWModule *sLex = 0;
						SWModule *sMorph = 0;
						if (gh == 'G') {
							sLex = defaultGreekLex;
							sMorph = defaultGreekParse;
						}
						if (gh == 'H') {
							sLex = defaultHebLex;
							sMorph = defaultHebParse;
						}
						SWBuf lexName = "";
						if (sLex) {
							// we can pass the real lex name in, but we have some
							// aliases in the javascript to optimize bandwidth
							lexName = sLex->getName();
							if (lexName == "StrongsGreek")
								lexName = "G";
							if (lexName == "StrongsHebrew")
								lexName = "H";
						}
						SWBuf wordID;
						if (vkey) {
							// optimize for bandwidth and use only the verse as the unique entry id
							wordID.appendFormatted("%d", vkey->getVerse());
						}
						else {
							wordID = key->getText();
						}
						for (unsigned int i = 0; i < wordID.size(); i++) {
							if ((!isdigit(wordID[i])) && (!isalpha(wordID[i]))) {
								wordID[i] = '_';
							}
						}
						wordID.appendFormatted("_%s%d", wordSrcPrefix.c_str(), atoi(wstr));
						if (textSt.size()) {
							int textStr = atoi(textSt.c_str());
							textStr += lastAppendLen;
							SWBuf spanStart = "";



							if (!sMorph) sMorph = 0;	// avoid unused warnings for now
/*
							if (sMorph) {
								SWBuf popMorph = "<a onclick=\"";
								popMorph.appendFormatted("p(\'%s\',\'%s\','%s','');\" >%s</a>", sMorph->getName(), morph.c_str(), wordID.c_str(), morph.c_str());
								morph = popMorph;
							}
*/

							// 'p' = 'fillpop' to save bandwidth
							const char *m = strchr(morph.c_str(), ':');
							if (m) m++;
							else m = morph.c_str();
							spanStart.appendFormatted("<span class=\"clk\" onclick=\"p('%s','%s','%s','%s','','%s');\" >", lexName.c_str(), strong.c_str(), wordID.c_str(), m, modName.c_str());
							text.insert(textStr, spanStart);
							lastAppendLen = spanStart.length();
						}
					}

				}
				if (newText) {
					textStart = text.length(); newText = false;
				}
				continue;
			}
			if (intoken) {
				if (tokpos < 2045) {
					token[tokpos++] = *from;
					// TODO: why is this + 2 ?
					token[tokpos+2] = 0;
				}
			}
			else	{
				text += *from;
			}
		}

		char wstr[10];
		sprintf(wstr, "%03d", word-1);
		AttributeValue *wAttrs = &(module->getEntryAttributes()["Word"][wstr]);
		needWordOut = false;
		SWBuf strong = (*wAttrs)["Strongs"];
		SWBuf morph = (*wAttrs)["Morph"];
		SWBuf morphClass = (*wAttrs)["MorphClass"];
		SWBuf wordText = (*wAttrs)["Text"];
		SWBuf textSt = (*wAttrs)["TextStart"];
		if (strong.size()) {
			char gh = 0;
			gh = isdigit(strong[0]) ? 0:strong[0];
			if (!gh) {
				if (vkey) {
					gh = vkey->getTestament() ? 'H' : 'G';
				}
			}
			else strong << 1;

			SWModule *sLex = 0;
			if (gh == 'G') {
				sLex = defaultGreekLex;
			}
			if (gh == 'H') {
				sLex = defaultHebLex;
			}
			SWBuf lexName = "";
			if (sLex) {
				// we can pass the real lex name in, but we have some
				// aliases in the javascript to optimize bandwidth
				lexName = sLex->getName();
				if (lexName == "StrongsGreek")
					lexName = "G";
				if (lexName == "StrongsHebrew")
					lexName = "H";
			}
			SWBuf wordID;
			if (vkey) {
				// optimize for bandwidth and use only the verse as the unique entry id
				wordID.appendFormatted("%d", vkey->getVerse());
			}
			else {
				wordID = key->getText();
			}
			for (unsigned int i = 0; i < wordID.size(); i++) {
				if ((!isdigit(wordID[i])) && (!isalpha(wordID[i]))) {
					wordID[i] = '_';
				}
			}
			wordID.appendFormatted("_%s%d", wordSrcPrefix.c_str(), atoi(wstr));
			if (textSt.size()) {
				int textStr = atoi(textSt.c_str());
				textStr += lastAppendLen;
				SWBuf spanStart = "";
				// 'p' = 'fillpop' to save bandwidth
				const char *m = strchr(morph.c_str(), ':');
				if (m) m++;
				else m = morph.c_str();
				spanStart.appendFormatted("<span class=\"clk\" onclick=\"p('%s','%s','%s','%s','','%s');\" >", lexName.c_str(), strong.c_str(), wordID.c_str(), m, modName.c_str());
				text.insert(textStr, spanStart);
			}
		}
	}

	return 0;
}
Пример #2
0
char GBFOSIS::processText(SWBuf &text, const SWKey *key, const SWModule *module) { 
	char token[2048]; //cheesy, we seem to like cheese :)
	int tokpos = 0;
	bool intoken = false;
	bool keepToken = false;
	
//	static QuoteStack quoteStack;

	SWBuf orig = text;
	SWBuf tmp;
	SWBuf value;
	
	bool suspendTextPassThru = false;
	bool handled = false;
	bool newWord = false;
	bool newText = false;
	bool lastspace = false;
	
	const char *wordStart = text.c_str();
	const char *wordEnd = NULL;
	
	const char *textStart = NULL;
	const char *textEnd = NULL;
	
	SWBuf textNode = "";

	SWBuf buf;
		
	text = "";
	for (const char* from = orig.c_str(); *from; ++from) {
		if (*from == '<') { //start of new token detected
			intoken = true;
			tokpos = 0;
			token[0] = 0;
			token[1] = 0;
			token[2] = 0;
			textEnd = from-1; //end of last text node found
			wordEnd = text.c_str() + text.length();//not good, instead of wordEnd = to!
			
			continue;
		}
		
		if (*from == '>') {	// process tokens
			intoken = false;
			keepToken = false;
			suspendTextPassThru = false;
			newWord = true;
			handled = false;

			while (wordStart < (text.c_str() + text.length())) { //hack
				if (strchr(";,. :?!()'\"", *wordStart) && wordStart[0] && wordStart[1])
					wordStart++;
				else break;
			}			
			while (wordEnd > wordStart) {
				if (strchr(" ,;:.?!()'\"", *wordEnd))
					wordEnd--;
				else break;
			}

			// Scripture Reference
			if (!strncmp(token, "scripRef", 8)) {
				suspendTextPassThru = true;
				newText = true;
				handled = true;
			}
			else if (!strncmp(token, "/scripRef", 9)) {
				tmp = "";
				tmp.append(textStart, (int)(textEnd - textStart)+1);
				text += VerseKey::convertToOSIS(tmp.c_str(), key);
				
				lastspace = false;
				suspendTextPassThru = false;
				handled = true;
			}

			// Footnote
			if (!strcmp(token, "RF") || !strncmp(token, "RF ", 3)) { //the GBFFootnotes filter adds the attribute "swordFootnote", we want to catch that, too
	//			pushString(buf, "<reference work=\"Bible.KJV\" reference=\"");
				text += "<note type=\"x-StudyNote\">";
				newText = true;
				lastspace = false;
				handled = true;
			}
			else	if (!strcmp(token, "Rf")) {
				text += "</note>";
				lastspace = false;
				handled = true;
			}
			// hebrew titles
			if (!strcmp(token, "TH")) {
				text += "<title type=\"psalm\">";
				newText = true;
				lastspace = false;
				handled = true;
			}
			else	if (!strcmp(token, "Th")) {
				text += "</title>";
				lastspace = false;
				handled = true;
			}
			// Italics assume transchange
			if (!strcmp(token, "FI")) {
				text += "<transChange type=\"added\">";
				newText = true;
				lastspace = false;
				handled = true;
			}
			else	if (!strcmp(token, "Fi")) {
				text += "</transChange>";
				lastspace = false;
				handled = true;
			}
			// less than
			if (!strcmp(token, "CT")) {
				text += "&lt;";
				newText = true;
				lastspace = false;
				handled = true;
			}
			// greater than
			if (!strcmp(token, "CG")) {
				text += "&gt;";
				newText = true;
				lastspace = false;
				handled = true;
			}
			// Paragraph break.  For now use empty paragraph element
			if (!strcmp(token, "CM")) {
				text += "<milestone type=\"x-p\" />";
				newText = true;
				lastspace = false;
				handled = true;
			}

			// Figure
			else	if (!strncmp(token, "img ", 4)) {
				const char *src = strstr(token, "src");
				if (!src)		// assert we have a src attribute
					continue;
//					return false;

				text += "<figure src=\"";
				const char *c;
				for (c = src;((*c) && (*c != '"')); c++);

// uncomment for SWORD absolute path logic
//				if (*(c+1) == '/') {
//					pushString(buf, "file:");
//					pushString(buf, module->getConfigEntry("AbsoluteDataPath"));
//					if (*((*buf)-1) == '/')
//						c++;		// skip '/'
//				}
// end of uncomment for asolute path logic 

				for (c++;((*c) && (*c != '"')); c++) {
					text += *c;
				}
				text += "\" />";
				
				lastspace = false;
				handled = true;
			}

			// Strongs numbers
			else if (*token == 'W' && (token[1] == 'G' || token[1] == 'H')) {	// Strongs
				bool divineName = false;
				value = token+1;
			
				// normal strongs number
				//strstrip(val);
				if (!strncmp(wordStart, "<w ", 3)) {
					const char *attStart = strstr(wordStart, "lemma");
					if (attStart) {
						attStart += 7;
						
						buf = "";
						buf.appendFormatted("strong:%s ", value.c_str());
					}
					else { // no lemma attribute
						attStart = wordStart + 3;
						
						buf = "";
						buf.appendFormatted(buf, "lemma=\"strong:%s\" ", value.c_str());
					}

					text.insert(attStart - text.c_str(), buf);
				}
				else { //wordStart doesn't point to an existing <w> attribute!
					if (!strcmp(value.c_str(), "H03068")) {	//divineName
						buf = "";
						buf.appendFormatted("<divineName><w lemma=\"strong:%s\">", value.c_str());
						
						divineName = true;
					}
					else {
						buf = "";
						buf.appendFormatted("<w lemma=\"strong:%s\">", value.c_str());
					}

					text.insert(wordStart - text.c_str(), buf);

					if (divineName) {
						wordStart += 12;
						text += "</w></divineName>";
					}
					else	text += "</w>";

					lastspace = false;
				}
				handled = true;
			}

			// Morphology
			else if (*token == 'W' && token[1] == 'T') {
				if (token[2] == 'G' || token[2] == 'H') {	// Strongs
					value = token+2;
				}
				else value = token+1;
				
				if (!strncmp(wordStart, "<w ", 3)) {
					const char *attStart = strstr(wordStart, "morph");
					if (attStart) { //existing morph attribute, append this one to it
						attStart += 7;
						buf = "";
						buf.appendFormatted("%s:%s ", "robinson", value.c_str());
					}
					else { // no lemma attribute
						attStart = wordStart + 3;
						buf = "";
						buf.appendFormatted("morph=\"%s:%s\" ", "robinson", value.c_str());
					}
					
					text.insert(attStart - text.c_str(), buf); //hack, we have to
				}
				else { //no existing <w> attribute fond
					buf = "";
					buf.appendFormatted("<w morph=\"%s:%s\">", "robinson", value.c_str());
					text.insert(wordStart - text.c_str(), buf);
					text += "</w>";
					lastspace = false;

				}
				handled = true;
			}

			if (!keepToken) {	
				if (!handled) {
					SWLog::getSystemLog()->logError("Unprocessed Token: <%s> in key %s", token, key ? (const char*)*key : "<unknown>");
//					exit(-1);
				}
				if (from[1] && strchr(" ,;.:?!()'\"", from[1])) {
					if (lastspace) {
						text--;
					}
				}
				if (newText) {
					textStart = from+1;
					newText = false; 
				}
				continue;
			}

			// if not a strongs token, keep token in text
			text.appendFormatted("<%s>", token);
			
			if (newText) {
				textStart = text.c_str() + text.length();
				newWord = false; 
			}
			continue;
		}
		if (intoken) {
			if ((tokpos < 2045) && ((*from != 10)&&(*from != 13))) {
				token[tokpos++] = *from;
				token[tokpos+2] = 0;
			}
		}
		else	{
			switch (*from) {
			case '\'':
			case '\"':
			case '`':
//				quoteStack.handleQuote(fromStart, from, &to);
				text += *from;
				//from++; //this line removes chars after an apostrophe! Needs fixing.
				break;
			default:
				if (newWord && (*from != ' ')) {
					wordStart = text.c_str() + text.length();
					newWord = false;
					
					//fix this if required?
					//memset(to, 0, 10);

				}

				if (!suspendTextPassThru) {
					text += (*from);
					lastspace = (*from == ' ');
				}
			}
		}
	}

	VerseKey *vkey = SWDYNAMIC_CAST(VerseKey, key);
	if (vkey) {
		SWBuf ref = "";
		if (vkey->getVerse()) {
			ref.appendFormatted("\t\t<verse osisID=\"%s\">", vkey->getOSISRef());
		}

		if (ref.length() > 0) {

			text = ref + text;

			if (vkey->getVerse()) {
				VerseKey *tmp = (VerseKey *)vkey->clone();
				*tmp = *vkey;
				tmp->setAutoNormalize(false);
				tmp->setIntros(true);

				text += "</verse>";

				*tmp = MAXVERSE;
				if (*vkey == *tmp) {
					tmp->setVerse(0);
//					sprintf(ref, "\t</div>");
//					pushString(&to, ref);
					*tmp = MAXCHAPTER;
					*tmp = MAXVERSE;
					if (*vkey == *tmp) {
						tmp->setChapter(0);
						tmp->setVerse(0);
//						sprintf(ref, "\t</div>");
//						pushString(&to, ref);
/*
						if (!quoteStack.empty()) {
							SWLog::getSystemLog()->logError("popping unclosed quote at end of book");
							quoteStack.clear();
						}
*/
					}
				}
                                delete tmp;
			}
//			else if (vkey->Chapter()) {
//				sprintf(ref, "\t<div type=\"chapter\" osisID=\"%s\">", vkey->getOSISRef());
//			}
//			else sprintf(ref, "\t<div type=\"book\" osisID=\"%s\">", vkey->getOSISRef());
		}
	}
	return 0;
}
Пример #3
0
char OSISWordJS::processText(SWBuf &text, const SWKey *key, const SWModule *module) {
	if (option) {
		char token[2112]; // cheese.  Fix.
		int tokpos = 0;
		bool intoken = false;
		int wordNum = 1;
		char wordstr[5];
		SWBuf modName = (module)?module->getName():"";
		// add TR to w src in KJV then remove this next line
		SWBuf wordSrcPrefix = (modName == "KJV")?SWBuf("TR"):modName;

		VerseKey *vkey = 0;
		if (key) {
			vkey = SWDYNAMIC_CAST(VerseKey, key);
		}
		
		const SWBuf orig = text;
		const char * from = orig.c_str();

		for (text = ""; *from; ++from) {
			if (*from == '<') {
				intoken = true;
				tokpos = 0;
				token[0] = 0;
				token[1] = 0;
				token[2] = 0;
				continue;
			}
			if (*from == '>') {	// process tokens
				intoken = false;
				if ((*token == 'w') && (token[1] == ' ')) {	// Word
					XMLTag wtag(token);
					sprintf(wordstr, "%03d", wordNum);
					SWBuf lemmaClass;
					SWBuf lemma;
					SWBuf morph;
					SWBuf page;
					SWBuf src;
					char gh = 0;
					page = module->getEntryAttributes()["Word"][wordstr]["Page"].c_str();
					if (page.length()) page = (SWBuf)"p:" + page;
					int count = atoi(module->getEntryAttributes()["Word"][wordstr]["PartCount"].c_str());
					for (int i = 0; i < count; i++) {

						// for now, lemma class can just be equal to last lemma class in multi part word
						SWBuf tmp = "LemmaClass";
						if (count > 1) tmp.appendFormatted(".%d", i+1);
						lemmaClass = module->getEntryAttributes()["Word"][wordstr][tmp];

						tmp = "Lemma";
						if (count > 1) tmp.appendFormatted(".%d", i+1);
						tmp = (module->getEntryAttributes()["Word"][wordstr][tmp].c_str());

						// if we're strongs, 
						if (lemmaClass == "strong") {
							gh = tmp[0];
							tmp << 1;
						}
						if (lemma.size()) lemma += "|";
						lemma += tmp;

						tmp = "Morph";
						if (count > 1) tmp.appendFormatted(".%d", i+1);
						tmp = (module->getEntryAttributes()["Word"][wordstr][tmp].c_str());
						if (morph.size()) morph += "|";
						morph += tmp;

						tmp = "Src";
						if (count > 1) tmp.appendFormatted(".%d", i+1);
						tmp = (module->getEntryAttributes()["Word"][wordstr][tmp].c_str());
						if (!tmp.length()) tmp.appendFormatted("%d", wordNum);
						tmp.insert(0, wordSrcPrefix);
						if (src.size()) src += "|";
						src += tmp;
					}

					SWBuf lexName = "";
					// we can pass the real lex name in, but we have some
					// aliases in the javascript to optimize bandwidth
					if ((gh == 'G') && (defaultGreekLex)) {
						lexName = (!strcmp(defaultGreekLex->getName(), "StrongsGreek"))?"G":defaultGreekLex->getName();
					}
					else if ((gh == 'H') && (defaultHebLex)) {
						lexName = (!strcmp(defaultHebLex->getName(), "StrongsHebrew"))?"H":defaultHebLex->getName();
					}

					SWBuf xlit = wtag.getAttribute("xlit");

					if ((lemmaClass != "strong") && (xlit.startsWith("betacode:"))) {
						lexName = "betacode";
//						const char *m = strchr(xlit.c_str(), ':');
//						strong = ++m;
					}
					SWBuf wordID;
					if (vkey) {
						// optimize for bandwidth and use only the verse as the unique entry id
						wordID.appendFormatted("%d", vkey->getVerse());
					}
					else {
						wordID = key->getText();
					}
					wordID.appendFormatted("_%s", src.c_str());
					// clean up our word ID for XHTML
					for (unsigned int i = 0; i < wordID.size(); i++) {
						if ((!isdigit(wordID[i])) && (!isalpha(wordID[i]))) {
							wordID[i] = '_';
						}
					}
					// 'p' = 'fillpop' to save bandwidth
					text.appendFormatted("<span class=\"clk\" onclick=\"p('%s','%s','%s','%s','%s','%s');\" >", lexName.c_str(), lemma.c_str(), wordID.c_str(), morph.c_str(), page.c_str(), modName.c_str());
					wordNum++;

					if (wtag.isEmpty()) {
						text += "</w></span>";
					}
				}
				if ((*token == '/') && (token[1] == 'w') && option) {	// Word
					text += "</w></span>";
					continue;
				}
				
				// if not a strongs token, keep token in text
				text.append('<');
				text.append(token);
				text.append('>');
				
				continue;
			}
			if (intoken) {
				if (tokpos < 2045) {
					token[tokpos++] = *from;
					token[tokpos+2] = 0;
				}
			}
			else	{
				text.append(*from);
			}
		}
	}
	return 0;
}