Ejemplo n.º 1
0
	SWBuf parseRangeKey(const char* keyValue, const char* locale) {
		const char* oldLocale = LocaleMgr::getSystemLocaleMgr()->getDefaultLocaleName();
		LocaleMgr::getSystemLocaleMgr()->setDefaultLocaleName(locale);

		SWBuf ret;
		
		VerseKey DefaultVSKey;
        	DefaultVSKey = "jas3:1";

        	ListKey verses = DefaultVSKey.ParseVerseList(keyValue, DefaultVSKey, true);

		for (int i = 0; i < verses.Count(); i++) {
			VerseKey *element = dynamic_cast<VerseKey *>(verses.GetElement(i));
			if (element) {
				if (ret.length()) {
					ret.append(" ");
				}
				
				ret.appendFormatted( "%s - %s;", (const char*)element->LowerBound(), (const char*)element->UpperBound() );
			}
			else {
				if (ret.length()) {
					ret.append(" ");
				}
				
				ret.appendFormatted("%s;", (const char *)*verses.GetElement(i));
			}
		}
		
// 		cout << ret.c_str() << endl;
		LocaleMgr::getSystemLocaleMgr()->setDefaultLocaleName(oldLocale);
		return ret;
	};
Ejemplo n.º 2
0
char PapyriPlain::processText (SWBuf &text, const SWKey *key, const SWModule *module)
{
	SWBuf orig = text;
	const char *from = orig.c_str();
	
	for (text = ""; *from; ++from) {

		// remove hyphen and whitespace if that is all that separates words
		// also be sure we're not a double hyphen '--'
		if ((*from == '-') && (text.length() > 0) && (text[text.length()-1] != '-')) {
			char remove = 0;
			const char *c;
			for (c = from+1; *c; c++) {
				if ((*c == 10) || (*c == 13)) {
					remove = 1;
				}
				if (!strchr(" \t\n", *c)) {
					if (remove) remove++;
					break;
				}
			}
			if (remove > 1) {
				from = c-1;
				continue;
			}
		}

		// remove all newlines
		if ((*from == 10) || (*from == 13)) {
			if ((text.length()>1) && (text[text.length()-2] != ' ') && (*(from+1) != ' '))
				text.append(' ');
			continue;
		}

				
		// strip odd characters
		switch (*from) {
		case '(':
		case ')':
		case '[':
		case ']':
		case '{':
		case '}':
		case '<':
		case '>':
			continue;
		}

		// if we've made it this far
		text.append(*from);

	}
	return 0;
}
Ejemplo n.º 3
0
char OSISRedLetterWords::processText(SWBuf &text, const SWKey *key, const SWModule *module) {
	if (option) //leave in the red lettered words
		return 0;
	
	SWBuf token;
	bool intoken    = false;

	SWBuf orig = text;
	const char *from = orig.c_str();

	//taken out of the loop
	const char* start = 0;
	const char* end = 0;
		
	for (text = ""; *from; from++) {
		if (*from == '<') {
			intoken = true;
			token = "";
			continue;
		}
		else if (*from == '>') {	// process tokens
			intoken = false;

			if ((token[0] == 'q') && (token[1] == ' ')) { //q tag
				start = strstr(token.c_str(), " who=\"Jesus\"");
				if (start && (strlen(start) >= 12)) { //we found a quote of Jesus Christ
					end = start+12; //marks the end of the who attribute value
					
					text.append('<');
					text.append(token, start - (token.c_str())); //the text before the who attr
					text.append(end, token.c_str() + token.length() - end);  //text after the who attr
					text.append('>');
					
					continue;
				}
			}
			
			//token not processed, append it. We don't want to alter the text
			text.append('<');
			text.append(token);
			text.append('>');
			continue;
		}
		
		if (intoken) { //copy token
			token.append(*from);
		}
		else { //copy text which is not inside a token
			text.append(*from);
		}
	}
	return 0;
}
Ejemplo n.º 4
0
char ThMLVariants::processText(SWBuf &text, const SWKey *key, const SWModule *module)
{
        if ( option == 0 || option == 1) { //we want primary or variant only
		bool intoken = false;
		bool hide = false;
		bool invar = false;
		
		SWBuf token;
		SWBuf orig = text;
		const char *from = orig.c_str();

		//we use a fixed comparision string to make sure the loop is as fast as the original two blocks with almost the same code
		const char* variantCompareString = (option == 0) ? "div type=\"variant\" class=\"1\"" : "div type=\"variant\" class=\"2\"";
		
		for (text = ""; *from; from++) {
			if (*from == '<') {
				intoken = true;
				token = "";
				continue;
			}
			else if (*from == '>') {	// process tokens
				intoken = false;
				
				if ( !strncmp(token.c_str(), variantCompareString, 28)) { //only one of the variants, length of the two strings is 28 in both cases 
					invar = true;
					hide = true;
					continue;
				}
				if (!strncmp(token.c_str(), "div type=\"variant\"", 18)) {
					invar = true;
					continue;
				}
				if (!strncmp(token.c_str(), "/div", 4)) {
					hide = false;
					if (invar) {
						invar = false;
						continue;
					}
				}
				if (!hide) {
					text += '<';
					text.append(token);
					text += '>';
				}

				continue;
			}
			if (intoken) {
				token += *from;
			}
			else if (!hide) {
				text += *from;
			}
		}

	}

	return 0;
}
Ejemplo n.º 5
0
char OSISGlosses::processText(SWBuf &text, const SWKey *key, const SWModule *module) {
	SWBuf token;
	bool intoken = false;

	const SWBuf orig = text;
	const char * from = orig.c_str();

	if (!option) {
		for (text = ""; *from; ++from) {
			if (*from == '<') {
				intoken = true;
				token = "";
				continue;
			}
			if (*from == '>') {	// process tokens
				intoken = false;
				if (token.startsWith("w ")) {	// Word
					XMLTag wtag(token);
					const char *l = wtag.getAttribute("gloss");
					if (l) {
						wtag.setAttribute("gloss", 0);
						token = wtag;
						token.trim();
						// drop <>
						token << 1;
						token--;
					}
				}
				
				// keep token in text
				text.append('<');
				text.append(token);
				text.append('>');
				
				continue;
			}
			if (intoken) {
				token += *from;
			}
			else	{
				text.append(*from);
			}
		}
	}
	return 0;
}
Ejemplo n.º 6
0
char OSISMorph::processText(SWBuf &text, const SWKey *key, const SWModule *module) {
	if (!option) {	// if we don't want morph tags
		char token[2048]; // cheese.  Fix.
		int tokpos = 0;
		bool intoken = false;
		SWBuf orig = text;
		const char *from = orig.c_str();
		
		//taken out of the loop for speed
		const char* start = 0;
		const char* end = 0;

		for (text = ""; *from; ++from) {
			if (*from == '<') {
				intoken = true;
				tokpos = 0;
				token[0] = 0;
				continue;
			}
			if (*from == '>') {	// process tokens
				intoken = false;
				
				if ((*token == 'w') && (token[1] == ' ')) {
					start = strstr(token+2, "morph=\""); //we leave out "w " at the start
					end = start ? strchr(start+7, '"') : 0; //search the end of the morph value

					if (start && end) { //start and end of the morph tag found
						text.append('<');
						text.append(token, start-token); //the text before the morph attr
						text.append(end+1); //text after the morph attr
						text.append('>');
						
						continue; //next loop
					}
				}
				
				text.append('<');
				text.append(token);
				text.append('>');
				
				continue;
			}
			if (intoken) {
				if (tokpos < 2045)
					token[tokpos++] = *from;
					token[tokpos] = 0;
			}
			else	{
				text.append(*from);
			}
		}
	}
	return 0;
}
Ejemplo n.º 7
0
const SWBuf URL::decode(const char *encoded) {
	/*static*/ SWBuf text;
	text = encoded;	

	SWBuf decoded;	
	const int length = text.length();
	int i = 0;
	
	while (i < length) {
		char a = text[i];
		
		if ( a == '+' ) { //handle special cases
			decoded.append(' ');
		}		
		else if ( (a == '%') && (i+2 < length)) { //decode the %ab  hex encoded char
			const char b = toupper( text[i+1] );
			const char c = toupper( text[i+2] );
			
			if (isxdigit(b) && isxdigit(c)) { //valid %ab part
				unsigned int dec = 16 * ((b >= 'A' && b <= 'F') ? (b - 'A' + 10) : (b - '0')); //dec value of the most left digit (b)
				dec += (c >= 'A' && c <= 'F') ? (c - 'A' + 10) : (c - '0'); //dec value of the right digit (c)
				
				decoded.append((char)dec); //append the decoded char
				
				i += 2; //we jump over the %ab part; we have to leave out three, but the while  loop adds one, too
			}
		}
		else { //just append the char
			decoded.append(a);
		}
		
		i++;
	}
	
	if (decoded.length()) {
		text = decoded;
	}
	return text;
}
Ejemplo n.º 8
0
const SWBuf URL::encode(const char *urlText) {
	/*static*/ SWBuf url;
	url = urlText;
	
	SWBuf buf;
	const int length = url.length();
	for (int i = 0; i < length; i++) { //fill "buf"
		const char& c = url[i];
		buf.append( ((m[c].length()) ? m[c] : SWBuf(c)) );
	}

	url = buf;
	return url;
}
Ejemplo n.º 9
0
vector<struct DirEntry> RemoteTransport::getDirList(const char *dirURL) {

SWLog::getSystemLog()->logDebug("RemoteTransport::getDirList(%s)", dirURL);
	vector<struct DirEntry> dirList;
	
	SWBuf dirBuf;
	if (!getURL("", dirURL, &dirBuf)) {
		char *start = dirBuf.getRawData();
		char *end = start;
		while (start < (dirBuf.getRawData()+dirBuf.size())) {
			struct ftpparse item;
			bool looking = true;
			for (end = start; *end; end++) {
				if (looking) {
					if ((*end == 10) || (*end == 13)) {
						*end = 0;
						looking = false;
					}
				}
				else if ((*end != 10) && (*end != 13))
					break;
			}
			SWLog::getSystemLog()->logDebug("getDirList: parsing item %s(%d)\n", start, end-start);
			int status = ftpparse(&item, start, end - start);
			// in ftpparse.h, there is a warning that name is not necessarily null terminated
			SWBuf name;
			name.append(item.name, item.namelen);
			SWLog::getSystemLog()->logDebug("getDirList: got item %s\n", name.c_str());
			if (status && name != "." && name != "..") {
				struct DirEntry i;
				i.name = name;
				i.size = item.size;
				i.isDirectory = (item.flagtrycwd == 1);
				dirList.push_back(i);
			}
			start = end;
		}
	}
	else {
		SWLog::getSystemLog()->logWarning("getDirList: failed to get dir %s\n", dirURL);
	}
	return dirList;
}
Ejemplo n.º 10
0
char UTF8GreekAccents::processText(SWBuf &text, const SWKey *key, const SWModule *module) {

	if (!option) { //we don't want greek accents
		SWBuf orig = text;
		const unsigned char* from = (unsigned char*)orig.c_str();
		text = "";
		map<__u32, SWBuf>::const_iterator it = converters.end();
		while (*from) {		
			__u32 ch = getUniCharFromUTF8(&from, true);
			// if ch is bad, then convert to replacement char
			if (!ch) ch = 0xFFFD;

			it = converters.find(ch);
			if (it == converters.end()) {
				getUTF8FromUniChar(ch, &text);
			}
			else text.append((const char *)it->second, it->second.size());	// save a strlen, since we know our size
		}
	}
	return 0;
}
Ejemplo n.º 11
0
char GBFOSIS::processText(SWBuf &text, const SWKey *key, const SWModule *module) { 
	char token[2048]; //cheesy, we seem to like cheese :)
	int tokpos = 0;
	bool intoken = false;
	bool keepToken = false;
	
//	static QuoteStack quoteStack;

	SWBuf orig = text;
	SWBuf tmp;
	SWBuf value;
	
	bool suspendTextPassThru = false;
	bool handled = false;
	bool newWord = false;
	bool newText = false;
	bool lastspace = false;
	
	const char *wordStart = text.c_str();
	const char *wordEnd = NULL;
	
	const char *textStart = NULL;
	const char *textEnd = NULL;
	
	SWBuf textNode = "";

	SWBuf buf;
		
	text = "";
	for (const char* from = orig.c_str(); *from; ++from) {
		if (*from == '<') { //start of new token detected
			intoken = true;
			tokpos = 0;
			token[0] = 0;
			token[1] = 0;
			token[2] = 0;
			textEnd = from-1; //end of last text node found
			wordEnd = text.c_str() + text.length();//not good, instead of wordEnd = to!
			
			continue;
		}
		
		if (*from == '>') {	// process tokens
			intoken = false;
			keepToken = false;
			suspendTextPassThru = false;
			newWord = true;
			handled = false;

			while (wordStart < (text.c_str() + text.length())) { //hack
				if (strchr(";,. :?!()'\"", *wordStart) && wordStart[0] && wordStart[1])
					wordStart++;
				else break;
			}			
			while (wordEnd > wordStart) {
				if (strchr(" ,;:.?!()'\"", *wordEnd))
					wordEnd--;
				else break;
			}

			// Scripture Reference
			if (!strncmp(token, "scripRef", 8)) {
				suspendTextPassThru = true;
				newText = true;
				handled = true;
			}
			else if (!strncmp(token, "/scripRef", 9)) {
				tmp = "";
				tmp.append(textStart, (int)(textEnd - textStart)+1);
				text += VerseKey::convertToOSIS(tmp.c_str(), key);
				
				lastspace = false;
				suspendTextPassThru = false;
				handled = true;
			}

			// Footnote
			if (!strcmp(token, "RF") || !strncmp(token, "RF ", 3)) { //the GBFFootnotes filter adds the attribute "swordFootnote", we want to catch that, too
	//			pushString(buf, "<reference work=\"Bible.KJV\" reference=\"");
				text += "<note type=\"x-StudyNote\">";
				newText = true;
				lastspace = false;
				handled = true;
			}
			else	if (!strcmp(token, "Rf")) {
				text += "</note>";
				lastspace = false;
				handled = true;
			}
			// hebrew titles
			if (!strcmp(token, "TH")) {
				text += "<title type=\"psalm\">";
				newText = true;
				lastspace = false;
				handled = true;
			}
			else	if (!strcmp(token, "Th")) {
				text += "</title>";
				lastspace = false;
				handled = true;
			}
			// Italics assume transchange
			if (!strcmp(token, "FI")) {
				text += "<transChange type=\"added\">";
				newText = true;
				lastspace = false;
				handled = true;
			}
			else	if (!strcmp(token, "Fi")) {
				text += "</transChange>";
				lastspace = false;
				handled = true;
			}
			// less than
			if (!strcmp(token, "CT")) {
				text += "&lt;";
				newText = true;
				lastspace = false;
				handled = true;
			}
			// greater than
			if (!strcmp(token, "CG")) {
				text += "&gt;";
				newText = true;
				lastspace = false;
				handled = true;
			}
			// Paragraph break.  For now use empty paragraph element
			if (!strcmp(token, "CM")) {
				text += "<milestone type=\"x-p\" />";
				newText = true;
				lastspace = false;
				handled = true;
			}

			// Figure
			else	if (!strncmp(token, "img ", 4)) {
				const char *src = strstr(token, "src");
				if (!src)		// assert we have a src attribute
					continue;
//					return false;

				text += "<figure src=\"";
				const char *c;
				for (c = src;((*c) && (*c != '"')); c++);

// uncomment for SWORD absolute path logic
//				if (*(c+1) == '/') {
//					pushString(buf, "file:");
//					pushString(buf, module->getConfigEntry("AbsoluteDataPath"));
//					if (*((*buf)-1) == '/')
//						c++;		// skip '/'
//				}
// end of uncomment for asolute path logic 

				for (c++;((*c) && (*c != '"')); c++) {
					text += *c;
				}
				text += "\" />";
				
				lastspace = false;
				handled = true;
			}

			// Strongs numbers
			else if (*token == 'W' && (token[1] == 'G' || token[1] == 'H')) {	// Strongs
				bool divineName = false;
				value = token+1;
			
				// normal strongs number
				//strstrip(val);
				if (!strncmp(wordStart, "<w ", 3)) {
					const char *attStart = strstr(wordStart, "lemma");
					if (attStart) {
						attStart += 7;
						
						buf = "";
						buf.appendFormatted("strong:%s ", value.c_str());
					}
					else { // no lemma attribute
						attStart = wordStart + 3;
						
						buf = "";
						buf.appendFormatted(buf, "lemma=\"strong:%s\" ", value.c_str());
					}

					text.insert(attStart - text.c_str(), buf);
				}
				else { //wordStart doesn't point to an existing <w> attribute!
					if (!strcmp(value.c_str(), "H03068")) {	//divineName
						buf = "";
						buf.appendFormatted("<divineName><w lemma=\"strong:%s\">", value.c_str());
						
						divineName = true;
					}
					else {
						buf = "";
						buf.appendFormatted("<w lemma=\"strong:%s\">", value.c_str());
					}

					text.insert(wordStart - text.c_str(), buf);

					if (divineName) {
						wordStart += 12;
						text += "</w></divineName>";
					}
					else	text += "</w>";

					lastspace = false;
				}
				handled = true;
			}

			// Morphology
			else if (*token == 'W' && token[1] == 'T') {
				if (token[2] == 'G' || token[2] == 'H') {	// Strongs
					value = token+2;
				}
				else value = token+1;
				
				if (!strncmp(wordStart, "<w ", 3)) {
					const char *attStart = strstr(wordStart, "morph");
					if (attStart) { //existing morph attribute, append this one to it
						attStart += 7;
						buf = "";
						buf.appendFormatted("%s:%s ", "robinson", value.c_str());
					}
					else { // no lemma attribute
						attStart = wordStart + 3;
						buf = "";
						buf.appendFormatted("morph=\"%s:%s\" ", "robinson", value.c_str());
					}
					
					text.insert(attStart - text.c_str(), buf); //hack, we have to
				}
				else { //no existing <w> attribute fond
					buf = "";
					buf.appendFormatted("<w morph=\"%s:%s\">", "robinson", value.c_str());
					text.insert(wordStart - text.c_str(), buf);
					text += "</w>";
					lastspace = false;

				}
				handled = true;
			}

			if (!keepToken) {	
				if (!handled) {
					SWLog::getSystemLog()->logError("Unprocessed Token: <%s> in key %s", token, key ? (const char*)*key : "<unknown>");
//					exit(-1);
				}
				if (from[1] && strchr(" ,;.:?!()'\"", from[1])) {
					if (lastspace) {
						text--;
					}
				}
				if (newText) {
					textStart = from+1;
					newText = false; 
				}
				continue;
			}

			// if not a strongs token, keep token in text
			text.appendFormatted("<%s>", token);
			
			if (newText) {
				textStart = text.c_str() + text.length();
				newWord = false; 
			}
			continue;
		}
		if (intoken) {
			if ((tokpos < 2045) && ((*from != 10)&&(*from != 13))) {
				token[tokpos++] = *from;
				token[tokpos+2] = 0;
			}
		}
		else	{
			switch (*from) {
			case '\'':
			case '\"':
			case '`':
//				quoteStack.handleQuote(fromStart, from, &to);
				text += *from;
				//from++; //this line removes chars after an apostrophe! Needs fixing.
				break;
			default:
				if (newWord && (*from != ' ')) {
					wordStart = text.c_str() + text.length();
					newWord = false;
					
					//fix this if required?
					//memset(to, 0, 10);

				}

				if (!suspendTextPassThru) {
					text += (*from);
					lastspace = (*from == ' ');
				}
			}
		}
	}

	VerseKey *vkey = SWDYNAMIC_CAST(VerseKey, key);
	if (vkey) {
		SWBuf ref = "";
		if (vkey->getVerse()) {
			ref.appendFormatted("\t\t<verse osisID=\"%s\">", vkey->getOSISRef());
		}

		if (ref.length() > 0) {

			text = ref + text;

			if (vkey->getVerse()) {
				VerseKey *tmp = (VerseKey *)vkey->clone();
				*tmp = *vkey;
				tmp->setAutoNormalize(false);
				tmp->setIntros(true);

				text += "</verse>";

				*tmp = MAXVERSE;
				if (*vkey == *tmp) {
					tmp->setVerse(0);
//					sprintf(ref, "\t</div>");
//					pushString(&to, ref);
					*tmp = MAXCHAPTER;
					*tmp = MAXVERSE;
					if (*vkey == *tmp) {
						tmp->setChapter(0);
						tmp->setVerse(0);
//						sprintf(ref, "\t</div>");
//						pushString(&to, ref);
/*
						if (!quoteStack.empty()) {
							SWLog::getSystemLog()->logError("popping unclosed quote at end of book");
							quoteStack.clear();
						}
*/
					}
				}
                                delete tmp;
			}
//			else if (vkey->Chapter()) {
//				sprintf(ref, "\t<div type=\"chapter\" osisID=\"%s\">", vkey->getOSISRef());
//			}
//			else sprintf(ref, "\t<div type=\"book\" osisID=\"%s\">", vkey->getOSISRef());
		}
	}
	return 0;
}
Ejemplo n.º 12
0
char GBFPlain::processText (SWBuf &text, const SWKey *key, const SWModule *module)
{
	char token[2048];
	int tokpos = 0;
	bool intoken = false;
	SWBuf orig = text;
	const char* from = orig.c_str();
	
	for (text = ""; *from; ++from) {
		if (*from == '<') {
			intoken = true;
			tokpos = 0;
			token[0] = 0;
			token[1] = 0;
			token[2] = 0;
			continue;
		}
		if (*from == '>') {
			intoken = false;
						// process desired tokens
			switch (*token) {
			case 'W':	// Strongs
				switch(token[1]) {
				case 'G':               // Greek
				case 'H':               // Hebrew
				case 'T':               // Tense
					text.append(" <");
					//for (char *tok = token + 2; *tok; tok++)
					//	text += *tok;
					text.append(token+2);
					text.append("> ");
					continue;
				}
				break;
			case 'R':
				switch(token[1]) {
				case 'F':               // footnote begin
					text.append(" [");
					continue;
				case 'f':               // footnote end
					text.append("] ");
					continue;
				}
				break;
			case 'C':
				switch(token[1]) {
				case 'A':               // ASCII value
					text.append((char)atoi(&token[2]));
					continue;
				case 'G':
					text.append('>');
					continue;
/*								Bug in WEB
				case 'L':
					*to++ = '<';
					continue;
*/
				case 'L':	//        Bug in WEB.  Use above entry when fixed
				case 'N':               // new line
					text.append('\n');
					continue;
				case 'M':               // new paragraph
					text.append("\n\n");
					continue;
				}
				break;
			}
			continue;
		}
		if (intoken) {
			if (tokpos < 2045)
				token[tokpos++] = *from;
				token[tokpos+2] = 0;
		}
		else	text.append(*from);
	}
	return 0;
}
Ejemplo n.º 13
0
char OSISMorphSegmentation::processText(SWBuf &text, const SWKey * /*key*/, const SWModule *module) {
	SWBuf token;
	bool intoken    = false;
	bool hide       = false;

	SWBuf orig( text );
	const char *from = orig.c_str();

	XMLTag tag;
	SWBuf tagText = "";
	unsigned int morphemeNum = 0;
	bool inMorpheme = false;
	SWBuf buf;

	for (text = ""; *from; ++from) {
		if (*from == '<') {
			intoken = true;
			token = "";
			continue;
		}

		if (*from == '>') { // process tokens
			intoken = false;

			if (!strncmp(token.c_str(), "seg ", 4) || !strncmp(token.c_str(), "/seg", 4)) {
				tag = token;

				if (!tag.isEndTag() && tag.getAttribute("type") && !strcmp("morph", tag.getAttribute("type"))) {  //<seg type="morph"> start tag
					hide = !option; //only hide if option is Off
					tagText = "";
					inMorpheme = true;
				}

				if (tag.isEndTag()) {
						buf.setFormatted("%.3d", morphemeNum++);
						module->getEntryAttributes()["Morpheme"][buf]["body"] = tagText;
						inMorpheme = false;
				}
				if (hide) { //hides start and end tags as long as hide is set

					if (tag.isEndTag()) { //</seg>
						hide = false;
					}

					continue; //leave out the current token
				}
			} //end of seg tag handling

			text.append('<');
			text.append(token);
			text.append('>');

			if (inMorpheme) {
				tagText.append('<');
				tagText.append(token);
				tagText.append('>');
			}

			hide = false;

			continue;
		} //end of intoken part

		if (intoken) { //copy token
			token.append(*from);
		}
		else { //copy text which is not inside of a tag
			text.append(*from);
			if (inMorpheme) {
				tagText.append(*from);
			}
		}
	}
	return 0;
}
Ejemplo n.º 14
0
bool TEIRTF::handleToken(SWBuf &buf, const char *token, BasicFilterUserData *userData) {
  // manually process if it wasn't a simple substitution
	if (!substituteToken(buf, token)) {
		MyUserData *u = (MyUserData *)userData;
		XMLTag tag(token);

		// <p> paragraph tag
		if (!strcmp(tag.getName(), "p")) {
			if (!tag.isEndTag()) {	// non-empty start tag
				buf += "{\\sb100\\fi200\\par}";
			}
		}

		// <hi>
		else if (!strcmp(tag.getName(), "hi") || !strcmp(tag.getName(), "emph")) {
			SWBuf rend = tag.getAttribute("rend");
			if ((!tag.isEndTag()) && (!tag.isEmpty())) {
				if (rend == "italic" || rend == "ital")
					buf += "{\\i1 ";
				else if (rend == "bold")
					buf += "{\\b1 ";
				else if (rend == "super" || rend == "sup")
				        buf += "{\\super ";
				else if (rend == "sub")
					buf += "{\\sub ";
			}
			else if (tag.isEndTag()) {
				buf += "}";
			}
		}

		// <entryFree>
		else if (!strcmp(tag.getName(), "entryFree")) {
			SWBuf n = tag.getAttribute("n");
			if ((!tag.isEndTag()) && (!tag.isEmpty())) {
			        if (n != "") {
                                	buf += "{\\b1 ";
					buf += n;
					buf += ". }";				}
			}
		}

		// <sense>
		else if (!strcmp(tag.getName(), "sense")) {
			SWBuf n = tag.getAttribute("n");
			if ((!tag.isEndTag()) && (!tag.isEmpty())) {
			        if (n != "") {
                                	buf += "{\\sb100\\par\\b1 ";
					buf += n;
					buf += ". }";
				}
			}
		}

 		// <orth>
 		else if (!strcmp(tag.getName(), "orth")) {
 			if ((!tag.isEndTag()) && (!tag.isEmpty())) {
 				buf += "{\\b1 ";
 			}
 			else if (tag.isEndTag()) {
 			        buf += "}";
 			}
 		}

		// <div>
		else if (!strcmp(tag.getName(), "div")) {

			if ((!tag.isEndTag()) && (!tag.isEmpty())) {
				buf.append("{\\pard\\sa300}");
			}
			else if (tag.isEndTag()) {
			}
		}

		// <pos>, <gen>, <case>, <gram>, <number>, <mood>
		else if (!strcmp(tag.getName(), "pos") || !strcmp(tag.getName(), "gen") || !strcmp(tag.getName(), "case") || !strcmp(tag.getName(), "gram") || !strcmp(tag.getName(), "number") || !strcmp(tag.getName(), "mood")) {
			if ((!tag.isEndTag()) && (!tag.isEmpty())) {
				buf += "{\\i1 ";
			}
			else if (tag.isEndTag()) {
			        buf += "}";
			}
		}

		// <tr>
		else if (!strcmp(tag.getName(), "tr")) {
			if ((!tag.isEndTag()) && (!tag.isEmpty())) {
				buf += "{\\i1 ";
			}
			else if (tag.isEndTag()) {
			        buf += "}";
			}
		}

		// <etym>
		else if (!strcmp(tag.getName(), "etym")) {
			if ((!tag.isEndTag()) && (!tag.isEmpty())) {
				buf += "[";
			}
			else if (tag.isEndTag()) {
			        buf += "]";
			}
		}

       		// <note> tag
		else if (!strcmp(tag.getName(), "note")) {
			if (!tag.isEndTag()) {
				if (!tag.isEmpty()) {
					SWBuf type = tag.getAttribute("type");

					SWBuf footnoteNumber = tag.getAttribute("swordFootnote");
					VerseKey *vkey = 0;
					// see if we have a VerseKey * or descendant
					SWTRY {
						vkey = SWDYNAMIC_CAST(VerseKey, u->key);
					}
					SWCATCH ( ... ) {	}
					if (vkey) {
						buf.appendFormatted("{\\super <a href=\"\">*%s</a>} ", footnoteNumber.c_str());
					}
					u->suspendTextPassThru = true;
				}
			}
			if (tag.isEndTag()) {
				u->suspendTextPassThru = false;
			}
		}
Ejemplo n.º 15
0
bool OSISHeadings::handleToken(SWBuf &buf, const char *token, BasicFilterUserData *userData) {

	MyUserData *u = (MyUserData *)userData;
	XMLTag tag(token);
	SWBuf name = tag.getName();

	// we only care about titles and divs or if we're already in a heading
	//
	// are we currently in a heading?
	if (u->currentHeadingName.size()) {
		u->heading.append(u->lastTextNode);
		if (name == u->currentHeadingName) {
			if (tag.isEndTag(u->sID)) {
				if (!u->depth-- || u->sID) {
					// see comment below about preverse div changed and needing to preserve the <title> container tag for old school pre-verse titles
					// we've just finished a heading.  It's all stored up in u->heading
					bool canonical = (SWBuf("true") == u->currentHeadingTag.getAttribute("canonical"));
					bool preverse = (SWBuf("x-preverse") == u->currentHeadingTag.getAttribute("subType") || SWBuf("x-preverse") == u->currentHeadingTag.getAttribute("subtype"));

					// do we want to put anything in EntryAttributes?
					if (u->module->isProcessEntryAttributes() && (option || canonical || !preverse)) {
						SWBuf buf; buf.appendFormatted("%i", u->headerNum++);
						// leave the actual <title...> wrapper in if we're part of an old school preverse title
						// because now frontend have to deal with preverse as a div which may or may not include <title> elements
						// and they can't simply wrap all preverse material in <h1>, like they probably did previously
						SWBuf heading;
						if (u->currentHeadingName == "title") {
							XMLTag wrapper = u->currentHeadingTag;
							if (SWBuf("x-preverse") == wrapper.getAttribute("subType")) wrapper.setAttribute("subType", 0);
							else if (SWBuf("x-preverse") == wrapper.getAttribute("subtype")) wrapper.setAttribute("subtype", 0);
							heading = wrapper;
							heading += u->heading;
							heading += tag;
						}
						else heading = u->heading;
						u->module->getEntryAttributes()["Heading"][(preverse)?"Preverse":"Interverse"][buf] = heading;

						StringList attributes = u->currentHeadingTag.getAttributeNames();
						for (StringList::const_iterator it = attributes.begin(); it != attributes.end(); it++) {
							u->module->getEntryAttributes()["Heading"][buf][it->c_str()] = u->currentHeadingTag.getAttribute(it->c_str());
						}
					}

					// do we want the heading in the body?
					if (!preverse && (option || canonical)) {
						buf.append(u->currentHeadingTag);
						buf.append(u->heading);
						buf.append(tag);
					}
					u->suspendTextPassThru = false;
					u->clear();
				}
			}
			else u->depth++;
		}
		u->heading.append(tag);
		return true;
	}

	// are we a title or a preverse div?
	else if (   name == "title"
		|| (name == "div"
			&& ( SWBuf("x-preverse") == tag.getAttribute("subType")
			  || SWBuf("x-preverse") == tag.getAttribute("subtype")))) {

		u->currentHeadingName = name;
		u->currentHeadingTag = tag;
		u->heading = "";
		u->sID = u->currentHeadingTag.getAttribute("sID");
		u->depth = 0;
		u->suspendTextPassThru = true;

		return true;
	}

	return false;
}
Ejemplo n.º 16
0
char OSISStrongs::processText(SWBuf &text, const SWKey *key, const SWModule *module) {
	SWBuf token;
	bool intoken = false;
	int wordNum = 1;
	char wordstr[5];
	const char *wordStart = 0;
	SWBuf page = "";		// some modules include <seg> page info, so we add these to the words

	const SWBuf orig = text;
	const char * from = orig.c_str();

	for (text = ""; *from; ++from) {
		if (*from == '<') {
			intoken = true;
			token = "";
			continue;
		}
		if (*from == '>') {	// process tokens
			intoken = false;

			// possible page seg --------------------------------
			if (token.startsWith("seg ")) {
				XMLTag stag(token);
				SWBuf type = stag.getAttribute("type");
				if (type == "page") {
					SWBuf number = stag.getAttribute("subtype");
					if (number.length()) {
						page = number;
					}
				}
			}
			// ---------------------------------------------------

			if (token.startsWith("w ")) {	// Word
				XMLTag wtag(token);
				if (module->isProcessEntryAttributes()) {
					wordStart = from+1;
					char gh = 0;
					VerseKey *vkey = 0;
					if (key) {
						vkey = SWDYNAMIC_CAST(VerseKey, key);
					}
					SWBuf lemma      = "";
					SWBuf morph      = "";
					SWBuf src        = "";
					SWBuf morphClass = "";
					SWBuf lemmaClass = "";

					const char *attrib;
					sprintf(wordstr, "%03d", wordNum);

					// why is morph entry attribute processing done in here?  Well, it's faster.  It makes more local sense to place this code in osismorph.
					// easier to keep lemma and morph in same wordstr number too maybe.
					if ((attrib = wtag.getAttribute("morph"))) {
						int count = wtag.getAttributePartCount("morph", ' ');
						int i = (count > 1) ? 0 : -1;		// -1 for whole value cuz it's faster, but does the same thing as 0
						do {
							SWBuf mClass = "";
							SWBuf mp = "";
							attrib = wtag.getAttribute("morph", i, ' ');
							if (i < 0) i = 0;	// to handle our -1 condition

							const char *m = strchr(attrib, ':');
							if (m) {
								int len = m-attrib;
								mClass.append(attrib, len);
								attrib += (len+1);
							}
							if ((mClass == "x-Robinsons") || (mClass == "x-Robinson") || (mClass == "Robinson")) {
								mClass = "robinson";
							}
							if (i) { morphClass += " "; morph += " "; }
							mp += attrib;
							morphClass += mClass;
							morph += mp;
							if (count > 1) {
								SWBuf tmp;
								tmp.setFormatted("Morph.%d", i+1);
								module->getEntryAttributes()["Word"][wordstr][tmp] = mp;
								tmp.setFormatted("MorphClass.%d", i+1);
								module->getEntryAttributes()["Word"][wordstr][tmp] = mClass;
							}
						} while (++i < count);
					}

					if ((attrib = wtag.getAttribute("lemma"))) {
						int count = wtag.getAttributePartCount("lemma", ' ');
						int i = (count > 1) ? 0 : -1;		// -1 for whole value cuz it's faster, but does the same thing as 0
						do {
							gh = 0;
							SWBuf lClass = "";
							SWBuf l = "";
							attrib = wtag.getAttribute("lemma", i, ' ');
							if (i < 0) i = 0;	// to handle our -1 condition

							const char *m = strchr(attrib, ':');
							if (m) {
								int len = m-attrib;
								lClass.append(attrib, len);
								attrib += (len+1);
							}
							if ((lClass == "x-Strongs") || (lClass == "strong") || (lClass == "Strong")) {
								if (isdigit(attrib[0])) {
									if (vkey) {
										gh = vkey->getTestament() ? 'H' : 'G';
									}
								}
								else {
									gh = *attrib;
									attrib++;
								}
								lClass = "strong";
							}
							if (gh) l += gh;
							l += attrib;
							if (i) { lemmaClass += " "; lemma += " "; }
							lemma += l;
							lemmaClass += lClass;
							if (count > 1) {
								SWBuf tmp;
								tmp.setFormatted("Lemma.%d", i+1);
								module->getEntryAttributes()["Word"][wordstr][tmp] = l;
								tmp.setFormatted("LemmaClass.%d", i+1);
								module->getEntryAttributes()["Word"][wordstr][tmp] = lClass;
							}
						} while (++i < count);
						module->getEntryAttributes()["Word"][wordstr]["PartCount"].setFormatted("%d", count);
					}

					if ((attrib = wtag.getAttribute("src"))) {
						int count = wtag.getAttributePartCount("src", ' ');
						int i = (count > 1) ? 0 : -1;		// -1 for whole value cuz it's faster, but does the same thing as 0
						do {
							SWBuf mp = "";
							attrib = wtag.getAttribute("src", i, ' ');
							if (i < 0) i = 0;	// to handle our -1 condition

							if (i) src += " ";
							mp += attrib;
							src += mp;
							if (count > 1) {
								SWBuf tmp;
								tmp.setFormatted("Src.%d", i+1);
								module->getEntryAttributes()["Word"][wordstr][tmp] = mp;
							}
						} while (++i < count);
					}


					if (lemma.length())
						module->getEntryAttributes()["Word"][wordstr]["Lemma"] = lemma;
					if (lemmaClass.length())
						module->getEntryAttributes()["Word"][wordstr]["LemmaClass"] = lemmaClass;
					if (morph.length())
						module->getEntryAttributes()["Word"][wordstr]["Morph"] = morph;
					if (morphClass.length())
						module->getEntryAttributes()["Word"][wordstr]["MorphClass"] = morphClass;
					if (src.length())
						module->getEntryAttributes()["Word"][wordstr]["Src"] = src;
					if (page.length())
						module->getEntryAttributes()["Word"][wordstr]["Page"] = page;

					if (wtag.isEmpty()) {
						int j;
						for (j = token.length()-1; ((j>0) && (strchr(" /", token[j]))); j--);
						token.size(j+1);
					}
					
					token += " wn=\"";
					token += wordstr;
					token += "\"";

					if (wtag.isEmpty()) {
						token += "/";
					}

					wordNum++;
				}

				if (!option) {
/*
 * Code which handles multiple lemma types.  Kindof works but breaks at least WEBIF filters for strongs.
 *
					int count = wtag.getAttributePartCount("lemma", ' ');
					for (int i = 0; i < count; i++) {
						SWBuf a = wtag.getAttribute("lemma", i, ' ');
						const char *prefix = a.stripPrefix(':');
						if ((prefix) && (!strcmp(prefix, "x-Strongs") || !strcmp(prefix, "strong") || !strcmp(prefix, "Strong"))) {
							// remove attribute part
							wtag.setAttribute("lemma", 0, i, ' ');
							i--;
							count--;
						}
					}
* Instead the codee below just removes the lemma attribute
*****/
					const char *l = wtag.getAttribute("lemma");
					if (l) {
						SWBuf savlm = l;
						wtag.setAttribute("lemma", 0);
						wtag.setAttribute("savlm", savlm);
						token = wtag;
						token.trim();
						// drop <>
						token << 1;
						token--;
					}
				}
			}
			if (token.startsWith("/w")) {	// Word End
				if (module->isProcessEntryAttributes()) {
					if (wordStart) {
						SWBuf tmp;
						tmp.append(wordStart, (from-wordStart)-3);
						sprintf(wordstr, "%03d", wordNum-1);
						module->getEntryAttributes()["Word"][wordstr]["Text"] = tmp;
					}
				}
				wordStart = 0;
			}
			
			// keep token in text
			text.append('<');
			text.append(token);
			text.append('>');
			
			continue;
		}
		if (intoken) {
			token += *from;
		}
		else	{
			text.append(*from);
		}
	}
	return 0;
}
Ejemplo n.º 17
0
char ThMLFootnotes::processText(SWBuf &text, const SWKey *key, const SWModule *module) {
	SWBuf token;
	bool intoken    = false;
	bool hide       = false;
	SWBuf tagText;
	XMLTag startTag;
	SWBuf refs = "";
	int footnoteNum = 1;
	char buf[254];
	SWKey *p = (module) ? module->createKey() : (key) ? key->clone() : new VerseKey();
        VerseKey *parser = SWDYNAMIC_CAST(VerseKey, p);
        if (!parser) {
        	delete p;
                parser = new VerseKey();
        }
        *parser = key->getText();

	SWBuf orig = text;
	const char *from = orig.c_str();

	for (text = ""; *from; from++) {
		if (*from == '<') {
			intoken = true;
			token = "";
			continue;
		}
		if (*from == '>') {	// process tokens
			intoken = false;

			XMLTag tag(token);
			if (!strcmp(tag.getName(), "note")) {
				if (!tag.isEndTag()) {
					if (!tag.isEmpty()) {
						refs = "";
						startTag = tag;
						hide = true;
						tagText = "";
						continue;
					}
				}
				if (hide && tag.isEndTag()) {
					if (module->isProcessEntryAttributes()) {
						SWBuf fc = module->getEntryAttributes()["Footnote"]["count"]["value"];
						footnoteNum = (fc.length()) ? atoi(fc.c_str()) : 0;
						sprintf(buf, "%i", ++footnoteNum);
						module->getEntryAttributes()["Footnote"]["count"]["value"] = buf;
						StringList attributes = startTag.getAttributeNames();
						for (StringList::iterator it = attributes.begin(); it != attributes.end(); it++) {
							module->getEntryAttributes()["Footnote"][buf][it->c_str()] = startTag.getAttribute(it->c_str());
						}
						module->getEntryAttributes()["Footnote"][buf]["body"] = tagText;
						startTag.setAttribute("swordFootnote", buf);
						if ((startTag.getAttribute("type")) && (!strcmp(startTag.getAttribute("type"), "crossReference"))) {
							if (!refs.length())
								refs = parser->parseVerseList(tagText.c_str(), *parser, true).getRangeText();
							module->getEntryAttributes()["Footnote"][buf]["refList"] = refs.c_str();
						}
					}
					hide = false;
					if ((option) || ((startTag.getAttribute("type") && (!strcmp(startTag.getAttribute("type"), "crossReference"))))) {	// we want the tag in the text; crossReferences are handled by another filter
						text += startTag;
						text.append(tagText);
					}
					else	continue;
				}
			}

			// if not a note token, keep token in text
			if ((!strcmp(tag.getName(), "scripRef")) && (!tag.isEndTag())) {
				SWBuf osisRef = tag.getAttribute("passage");
				if (refs.length())
					refs += "; ";
				refs += osisRef;
			}
			if (!hide) {
				text += '<';
				text.append(token);
				text += '>';
			}
			else {
				tagText += '<';
				tagText.append(token);
				tagText += '>';
			}
			continue;
		}
		if (intoken) { //copy token
			token += *from;
		}
		else if (!hide) { //copy text which is not inside a token
			text += *from;
		}
		else tagText += *from;
	}
        delete parser;
	return 0;
}
Ejemplo n.º 18
0
char ThMLHeadings::processText(SWBuf &text, const SWKey *key, const SWModule *module) {
	SWBuf token;
	bool intoken    = false;
	bool isheader   = false;
	bool hide       = false;
	bool preverse   = false;
	bool withinDiv  = false;
	SWBuf header;
	int headerNum   = 0;
	int pvHeaderNum = 0;
	char buf[254];
	XMLTag startTag;

	SWBuf orig = text;
	const char *from = orig.c_str();
	
	XMLTag tag;

	for (text = ""; *from; ++from) {
		if (*from == '<') {
			intoken = true;
			token = "";
			
			continue;
		}
		if (*from == '>') {	// process tokens
			intoken = false;

			if (!strnicmp(token.c_str(), "div", 3) || !strnicmp(token.c_str(), "/div", 4)) {
				withinDiv =  (!strnicmp(token.c_str(), "div", 3));
				tag = token;
				if (hide && tag.isEndTag()) {
					if (module->isProcessEntryAttributes() && (option || (!preverse))) {
						if (preverse) {
							sprintf(buf, "%i", pvHeaderNum++);
							module->getEntryAttributes()["Heading"]["Preverse"][buf] = header;
						}
						else {
							sprintf(buf, "%i", headerNum++);
							module->getEntryAttributes()["Heading"]["Interverse"][buf] = header;
							if (option) {	// we want the tag in the text
								text.append(header);
							}
						}
						
						StringList attributes = startTag.getAttributeNames();
						for (StringList::const_iterator it = attributes.begin(); it != attributes.end(); it++) {
							module->getEntryAttributes()["Heading"][buf][it->c_str()] = startTag.getAttribute(it->c_str());
						}
					}
					
					hide = false;
					if (!option || preverse) {	// we don't want the tag in the text anymore
						preverse = false;
						continue;
					}
					preverse = false;
				}
				if (tag.getAttribute("class") && ((!stricmp(tag.getAttribute("class"), "sechead"))
										 ||  (!stricmp(tag.getAttribute("class"), "title")))) {

					isheader = true;
					
					if (!tag.isEndTag()) { //start tag
						if (!tag.isEmpty()) {
							startTag = tag;
					
/* how do we tell a ThML preverse title from one that should be in the text?  probably if any text is before the title...  just assuming all are preverse for now
					}
					if (tag.getAttribute("subtype") && !stricmp(tag.getAttribute("subtype"), "x-preverse")) {
*/
						hide = true;
						preverse = true;
						header = "";
						continue;
						}	// move back up under startTag = tag
					}
/* this is where non-preverse will go eventually
					if (!tag.isEndTag()) { //start tag
						hide = true;
						header = "";
						if (option) {	// we want the tag in the text
							text.append('<');
							text.append(token);
							text.append('>');
						}
						continue;
					}
*/
				}
				else
					isheader = false;
			}

			if (withinDiv && isheader) {
				header.append('<');
				header.append(token);
				header.append('>');
			} else {
				// if not a heading token, keep token in text
				if (!hide) {
					text.append('<');
					text.append(token);
					text.append('>');
				}
			}
			continue;
		}
		if (intoken) { //copy token
			token.append(*from);
		}
		else if (!hide) { //copy text which is not inside a token
			text.append(*from);
		}
		else header.append(*from);
	}
	return 0;
}
Ejemplo n.º 19
0
char ThMLWordJS::processText(SWBuf &text, const SWKey *key, const SWModule *module) {
	if (option) {
		char token[2112]; // cheese.  Fix.
		int tokpos = 0;
		bool intoken = false;
		int word = 1;
		char val[128];
		char *valto;
		char *ch;
		char wordstr[5];
		unsigned int textStart = 0, lastAppendLen = 0, textEnd = 0;
		SWBuf tmp;
		bool newText = false;
		bool needWordOut = false;
		AttributeValue *wordAttrs = 0;
		SWBuf modName = (module)?module->getName():"";
		SWBuf wordSrcPrefix = modName;
		
		const SWBuf orig = text;
		const char * from = orig.c_str();
		VerseKey *vkey = 0;
		if (key) {
			vkey = SWDYNAMIC_CAST(VerseKey, key);
		}

		for (text = ""; *from; from++) {
			if (*from == '<') {
				intoken = true;
				tokpos = 0;
				token[0] = 0;
				token[1] = 0;
				token[2] = 0;
				textEnd = text.length();
				continue;
			}
			if (*from == '>') {	// process tokens
				intoken = false;
				if (!strnicmp(token, "sync type=\"Strongs\" ", 20)) {	// Strongs
					valto = val;
					for (unsigned int i = 27; token[i] != '\"' && i < 150; i++)
						*valto++ = token[i];
					*valto = 0;
					if (atoi((!isdigit(*val))?val+1:val) < 5627) {
						// normal strongs number
						sprintf(wordstr, "%03d", word++);
						needWordOut = (word > 2);
						wordAttrs = &(module->getEntryAttributes()["Word"][wordstr]);
						(*wordAttrs)["Strongs"] = val;
	//printf("Adding: [\"Word\"][%s][\"Strongs\"] = %s\n", wordstr, val);
						tmp = "";
						tmp.append(text.c_str()+textStart, (int)(textEnd - textStart));
						(*wordAttrs)["Text"] = tmp;
						text.append("</span>");
						SWBuf ts;
						ts.appendFormatted("%d", textStart);
						(*wordAttrs)["TextStart"] = ts;
	//printf("Adding: [\"Word\"][%s][\"Text\"] = %s\n", wordstr, tmp.c_str());
						newText = true;
					}
					else {
						// verb morph
						(*wordAttrs)["Morph"] = val;
	//printf("Adding: [\"Word\"][%s][\"Morph\"] = %s\n", wordstr, val);
					}

				}
				if (!strncmp(token, "sync type=\"morph\"", 17)) {
					for (ch = token+17; *ch; ch++) {
						if (!strncmp(ch, "class=\"", 7)) {
							valto = val;
							for (unsigned int i = 7; ch[i] != '\"' && i < 127; i++)
								*valto++ = ch[i];
							*valto = 0;
							(*wordAttrs)["MorphClass"] = val;
	//printf("Adding: [\"Word\"][%s][\"MorphClass\"] = %s\n", wordstr, val);
						}
						if (!strncmp(ch, "value=\"", 7)) {
							valto = val;
							for (unsigned int i = 7; ch[i] != '\"' && i < 127; i++)
								*valto++ = ch[i];
							*valto = 0;
							(*wordAttrs)["Morph"] = val;
	//printf("Adding: [\"Word\"][%s][\"Morph\"] = %s\n", wordstr, val);
						}
					}
					newText = true;
				}
				// if not a strongs token, keep token in text
				text += '<';
				text += token;
				text += '>';
				if (needWordOut) {
					char wstr[10];
					sprintf(wstr, "%03d", word-2);
					AttributeValue *wAttrs = &(module->getEntryAttributes()["Word"][wstr]);
					needWordOut = false;
					SWBuf strong = (*wAttrs)["Strongs"];
					SWBuf morph = (*wAttrs)["Morph"];
					SWBuf morphClass = (*wAttrs)["MorphClass"];
					SWBuf wordText = (*wAttrs)["Text"];
					SWBuf textSt = (*wAttrs)["TextStart"];
					if (strong.size()) {
						char gh = 0;
						gh = isdigit(strong[0]) ? 0:strong[0];
						if (!gh) {
							if (vkey) {
								gh = vkey->getTestament() ? 'H' : 'G';
							}
						}
						else strong << 1;

						SWModule *sLex = 0;
						SWModule *sMorph = 0;
						if (gh == 'G') {
							sLex = defaultGreekLex;
							sMorph = defaultGreekParse;
						}
						if (gh == 'H') {
							sLex = defaultHebLex;
							sMorph = defaultHebParse;
						}
						SWBuf lexName = "";
						if (sLex) {
							// we can pass the real lex name in, but we have some
							// aliases in the javascript to optimize bandwidth
							lexName = sLex->getName();
							if (lexName == "StrongsGreek")
								lexName = "G";
							if (lexName == "StrongsHebrew")
								lexName = "H";
						}
						SWBuf wordID;
						if (vkey) {
							// optimize for bandwidth and use only the verse as the unique entry id
							wordID.appendFormatted("%d", vkey->getVerse());
						}
						else {
							wordID = key->getText();
						}
						for (unsigned int i = 0; i < wordID.size(); i++) {
							if ((!isdigit(wordID[i])) && (!isalpha(wordID[i]))) {
								wordID[i] = '_';
							}
						}
						wordID.appendFormatted("_%s%d", wordSrcPrefix.c_str(), atoi(wstr));
						if (textSt.size()) {
							int textStr = atoi(textSt.c_str());
							textStr += lastAppendLen;
							SWBuf spanStart = "";



							if (!sMorph) sMorph = 0;	// avoid unused warnings for now
/*
							if (sMorph) {
								SWBuf popMorph = "<a onclick=\"";
								popMorph.appendFormatted("p(\'%s\',\'%s\','%s','');\" >%s</a>", sMorph->getName(), morph.c_str(), wordID.c_str(), morph.c_str());
								morph = popMorph;
							}
*/

							// 'p' = 'fillpop' to save bandwidth
							const char *m = strchr(morph.c_str(), ':');
							if (m) m++;
							else m = morph.c_str();
							spanStart.appendFormatted("<span class=\"clk\" onclick=\"p('%s','%s','%s','%s','','%s');\" >", lexName.c_str(), strong.c_str(), wordID.c_str(), m, modName.c_str());
							text.insert(textStr, spanStart);
							lastAppendLen = spanStart.length();
						}
					}

				}
				if (newText) {
					textStart = text.length(); newText = false;
				}
				continue;
			}
			if (intoken) {
				if (tokpos < 2045) {
					token[tokpos++] = *from;
					// TODO: why is this + 2 ?
					token[tokpos+2] = 0;
				}
			}
			else	{
				text += *from;
			}
		}

		char wstr[10];
		sprintf(wstr, "%03d", word-1);
		AttributeValue *wAttrs = &(module->getEntryAttributes()["Word"][wstr]);
		needWordOut = false;
		SWBuf strong = (*wAttrs)["Strongs"];
		SWBuf morph = (*wAttrs)["Morph"];
		SWBuf morphClass = (*wAttrs)["MorphClass"];
		SWBuf wordText = (*wAttrs)["Text"];
		SWBuf textSt = (*wAttrs)["TextStart"];
		if (strong.size()) {
			char gh = 0;
			gh = isdigit(strong[0]) ? 0:strong[0];
			if (!gh) {
				if (vkey) {
					gh = vkey->getTestament() ? 'H' : 'G';
				}
			}
			else strong << 1;

			SWModule *sLex = 0;
			if (gh == 'G') {
				sLex = defaultGreekLex;
			}
			if (gh == 'H') {
				sLex = defaultHebLex;
			}
			SWBuf lexName = "";
			if (sLex) {
				// we can pass the real lex name in, but we have some
				// aliases in the javascript to optimize bandwidth
				lexName = sLex->getName();
				if (lexName == "StrongsGreek")
					lexName = "G";
				if (lexName == "StrongsHebrew")
					lexName = "H";
			}
			SWBuf wordID;
			if (vkey) {
				// optimize for bandwidth and use only the verse as the unique entry id
				wordID.appendFormatted("%d", vkey->getVerse());
			}
			else {
				wordID = key->getText();
			}
			for (unsigned int i = 0; i < wordID.size(); i++) {
				if ((!isdigit(wordID[i])) && (!isalpha(wordID[i]))) {
					wordID[i] = '_';
				}
			}
			wordID.appendFormatted("_%s%d", wordSrcPrefix.c_str(), atoi(wstr));
			if (textSt.size()) {
				int textStr = atoi(textSt.c_str());
				textStr += lastAppendLen;
				SWBuf spanStart = "";
				// 'p' = 'fillpop' to save bandwidth
				const char *m = strchr(morph.c_str(), ':');
				if (m) m++;
				else m = morph.c_str();
				spanStart.appendFormatted("<span class=\"clk\" onclick=\"p('%s','%s','%s','%s','','%s');\" >", lexName.c_str(), strong.c_str(), wordID.c_str(), m, modName.c_str());
				text.insert(textStr, spanStart);
			}
		}
	}

	return 0;
}
Ejemplo n.º 20
0
char OSISXHTMLXS::processText(SWBuf &text, const SWKey *key, const SWModule *module) {
	char *from;
	char token[4096];
	int tokpos = 0;
	bool intoken = false;
	bool inEsc = false;
	SWBuf lastTextNode;
	MyUserDataXS *userData = (MyUserDataXS *)createUserData(module, key);
	

	SWBuf orig = text;
	from = orig.getRawData();
	text = "";

	for (;*from; from++) {

		if (*from == '<') {
			intoken = true;
			tokpos = 0;
			token[0] = 0;
			token[1] = 0;
			token[2] = 0;
			inEsc = false;
			continue;
		}

		if (*from == '&') {
			intoken = true;
			tokpos = 0;
			token[0] = 0;
			token[1] = 0;
			token[2] = 0;
			inEsc = true;
			continue;
		}

		if (inEsc) {
			if (*from == ';') {
				intoken = inEsc = false;
				userData->lastTextNode = lastTextNode;
				
				if (!userData->suspendTextPassThru)  { //if text through is disabled no tokens should pass, too
					handleEscapeString(text, token, userData);
				}
				lastTextNode = "";
				continue;
			}
		}

		if (!inEsc) {
			if (*from == '>') {
				intoken = false;
				userData->lastTextNode = lastTextNode;
				handleToken(text, token, userData);
				lastTextNode = "";
				continue;
			}
		}

		if (intoken) {
			if (tokpos < 4090) {
				token[tokpos++] = *from;
				token[tokpos+2] = 0;
			}
		}
		else {
 			if ((!userData->supressAdjacentWhitespace) || (*from != ' ')) {
				if (!userData->suspendTextPassThru) {
					text.append(*from);
					userData->lastSuspendSegment.size(0);
				}
				else	userData->lastSuspendSegment.append(*from);
				lastTextNode.append(*from);
 			}
			userData->supressAdjacentWhitespace = false;
		}

	}
	
	// THE MAIN PURPOSE OF THIS OVERRIDE FUNCTION: is to insure all opened HTML tags are closed
	while (!userData->htmlTagStack->empty()) {
		text.append((SWBuf)"</" + userData->htmlTagStack->top().c_str() + ">");
		userData->htmlTagStack->pop();
	}

	delete userData;
	return 0;
}
Ejemplo n.º 21
0
char OSISWordJS::processText(SWBuf &text, const SWKey *key, const SWModule *module) {
	if (option) {
		char token[2112]; // cheese.  Fix.
		int tokpos = 0;
		bool intoken = false;
		int wordNum = 1;
		char wordstr[5];
		SWBuf modName = (module)?module->getName():"";
		// add TR to w src in KJV then remove this next line
		SWBuf wordSrcPrefix = (modName == "KJV")?SWBuf("TR"):modName;

		VerseKey *vkey = 0;
		if (key) {
			vkey = SWDYNAMIC_CAST(VerseKey, key);
		}
		
		const SWBuf orig = text;
		const char * from = orig.c_str();

		for (text = ""; *from; ++from) {
			if (*from == '<') {
				intoken = true;
				tokpos = 0;
				token[0] = 0;
				token[1] = 0;
				token[2] = 0;
				continue;
			}
			if (*from == '>') {	// process tokens
				intoken = false;
				if ((*token == 'w') && (token[1] == ' ')) {	// Word
					XMLTag wtag(token);
					sprintf(wordstr, "%03d", wordNum);
					SWBuf lemmaClass;
					SWBuf lemma;
					SWBuf morph;
					SWBuf page;
					SWBuf src;
					char gh = 0;
					page = module->getEntryAttributes()["Word"][wordstr]["Page"].c_str();
					if (page.length()) page = (SWBuf)"p:" + page;
					int count = atoi(module->getEntryAttributes()["Word"][wordstr]["PartCount"].c_str());
					for (int i = 0; i < count; i++) {

						// for now, lemma class can just be equal to last lemma class in multi part word
						SWBuf tmp = "LemmaClass";
						if (count > 1) tmp.appendFormatted(".%d", i+1);
						lemmaClass = module->getEntryAttributes()["Word"][wordstr][tmp];

						tmp = "Lemma";
						if (count > 1) tmp.appendFormatted(".%d", i+1);
						tmp = (module->getEntryAttributes()["Word"][wordstr][tmp].c_str());

						// if we're strongs, 
						if (lemmaClass == "strong") {
							gh = tmp[0];
							tmp << 1;
						}
						if (lemma.size()) lemma += "|";
						lemma += tmp;

						tmp = "Morph";
						if (count > 1) tmp.appendFormatted(".%d", i+1);
						tmp = (module->getEntryAttributes()["Word"][wordstr][tmp].c_str());
						if (morph.size()) morph += "|";
						morph += tmp;

						tmp = "Src";
						if (count > 1) tmp.appendFormatted(".%d", i+1);
						tmp = (module->getEntryAttributes()["Word"][wordstr][tmp].c_str());
						if (!tmp.length()) tmp.appendFormatted("%d", wordNum);
						tmp.insert(0, wordSrcPrefix);
						if (src.size()) src += "|";
						src += tmp;
					}

					SWBuf lexName = "";
					// we can pass the real lex name in, but we have some
					// aliases in the javascript to optimize bandwidth
					if ((gh == 'G') && (defaultGreekLex)) {
						lexName = (!strcmp(defaultGreekLex->getName(), "StrongsGreek"))?"G":defaultGreekLex->getName();
					}
					else if ((gh == 'H') && (defaultHebLex)) {
						lexName = (!strcmp(defaultHebLex->getName(), "StrongsHebrew"))?"H":defaultHebLex->getName();
					}

					SWBuf xlit = wtag.getAttribute("xlit");

					if ((lemmaClass != "strong") && (xlit.startsWith("betacode:"))) {
						lexName = "betacode";
//						const char *m = strchr(xlit.c_str(), ':');
//						strong = ++m;
					}
					SWBuf wordID;
					if (vkey) {
						// optimize for bandwidth and use only the verse as the unique entry id
						wordID.appendFormatted("%d", vkey->getVerse());
					}
					else {
						wordID = key->getText();
					}
					wordID.appendFormatted("_%s", src.c_str());
					// clean up our word ID for XHTML
					for (unsigned int i = 0; i < wordID.size(); i++) {
						if ((!isdigit(wordID[i])) && (!isalpha(wordID[i]))) {
							wordID[i] = '_';
						}
					}
					// 'p' = 'fillpop' to save bandwidth
					text.appendFormatted("<span class=\"clk\" onclick=\"p('%s','%s','%s','%s','%s','%s');\" >", lexName.c_str(), lemma.c_str(), wordID.c_str(), morph.c_str(), page.c_str(), modName.c_str());
					wordNum++;

					if (wtag.isEmpty()) {
						text += "</w></span>";
					}
				}
				if ((*token == '/') && (token[1] == 'w') && option) {	// Word
					text += "</w></span>";
					continue;
				}
				
				// if not a strongs token, keep token in text
				text.append('<');
				text.append(token);
				text.append('>');
				
				continue;
			}
			if (intoken) {
				if (tokpos < 2045) {
					token[tokpos++] = *from;
					token[tokpos+2] = 0;
				}
			}
			else	{
				text.append(*from);
			}
		}
	}
	return 0;
}
Ejemplo n.º 22
0
char OSISLemma::processText(SWBuf &text, const SWKey *key, const SWModule *module) {
	SWBuf token;
	bool intoken = false;

	const SWBuf orig = text;
	const char * from = orig.c_str();

	if (!option) {
		for (text = ""; *from; ++from) {
			if (*from == '<') {
				intoken = true;
				token = "";
				continue;
			}
			if (*from == '>') {	// process tokens
				intoken = false;
				if (token.startsWith("w ")) {	// Word
					XMLTag wtag(token);

					// always save off lemma if we haven't yet
					if (!wtag.getAttribute("savlm")) {
						const char *l = wtag.getAttribute("lemma");
						if (l) {
							wtag.setAttribute("savlm", l);
						}
					}

					int count = wtag.getAttributePartCount("lemma", ' ');
					for (int i = 0; i < count; i++) {
						SWBuf a = wtag.getAttribute("lemma", i, ' ');
						const char *prefix = a.stripPrefix(':');
						if ((!prefix) || ((SWBuf)prefix).startsWith("lemma.")) {
							// remove attribute part
							wtag.setAttribute("lemma", 0, i, ' ');
							i--;
							count--;
						}
					}

					token = wtag;
					token.trim();
					// drop <>
					token << 1;
					token--;
				}
				
				// keep token in text
				text.append('<');
				text.append(token);
				text.append('>');
				
				continue;
			}
			if (intoken) {
				token += *from;
			}
			else	{
				text.append(*from);
			}
		}
	}
	return 0;
}
Ejemplo n.º 23
0
char OSISFootnotes::processText(SWBuf &text, const SWKey *key, const SWModule *module) {
	SWBuf token;
	bool intoken    = false;
	bool hide       = false;
	SWBuf tagText;
	XMLTag startTag;
	SWBuf refs = "";
	int footnoteNum = 1;
	char buf[254];
	SWKey *p = (module) ? module->createKey() : (key) ? key->clone() : new VerseKey();
        VerseKey *parser = SWDYNAMIC_CAST(VerseKey, p);
        if (!parser) {
        	delete p;
                parser = new VerseKey();
        }
        *parser = key->getText();

	SWBuf orig = text;
	const char *from = orig.c_str();

	XMLTag tag;
	bool strongsMarkup = false;


	for (text = ""; *from; ++from) {

		// remove all newlines temporarily to fix kjv2003 module
		if ((*from == 10) || (*from == 13)) {
			if ((text.length()>1) && (text[text.length()-2] != ' ') && (*(from+1) != ' '))
				text.append(' ');
			continue;
		}


		if (*from == '<') {
			intoken = true;
			token = "";
			continue;
		}



		if (*from == '>') {	// process tokens
			intoken = false;
			if (!strncmp(token, "note", 4) || !strncmp(token.c_str(), "/note", 5)) {
				tag = token;

				if (!tag.isEndTag()) {
					if (tag.getAttribute("type") && (!strcmp("x-strongsMarkup", tag.getAttribute("type"))
											|| !strcmp("strongsMarkup", tag.getAttribute("type")))	// deprecated
							) {
						tag.setEmpty(false);  // handle bug in KJV2003 module where some note open tags were <note ... />
						strongsMarkup = true;
					}

					if (!tag.isEmpty()) {
//					if ((!tag.isEmpty()) || (SWBuf("strongsMarkup") == tag.getAttribute("type"))) {
						refs = "";
						startTag = tag;
						hide = true;
						tagText = "";
						continue;
					}
				}
				if (hide && tag.isEndTag()) {
					if (module->isProcessEntryAttributes() && !strongsMarkup) { //don`t parse strongsMarkup to EntryAttributes as Footnote
						sprintf(buf, "%i", footnoteNum++);
						StringList attributes = startTag.getAttributeNames();
						for (StringList::const_iterator it = attributes.begin(); it != attributes.end(); it++) {
							module->getEntryAttributes()["Footnote"][buf][it->c_str()] = startTag.getAttribute(it->c_str());
						}
						module->getEntryAttributes()["Footnote"][buf]["body"] = tagText;
						startTag.setAttribute("swordFootnote", buf);
						if ((startTag.getAttribute("type")) && (!strcmp(startTag.getAttribute("type"), "crossReference"))) {
							if (!refs.length())
								refs = parser->parseVerseList(tagText.c_str(), *parser, true).getRangeText();
							module->getEntryAttributes()["Footnote"][buf]["refList"] = refs.c_str();
						}
					}
					hide = false;
					if (option || (startTag.getAttribute("type") && !strcmp(startTag.getAttribute("type"), "crossReference"))) {	// we want the tag in the text; crossReferences are handled by another filter
						text.append(startTag);
//						text.append(tagText);	// we don't put the body back in because it is retrievable from EntryAttributes["Footnotes"][]["body"].
					}
					else	continue;
				}
				strongsMarkup = false;
			}

			// if not a heading token, keep token in text
			//if ((!strcmp(tag.getName(), "reference")) && (!tag.isEndTag())) {
			//	SWBuf osisRef = tag.getAttribute("osisRef");
			if (!strncmp(token, "reference", 9)) {
				if (refs.length()) {
					refs.append("; ");
				}

				const char* attr = strstr(token.c_str() + 9, "osisRef=\"");
				const char* end  = attr ? strchr(attr+9, '"') : 0;

				if (attr && end) {
					refs.append(attr+9, end-(attr+9));
				}
			}
			if (!hide) {
				text.append('<');
				text.append(token);
				text.append('>');
			}
			else {
				tagText.append('<');
				tagText.append(token);
				tagText.append('>');
			}
			continue;
		}
		if (intoken) { //copy token
			token.append(*from);
		}
		else if (!hide) { //copy text which is not inside a token
			text.append(*from);
		}
		else tagText.append(*from);
	}
        delete parser;
	return 0;
}
Ejemplo n.º 24
0
char ThMLStrongs::processText(SWBuf &text, const SWKey *key, const SWModule *module) {
	char token[2048]; // cheese.  Fix.
	const char *from;
	int tokpos = 0;
	bool intoken = false;
	bool lastspace = false;
	int word = 1;
	char val[128];
	char wordstr[5];
	char *valto;
	char *ch;
	unsigned int textStart = 0, textEnd = 0;
	SWBuf tmp;
	bool newText = false;

	SWBuf orig = text;
	from = orig.c_str();

	for (text = ""; *from; from++) {
		if (*from == '<') {
			intoken = true;
			tokpos = 0;
			token[0] = 0;
			token[1] = 0;
			token[2] = 0;
			textEnd = text.length();
			continue;
		}
		if (*from == '>') {	// process tokens
			intoken = false;
			if (!strnicmp(token, "sync type=\"Strongs\" ", 20)) {	// Strongs
				if (module->isProcessEntryAttributes()) {
					valto = val;
					for (unsigned int i = 27; token[i] != '\"' && i < 150; i++)
						*valto++ = token[i];
					*valto = 0;
					if (atoi((!isdigit(*val))?val+1:val) < 5627) {
						// normal strongs number
						sprintf(wordstr, "%03d", word);
						module->getEntryAttributes()["Word"][wordstr]["PartCount"] = "1";
						module->getEntryAttributes()["Word"][wordstr]["Lemma"] = val;
						module->getEntryAttributes()["Word"][wordstr]["LemmaClass"] = "strong";
						tmp = "";
						tmp.append(text.c_str()+textStart, (int)(textEnd - textStart));
						module->getEntryAttributes()["Word"][wordstr]["Text"] = tmp;
						newText = true;
					}
					else {
/*
						// verb morph
						sprintf(wordstr, "%03d", word);
						module->getEntryAttributes()["Word"][wordstr]["Morph"] = val;
						module->getEntryAttributes()["Word"][wordstr]["MorphClass"] = "OLBMorph";
*/
						word--;	// for now, completely ignore this word attribute.
					}
					word++;
				}

				if (!option) {	// if we don't want strongs
					if ((from[1] == ' ') || (from[1] == ',') || (from[1] == ';') || (from[1] == '.') || (from[1] == '?') || (from[1] == '!') || (from[1] == ')') || (from[1] == '\'') || (from[1] == '\"')) {
						if (lastspace)
							text--;
					}
					if (newText) {textStart = text.length(); newText = false; }
					continue;
				}
			}
			if (module->isProcessEntryAttributes()) {
				if (!strncmp(token, "sync type=\"morph\"", 17)) {
					for (ch = token+17; *ch; ch++) {
						if (!strncmp(ch, "class=\"", 7)) {
							valto = val;
							for (unsigned int i = 7; ch[i] != '\"' && i < 127; i++)
								*valto++ = ch[i];
							*valto = 0;
							sprintf(wordstr, "%03d", word-1);
							if ((!stricmp(val, "Robinsons")) || (!stricmp(val, "Robinson"))) {
								strcpy(val, "robinson");
							}
							module->getEntryAttributes()["Word"][wordstr]["MorphClass"] = val;
						}
						if (!strncmp(ch, "value=\"", 7)) {
							valto = val;
							for (unsigned int i = 7; ch[i] != '\"' && i < 127; i++)
								*valto++ = ch[i];
							*valto = 0;
							sprintf(wordstr, "%03d", word-1);
							module->getEntryAttributes()["Word"][wordstr]["Morph"] = val;
						}
					}
					newText = true;
				}
			}
			// if not a strongs token, keep token in text
			text += '<';
			text += token;
			text += '>';
			if (newText) {textStart = text.length(); newText = false; }
			continue;
		}
		if (intoken) {
			if (tokpos < 2045)
				token[tokpos++] = *from;
				token[tokpos+2] = 0;
		}
		else	{
			text += *from;
			lastspace = (*from == ' ');
		}
	}
	return 0;
}
Ejemplo n.º 25
0
bool TEIPlain::handleToken(SWBuf &buf, const char *token, BasicFilterUserData *userData) {
  // manually process if it wasn't a simple substitution
	if (!substituteToken(buf, token)) {
		//MyUserData *u = (MyUserData *)userData;
		XMLTag tag(token);

		// <p> paragraph tag
		if (!strcmp(tag.getName(), "p")) {
			if ((!tag.isEndTag()) && (!tag.isEmpty())) {	// non-empty start tag
				buf += "\n";
			}
			else if (tag.isEndTag()) {	// end tag
				buf += "\n";
				userData->supressAdjacentWhitespace = true;
			}
			else {					// empty paragraph break marker
				buf += "\n\n";
				userData->supressAdjacentWhitespace = true;
			}
		}

		// <entryFree>
		else if (!strcmp(tag.getName(), "entryFree")) {
			SWBuf n = tag.getAttribute("n");
			if ((!tag.isEndTag()) && (!tag.isEmpty())) {
			        if (n != "") {
					buf += n;
					buf += ". ";
				}
			}
		}

		// <sense>
		else if (!strcmp(tag.getName(), "sense")) {
			SWBuf n = tag.getAttribute("n");
			if ((!tag.isEndTag()) && (!tag.isEmpty())) {
			        if (n != "") {
					buf += n;
					buf += ". ";
				}
			}
			else if (tag.isEndTag()) {
			                buf += "\n";
			}
		}

		// <div>
		else if (!strcmp(tag.getName(), "div")) {

			if ((!tag.isEndTag()) && (!tag.isEmpty())) {
				buf.append("\n\n\n");
			}
			else if (tag.isEndTag()) {
			}
		}

		// <etym>
		else if (!strcmp(tag.getName(), "etym")) {
			if ((!tag.isEndTag()) && (!tag.isEmpty())) {
				buf += "[";
			}
			else if (tag.isEndTag()) {
			        buf += "]";
			}
		}

		else {
			return false;  // we still didn't handle token
		}
	}
	return true;
}
Ejemplo n.º 26
0
int main(int argc, char **argv) {

	SWBuf program = argv[0];
	fprintf(stderr, "You are running %s: $Rev: 2138 $\n", argv[0]);

	// Let's test our command line arguments
	if (argc < 3) {
		usage(*argv);
	}

	// variables for arguments, holding defaults
	SWBuf path             = argv[1];
	SWBuf teiDoc           = argv[2];
	SWBuf compType	       = "";
	SWBuf modDrv           = "";
	SWBuf recommendedPath  = "./modules/lexdict/";
	SWBuf cipherKey        = "";
	SWCompress *compressor = 0;

	for (int i = 3; i < argc; i++) {
		if (!strcmp(argv[i], "-z")) {
			if (compType.size()) usage(*argv, "Cannot specify both -z and -Z");
			if (modDrv.size()) usage(*argv, "Cannot specify both -z and -s");
			compType = "ZIP";
			modDrv = "zLD";
			recommendedPath += "zld/";
		}
		else if (!strcmp(argv[i], "-Z")) {
			if (compType.size()) usage(*argv, "Cannot specify both -z and -Z");
			if (modDrv.size()) usage(*argv, "Cannot specify both -Z and -s");
			compType = "LZSS";
			recommendedPath += "zld/";
		}
		else if (!strcmp(argv[i], "-s")) {
			if (compType.size()) usage(*argv, "Cannot specify both -s and -z or -Z");
			if (i+1 < argc) {
				int size = atoi(argv[++i]);
				if (size == 2) {
					modDrv           = "RawLD";
					recommendedPath += "rawld/";
					continue;
				}
				if (size == 4) {
					modDrv           = "RawLD4";
					recommendedPath += "rawld4/";
					continue;
				}
			}
			usage(*argv, "-s requires one of <2|4>");
		}
		else if (!strcmp(argv[i], "-N")) {
			normalize = false;
		}
		else if (!strcmp(argv[i], "-c")) {
			if (i+1 < argc) cipherKey = argv[++i];
			else usage(*argv, "-c requires <cipher_key>");
		}
		else usage(*argv, (((SWBuf)"Unknown argument: ")+ argv[i]).c_str());
	}
	if (!modDrv.size()) {
		modDrv           = "RawLD4";
		recommendedPath += "rawld4/";
	}

#ifndef _ICU_
	if (normalize) {
		normalize = false;
		cout << program << " is not compiled with support for ICU. Setting -N flag." << endl;
	}
#endif

	if (compType == "ZIP") {
		compressor = new ZipCompress();
	}
	else if (compType = "LZSS") {
		compressor = new LZSSCompress();
	}

#ifdef DEBUG
	// cout << "path: " << path << " teiDoc: " << teiDoc << " compressType: " << compType << " ldType: " << modDrv << " cipherKey: " << cipherKey.c_str() << " normalize: " << normalize << "\n";
	cout << "path: " << path << " teiDoc: " << teiDoc << " compressType: " << compType << " ldType: " << modDrv << " normalize: " << normalize << "\n";
	cout << "";
//      exit(-3);
#endif

	SWBuf modName = path;
	int pathlen   = path.length();
	char lastChar = path[pathlen - 1];
	if (lastChar != '/' && lastChar != '\\') {
		modName += "/";
	}
	modName += "dict";

	SWBuf keyBuf;
	SWBuf entBuf;
	SWBuf lineBuf;
	vector<string> linkBuf;

	if (modDrv == "zLD") {
		if (zLD::createModule(modName)) {
			fprintf(stderr, "error: %s: couldn't create module at path: %s \n", program.c_str(), modName.c_str()); 
			exit(-3);
		}
		module = new zLD(modName, 0, 0, 30, compressor);
	}
	else if (modDrv == "RawLD") {
		if (RawLD::createModule(modName)) {
			fprintf(stderr, "error: %s: couldn't create module at path: %s \n", program.c_str(), modName.c_str()); 
			exit(-3);
		}
		module = new RawLD(modName);
	}
	else {
		if (RawLD4::createModule(modName)) {
			fprintf(stderr, "error: %s: couldn't create module at path: %s \n", program.c_str(), modName.c_str()); 
			exit(-3);
		}
		module = new RawLD4(modName);
	}

	SWFilter *cipherFilter = 0;

	if (cipherKey.size()) {
		fprintf(stderr, "Adding cipher filter with phrase: %s\n", cipherKey.c_str() );
		cipherFilter = new CipherFilter(cipherKey.c_str());
		module->AddRawFilter(cipherFilter);
	}

        if (!module->isWritable()) {
                fprintf(stderr, "The module is not writable. Writing text to it will not work.\nExiting.\n" );
                exit(-1);
        }

	// Let's see if we can open our input file
	ifstream infile(teiDoc);
	if (infile.fail()) {
		fprintf(stderr, "error: %s: couldn't open input file: %s \n", program.c_str(), teiDoc.c_str());
		exit(-2);
	}

	currentKey = module->CreateKey();
	currentKey->Persist(1);
	module->setKey(*currentKey);

	(*module) = TOP;

	SWBuf token;
	SWBuf text;
	bool intoken = false;
	char curChar = '\0';

	while (infile.good()) {

		curChar = infile.get();

		// skip the character if it is bad. infile.good() will catch the problem
		if (curChar == -1) {
			continue;
		}

		if (!intoken && curChar == '<') {
			intoken = true;
			token = "<";
			continue;
		}

		if (intoken && curChar == '>') {
			intoken = false;
			token.append('>');

			XMLTag *t = new XMLTag(token.c_str());
			if (!handleToken(text, t)) {
				text.append(*t);
			}
                        delete t;
			continue;
		}

		if (intoken)
			token.append(curChar);
		else
			switch (curChar) {
				case '>' : text.append("&gt;"); break;
				case '<' : text.append("&lt;"); break;
				default  : text.append(curChar); break;
			}
	}

	// Force the last entry from the text buffer.
	//text = "";
	//writeEntry(*currentKey, text);

	delete module;
	delete currentKey;
	if (cipherFilter)
		delete cipherFilter;
	infile.close();

#ifdef _ICU_
	if (converted)  fprintf(stderr, "tei2mod converted %d verses to UTF-8\n", converted);
	if (normalized) fprintf(stderr, "tei2mod normalized %d verses to NFC\n", normalized);
#endif

	/*
	 * Suggested module name detection.
	 * Only used for suggesting a conf.
	 *
	 * Various forms of path.
	 * . and .. - no module name given, use "dict".
	 * Or one of the following where z is the module name
	 * and x may be . or ..
	 * z 
	 * x/y/z
	 * x/y/z/
	 * x/y/z/z
	 */
	SWBuf suggestedModuleName = path;
	if (lastChar == '/' || lastChar == '\\') {
		suggestedModuleName.setSize(--pathlen);
	}

	lastChar = suggestedModuleName[pathlen - 1];
	if (lastChar == '.') {
		suggestedModuleName = "???";
	}
	else {
		/* At this point the suggestion is either
		 * what follows the last / or \
		 * or the entire string
		 */
		const char *m = strrchr(suggestedModuleName.c_str(), '/');
		if (!m) {
			m = strrchr(suggestedModuleName.c_str(), '\\');
		}
		if (m) {
			suggestedModuleName = m+1;
		}
	}

	recommendedPath += suggestedModuleName;
	recommendedPath += "/dict";

	fprintf(stderr, "\nSuggested conf (replace ??? with appropriate values)\n\n");
	fprintf(stderr, "[%s]\n", suggestedModuleName.c_str());
	fprintf(stderr, "DataPath=%s\n", recommendedPath.c_str());
	fprintf(stderr, "Description=???\n");
	fprintf(stderr, "SourceType=TEI\n");
	fprintf(stderr, "Encoding=%s\n", (normalize ? "UTF-8" : "???"));
	fprintf(stderr, "ModDrv=%s\n", modDrv.c_str());
	if (compressor) {
		fprintf(stderr, "CompressType=%s\n", compType.c_str());
	}
	if (cipherKey.size()) {
		fprintf(stderr, "CipherKey=%s\n", cipherKey.c_str());
	}
}
Ejemplo n.º 27
0
char FileMgr::getLine(FileDesc *fDesc, SWBuf &line) {
	int len;
	bool more = true;
	char chunk[255];

	line = "";

	// assert we have a valid file handle
	if (fDesc->getFd() < 1)
		return 0;

	while (more) {
		more = false;
		long index = fDesc->seek(0, SEEK_CUR);
		len = fDesc->read(chunk, 254);

		// assert we have a readable file (not a directory)
		if (len < 1)
			break;

		int start = 0;
		// clean up any preceding white space if we're at the beginning of line
		if (!line.length()) {
			for (;start < len; start++) {
				if ((chunk[start] != 13) && (chunk[start] != ' ') && (chunk[start] != '\t'))
					break;
			}
		}

		// find the end
		int end;
		for (end = start; ((end < (len-1)) && (chunk[end] != 10)); end++);
	
		if ((chunk[end] != 10) && (len == 254)) {
			more = true;
		}
		index += (end + 1);

		// reposition to next valid place to read
		fDesc->seek(index, SEEK_SET);

		// clean up any trailing junk on line if we're at the end
		if (!more) {
			for (; end > start; end--) {
				if ((chunk[end] != 10) && (chunk[end] != 13) && (chunk[end] != ' ') && (chunk[end] != '\t')) {
					if (chunk[end] == '\\') {
						more = true;
						end--;
					}
					break;
				}
			}
		}
		
		int size = (end - start) + 1;

		if (size > 0) {
			// line.appendFormatted("%.*s", size, chunk+start);
			line.append(chunk+start, size);
		}
	}
	return ((len > 0) || line.length());
}
Ejemplo n.º 28
0
char GBFStrongs::processText(SWBuf &text, const SWKey *key, const SWModule *module) {
	char token[2048]; // cheese.  Fix.
	int tokpos = 0;
	bool intoken = false;
	bool lastspace = false;
	int word = 1;
	char val[128];
	char wordstr[5];
	char *valto;
	unsigned int textStart = 0, textEnd = 0;
	bool newText = false;
	SWBuf tmp;
	const char *from;

	SWBuf orig = text;
	from = orig.c_str();

	for (text = ""; *from; from++) {
		if (*from == '<') {
			intoken = true;
			tokpos = 0;
			token[0] = 0;
			token[1] = 0;
			token[2] = 0;
			textEnd = text.size();
			continue;
		}
		if (*from == '>') {	// process tokens
			intoken = false;
			if (*token == 'W' && (token[1] == 'G' || token[1] == 'H')) {	// Strongs
				if (module->isProcessEntryAttributes()) {
					valto = val;
					for (unsigned int i = 1; ((token[i]) && (i < 150)); i++)
						*valto++ = token[i];
					*valto = 0;
					if (atoi((!isdigit(*val))?val+1:val) < 5627) {
						// normal strongs number
						sprintf(wordstr, "%03d", word++);
						module->getEntryAttributes()["Word"][wordstr]["PartsCount"] = "1";
						module->getEntryAttributes()["Word"][wordstr]["Lemma"] = val;
						module->getEntryAttributes()["Word"][wordstr]["LemmaClass"] = "strong";
						tmp = "";
						tmp.append(text.c_str()+textStart, (int)(textEnd - textStart));
						module->getEntryAttributes()["Word"][wordstr]["Text"] = tmp;
						newText = true;
					}
					else {
						// verb morph
						sprintf(wordstr, "%03d", word-1);
						module->getEntryAttributes()["Word"][wordstr]["Morph"] = val;
						module->getEntryAttributes()["Word"][wordstr]["MorphClass"] = "OLBMorph";
					}
				}

				if (!option) {
					if ((from[1] == ' ') || (from[1] == ',') || (from[1] == ';') || (from[1] == '.') || (from[1] == '?') || (from[1] == '!') || (from[1] == ')') || (from[1] == '\'') || (from[1] == '\"')) {
						if (lastspace)
							text--;
					}
					if (newText) {textStart = text.size(); newText = false; }
					continue;
				}
			}
			if (module->isProcessEntryAttributes()) {
				if ((*token == 'W') && (token[1] == 'T')) {	// Morph
					valto = val;
					for (unsigned int i = 2; ((token[i]) && (i < 150)); i++)
						*valto++ = token[i];
					*valto = 0;
					sprintf(wordstr, "%03d", word-1);
					module->getEntryAttributes()["Word"][wordstr]["MorphClass"] = "GBFMorph";
					module->getEntryAttributes()["Word"][wordstr]["Morph"] = val;
					newText = true;
				}
			}
			// if not a strongs token, keep token in text
			text += '<';
			text += token;
			text += '>';
			if (newText) {textStart = text.size(); newText = false; }
			continue;
		}
		if (intoken) {
			if (tokpos < 2045)
				token[tokpos++] = *from;
				token[tokpos+2] = 0;
		}
		else	{
			text += *from;
			lastspace = (*from == ' ');
		}
	}
	return 0;
}
Ejemplo n.º 29
0
/** Parse the URL.
 * Parse the URL into the protocol, the hostname, the path and the paramters with their values
 * 
 */
void URL::parse() {
	/* format example		protocol://hostname/path/path/path.pl?param1=value1&amp;param2=value2
	* we include the script name in the path, so the path would be /path/path/path.pl in this example
	*  &amp; could also be &
	*/

	//1. Init
	const char *urlPtr = url.c_str();
	 
	protocol = "";
	hostname = "";
	path     = "";
	parameterMap.clear();
	 
	 // 2. Get the protocol, which is from the begining to the first ://
	const char *end = strchr( urlPtr, ':' );
	if (end) { //protocol was found
	 	protocol.append(urlPtr, end-urlPtr);
	 	urlPtr = end + 1;
	
		//find the end of the protocol separator (e.g. "://")
		for (; (*urlPtr == ':') || (*urlPtr == '/'); urlPtr++);
	}

 //3.Get the hostname part. This is the part from pos up to the first slash
	bool checkPath   = true;
	bool checkParams = true;
	bool checkAnchor = true;

	end = strchr(urlPtr, '/');
	if (!end) {
		checkPath = false;
		end = strchr(urlPtr, '?');
	}
	if (!end) {
		checkParams = false;
		end = strchr(urlPtr, '#');
	}
	if (!end) {
		checkAnchor = false;
		end = urlPtr+strlen(urlPtr);
	}
	 
	hostname.append(urlPtr, end-urlPtr);
	 	
	urlPtr = end + ((*end)? 1 : 0);

	if (checkPath) { 
		end = strchr(urlPtr, '?');
		if (!end) {
			checkParams = false;
			end = strchr(urlPtr, '#');
		}
		if (!end) {
			checkAnchor = false;
			end = urlPtr+strlen(urlPtr);
		}

	 	path.append(urlPtr, end-urlPtr);
		
		urlPtr = end + ((*end)? 1 : 0);
	 }

	if (checkParams) {
		//5. Fill the map with the parameters and their values
		SWBuf paramName;
		SWBuf paramValue;
				
		if (checkAnchor) checkAnchor = false;
/*
		end = strchr(urlPtr, '#');
		if (!end) {
			checkAnchor = false;
			end = urlPtr+strlen(urlPtr);
		}
*/
		//end = (start && strchr(start, '?')) ? strchr(start, '?')+1 :0;
		end = urlPtr;
		while (end) {
			paramName = "";
			paramValue = "";
			
			//search for the equal sign to find the value part
			const char *valueStart = strchr(end, '=');		
			if (valueStart) {
				const char* valueEnd = strstr(valueStart, "&amp;") ? strstr(valueStart, "&amp;") : strstr(valueStart, "&"); //try to find a new paramter part
				
				if (valueEnd) {
					paramName.append(end, valueStart-end);
					paramValue.append(valueStart+1, valueEnd-(valueStart+1));
				}
				else { //this is the last paramter of the URL
					paramName.append(end, valueStart-end);
					paramValue.append(valueStart+1);
				}
				
				if (paramName.length() && paramValue.length()) {//insert the param into the map if it's valid
					paramName = decode(paramName.c_str());
					paramValue = decode(paramValue.c_str());
					
					parameterMap[ paramName ] = paramValue;
				}
			}
			else {
				break; //no valid parameter in the url
			}
			
			const char *start = end+1;
			end = strstr(start, "&amp;") ? strstr(start, "&amp;")+5 : (strstr(start, "&") ? strstr(start, "&")+1 : 0); //try to find a new paramter part
		}
	}
}