Пример #1
0
RawVerse::RawVerse(const char *ipath, int fileMode)
{
	SWBuf buf;

	path = 0;
	stdstr(&path, ipath);
	
	if ((path[strlen(path)-1] == '/') || (path[strlen(path)-1] == '\\'))
		path[strlen(path)-1] = 0;

	if (fileMode == -1) { // try read/write if possible
		fileMode = FileMgr::RDWR;
	}
		
	buf.setFormatted("%s/ot.vss", path);
	idxfp[0] = FileMgr::getSystemFileMgr()->open(buf, fileMode, true);

	buf.setFormatted("%s/nt.vss", path);
	idxfp[1] = FileMgr::getSystemFileMgr()->open(buf, fileMode, true);

	buf.setFormatted("%s/ot", path);
	textfp[0] = FileMgr::getSystemFileMgr()->open(buf, fileMode, true);

	buf.setFormatted("%s/nt", path);
	textfp[1] = FileMgr::getSystemFileMgr()->open(buf, fileMode, true);

	instance++;
}
Пример #2
0
void openfiles(char *fname)
{
	SWBuf buf;

	if ((fp = FileMgr::openFileReadOnly(fname)) < 0) {
		fprintf(stderr, "Couldn't open file: %s\n", fname);
		exit(1);
	}

	buf.setFormatted("%s.vss", fname);
	if ((vfp = FileMgr::createPathAndFile(buf.c_str())) < 0) {
		fprintf(stderr, "Couldn't open file: %s\n", buf.c_str());
		exit(1);
	}

	buf.setFormatted("%s.cps", fname);
	if ((cfp = FileMgr::createPathAndFile(buf.c_str())) < 0) {
		fprintf(stderr, "Couldn't open file: %s\n", buf.c_str());
		exit(1);
	}

	buf.setFormatted("%s.bks", fname);
	if ((bfp = FileMgr::createPathAndFile(buf.c_str())) < 0) {
		fprintf(stderr, "Couldn't open file: %s\n", buf.c_str());
		exit(1);
	}
}
Пример #3
0
void SWMgr::augmentModules(const char *ipath, bool multiMod) {
	SWBuf path = ipath;
	if ((ipath[strlen(ipath)-1] != '\\') && (ipath[strlen(ipath)-1] != '/'))
		path += "/";
	if (FileMgr::existsDir(path.c_str(), "mods.d")) {
		char *savePrefixPath = 0;
		char *saveConfigPath = 0;
		SWConfig *saveConfig = 0;
		stdstr(&savePrefixPath, prefixPath);
		stdstr(&prefixPath, path.c_str());
		path += "mods.d";
		stdstr(&saveConfigPath, configPath);
		stdstr(&configPath, path.c_str());
		saveConfig = config;
		config = myconfig = 0;
		loadConfigDir(configPath);

		if (multiMod) {
			// fix config's Section names to rename modules which are available more than once
			// find out which sections are in both config objects
			// inserting all configs first is not good because that overwrites old keys and new modules would share the same config
			for (SectionMap::iterator it = config->Sections.begin(); it != config->Sections.end();) {
				if (saveConfig->Sections.find( (*it).first ) != saveConfig->Sections.end()) { //if the new section is already present rename it
					ConfigEntMap entMap((*it).second);
					
					SWBuf name;
					int i = 1;
					do { //module name already used?
						name.setFormatted("%s_%d", (*it).first.c_str(), i);
						i++;
					} while (config->Sections.find(name) != config->Sections.end());
					
					config->Sections.insert(SectionMap::value_type(name, entMap) );
					SectionMap::iterator toErase = it++;
					config->Sections.erase(toErase);
				}
				else ++it;
			}
		}
		
		CreateMods(multiMod);

		stdstr(&prefixPath, savePrefixPath);
		delete []savePrefixPath;
		stdstr(&configPath, saveConfigPath);
		delete []saveConfigPath;

		(*saveConfig) += *config;
		
		homeConfig = myconfig;
		config = myconfig = saveConfig;
	}
}
Пример #4
0
zStr::zStr(const char *ipath, int fileMode, long blockCount, SWCompress *icomp, bool caseSensitive) : caseSensitive(caseSensitive)
{
	SWBuf buf;

	lastoff = -1;
	path = 0;
	stdstr(&path, ipath);

	compressor = (icomp) ? icomp : new SWCompress();
	this->blockCount = blockCount;

	if (fileMode == -1) { // try read/write if possible
		fileMode = FileMgr::RDWR;
	}
		
	buf.setFormatted("%s.idx", path);
	idxfd = FileMgr::getSystemFileMgr()->open(buf, fileMode, true);

	buf.setFormatted("%s.dat", path);
	datfd = FileMgr::getSystemFileMgr()->open(buf, fileMode, true);

	buf.setFormatted("%s.zdx", path);
	zdxfd = FileMgr::getSystemFileMgr()->open(buf, fileMode, true);

	buf.setFormatted("%s.zdt", path);
	zdtfd = FileMgr::getSystemFileMgr()->open(buf, fileMode, true);

	if (datfd <= 0) {
		SWLog::getSystemLog()->logError("%d", errno);
	}

	cacheBlock = 0;
	cacheBlockIndex = -1;
	cacheDirty = false;

	instance++;
}
Пример #5
0
RawStr::RawStr(const char *ipath, int fileMode, bool caseSensitive) : caseSensitive(caseSensitive)
{
	SWBuf buf;

	lastoff = -1;
	path = 0;
	stdstr(&path, ipath);

	if (fileMode == -1) { // try read/write if possible
		fileMode = FileMgr::RDWR;
	}
		
	buf.setFormatted("%s.idx", path);
	idxfd = FileMgr::getSystemFileMgr()->open(buf, fileMode, true);

	buf.setFormatted("%s.dat", path);
	datfd = FileMgr::getSystemFileMgr()->open(buf, fileMode, true);

	if (datfd < 0) {
		SWLog::getSystemLog()->logError("%d", errno);
	}

	instance++;
}
Пример #6
0
TreeKeyIdx::TreeKeyIdx(const char *idxPath, int fileMode) : currentNode() {
	SWBuf buf;

	init();
	path = 0;
	stdstr(&path, idxPath);

	if (fileMode == -1) { // try read/write if possible
		fileMode = FileMgr::RDWR;
	}
		
	buf.setFormatted("%s.idx", path);
	idxfd = FileMgr::getSystemFileMgr()->open(buf, fileMode, true);
	buf.setFormatted("%s.dat", path);
	datfd = FileMgr::getSystemFileMgr()->open(buf, fileMode, true);

	if (datfd <= 0) {
		SWLog::getSystemLog()->logError("%d", errno);
		error = errno;
	}
	else {
		root();
	}
}
Пример #7
0
zVerse::zVerse(const char *ipath, int fileMode, int blockType, SWCompress *icomp)
{
	// this line, instead of just defaulting, to keep FileMgr out of header
	if (fileMode == -1) fileMode = FileMgr::RDONLY;

	SWBuf buf;

	path = 0;
	cacheBufIdx = -1;
	cacheTestament = 0;
	cacheBuf = 0;
	dirtyCache = false;
	stdstr(&path, ipath);

	if ((path[strlen(path)-1] == '/') || (path[strlen(path)-1] == '\\'))
		path[strlen(path)-1] = 0;

	compressor = (icomp) ? icomp : new SWCompress();

	if (fileMode == -1) { // try read/write if possible
		fileMode = FileMgr::RDWR;
	}
		
	buf.setFormatted("%s/ot.%czs", path, uniqueIndexID[blockType]);
	idxfp[0] = FileMgr::getSystemFileMgr()->open(buf, fileMode, true);

	buf.setFormatted("%s/nt.%czs", path, uniqueIndexID[blockType]);
	idxfp[1] = FileMgr::getSystemFileMgr()->open(buf, fileMode, true);

	buf.setFormatted("%s/ot.%czz", path, uniqueIndexID[blockType]);
	textfp[0] = FileMgr::getSystemFileMgr()->open(buf, fileMode, true);

	buf.setFormatted("%s/nt.%czz", path, uniqueIndexID[blockType]);
	textfp[1] = FileMgr::getSystemFileMgr()->open(buf, fileMode, true);

	buf.setFormatted("%s/ot.%czv", path, uniqueIndexID[blockType]);
	compfp[0] = FileMgr::getSystemFileMgr()->open(buf, fileMode, true);

	buf.setFormatted("%s/nt.%czv", path, uniqueIndexID[blockType]);
	compfp[1] = FileMgr::getSystemFileMgr()->open(buf, fileMode, true);
	
	instance++;
}
Пример #8
0
char OSISStrongs::processText(SWBuf &text, const SWKey *key, const SWModule *module) {
	SWBuf token;
	bool intoken = false;
	int wordNum = 1;
	char wordstr[5];
	const char *wordStart = 0;
	SWBuf page = "";		// some modules include <seg> page info, so we add these to the words

	const SWBuf orig = text;
	const char * from = orig.c_str();

	for (text = ""; *from; ++from) {
		if (*from == '<') {
			intoken = true;
			token = "";
			continue;
		}
		if (*from == '>') {	// process tokens
			intoken = false;

			// possible page seg --------------------------------
			if (token.startsWith("seg ")) {
				XMLTag stag(token);
				SWBuf type = stag.getAttribute("type");
				if (type == "page") {
					SWBuf number = stag.getAttribute("subtype");
					if (number.length()) {
						page = number;
					}
				}
			}
			// ---------------------------------------------------

			if (token.startsWith("w ")) {	// Word
				XMLTag wtag(token);
				if (module->isProcessEntryAttributes()) {
					wordStart = from+1;
					char gh = 0;
					VerseKey *vkey = 0;
					if (key) {
						vkey = SWDYNAMIC_CAST(VerseKey, key);
					}
					SWBuf lemma      = "";
					SWBuf morph      = "";
					SWBuf src        = "";
					SWBuf morphClass = "";
					SWBuf lemmaClass = "";

					const char *attrib;
					sprintf(wordstr, "%03d", wordNum);

					// why is morph entry attribute processing done in here?  Well, it's faster.  It makes more local sense to place this code in osismorph.
					// easier to keep lemma and morph in same wordstr number too maybe.
					if ((attrib = wtag.getAttribute("morph"))) {
						int count = wtag.getAttributePartCount("morph", ' ');
						int i = (count > 1) ? 0 : -1;		// -1 for whole value cuz it's faster, but does the same thing as 0
						do {
							SWBuf mClass = "";
							SWBuf mp = "";
							attrib = wtag.getAttribute("morph", i, ' ');
							if (i < 0) i = 0;	// to handle our -1 condition

							const char *m = strchr(attrib, ':');
							if (m) {
								int len = m-attrib;
								mClass.append(attrib, len);
								attrib += (len+1);
							}
							if ((mClass == "x-Robinsons") || (mClass == "x-Robinson") || (mClass == "Robinson")) {
								mClass = "robinson";
							}
							if (i) { morphClass += " "; morph += " "; }
							mp += attrib;
							morphClass += mClass;
							morph += mp;
							if (count > 1) {
								SWBuf tmp;
								tmp.setFormatted("Morph.%d", i+1);
								module->getEntryAttributes()["Word"][wordstr][tmp] = mp;
								tmp.setFormatted("MorphClass.%d", i+1);
								module->getEntryAttributes()["Word"][wordstr][tmp] = mClass;
							}
						} while (++i < count);
					}

					if ((attrib = wtag.getAttribute("lemma"))) {
						int count = wtag.getAttributePartCount("lemma", ' ');
						int i = (count > 1) ? 0 : -1;		// -1 for whole value cuz it's faster, but does the same thing as 0
						do {
							gh = 0;
							SWBuf lClass = "";
							SWBuf l = "";
							attrib = wtag.getAttribute("lemma", i, ' ');
							if (i < 0) i = 0;	// to handle our -1 condition

							const char *m = strchr(attrib, ':');
							if (m) {
								int len = m-attrib;
								lClass.append(attrib, len);
								attrib += (len+1);
							}
							if ((lClass == "x-Strongs") || (lClass == "strong") || (lClass == "Strong")) {
								if (isdigit(attrib[0])) {
									if (vkey) {
										gh = vkey->getTestament() ? 'H' : 'G';
									}
								}
								else {
									gh = *attrib;
									attrib++;
								}
								lClass = "strong";
							}
							if (gh) l += gh;
							l += attrib;
							if (i) { lemmaClass += " "; lemma += " "; }
							lemma += l;
							lemmaClass += lClass;
							if (count > 1) {
								SWBuf tmp;
								tmp.setFormatted("Lemma.%d", i+1);
								module->getEntryAttributes()["Word"][wordstr][tmp] = l;
								tmp.setFormatted("LemmaClass.%d", i+1);
								module->getEntryAttributes()["Word"][wordstr][tmp] = lClass;
							}
						} while (++i < count);
						module->getEntryAttributes()["Word"][wordstr]["PartCount"].setFormatted("%d", count);
					}

					if ((attrib = wtag.getAttribute("src"))) {
						int count = wtag.getAttributePartCount("src", ' ');
						int i = (count > 1) ? 0 : -1;		// -1 for whole value cuz it's faster, but does the same thing as 0
						do {
							SWBuf mp = "";
							attrib = wtag.getAttribute("src", i, ' ');
							if (i < 0) i = 0;	// to handle our -1 condition

							if (i) src += " ";
							mp += attrib;
							src += mp;
							if (count > 1) {
								SWBuf tmp;
								tmp.setFormatted("Src.%d", i+1);
								module->getEntryAttributes()["Word"][wordstr][tmp] = mp;
							}
						} while (++i < count);
					}


					if (lemma.length())
						module->getEntryAttributes()["Word"][wordstr]["Lemma"] = lemma;
					if (lemmaClass.length())
						module->getEntryAttributes()["Word"][wordstr]["LemmaClass"] = lemmaClass;
					if (morph.length())
						module->getEntryAttributes()["Word"][wordstr]["Morph"] = morph;
					if (morphClass.length())
						module->getEntryAttributes()["Word"][wordstr]["MorphClass"] = morphClass;
					if (src.length())
						module->getEntryAttributes()["Word"][wordstr]["Src"] = src;
					if (page.length())
						module->getEntryAttributes()["Word"][wordstr]["Page"] = page;

					if (wtag.isEmpty()) {
						int j;
						for (j = token.length()-1; ((j>0) && (strchr(" /", token[j]))); j--);
						token.size(j+1);
					}
					
					token += " wn=\"";
					token += wordstr;
					token += "\"";

					if (wtag.isEmpty()) {
						token += "/";
					}

					wordNum++;
				}

				if (!option) {
/*
 * Code which handles multiple lemma types.  Kindof works but breaks at least WEBIF filters for strongs.
 *
					int count = wtag.getAttributePartCount("lemma", ' ');
					for (int i = 0; i < count; i++) {
						SWBuf a = wtag.getAttribute("lemma", i, ' ');
						const char *prefix = a.stripPrefix(':');
						if ((prefix) && (!strcmp(prefix, "x-Strongs") || !strcmp(prefix, "strong") || !strcmp(prefix, "Strong"))) {
							// remove attribute part
							wtag.setAttribute("lemma", 0, i, ' ');
							i--;
							count--;
						}
					}
* Instead the codee below just removes the lemma attribute
*****/
					const char *l = wtag.getAttribute("lemma");
					if (l) {
						SWBuf savlm = l;
						wtag.setAttribute("lemma", 0);
						wtag.setAttribute("savlm", savlm);
						token = wtag;
						token.trim();
						// drop <>
						token << 1;
						token--;
					}
				}
			}
			if (token.startsWith("/w")) {	// Word End
				if (module->isProcessEntryAttributes()) {
					if (wordStart) {
						SWBuf tmp;
						tmp.append(wordStart, (from-wordStart)-3);
						sprintf(wordstr, "%03d", wordNum-1);
						module->getEntryAttributes()["Word"][wordstr]["Text"] = tmp;
					}
				}
				wordStart = 0;
			}
			
			// keep token in text
			text.append('<');
			text.append(token);
			text.append('>');
			
			continue;
		}
		if (intoken) {
			token += *from;
		}
		else	{
			text.append(*from);
		}
	}
	return 0;
}
Пример #9
0
void writeEntry(SWModule *book, SWBuf keyBuffer, SWBuf entBuffer) {


	if (greekFilter) {
		greekAccentsFilter.processText(keyBuffer);
	}

	if (toUpper) {
		unsigned size = (keyBuffer.size()+5)*3;
		keyBuffer.setFillByte(0);
		keyBuffer.resize(size);
		StringMgr::getSystemStringMgr()->upperUTF8(keyBuffer.getRawData(), size-2);
	}

// Added for Hesychius, but this stuff should be pushed back into new StringMgr
// functionality
#ifdef _ICU_
//	if (lexLevels) {
	if (lexLevels && !keyBuffer.startsWith("/Intro")) {
		unsigned size = (keyBuffer.size()+(lexLevels*2));
		keyBuffer.setFillByte(0);
		keyBuffer.resize(size);
			
		UErrorCode err = U_ZERO_ERROR;
		
		int max = (size+5)*3;
		UChar *ubuffer = new UChar[max+10];
		int32_t len;
		
		u_strFromUTF8(ubuffer, max+9, &len, keyBuffer.c_str(), -1, &err);
		if (err == U_ZERO_ERROR) {
			UChar *upper = new UChar[(lexLevels+1)*3];
			memcpy(upper, ubuffer, lexLevels*sizeof(UChar));
			upper[lexLevels] = 0;
			len = u_strToUpper(upper, (lexLevels+1)*3, upper, -1, 0, &err);
			memmove(ubuffer+len+1, ubuffer, (max-len)*sizeof(UChar));
			memcpy(ubuffer, upper, len*sizeof(UChar));
			ubuffer[len] = '/';
			delete [] upper;

			int totalShift = 0;
			for (int i = lexLevels-1; i; i--) {
				int shift = (i < len)? i : len;
				memmove(ubuffer+(shift+1), ubuffer, (max-shift)*sizeof(UChar));
				ubuffer[shift] = '/';
				totalShift += (shift+1);
			}
			u_strToUTF8(keyBuffer.getRawData(), max, 0, ubuffer, -1, &err);
		}
		
/*
		u_strFromUTF8(ubuffer, max+9, &len, keyBuffer.c_str(), -1, &err);
		if (err == U_ZERO_ERROR) {
			int totalShift = 0;
			for (int i = lexLevels; i; i--) {
				int shift = (i < len)? i : len;
				memmove(ubuffer+(shift+1), ubuffer, (max-shift)*sizeof(UChar));
				ubuffer[shift] = '/';
				totalShift += (shift+1);
			}
			UChar *upper = new UChar[(totalShift+1)*3];
			memcpy(upper, ubuffer, totalShift*sizeof(UChar));
			upper[totalShift] = 0;
			len = u_strToUpper(upper, (totalShift+1)*3, upper, -1, 0, &err);
			memmove(ubuffer+len, ubuffer+totalShift, (max-totalShift)*sizeof(UChar));
			memcpy(ubuffer, upper, len*sizeof(UChar));
			delete [] upper;
			u_strToUTF8(keyBuffer.getRawData(), max, 0, ubuffer, -1, &err);
		}
*/
		
		delete [] ubuffer;
	}
#endif

	std::cout << keyBuffer << std::endl;

	book->setKey(keyBuffer.c_str());

	// check to see if we already have an entry
	for (int i = 2; book->getKey()->popError() != KEYERR_OUTOFBOUNDS; i++) {
		SWBuf key;
		key.setFormatted("%s {%d}", keyBuffer.c_str(), i);
		std::cout << "dup key, trying: " << key << std::endl;
		book->setKey(key.c_str());
	}

	book->setEntry(entBuffer);
}
Пример #10
0
char OSISMorphSegmentation::processText(SWBuf &text, const SWKey * /*key*/, const SWModule *module) {
	SWBuf token;
	bool intoken    = false;
	bool hide       = false;

	SWBuf orig( text );
	const char *from = orig.c_str();

	XMLTag tag;
	SWBuf tagText = "";
	unsigned int morphemeNum = 0;
	bool inMorpheme = false;
	SWBuf buf;

	for (text = ""; *from; ++from) {
		if (*from == '<') {
			intoken = true;
			token = "";
			continue;
		}

		if (*from == '>') { // process tokens
			intoken = false;

			if (!strncmp(token.c_str(), "seg ", 4) || !strncmp(token.c_str(), "/seg", 4)) {
				tag = token;

				if (!tag.isEndTag() && tag.getAttribute("type") && !strcmp("morph", tag.getAttribute("type"))) {  //<seg type="morph"> start tag
					hide = !option; //only hide if option is Off
					tagText = "";
					inMorpheme = true;
				}

				if (tag.isEndTag()) {
						buf.setFormatted("%.3d", morphemeNum++);
						module->getEntryAttributes()["Morpheme"][buf]["body"] = tagText;
						inMorpheme = false;
				}
				if (hide) { //hides start and end tags as long as hide is set

					if (tag.isEndTag()) { //</seg>
						hide = false;
					}

					continue; //leave out the current token
				}
			} //end of seg tag handling

			text.append('<');
			text.append(token);
			text.append('>');

			if (inMorpheme) {
				tagText.append('<');
				tagText.append(token);
				tagText.append('>');
			}

			hide = false;

			continue;
		} //end of intoken part

		if (intoken) { //copy token
			token.append(*from);
		}
		else { //copy text which is not inside of a tag
			text.append(*from);
			if (inMorpheme) {
				tagText.append(*from);
			}
		}
	}
	return 0;
}