RawVerse::RawVerse(const char *ipath, int fileMode) { SWBuf buf; path = 0; stdstr(&path, ipath); if ((path[strlen(path)-1] == '/') || (path[strlen(path)-1] == '\\')) path[strlen(path)-1] = 0; if (fileMode == -1) { // try read/write if possible fileMode = FileMgr::RDWR; } buf.setFormatted("%s/ot.vss", path); idxfp[0] = FileMgr::getSystemFileMgr()->open(buf, fileMode, true); buf.setFormatted("%s/nt.vss", path); idxfp[1] = FileMgr::getSystemFileMgr()->open(buf, fileMode, true); buf.setFormatted("%s/ot", path); textfp[0] = FileMgr::getSystemFileMgr()->open(buf, fileMode, true); buf.setFormatted("%s/nt", path); textfp[1] = FileMgr::getSystemFileMgr()->open(buf, fileMode, true); instance++; }
void openfiles(char *fname) { SWBuf buf; if ((fp = FileMgr::openFileReadOnly(fname)) < 0) { fprintf(stderr, "Couldn't open file: %s\n", fname); exit(1); } buf.setFormatted("%s.vss", fname); if ((vfp = FileMgr::createPathAndFile(buf.c_str())) < 0) { fprintf(stderr, "Couldn't open file: %s\n", buf.c_str()); exit(1); } buf.setFormatted("%s.cps", fname); if ((cfp = FileMgr::createPathAndFile(buf.c_str())) < 0) { fprintf(stderr, "Couldn't open file: %s\n", buf.c_str()); exit(1); } buf.setFormatted("%s.bks", fname); if ((bfp = FileMgr::createPathAndFile(buf.c_str())) < 0) { fprintf(stderr, "Couldn't open file: %s\n", buf.c_str()); exit(1); } }
void SWMgr::augmentModules(const char *ipath, bool multiMod) { SWBuf path = ipath; if ((ipath[strlen(ipath)-1] != '\\') && (ipath[strlen(ipath)-1] != '/')) path += "/"; if (FileMgr::existsDir(path.c_str(), "mods.d")) { char *savePrefixPath = 0; char *saveConfigPath = 0; SWConfig *saveConfig = 0; stdstr(&savePrefixPath, prefixPath); stdstr(&prefixPath, path.c_str()); path += "mods.d"; stdstr(&saveConfigPath, configPath); stdstr(&configPath, path.c_str()); saveConfig = config; config = myconfig = 0; loadConfigDir(configPath); if (multiMod) { // fix config's Section names to rename modules which are available more than once // find out which sections are in both config objects // inserting all configs first is not good because that overwrites old keys and new modules would share the same config for (SectionMap::iterator it = config->Sections.begin(); it != config->Sections.end();) { if (saveConfig->Sections.find( (*it).first ) != saveConfig->Sections.end()) { //if the new section is already present rename it ConfigEntMap entMap((*it).second); SWBuf name; int i = 1; do { //module name already used? name.setFormatted("%s_%d", (*it).first.c_str(), i); i++; } while (config->Sections.find(name) != config->Sections.end()); config->Sections.insert(SectionMap::value_type(name, entMap) ); SectionMap::iterator toErase = it++; config->Sections.erase(toErase); } else ++it; } } CreateMods(multiMod); stdstr(&prefixPath, savePrefixPath); delete []savePrefixPath; stdstr(&configPath, saveConfigPath); delete []saveConfigPath; (*saveConfig) += *config; homeConfig = myconfig; config = myconfig = saveConfig; } }
zStr::zStr(const char *ipath, int fileMode, long blockCount, SWCompress *icomp, bool caseSensitive) : caseSensitive(caseSensitive) { SWBuf buf; lastoff = -1; path = 0; stdstr(&path, ipath); compressor = (icomp) ? icomp : new SWCompress(); this->blockCount = blockCount; if (fileMode == -1) { // try read/write if possible fileMode = FileMgr::RDWR; } buf.setFormatted("%s.idx", path); idxfd = FileMgr::getSystemFileMgr()->open(buf, fileMode, true); buf.setFormatted("%s.dat", path); datfd = FileMgr::getSystemFileMgr()->open(buf, fileMode, true); buf.setFormatted("%s.zdx", path); zdxfd = FileMgr::getSystemFileMgr()->open(buf, fileMode, true); buf.setFormatted("%s.zdt", path); zdtfd = FileMgr::getSystemFileMgr()->open(buf, fileMode, true); if (datfd <= 0) { SWLog::getSystemLog()->logError("%d", errno); } cacheBlock = 0; cacheBlockIndex = -1; cacheDirty = false; instance++; }
RawStr::RawStr(const char *ipath, int fileMode, bool caseSensitive) : caseSensitive(caseSensitive) { SWBuf buf; lastoff = -1; path = 0; stdstr(&path, ipath); if (fileMode == -1) { // try read/write if possible fileMode = FileMgr::RDWR; } buf.setFormatted("%s.idx", path); idxfd = FileMgr::getSystemFileMgr()->open(buf, fileMode, true); buf.setFormatted("%s.dat", path); datfd = FileMgr::getSystemFileMgr()->open(buf, fileMode, true); if (datfd < 0) { SWLog::getSystemLog()->logError("%d", errno); } instance++; }
TreeKeyIdx::TreeKeyIdx(const char *idxPath, int fileMode) : currentNode() { SWBuf buf; init(); path = 0; stdstr(&path, idxPath); if (fileMode == -1) { // try read/write if possible fileMode = FileMgr::RDWR; } buf.setFormatted("%s.idx", path); idxfd = FileMgr::getSystemFileMgr()->open(buf, fileMode, true); buf.setFormatted("%s.dat", path); datfd = FileMgr::getSystemFileMgr()->open(buf, fileMode, true); if (datfd <= 0) { SWLog::getSystemLog()->logError("%d", errno); error = errno; } else { root(); } }
zVerse::zVerse(const char *ipath, int fileMode, int blockType, SWCompress *icomp) { // this line, instead of just defaulting, to keep FileMgr out of header if (fileMode == -1) fileMode = FileMgr::RDONLY; SWBuf buf; path = 0; cacheBufIdx = -1; cacheTestament = 0; cacheBuf = 0; dirtyCache = false; stdstr(&path, ipath); if ((path[strlen(path)-1] == '/') || (path[strlen(path)-1] == '\\')) path[strlen(path)-1] = 0; compressor = (icomp) ? icomp : new SWCompress(); if (fileMode == -1) { // try read/write if possible fileMode = FileMgr::RDWR; } buf.setFormatted("%s/ot.%czs", path, uniqueIndexID[blockType]); idxfp[0] = FileMgr::getSystemFileMgr()->open(buf, fileMode, true); buf.setFormatted("%s/nt.%czs", path, uniqueIndexID[blockType]); idxfp[1] = FileMgr::getSystemFileMgr()->open(buf, fileMode, true); buf.setFormatted("%s/ot.%czz", path, uniqueIndexID[blockType]); textfp[0] = FileMgr::getSystemFileMgr()->open(buf, fileMode, true); buf.setFormatted("%s/nt.%czz", path, uniqueIndexID[blockType]); textfp[1] = FileMgr::getSystemFileMgr()->open(buf, fileMode, true); buf.setFormatted("%s/ot.%czv", path, uniqueIndexID[blockType]); compfp[0] = FileMgr::getSystemFileMgr()->open(buf, fileMode, true); buf.setFormatted("%s/nt.%czv", path, uniqueIndexID[blockType]); compfp[1] = FileMgr::getSystemFileMgr()->open(buf, fileMode, true); instance++; }
char OSISStrongs::processText(SWBuf &text, const SWKey *key, const SWModule *module) { SWBuf token; bool intoken = false; int wordNum = 1; char wordstr[5]; const char *wordStart = 0; SWBuf page = ""; // some modules include <seg> page info, so we add these to the words const SWBuf orig = text; const char * from = orig.c_str(); for (text = ""; *from; ++from) { if (*from == '<') { intoken = true; token = ""; continue; } if (*from == '>') { // process tokens intoken = false; // possible page seg -------------------------------- if (token.startsWith("seg ")) { XMLTag stag(token); SWBuf type = stag.getAttribute("type"); if (type == "page") { SWBuf number = stag.getAttribute("subtype"); if (number.length()) { page = number; } } } // --------------------------------------------------- if (token.startsWith("w ")) { // Word XMLTag wtag(token); if (module->isProcessEntryAttributes()) { wordStart = from+1; char gh = 0; VerseKey *vkey = 0; if (key) { vkey = SWDYNAMIC_CAST(VerseKey, key); } SWBuf lemma = ""; SWBuf morph = ""; SWBuf src = ""; SWBuf morphClass = ""; SWBuf lemmaClass = ""; const char *attrib; sprintf(wordstr, "%03d", wordNum); // why is morph entry attribute processing done in here? Well, it's faster. It makes more local sense to place this code in osismorph. // easier to keep lemma and morph in same wordstr number too maybe. if ((attrib = wtag.getAttribute("morph"))) { int count = wtag.getAttributePartCount("morph", ' '); int i = (count > 1) ? 0 : -1; // -1 for whole value cuz it's faster, but does the same thing as 0 do { SWBuf mClass = ""; SWBuf mp = ""; attrib = wtag.getAttribute("morph", i, ' '); if (i < 0) i = 0; // to handle our -1 condition const char *m = strchr(attrib, ':'); if (m) { int len = m-attrib; mClass.append(attrib, len); attrib += (len+1); } if ((mClass == "x-Robinsons") || (mClass == "x-Robinson") || (mClass == "Robinson")) { mClass = "robinson"; } if (i) { morphClass += " "; morph += " "; } mp += attrib; morphClass += mClass; morph += mp; if (count > 1) { SWBuf tmp; tmp.setFormatted("Morph.%d", i+1); module->getEntryAttributes()["Word"][wordstr][tmp] = mp; tmp.setFormatted("MorphClass.%d", i+1); module->getEntryAttributes()["Word"][wordstr][tmp] = mClass; } } while (++i < count); } if ((attrib = wtag.getAttribute("lemma"))) { int count = wtag.getAttributePartCount("lemma", ' '); int i = (count > 1) ? 0 : -1; // -1 for whole value cuz it's faster, but does the same thing as 0 do { gh = 0; SWBuf lClass = ""; SWBuf l = ""; attrib = wtag.getAttribute("lemma", i, ' '); if (i < 0) i = 0; // to handle our -1 condition const char *m = strchr(attrib, ':'); if (m) { int len = m-attrib; lClass.append(attrib, len); attrib += (len+1); } if ((lClass == "x-Strongs") || (lClass == "strong") || (lClass == "Strong")) { if (isdigit(attrib[0])) { if (vkey) { gh = vkey->getTestament() ? 'H' : 'G'; } } else { gh = *attrib; attrib++; } lClass = "strong"; } if (gh) l += gh; l += attrib; if (i) { lemmaClass += " "; lemma += " "; } lemma += l; lemmaClass += lClass; if (count > 1) { SWBuf tmp; tmp.setFormatted("Lemma.%d", i+1); module->getEntryAttributes()["Word"][wordstr][tmp] = l; tmp.setFormatted("LemmaClass.%d", i+1); module->getEntryAttributes()["Word"][wordstr][tmp] = lClass; } } while (++i < count); module->getEntryAttributes()["Word"][wordstr]["PartCount"].setFormatted("%d", count); } if ((attrib = wtag.getAttribute("src"))) { int count = wtag.getAttributePartCount("src", ' '); int i = (count > 1) ? 0 : -1; // -1 for whole value cuz it's faster, but does the same thing as 0 do { SWBuf mp = ""; attrib = wtag.getAttribute("src", i, ' '); if (i < 0) i = 0; // to handle our -1 condition if (i) src += " "; mp += attrib; src += mp; if (count > 1) { SWBuf tmp; tmp.setFormatted("Src.%d", i+1); module->getEntryAttributes()["Word"][wordstr][tmp] = mp; } } while (++i < count); } if (lemma.length()) module->getEntryAttributes()["Word"][wordstr]["Lemma"] = lemma; if (lemmaClass.length()) module->getEntryAttributes()["Word"][wordstr]["LemmaClass"] = lemmaClass; if (morph.length()) module->getEntryAttributes()["Word"][wordstr]["Morph"] = morph; if (morphClass.length()) module->getEntryAttributes()["Word"][wordstr]["MorphClass"] = morphClass; if (src.length()) module->getEntryAttributes()["Word"][wordstr]["Src"] = src; if (page.length()) module->getEntryAttributes()["Word"][wordstr]["Page"] = page; if (wtag.isEmpty()) { int j; for (j = token.length()-1; ((j>0) && (strchr(" /", token[j]))); j--); token.size(j+1); } token += " wn=\""; token += wordstr; token += "\""; if (wtag.isEmpty()) { token += "/"; } wordNum++; } if (!option) { /* * Code which handles multiple lemma types. Kindof works but breaks at least WEBIF filters for strongs. * int count = wtag.getAttributePartCount("lemma", ' '); for (int i = 0; i < count; i++) { SWBuf a = wtag.getAttribute("lemma", i, ' '); const char *prefix = a.stripPrefix(':'); if ((prefix) && (!strcmp(prefix, "x-Strongs") || !strcmp(prefix, "strong") || !strcmp(prefix, "Strong"))) { // remove attribute part wtag.setAttribute("lemma", 0, i, ' '); i--; count--; } } * Instead the codee below just removes the lemma attribute *****/ const char *l = wtag.getAttribute("lemma"); if (l) { SWBuf savlm = l; wtag.setAttribute("lemma", 0); wtag.setAttribute("savlm", savlm); token = wtag; token.trim(); // drop <> token << 1; token--; } } } if (token.startsWith("/w")) { // Word End if (module->isProcessEntryAttributes()) { if (wordStart) { SWBuf tmp; tmp.append(wordStart, (from-wordStart)-3); sprintf(wordstr, "%03d", wordNum-1); module->getEntryAttributes()["Word"][wordstr]["Text"] = tmp; } } wordStart = 0; } // keep token in text text.append('<'); text.append(token); text.append('>'); continue; } if (intoken) { token += *from; } else { text.append(*from); } } return 0; }
void writeEntry(SWModule *book, SWBuf keyBuffer, SWBuf entBuffer) { if (greekFilter) { greekAccentsFilter.processText(keyBuffer); } if (toUpper) { unsigned size = (keyBuffer.size()+5)*3; keyBuffer.setFillByte(0); keyBuffer.resize(size); StringMgr::getSystemStringMgr()->upperUTF8(keyBuffer.getRawData(), size-2); } // Added for Hesychius, but this stuff should be pushed back into new StringMgr // functionality #ifdef _ICU_ // if (lexLevels) { if (lexLevels && !keyBuffer.startsWith("/Intro")) { unsigned size = (keyBuffer.size()+(lexLevels*2)); keyBuffer.setFillByte(0); keyBuffer.resize(size); UErrorCode err = U_ZERO_ERROR; int max = (size+5)*3; UChar *ubuffer = new UChar[max+10]; int32_t len; u_strFromUTF8(ubuffer, max+9, &len, keyBuffer.c_str(), -1, &err); if (err == U_ZERO_ERROR) { UChar *upper = new UChar[(lexLevels+1)*3]; memcpy(upper, ubuffer, lexLevels*sizeof(UChar)); upper[lexLevels] = 0; len = u_strToUpper(upper, (lexLevels+1)*3, upper, -1, 0, &err); memmove(ubuffer+len+1, ubuffer, (max-len)*sizeof(UChar)); memcpy(ubuffer, upper, len*sizeof(UChar)); ubuffer[len] = '/'; delete [] upper; int totalShift = 0; for (int i = lexLevels-1; i; i--) { int shift = (i < len)? i : len; memmove(ubuffer+(shift+1), ubuffer, (max-shift)*sizeof(UChar)); ubuffer[shift] = '/'; totalShift += (shift+1); } u_strToUTF8(keyBuffer.getRawData(), max, 0, ubuffer, -1, &err); } /* u_strFromUTF8(ubuffer, max+9, &len, keyBuffer.c_str(), -1, &err); if (err == U_ZERO_ERROR) { int totalShift = 0; for (int i = lexLevels; i; i--) { int shift = (i < len)? i : len; memmove(ubuffer+(shift+1), ubuffer, (max-shift)*sizeof(UChar)); ubuffer[shift] = '/'; totalShift += (shift+1); } UChar *upper = new UChar[(totalShift+1)*3]; memcpy(upper, ubuffer, totalShift*sizeof(UChar)); upper[totalShift] = 0; len = u_strToUpper(upper, (totalShift+1)*3, upper, -1, 0, &err); memmove(ubuffer+len, ubuffer+totalShift, (max-totalShift)*sizeof(UChar)); memcpy(ubuffer, upper, len*sizeof(UChar)); delete [] upper; u_strToUTF8(keyBuffer.getRawData(), max, 0, ubuffer, -1, &err); } */ delete [] ubuffer; } #endif std::cout << keyBuffer << std::endl; book->setKey(keyBuffer.c_str()); // check to see if we already have an entry for (int i = 2; book->getKey()->popError() != KEYERR_OUTOFBOUNDS; i++) { SWBuf key; key.setFormatted("%s {%d}", keyBuffer.c_str(), i); std::cout << "dup key, trying: " << key << std::endl; book->setKey(key.c_str()); } book->setEntry(entBuffer); }
char OSISMorphSegmentation::processText(SWBuf &text, const SWKey * /*key*/, const SWModule *module) { SWBuf token; bool intoken = false; bool hide = false; SWBuf orig( text ); const char *from = orig.c_str(); XMLTag tag; SWBuf tagText = ""; unsigned int morphemeNum = 0; bool inMorpheme = false; SWBuf buf; for (text = ""; *from; ++from) { if (*from == '<') { intoken = true; token = ""; continue; } if (*from == '>') { // process tokens intoken = false; if (!strncmp(token.c_str(), "seg ", 4) || !strncmp(token.c_str(), "/seg", 4)) { tag = token; if (!tag.isEndTag() && tag.getAttribute("type") && !strcmp("morph", tag.getAttribute("type"))) { //<seg type="morph"> start tag hide = !option; //only hide if option is Off tagText = ""; inMorpheme = true; } if (tag.isEndTag()) { buf.setFormatted("%.3d", morphemeNum++); module->getEntryAttributes()["Morpheme"][buf]["body"] = tagText; inMorpheme = false; } if (hide) { //hides start and end tags as long as hide is set if (tag.isEndTag()) { //</seg> hide = false; } continue; //leave out the current token } } //end of seg tag handling text.append('<'); text.append(token); text.append('>'); if (inMorpheme) { tagText.append('<'); tagText.append(token); tagText.append('>'); } hide = false; continue; } //end of intoken part if (intoken) { //copy token token.append(*from); } else { //copy text which is not inside of a tag text.append(*from); if (inMorpheme) { tagText.append(*from); } } } return 0; }