void TreeKeyIdx::setText(const char *ikey) { char *buf = 0; stdstr(&buf, ikey); SWBuf leaf = strtok(buf, "/"); leaf.trim(); root(); while ((leaf.size()) && (!popError())) { bool ok, inChild = false; error = KEYERR_OUTOFBOUNDS; for (ok = firstChild(); ok; ok = nextSibling()) { inChild = true; if (leaf == getLocalName()) { error = 0; break; } } leaf = strtok(0, "/"); leaf.trim(); if (!ok) { if (inChild) { // if we didn't find a matching child node, default to first child parent(); firstChild(); } error = KEYERR_OUTOFBOUNDS; } } if (leaf.size()) error = KEYERR_OUTOFBOUNDS; delete [] buf; unsnappedKeyText = ikey; positionChanged(); }
char UTF8UTF16::processText(SWBuf &text, const SWKey *key, const SWModule *module) { const unsigned char *from; SWBuf orig = text; from = (const unsigned char *)orig.c_str(); // ------------------------------- text = ""; while (*from) { __u32 ch = getUniCharFromUTF8(&from); if (!ch) continue; // invalid char if (ch < 0x10000) { text.setSize(text.size()+2); *((__u16 *)(text.getRawData()+(text.size()-2))) = (__u16)ch; } else { __u16 utf16; utf16 = (__s16)((ch - 0x10000) / 0x400 + 0xD800); text.setSize(text.size()+4); *((__u16 *)(text.getRawData()+(text.size()-4))) = utf16; utf16 = (__s16)((ch - 0x10000) % 0x400 + 0xDC00); *((__u16 *)(text.getRawData()+(text.size()-2))) = utf16; } } text.setSize(text.size()+2); *((__u16 *)(text.getRawData()+(text.size()-2))) = (__u16)0; text.setSize(text.size()-2); return 0; }
void parseParams(int argc, char **argv) { if (argc < 2) { usage(*argv); } inFile = argv[1]; for (int i = 2; i < argc; i++) { if (!strcmp(argv[i], "-o")) { if ((i+1 < argc) && (argv[i+1][0] != '-')) { outPath = argv[i+1]; i++; } else usage(*argv); } else if (!strcmp(argv[i], "-U")) { if (StringMgr::hasUTF8Support()) { toUpper = true; } else { fprintf(stderr, "Error: %s. Cannot reliably toUpper without UTF8 support\n\t(recompile with ICU enabled)\n\n", *argv); usage(*argv); } } else if (!strcmp(argv[i], "-g")) { greekFilter = true; } else if (!strcmp(argv[i], "-O")) { augEnt = false; } else if (!strcmp(argv[i], "-a")) { augMod = true; } else if (!strcmp(argv[i], "-l")) { if (i+1 < argc) { lexLevels = atoi(argv[i+1]); i++; } if (!lexLevels) usage(*argv); } } if (!outPath.size()) { outPath = inFile; unsigned int i; for (i = 0; (i < outPath.size() && outPath[i] != '.'); i++); outPath.size(i); } }
jstring newBigString(JNIEnv *env, const char *buf) { SWBuf str = assureValidUTF8(buf); jclass stringClass = env->FindClass("java/lang/String"); jmethodID ctorID = env->GetMethodID(stringClass, "<init>", "([B)V"); jstring result; SWLog::getSystemLog()->logDebug("newBigString: making byte array size: %d", str.size()); jbyteArray bytes = env->NewByteArray(str.size()); SWLog::getSystemLog()->logDebug("newBigString: setting array region"); env->SetByteArrayRegion(bytes, 0, str.size(), (jbyte *)str.c_str()); SWLog::getSystemLog()->logDebug("newBigString: newing string"); result = (jstring)env->NewObject(stringClass, ctorID, bytes); env->DeleteLocalRef(bytes); SWLog::getSystemLog()->logDebug("newBigString: returning"); return result; }
void TreeKey::assureKeyPath(const char *keyBuffer) { if (!keyBuffer) { keyBuffer = unsnappedKeyText; //assert we have something to do before setting root if (!*keyBuffer) return; } char *keybuf = 0; stdstr(&keybuf, keyBuffer); root(); // TODO: change to NOT use strtok. strtok is dangerous. SWBuf tok = strtok(keybuf, "/"); tok.trim(); while (tok.size()) { bool foundkey = false; if (hasChildren()) { firstChild(); if (tok == getLocalName()) { foundkey = true; } else { while (nextSibling()) { if (getLocalName()) { if (tok == getLocalName()) { foundkey = true; break; } } } } if (!foundkey) { append(); setLocalName(tok); save(); } } else { appendChild(); setLocalName(tok); save(); } #ifdef DEBUG // std::cout << getLocalName() << " : " << tok << std::endl; #endif tok = strtok(0, "/"); tok.trim(); } delete [] keybuf; }
char UTF8NFC::processText(SWBuf &text, const SWKey *key, const SWModule *module) { if ((unsigned long)key < 2) // hack, we're en(1)/de(0)ciphering return -1; err = U_ZERO_ERROR; UnicodeString source(text.getRawData(), text.length(), conv, err); UnicodeString target; err = U_ZERO_ERROR; Normalizer::normalize(source, UNORM_NFC, 0, target, err); err = U_ZERO_ERROR; text.setSize(text.size()*2); // potentially, it can grow to 2x the original size int32_t len = target.extract(text.getRawData(), text.size(), conv, err); text.setSize(len); return 0; }
char UTF8arShaping::processText(SWBuf &text, const SWKey *key, const SWModule *module) { UChar *ustr, *ustr2; if ((unsigned long)key < 2) // hack, we're en(1)/de(0)ciphering return -1; int32_t len = text.length(); ustr = new UChar[len]; ustr2 = new UChar[len]; // Convert UTF-8 string to UTF-16 (UChars) len = ucnv_toUChars(conv, ustr, len, text.c_str(), -1, &err); len = u_shapeArabic(ustr, len, ustr2, len, U_SHAPE_LETTERS_SHAPE | U_SHAPE_DIGITS_EN2AN, &err); text.setSize(text.size()*2); len = ucnv_fromUChars(conv, text.getRawData(), text.size(), ustr2, len, &err); text.setSize(len); delete [] ustr2; delete [] ustr; return 0; }
int main(int argc, char **argv) { greekAccentsFilter.setOptionValue("Off"); // off = accents off parseParams(argc, argv); // Let's see if we can open our input file FileDesc *fd = FileMgr::getSystemFileMgr()->open(inFile, FileMgr::RDONLY); if (fd->getFd() < 0) { fprintf(stderr, "error: %s: couldn't open input file: %s \n", argv[0], inFile.c_str()); exit(-2); } RawGenBook *book; // Do some initialization stuff if (!augMod) { RawGenBook::createModule(outPath); } book = new RawGenBook(outPath); SWBuf lineBuffer; SWBuf keyBuffer; SWBuf entBuffer; bool more = true; do { more = FileMgr::getLine(fd, lineBuffer)!=0; if (lineBuffer.startsWith("$$$")) { if ((keyBuffer.size()) && (entBuffer.size())) { writeEntry(book, keyBuffer, entBuffer); } keyBuffer = lineBuffer; keyBuffer << 3; keyBuffer.trim(); entBuffer.size(0); } else { if (keyBuffer.size()) { entBuffer += lineBuffer; entBuffer += "\n"; } } } while (more); if ((keyBuffer.size()) && (entBuffer.size())) { writeEntry(book, keyBuffer, entBuffer); } delete book; FileMgr::getSystemFileMgr()->close(fd); return 0; }
vector<struct DirEntry> RemoteTransport::getDirList(const char *dirURL) { SWLog::getSystemLog()->logDebug("RemoteTransport::getDirList(%s)", dirURL); vector<struct DirEntry> dirList; SWBuf dirBuf; if (!getURL("", dirURL, &dirBuf)) { char *start = dirBuf.getRawData(); char *end = start; while (start < (dirBuf.getRawData()+dirBuf.size())) { struct ftpparse item; bool looking = true; for (end = start; *end; end++) { if (looking) { if ((*end == 10) || (*end == 13)) { *end = 0; looking = false; } } else if ((*end != 10) && (*end != 13)) break; } SWLog::getSystemLog()->logDebug("getDirList: parsing item %s(%d)\n", start, end-start); int status = ftpparse(&item, start, end - start); // in ftpparse.h, there is a warning that name is not necessarily null terminated SWBuf name; name.append(item.name, item.namelen); SWLog::getSystemLog()->logDebug("getDirList: got item %s\n", name.c_str()); if (status && name != "." && name != "..") { struct DirEntry i; i.name = name; i.size = item.size; i.isDirectory = (item.flagtrycwd == 1); dirList.push_back(i); } start = end; } } else { SWLog::getSystemLog()->logWarning("getDirList: failed to get dir %s\n", dirURL); } return dirList; }
vector<struct DirEntry> FTPTransport::getDirList(const char *dirURL) { vector<struct DirEntry> dirList; SWBuf dirBuf; if (!getURL("", dirURL, &dirBuf)) { char *start = dirBuf.getRawData(); char *end = start; while (start < (dirBuf.getRawData()+dirBuf.size())) { struct ftpparse item; bool looking = true; for (end = start; *end; end++) { if (looking) { if ((*end == 10) || (*end == 13)) { *end = 0; looking = false; } } else if ((*end != 10) && (*end != 13)) break; } SWLog::getSystemLog()->logWarning("FTPURLGetDir: parsing item %s(%d)\n", start, end-start); int status = ftpparse(&item, start, end - start); SWLog::getSystemLog()->logWarning("FTPURLGetDir: got item %s\n", item.name); if (status) { struct DirEntry i; i.name = item.name; i.size = item.size; i.isDirectory = (item.flagtrycwd == 1); dirList.push_back(i); } start = end; } } else { SWLog::getSystemLog()->logWarning("FTPURLGetDir: failed to get dir %s\n", dirURL); } return dirList; }
int main(int argc, char **argv) { UTF8UTF16 filter; // PapyriPlain filter; // FileDesc *fd = (argc > 1) ? FileMgr::getSystemFileMgr()->open(argv[1], FileMgr::RDONLY) : 0; SWBuf lineBuffer = "This is t<e>xt which has papy-\nri markings in it.\n L[et's be] sure it gets--\n cleaned up well for s(earching)"; std::cout << "Original:\n\n"; while (!fd || FileMgr::getLine(fd, lineBuffer)) { cout << lineBuffer << "\n"; if (!fd) break; } cout << "\n\n-------\n\n"; if (fd) { FileMgr::getSystemFileMgr()->close(fd); fd = FileMgr::getSystemFileMgr()->open(argv[1], FileMgr::RDONLY); } while (!fd || FileMgr::getLine(fd, lineBuffer)) { filter.processText(lineBuffer); for (unsigned int i = 0; i < lineBuffer.size(); i++) { printf("%c", lineBuffer[i]); } cout << "\n"; if (!fd) break; } std::cout << "\n\n+++++++\n"; if (fd) { FileMgr::getSystemFileMgr()->close(fd); } return 0; }
static int myhttp_trace(CURL *handle, curl_infotype type, unsigned char *data, size_t size, void *userp) { SWBuf header; (void)userp; /* prevent compiler warning */ (void)handle; /* prevent compiler warning */ switch (type) { case CURLINFO_TEXT: header = "TEXT"; break; case CURLINFO_HEADER_OUT: header = "=> Send header"; break; case CURLINFO_HEADER_IN: header = "<= Recv header"; break; // these we don't want to log (HUGE) case CURLINFO_DATA_OUT: header = "=> Send data"; case CURLINFO_SSL_DATA_OUT: header = "=> Send SSL data"; case CURLINFO_DATA_IN: header = "<= Recv data"; case CURLINFO_SSL_DATA_IN: header = "<= Recv SSL data"; default: /* in case a new one is introduced to shock us */ return 0; } if (size > 120) size = 120; SWBuf text; text.size(size); memcpy(text.getRawData(), data, size); SWLog::getSystemLog()->logDebug("CURLHTTPTransport: %s: %s", header.c_str(), text.c_str()); return 0; }
bool TEIHTMLHREF::handleToken(SWBuf &buf, const char *token, BasicFilterUserData *userData) { // manually process if it wasn't a simple substitution if (!substituteToken(buf, token)) { MyUserData *u = (MyUserData *)userData; XMLTag tag(token); if (!strcmp(tag.getName(), "p")) { if ((!tag.isEndTag()) && (!tag.isEmpty())) { // non-empty start tag buf += "<!P><br />"; } else if (tag.isEndTag()) { // end tag buf += "<!/P><br />"; //userData->supressAdjacentWhitespace = true; } else { // empty paragraph break marker buf += "<!P><br />"; //userData->supressAdjacentWhitespace = true; } } // <hi> else if (!strcmp(tag.getName(), "hi")) { if ((!tag.isEndTag()) && (!tag.isEmpty())) { SWBuf rend = tag.getAttribute("rend"); u->lastHi = rend; if (rend == "ital") buf += "<i>"; else if (rend == "bold") buf += "<b>"; else if (rend == "sup") buf += "<small><sup>"; } else if (tag.isEndTag()) { SWBuf rend = u->lastHi; if (rend == "ital") buf += "</i>"; else if (rend == "bold") buf += "</b>"; else if (rend == "sup") buf += "</sup></small>"; } } // <entryFree> else if (!strcmp(tag.getName(), "entryFree")) { if ((!tag.isEndTag()) && (!tag.isEmpty())) { SWBuf n = tag.getAttribute("n"); if (n != "") { buf += "<b>"; buf += n; buf += "</b>"; } } } // <sense> else if (!strcmp(tag.getName(), "sense")) { if ((!tag.isEndTag()) && (!tag.isEmpty())) { SWBuf n = tag.getAttribute("n"); if (n != "") { buf += "<br /><b>"; buf += n; buf += "</b>"; } } } // <div> else if (!strcmp(tag.getName(), "div")) { if ((!tag.isEndTag()) && (!tag.isEmpty())) { buf += "<!P>"; } else if (tag.isEndTag()) { } } // <pos>, <gen>, <case>, <gram>, <number>, <mood>, <pron>, <def> else if (!strcmp(tag.getName(), "pos") || !strcmp(tag.getName(), "gen") || !strcmp(tag.getName(), "case") || !strcmp(tag.getName(), "gram") || !strcmp(tag.getName(), "number") || !strcmp(tag.getName(), "pron") /*|| !strcmp(tag.getName(), "def")*/) { if ((!tag.isEndTag()) && (!tag.isEmpty())) { buf += "<i>"; } else if (tag.isEndTag()) { buf += "</i>"; } } // <tr> else if (!strcmp(tag.getName(), "tr")) { if ((!tag.isEndTag()) && (!tag.isEmpty())) { buf += "<i>"; } else if (tag.isEndTag()) { buf += "</i>"; } } // orth else if (!strcmp(tag.getName(), "orth")) { if ((!tag.isEndTag()) && (!tag.isEmpty())) { buf += "<b>"; } else if (tag.isEndTag()) { buf += "</b>"; } } // <etym>, <usg> else if (!strcmp(tag.getName(), "etym") || !strcmp(tag.getName(), "usg")) { // do nothing here } else if (!strcmp(tag.getName(), "ref")) { if (!tag.isEndTag()) { u->suspendTextPassThru = true; SWBuf target; SWBuf work; SWBuf ref; int was_osisref = false; if(tag.getAttribute("osisRef")) { target += tag.getAttribute("osisRef"); was_osisref=true; } else if(tag.getAttribute("target")) target += tag.getAttribute("target"); if(target.size()) { const char* the_ref = strchr(target, ':'); if(!the_ref) { // No work ref = target; } else { // Compensate for starting : ref = the_ref + 1; int size = target.size() - ref.size() - 1; work.setSize(size); strncpy(work.getRawData(), target, size); } if(was_osisref) { buf.appendFormatted("<a href=\"passagestudy.jsp?action=showRef&type=scripRef&value=%s&module=%s\">", (ref) ? URL::encode(ref.c_str()).c_str() : "", (work.size()) ? URL::encode(work.c_str()).c_str() : ""); } else { // Dictionary link, or something buf.appendFormatted("<a href=\"sword://%s/%s\">", (work.size()) ? URL::encode(work.c_str()).c_str() : u->version.c_str(), (ref) ? URL::encode(ref.c_str()).c_str() : "" ); } } else { //std::cout << "TARGET WASN'T\n"; } } else { buf += u->lastTextNode.c_str(); buf += "</a>"; u->suspendTextPassThru = false; } } // <note> tag else if (!strcmp(tag.getName(), "note")) { if (!tag.isEndTag()) { if (!tag.isEmpty()) { u->suspendTextPassThru = true; } } if (tag.isEndTag()) { SWBuf footnoteNumber = tag.getAttribute("swordFootnote"); buf.appendFormatted("<a href=\"passagestudy.jsp?action=showNote&type=n&value=%s&module=%s&passage=%s\"><small><sup>*n</sup></small></a>", URL::encode(footnoteNumber.c_str()).c_str(), URL::encode(u->version.c_str()).c_str(), URL::encode(u->key->getText()).c_str()); u->suspendTextPassThru = false; } } else { return false; // we still didn't handle token } } return true; }
char GBFStrongs::processText(SWBuf &text, const SWKey *key, const SWModule *module) { char token[2048]; // cheese. Fix. int tokpos = 0; bool intoken = false; bool lastspace = false; int word = 1; char val[128]; char wordstr[5]; char *valto; unsigned int textStart = 0, textEnd = 0; bool newText = false; SWBuf tmp; const char *from; SWBuf orig = text; from = orig.c_str(); for (text = ""; *from; from++) { if (*from == '<') { intoken = true; tokpos = 0; token[0] = 0; token[1] = 0; token[2] = 0; textEnd = text.size(); continue; } if (*from == '>') { // process tokens intoken = false; if (*token == 'W' && (token[1] == 'G' || token[1] == 'H')) { // Strongs if (module->isProcessEntryAttributes()) { valto = val; for (unsigned int i = 1; ((token[i]) && (i < 150)); i++) *valto++ = token[i]; *valto = 0; if (atoi((!isdigit(*val))?val+1:val) < 5627) { // normal strongs number sprintf(wordstr, "%03d", word++); module->getEntryAttributes()["Word"][wordstr]["PartsCount"] = "1"; module->getEntryAttributes()["Word"][wordstr]["Lemma"] = val; module->getEntryAttributes()["Word"][wordstr]["LemmaClass"] = "strong"; tmp = ""; tmp.append(text.c_str()+textStart, (int)(textEnd - textStart)); module->getEntryAttributes()["Word"][wordstr]["Text"] = tmp; newText = true; } else { // verb morph sprintf(wordstr, "%03d", word-1); module->getEntryAttributes()["Word"][wordstr]["Morph"] = val; module->getEntryAttributes()["Word"][wordstr]["MorphClass"] = "OLBMorph"; } } if (!option) { if ((from[1] == ' ') || (from[1] == ',') || (from[1] == ';') || (from[1] == '.') || (from[1] == '?') || (from[1] == '!') || (from[1] == ')') || (from[1] == '\'') || (from[1] == '\"')) { if (lastspace) text--; } if (newText) {textStart = text.size(); newText = false; } continue; } } if (module->isProcessEntryAttributes()) { if ((*token == 'W') && (token[1] == 'T')) { // Morph valto = val; for (unsigned int i = 2; ((token[i]) && (i < 150)); i++) *valto++ = token[i]; *valto = 0; sprintf(wordstr, "%03d", word-1); module->getEntryAttributes()["Word"][wordstr]["MorphClass"] = "GBFMorph"; module->getEntryAttributes()["Word"][wordstr]["Morph"] = val; newText = true; } } // if not a strongs token, keep token in text text += '<'; text += token; text += '>'; if (newText) {textStart = text.size(); newText = false; } continue; } if (intoken) { if (tokpos < 2045) token[tokpos++] = *from; token[tokpos+2] = 0; } else { text += *from; lastspace = (*from == ' '); } } return 0; }
int main(int argc, char **argv) { SWBuf program = argv[0]; fprintf(stderr, "You are running %s: $Rev: 2138 $\n", argv[0]); // Let's test our command line arguments if (argc < 3) { usage(*argv); } // variables for arguments, holding defaults SWBuf path = argv[1]; SWBuf teiDoc = argv[2]; SWBuf compType = ""; SWBuf modDrv = ""; SWBuf recommendedPath = "./modules/lexdict/"; SWBuf cipherKey = ""; SWCompress *compressor = 0; for (int i = 3; i < argc; i++) { if (!strcmp(argv[i], "-z")) { if (compType.size()) usage(*argv, "Cannot specify both -z and -Z"); if (modDrv.size()) usage(*argv, "Cannot specify both -z and -s"); compType = "ZIP"; modDrv = "zLD"; recommendedPath += "zld/"; } else if (!strcmp(argv[i], "-Z")) { if (compType.size()) usage(*argv, "Cannot specify both -z and -Z"); if (modDrv.size()) usage(*argv, "Cannot specify both -Z and -s"); compType = "LZSS"; recommendedPath += "zld/"; } else if (!strcmp(argv[i], "-s")) { if (compType.size()) usage(*argv, "Cannot specify both -s and -z or -Z"); if (i+1 < argc) { int size = atoi(argv[++i]); if (size == 2) { modDrv = "RawLD"; recommendedPath += "rawld/"; continue; } if (size == 4) { modDrv = "RawLD4"; recommendedPath += "rawld4/"; continue; } } usage(*argv, "-s requires one of <2|4>"); } else if (!strcmp(argv[i], "-N")) { normalize = false; } else if (!strcmp(argv[i], "-c")) { if (i+1 < argc) cipherKey = argv[++i]; else usage(*argv, "-c requires <cipher_key>"); } else usage(*argv, (((SWBuf)"Unknown argument: ")+ argv[i]).c_str()); } if (!modDrv.size()) { modDrv = "RawLD4"; recommendedPath += "rawld4/"; } #ifndef _ICU_ if (normalize) { normalize = false; cout << program << " is not compiled with support for ICU. Setting -N flag." << endl; } #endif if (compType == "ZIP") { compressor = new ZipCompress(); } else if (compType = "LZSS") { compressor = new LZSSCompress(); } #ifdef DEBUG // cout << "path: " << path << " teiDoc: " << teiDoc << " compressType: " << compType << " ldType: " << modDrv << " cipherKey: " << cipherKey.c_str() << " normalize: " << normalize << "\n"; cout << "path: " << path << " teiDoc: " << teiDoc << " compressType: " << compType << " ldType: " << modDrv << " normalize: " << normalize << "\n"; cout << ""; // exit(-3); #endif SWBuf modName = path; int pathlen = path.length(); char lastChar = path[pathlen - 1]; if (lastChar != '/' && lastChar != '\\') { modName += "/"; } modName += "dict"; SWBuf keyBuf; SWBuf entBuf; SWBuf lineBuf; vector<string> linkBuf; if (modDrv == "zLD") { if (zLD::createModule(modName)) { fprintf(stderr, "error: %s: couldn't create module at path: %s \n", program.c_str(), modName.c_str()); exit(-3); } module = new zLD(modName, 0, 0, 30, compressor); } else if (modDrv == "RawLD") { if (RawLD::createModule(modName)) { fprintf(stderr, "error: %s: couldn't create module at path: %s \n", program.c_str(), modName.c_str()); exit(-3); } module = new RawLD(modName); } else { if (RawLD4::createModule(modName)) { fprintf(stderr, "error: %s: couldn't create module at path: %s \n", program.c_str(), modName.c_str()); exit(-3); } module = new RawLD4(modName); } SWFilter *cipherFilter = 0; if (cipherKey.size()) { fprintf(stderr, "Adding cipher filter with phrase: %s\n", cipherKey.c_str() ); cipherFilter = new CipherFilter(cipherKey.c_str()); module->AddRawFilter(cipherFilter); } if (!module->isWritable()) { fprintf(stderr, "The module is not writable. Writing text to it will not work.\nExiting.\n" ); exit(-1); } // Let's see if we can open our input file ifstream infile(teiDoc); if (infile.fail()) { fprintf(stderr, "error: %s: couldn't open input file: %s \n", program.c_str(), teiDoc.c_str()); exit(-2); } currentKey = module->CreateKey(); currentKey->Persist(1); module->setKey(*currentKey); (*module) = TOP; SWBuf token; SWBuf text; bool intoken = false; char curChar = '\0'; while (infile.good()) { curChar = infile.get(); // skip the character if it is bad. infile.good() will catch the problem if (curChar == -1) { continue; } if (!intoken && curChar == '<') { intoken = true; token = "<"; continue; } if (intoken && curChar == '>') { intoken = false; token.append('>'); XMLTag *t = new XMLTag(token.c_str()); if (!handleToken(text, t)) { text.append(*t); } delete t; continue; } if (intoken) token.append(curChar); else switch (curChar) { case '>' : text.append(">"); break; case '<' : text.append("<"); break; default : text.append(curChar); break; } } // Force the last entry from the text buffer. //text = ""; //writeEntry(*currentKey, text); delete module; delete currentKey; if (cipherFilter) delete cipherFilter; infile.close(); #ifdef _ICU_ if (converted) fprintf(stderr, "tei2mod converted %d verses to UTF-8\n", converted); if (normalized) fprintf(stderr, "tei2mod normalized %d verses to NFC\n", normalized); #endif /* * Suggested module name detection. * Only used for suggesting a conf. * * Various forms of path. * . and .. - no module name given, use "dict". * Or one of the following where z is the module name * and x may be . or .. * z * x/y/z * x/y/z/ * x/y/z/z */ SWBuf suggestedModuleName = path; if (lastChar == '/' || lastChar == '\\') { suggestedModuleName.setSize(--pathlen); } lastChar = suggestedModuleName[pathlen - 1]; if (lastChar == '.') { suggestedModuleName = "???"; } else { /* At this point the suggestion is either * what follows the last / or \ * or the entire string */ const char *m = strrchr(suggestedModuleName.c_str(), '/'); if (!m) { m = strrchr(suggestedModuleName.c_str(), '\\'); } if (m) { suggestedModuleName = m+1; } } recommendedPath += suggestedModuleName; recommendedPath += "/dict"; fprintf(stderr, "\nSuggested conf (replace ??? with appropriate values)\n\n"); fprintf(stderr, "[%s]\n", suggestedModuleName.c_str()); fprintf(stderr, "DataPath=%s\n", recommendedPath.c_str()); fprintf(stderr, "Description=???\n"); fprintf(stderr, "SourceType=TEI\n"); fprintf(stderr, "Encoding=%s\n", (normalize ? "UTF-8" : "???")); fprintf(stderr, "ModDrv=%s\n", modDrv.c_str()); if (compressor) { fprintf(stderr, "CompressType=%s\n", compType.c_str()); } if (cipherKey.size()) { fprintf(stderr, "CipherKey=%s\n", cipherKey.c_str()); } }
void SWMgr::findConfig(char *configType, char **prefixPath, char **configPath, std::list<SWBuf> *augPaths, SWConfig **providedSysConf) { SWBuf path; SWBuf sysConfPath; ConfigEntMap::iterator entry; ConfigEntMap::iterator lastEntry; SWConfig *sysConf = 0; SWBuf sysConfDataPath = ""; *configType = 0; SWBuf homeDir = getHomeDir(); // check for a sysConf passed in to us SWLog::getSystemLog()->logDebug("Checking for provided SWConfig(\"sword.conf\")..."); if (providedSysConf && *providedSysConf) { sysConf = *providedSysConf; SWLog::getSystemLog()->logDebug("found."); } // if we haven't been given our datapath in a sysconf, we need to track it down if (!sysConf) { // check working directory SWLog::getSystemLog()->logDebug("Checking working directory for sword.conf..."); if (FileMgr::existsFile(".", "sword.conf")) { SWLog::getSystemLog()->logDebug("Overriding any systemwide or ~/.sword/ sword.conf with one found in current directory."); sysConfPath = "./sword.conf"; sysConf = new SWConfig(sysConfPath); if ((entry = sysConf->Sections["Install"].find("DataPath")) != sysConf->Sections["Install"].end()) { sysConfDataPath = (*entry).second; } if (providedSysConf) { *providedSysConf = sysConf; } else { delete sysConf; sysConf = 0; } } if (!sysConfDataPath.size()) { SWLog::getSystemLog()->logDebug("Checking working directory for mods.conf..."); if (FileMgr::existsFile(".", "mods.conf")) { SWLog::getSystemLog()->logDebug("found."); stdstr(prefixPath, "./"); stdstr(configPath, "./mods.conf"); return; } SWLog::getSystemLog()->logDebug("Checking working directory for mods.d..."); if (FileMgr::existsDir(".", "mods.d")) { SWLog::getSystemLog()->logDebug("found."); stdstr(prefixPath, "./"); stdstr(configPath, "./mods.d"); *configType = 1; return; } // check working directory ../library/ SWLog::getSystemLog()->logDebug("Checking working directory ../library/ for mods.d..."); if (FileMgr::existsDir("../library", "mods.d")) { SWLog::getSystemLog()->logDebug("found."); stdstr(prefixPath, "../library/"); stdstr(configPath, "../library/mods.d"); *configType = 1; return; } // check environment variable SWORD_PATH SWLog::getSystemLog()->logDebug("Checking $SWORD_PATH..."); SWBuf envsworddir = getenv("SWORD_PATH"); if (envsworddir.length()) { SWLog::getSystemLog()->logDebug("found (%s).", envsworddir.c_str()); path = envsworddir; if ((envsworddir[envsworddir.length()-1] != '\\') && (envsworddir[envsworddir.length()-1] != '/')) path += "/"; SWLog::getSystemLog()->logDebug("Checking $SWORD_PATH for mods.conf..."); if (FileMgr::existsFile(path.c_str(), "mods.conf")) { SWLog::getSystemLog()->logDebug("found."); stdstr(prefixPath, path.c_str()); path += "mods.conf"; stdstr(configPath, path.c_str()); return; } SWLog::getSystemLog()->logDebug("Checking $SWORD_PATH for mods.d..."); if (FileMgr::existsDir(path.c_str(), "mods.d")) { SWLog::getSystemLog()->logDebug("found."); stdstr(prefixPath, path.c_str()); path += "mods.d"; stdstr(configPath, path.c_str()); *configType = 1; return; } } // check for systemwide globalConfPath SWLog::getSystemLog()->logDebug("Parsing %s...", globalConfPath); char *globPaths = 0; char *gfp; stdstr(&globPaths, globalConfPath); for (gfp = strtok(globPaths, ":"); gfp; gfp = strtok(0, ":")) { SWLog::getSystemLog()->logDebug("Checking for %s...", gfp); if (FileMgr::existsFile(gfp)) { SWLog::getSystemLog()->logDebug("found."); break; } } if (gfp) sysConfPath = gfp; delete [] globPaths; if (homeDir.length()) { SWBuf tryPath = homeDir; tryPath += ".sword/sword.conf"; if (FileMgr::existsFile(tryPath)) { SWLog::getSystemLog()->logDebug("Overriding any systemwide sword.conf with one found in users home directory (%s)", tryPath.c_str()); sysConfPath = tryPath; } else { SWBuf tryPath = homeDir; tryPath += "sword/sword.conf"; if (FileMgr::existsFile(tryPath)) { SWLog::getSystemLog()->logDebug("Overriding any systemwide sword.conf with one found in users home directory (%s)", tryPath.c_str()); sysConfPath = tryPath; } } } } } if (!sysConf && sysConfPath.size()) { sysConf = new SWConfig(sysConfPath); } if (sysConf) { if ((entry = sysConf->Sections["Install"].find("DataPath")) != sysConf->Sections["Install"].end()) { sysConfDataPath = (*entry).second; } if (sysConfDataPath.size()) { if ((!sysConfDataPath.endsWith("\\")) && (!sysConfDataPath.endsWith("/"))) sysConfDataPath += "/"; path = sysConfDataPath; SWLog::getSystemLog()->logDebug("DataPath in %s is set to %s.", sysConfPath.c_str(), path.c_str()); SWLog::getSystemLog()->logDebug("Checking for mods.conf in DataPath..."); if (FileMgr::existsFile(path.c_str(), "mods.conf")) { SWLog::getSystemLog()->logDebug("found."); stdstr(prefixPath, path.c_str()); path += "mods.conf"; stdstr(configPath, path.c_str()); *configType = 1; } SWLog::getSystemLog()->logDebug("Checking for mods.d in DataPath..."); if (FileMgr::existsDir(path.c_str(), "mods.d")) { SWLog::getSystemLog()->logDebug("found."); stdstr(prefixPath, path.c_str()); path += "mods.d"; stdstr(configPath, path.c_str()); *configType = 1; } } } // do some extra processing of sysConf if we have one if (sysConf) { if (augPaths) { augPaths->clear(); entry = sysConf->Sections["Install"].lower_bound("AugmentPath"); lastEntry = sysConf->Sections["Install"].upper_bound("AugmentPath"); for (;entry != lastEntry; entry++) { path = entry->second; if ((entry->second.c_str()[strlen(entry->second.c_str())-1] != '\\') && (entry->second.c_str()[strlen(entry->second.c_str())-1] != '/')) path += "/"; augPaths->push_back(path); } } if (providedSysConf) { *providedSysConf = sysConf; } else delete sysConf; } if (*configType) return; // WE STILL HAVEN'T FOUND A CONFIGURATION. LET'S LOOK IN SOME OS SPECIFIC // LOCATIONS // // for various flavors of windoze... // check %ALLUSERSPROFILE%/Application Data/sword/ SWLog::getSystemLog()->logDebug("Checking $ALLUSERSPROFILE/Application Data/sword/..."); SWBuf envallusersdir = getenv("ALLUSERSPROFILE"); if (envallusersdir.length()) { SWLog::getSystemLog()->logDebug("found (%s).", envallusersdir.c_str()); path = envallusersdir; if ((!path.endsWith("\\")) && (!path.endsWith("/"))) path += "/"; path += "Application Data/sword/"; SWLog::getSystemLog()->logDebug("Checking %s for mods.d...", path.c_str()); if (FileMgr::existsDir(path.c_str(), "mods.d")) { SWLog::getSystemLog()->logDebug("found."); stdstr(prefixPath, path.c_str()); path += "mods.d"; stdstr(configPath, path.c_str()); *configType = 1; return; } } // for Mac OSX... // check $HOME/Library/Application Support/Sword/ SWLog::getSystemLog()->logDebug("Checking $HOME/Library/Application Support/Sword/..."); SWBuf pathCheck = getHomeDir(); if (pathCheck.length()) { SWLog::getSystemLog()->logDebug("found (%s).", pathCheck.c_str()); path = pathCheck; if ((!path.endsWith("\\")) && (!path.endsWith("/"))) path += "/"; SWLog::getSystemLog()->logDebug("Checking %s for mods.d...", path.c_str()); if (FileMgr::existsDir(path.c_str(), "mods.d")) { SWLog::getSystemLog()->logDebug("found."); stdstr(prefixPath, path.c_str()); path += "mods.d"; stdstr(configPath, path.c_str()); *configType = 1; return; } } // FINALLY CHECK PERSONAL HOME DIRECTORY LOCATIONS // check ~/.sword/ SWLog::getSystemLog()->logDebug("Checking home directory for ~/.sword..."); if (homeDir.length()) { path = homeDir; path += ".sword/"; SWLog::getSystemLog()->logDebug(" Checking for %smods.conf...", path.c_str()); if (FileMgr::existsFile(path.c_str(), "mods.conf")) { SWLog::getSystemLog()->logDebug("found."); stdstr(prefixPath, path.c_str()); path += "mods.conf"; stdstr(configPath, path.c_str()); return; } SWLog::getSystemLog()->logDebug(" Checking for %smods.d...", path.c_str()); if (FileMgr::existsDir(path.c_str(), "mods.d")) { SWLog::getSystemLog()->logDebug("found."); stdstr(prefixPath, path.c_str()); path += "mods.d"; stdstr(configPath, path.c_str()); *configType = 2; return; } path = homeDir; path += "sword/"; SWLog::getSystemLog()->logDebug(" Checking for %smods.d...", path.c_str()); if (FileMgr::existsDir(path.c_str(), "mods.d")) { SWLog::getSystemLog()->logDebug("found."); stdstr(prefixPath, path.c_str()); path += "mods.d"; stdstr(configPath, path.c_str()); *configType = 2; return; } } }
int main(int argc, char **argv) { std::vector<string> linkbuffer; signed long i = 0; string keybuffer; string entbuffer; string linebuffer; char links = 0; string modname; SWBuf outPath = ""; bool append = false; long blockCount = 30; bool caseSensitive = false; SWCompress *compressor = 0; SWBuf compType = ""; bool fourByteSize = false; if (argc < 2) usage(*argv); const char *progName = argv[0]; const char *inFileName = argv[1]; for (int i = 2; i < argc; i++) { if (!strcmp(argv[i], "-a")) { append = true; } else if (!strcmp(argv[i], "-z")) { if (compType.size()) usage(*argv, "Cannot specify both -z and -Z"); if (fourByteSize) usage(*argv, "Cannot specify both -z and -4"); compType = "ZIP"; } else if (!strcmp(argv[i], "-Z")) { if (compType.size()) usage(*argv, "Cannot specify both -z and -Z"); if (fourByteSize) usage(*argv, "Cannot specify both -Z and -4"); compType = "LZSS"; } else if (!strcmp(argv[i], "-4")) { fourByteSize = true; } else if (!strcmp(argv[i], "-b")) { if (i+1 < argc) { blockCount = atoi(argv[++i]); if (blockCount > 0) continue; } usage(*argv, "-b requires in entry count integer > 0"); } else if (!strcmp(argv[i], "-o")) { if (i+1 < argc) outPath = argv[++i]; else usage(progName, "-o requires <output_path>"); } else if (!strcmp(argv[i], "-s")) { caseSensitive = true; } else usage(progName, (((SWBuf)"Unknown argument: ")+ argv[i]).c_str()); } if (outPath.size() < 1) { for (i = 0; (i < 16) && (inFileName[i]) && (inFileName[i] != '.'); i++) { outPath += inFileName[i]; } } std::ifstream infile(inFileName); SWModule *mod = 0; SWKey *key, *linkKey; if (compType == "ZIP") { #ifndef EXCLUDEZLIB compressor = new ZipCompress(); #else usage(*argv, "ERROR: SWORD library not compiled with ZIP compression support.\n\tBe sure libzip is available when compiling SWORD library"); #endif } else if (compType == "LZSS") { compressor = new LZSSCompress(); } // setup module if (!append) { if (compressor) { if (zLD::createModule(outPath)) { fprintf(stderr, "ERROR: %s: couldn't create module at path: %s \n", *argv, outPath.c_str()); exit(-1); } } else { if (!fourByteSize) RawLD::createModule(outPath); else RawLD4::createModule(outPath); } } if (compressor) { // Create a compressed text module allowing very large entries // Taking defaults except for first, fourth, fifth and last argument mod = new zLD(outPath, 0, 0, blockCount, compressor, 0, ENC_UNKNOWN, DIRECTION_LTR, FMT_UNKNOWN, 0, caseSensitive); } else { mod = (!fourByteSize) ? (SWModule *)new RawLD (outPath, 0, 0, 0, ENC_UNKNOWN, DIRECTION_LTR, FMT_UNKNOWN, 0, caseSensitive) : (SWModule *)new RawLD4(outPath, 0, 0, 0, ENC_UNKNOWN, DIRECTION_LTR, FMT_UNKNOWN, 0, caseSensitive); } key = mod->createKey(); linkKey = mod->createKey(); key->setPersist(true); mod->setKey(key); while (!infile.eof()) { std::getline(infile, linebuffer); if (linebuffer.size() > 3 && linebuffer.substr(0,3) == "$$$") { if (keybuffer.size() && entbuffer.size()) { std::cout << keybuffer << std::endl; *key = keybuffer.c_str(); mod->setEntry(entbuffer.c_str(), entbuffer.size()); for (i = 0; i < links; i++) { std::cout << "Linking: " << linkbuffer[i] << std::endl; *linkKey = linkbuffer[i].c_str(); mod->linkEntry(linkKey); } } if (linebuffer.size() > 3) keybuffer = linebuffer.substr(3,linebuffer.size()); entbuffer.resize(0); linkbuffer.clear(); links = 0; } else if (linebuffer.size() > 3 && linebuffer.substr(0,3) == "%%%") { linkbuffer.push_back(linebuffer.substr(3,linebuffer.size())); links++; } else { entbuffer += linebuffer; } } //handle final entry if (keybuffer.size() && entbuffer.size()) { std::cout << keybuffer << std::endl; *key = keybuffer.c_str(); mod->setEntry(entbuffer.c_str(), entbuffer.size()); for (i = 0; i < links; i++) { std::cout << "Linking: " << linkbuffer[i] << std::endl; *linkKey = linkbuffer[i].c_str(); mod->linkEntry(linkKey); } } infile.close(); delete linkKey; delete key; delete mod; return 0; }
char ThMLWordJS::processText(SWBuf &text, const SWKey *key, const SWModule *module) { if (option) { char token[2112]; // cheese. Fix. int tokpos = 0; bool intoken = false; int word = 1; char val[128]; char *valto; char *ch; char wordstr[5]; unsigned int textStart = 0, lastAppendLen = 0, textEnd = 0; SWBuf tmp; bool newText = false; bool needWordOut = false; AttributeValue *wordAttrs = 0; SWBuf modName = (module)?module->getName():""; SWBuf wordSrcPrefix = modName; const SWBuf orig = text; const char * from = orig.c_str(); VerseKey *vkey = 0; if (key) { vkey = SWDYNAMIC_CAST(VerseKey, key); } for (text = ""; *from; from++) { if (*from == '<') { intoken = true; tokpos = 0; token[0] = 0; token[1] = 0; token[2] = 0; textEnd = text.length(); continue; } if (*from == '>') { // process tokens intoken = false; if (!strnicmp(token, "sync type=\"Strongs\" ", 20)) { // Strongs valto = val; for (unsigned int i = 27; token[i] != '\"' && i < 150; i++) *valto++ = token[i]; *valto = 0; if (atoi((!isdigit(*val))?val+1:val) < 5627) { // normal strongs number sprintf(wordstr, "%03d", word++); needWordOut = (word > 2); wordAttrs = &(module->getEntryAttributes()["Word"][wordstr]); (*wordAttrs)["Strongs"] = val; //printf("Adding: [\"Word\"][%s][\"Strongs\"] = %s\n", wordstr, val); tmp = ""; tmp.append(text.c_str()+textStart, (int)(textEnd - textStart)); (*wordAttrs)["Text"] = tmp; text.append("</span>"); SWBuf ts; ts.appendFormatted("%d", textStart); (*wordAttrs)["TextStart"] = ts; //printf("Adding: [\"Word\"][%s][\"Text\"] = %s\n", wordstr, tmp.c_str()); newText = true; } else { // verb morph (*wordAttrs)["Morph"] = val; //printf("Adding: [\"Word\"][%s][\"Morph\"] = %s\n", wordstr, val); } } if (!strncmp(token, "sync type=\"morph\"", 17)) { for (ch = token+17; *ch; ch++) { if (!strncmp(ch, "class=\"", 7)) { valto = val; for (unsigned int i = 7; ch[i] != '\"' && i < 127; i++) *valto++ = ch[i]; *valto = 0; (*wordAttrs)["MorphClass"] = val; //printf("Adding: [\"Word\"][%s][\"MorphClass\"] = %s\n", wordstr, val); } if (!strncmp(ch, "value=\"", 7)) { valto = val; for (unsigned int i = 7; ch[i] != '\"' && i < 127; i++) *valto++ = ch[i]; *valto = 0; (*wordAttrs)["Morph"] = val; //printf("Adding: [\"Word\"][%s][\"Morph\"] = %s\n", wordstr, val); } } newText = true; } // if not a strongs token, keep token in text text += '<'; text += token; text += '>'; if (needWordOut) { char wstr[10]; sprintf(wstr, "%03d", word-2); AttributeValue *wAttrs = &(module->getEntryAttributes()["Word"][wstr]); needWordOut = false; SWBuf strong = (*wAttrs)["Strongs"]; SWBuf morph = (*wAttrs)["Morph"]; SWBuf morphClass = (*wAttrs)["MorphClass"]; SWBuf wordText = (*wAttrs)["Text"]; SWBuf textSt = (*wAttrs)["TextStart"]; if (strong.size()) { char gh = 0; gh = isdigit(strong[0]) ? 0:strong[0]; if (!gh) { if (vkey) { gh = vkey->getTestament() ? 'H' : 'G'; } } else strong << 1; SWModule *sLex = 0; SWModule *sMorph = 0; if (gh == 'G') { sLex = defaultGreekLex; sMorph = defaultGreekParse; } if (gh == 'H') { sLex = defaultHebLex; sMorph = defaultHebParse; } SWBuf lexName = ""; if (sLex) { // we can pass the real lex name in, but we have some // aliases in the javascript to optimize bandwidth lexName = sLex->getName(); if (lexName == "StrongsGreek") lexName = "G"; if (lexName == "StrongsHebrew") lexName = "H"; } SWBuf wordID; if (vkey) { // optimize for bandwidth and use only the verse as the unique entry id wordID.appendFormatted("%d", vkey->getVerse()); } else { wordID = key->getText(); } for (unsigned int i = 0; i < wordID.size(); i++) { if ((!isdigit(wordID[i])) && (!isalpha(wordID[i]))) { wordID[i] = '_'; } } wordID.appendFormatted("_%s%d", wordSrcPrefix.c_str(), atoi(wstr)); if (textSt.size()) { int textStr = atoi(textSt.c_str()); textStr += lastAppendLen; SWBuf spanStart = ""; if (!sMorph) sMorph = 0; // avoid unused warnings for now /* if (sMorph) { SWBuf popMorph = "<a onclick=\""; popMorph.appendFormatted("p(\'%s\',\'%s\','%s','');\" >%s</a>", sMorph->getName(), morph.c_str(), wordID.c_str(), morph.c_str()); morph = popMorph; } */ // 'p' = 'fillpop' to save bandwidth const char *m = strchr(morph.c_str(), ':'); if (m) m++; else m = morph.c_str(); spanStart.appendFormatted("<span class=\"clk\" onclick=\"p('%s','%s','%s','%s','','%s');\" >", lexName.c_str(), strong.c_str(), wordID.c_str(), m, modName.c_str()); text.insert(textStr, spanStart); lastAppendLen = spanStart.length(); } } } if (newText) { textStart = text.length(); newText = false; } continue; } if (intoken) { if (tokpos < 2045) { token[tokpos++] = *from; // TODO: why is this + 2 ? token[tokpos+2] = 0; } } else { text += *from; } } char wstr[10]; sprintf(wstr, "%03d", word-1); AttributeValue *wAttrs = &(module->getEntryAttributes()["Word"][wstr]); needWordOut = false; SWBuf strong = (*wAttrs)["Strongs"]; SWBuf morph = (*wAttrs)["Morph"]; SWBuf morphClass = (*wAttrs)["MorphClass"]; SWBuf wordText = (*wAttrs)["Text"]; SWBuf textSt = (*wAttrs)["TextStart"]; if (strong.size()) { char gh = 0; gh = isdigit(strong[0]) ? 0:strong[0]; if (!gh) { if (vkey) { gh = vkey->getTestament() ? 'H' : 'G'; } } else strong << 1; SWModule *sLex = 0; if (gh == 'G') { sLex = defaultGreekLex; } if (gh == 'H') { sLex = defaultHebLex; } SWBuf lexName = ""; if (sLex) { // we can pass the real lex name in, but we have some // aliases in the javascript to optimize bandwidth lexName = sLex->getName(); if (lexName == "StrongsGreek") lexName = "G"; if (lexName == "StrongsHebrew") lexName = "H"; } SWBuf wordID; if (vkey) { // optimize for bandwidth and use only the verse as the unique entry id wordID.appendFormatted("%d", vkey->getVerse()); } else { wordID = key->getText(); } for (unsigned int i = 0; i < wordID.size(); i++) { if ((!isdigit(wordID[i])) && (!isalpha(wordID[i]))) { wordID[i] = '_'; } } wordID.appendFormatted("_%s%d", wordSrcPrefix.c_str(), atoi(wstr)); if (textSt.size()) { int textStr = atoi(textSt.c_str()); textStr += lastAppendLen; SWBuf spanStart = ""; // 'p' = 'fillpop' to save bandwidth const char *m = strchr(morph.c_str(), ':'); if (m) m++; else m = morph.c_str(); spanStart.appendFormatted("<span class=\"clk\" onclick=\"p('%s','%s','%s','%s','','%s');\" >", lexName.c_str(), strong.c_str(), wordID.c_str(), m, modName.c_str()); text.insert(textStr, spanStart); } } } return 0; }
void writeEntry(SWModule *book, SWBuf keyBuffer, SWBuf entBuffer) { if (greekFilter) { greekAccentsFilter.processText(keyBuffer); } if (toUpper) { unsigned size = (keyBuffer.size()+5)*3; keyBuffer.setFillByte(0); keyBuffer.resize(size); StringMgr::getSystemStringMgr()->upperUTF8(keyBuffer.getRawData(), size-2); } // Added for Hesychius, but this stuff should be pushed back into new StringMgr // functionality #ifdef _ICU_ // if (lexLevels) { if (lexLevels && !keyBuffer.startsWith("/Intro")) { unsigned size = (keyBuffer.size()+(lexLevels*2)); keyBuffer.setFillByte(0); keyBuffer.resize(size); UErrorCode err = U_ZERO_ERROR; int max = (size+5)*3; UChar *ubuffer = new UChar[max+10]; int32_t len; u_strFromUTF8(ubuffer, max+9, &len, keyBuffer.c_str(), -1, &err); if (err == U_ZERO_ERROR) { UChar *upper = new UChar[(lexLevels+1)*3]; memcpy(upper, ubuffer, lexLevels*sizeof(UChar)); upper[lexLevels] = 0; len = u_strToUpper(upper, (lexLevels+1)*3, upper, -1, 0, &err); memmove(ubuffer+len+1, ubuffer, (max-len)*sizeof(UChar)); memcpy(ubuffer, upper, len*sizeof(UChar)); ubuffer[len] = '/'; delete [] upper; int totalShift = 0; for (int i = lexLevels-1; i; i--) { int shift = (i < len)? i : len; memmove(ubuffer+(shift+1), ubuffer, (max-shift)*sizeof(UChar)); ubuffer[shift] = '/'; totalShift += (shift+1); } u_strToUTF8(keyBuffer.getRawData(), max, 0, ubuffer, -1, &err); } /* u_strFromUTF8(ubuffer, max+9, &len, keyBuffer.c_str(), -1, &err); if (err == U_ZERO_ERROR) { int totalShift = 0; for (int i = lexLevels; i; i--) { int shift = (i < len)? i : len; memmove(ubuffer+(shift+1), ubuffer, (max-shift)*sizeof(UChar)); ubuffer[shift] = '/'; totalShift += (shift+1); } UChar *upper = new UChar[(totalShift+1)*3]; memcpy(upper, ubuffer, totalShift*sizeof(UChar)); upper[totalShift] = 0; len = u_strToUpper(upper, (totalShift+1)*3, upper, -1, 0, &err); memmove(ubuffer+len, ubuffer+totalShift, (max-totalShift)*sizeof(UChar)); memcpy(ubuffer, upper, len*sizeof(UChar)); delete [] upper; u_strToUTF8(keyBuffer.getRawData(), max, 0, ubuffer, -1, &err); } */ delete [] ubuffer; } #endif std::cout << keyBuffer << std::endl; book->setKey(keyBuffer.c_str()); // check to see if we already have an entry for (int i = 2; book->getKey()->popError() != KEYERR_OUTOFBOUNDS; i++) { SWBuf key; key.setFormatted("%s {%d}", keyBuffer.c_str(), i); std::cout << "dup key, trying: " << key << std::endl; book->setKey(key.c_str()); } book->setEntry(entBuffer); }
char OSISWordJS::processText(SWBuf &text, const SWKey *key, const SWModule *module) { if (option) { char token[2112]; // cheese. Fix. int tokpos = 0; bool intoken = false; int wordNum = 1; char wordstr[5]; SWBuf modName = (module)?module->getName():""; // add TR to w src in KJV then remove this next line SWBuf wordSrcPrefix = (modName == "KJV")?SWBuf("TR"):modName; VerseKey *vkey = 0; if (key) { vkey = SWDYNAMIC_CAST(VerseKey, key); } const SWBuf orig = text; const char * from = orig.c_str(); for (text = ""; *from; ++from) { if (*from == '<') { intoken = true; tokpos = 0; token[0] = 0; token[1] = 0; token[2] = 0; continue; } if (*from == '>') { // process tokens intoken = false; if ((*token == 'w') && (token[1] == ' ')) { // Word XMLTag wtag(token); sprintf(wordstr, "%03d", wordNum); SWBuf lemmaClass; SWBuf lemma; SWBuf morph; SWBuf page; SWBuf src; char gh = 0; page = module->getEntryAttributes()["Word"][wordstr]["Page"].c_str(); if (page.length()) page = (SWBuf)"p:" + page; int count = atoi(module->getEntryAttributes()["Word"][wordstr]["PartCount"].c_str()); for (int i = 0; i < count; i++) { // for now, lemma class can just be equal to last lemma class in multi part word SWBuf tmp = "LemmaClass"; if (count > 1) tmp.appendFormatted(".%d", i+1); lemmaClass = module->getEntryAttributes()["Word"][wordstr][tmp]; tmp = "Lemma"; if (count > 1) tmp.appendFormatted(".%d", i+1); tmp = (module->getEntryAttributes()["Word"][wordstr][tmp].c_str()); // if we're strongs, if (lemmaClass == "strong") { gh = tmp[0]; tmp << 1; } if (lemma.size()) lemma += "|"; lemma += tmp; tmp = "Morph"; if (count > 1) tmp.appendFormatted(".%d", i+1); tmp = (module->getEntryAttributes()["Word"][wordstr][tmp].c_str()); if (morph.size()) morph += "|"; morph += tmp; tmp = "Src"; if (count > 1) tmp.appendFormatted(".%d", i+1); tmp = (module->getEntryAttributes()["Word"][wordstr][tmp].c_str()); if (!tmp.length()) tmp.appendFormatted("%d", wordNum); tmp.insert(0, wordSrcPrefix); if (src.size()) src += "|"; src += tmp; } SWBuf lexName = ""; // we can pass the real lex name in, but we have some // aliases in the javascript to optimize bandwidth if ((gh == 'G') && (defaultGreekLex)) { lexName = (!strcmp(defaultGreekLex->getName(), "StrongsGreek"))?"G":defaultGreekLex->getName(); } else if ((gh == 'H') && (defaultHebLex)) { lexName = (!strcmp(defaultHebLex->getName(), "StrongsHebrew"))?"H":defaultHebLex->getName(); } SWBuf xlit = wtag.getAttribute("xlit"); if ((lemmaClass != "strong") && (xlit.startsWith("betacode:"))) { lexName = "betacode"; // const char *m = strchr(xlit.c_str(), ':'); // strong = ++m; } SWBuf wordID; if (vkey) { // optimize for bandwidth and use only the verse as the unique entry id wordID.appendFormatted("%d", vkey->getVerse()); } else { wordID = key->getText(); } wordID.appendFormatted("_%s", src.c_str()); // clean up our word ID for XHTML for (unsigned int i = 0; i < wordID.size(); i++) { if ((!isdigit(wordID[i])) && (!isalpha(wordID[i]))) { wordID[i] = '_'; } } // 'p' = 'fillpop' to save bandwidth text.appendFormatted("<span class=\"clk\" onclick=\"p('%s','%s','%s','%s','%s','%s');\" >", lexName.c_str(), lemma.c_str(), wordID.c_str(), morph.c_str(), page.c_str(), modName.c_str()); wordNum++; if (wtag.isEmpty()) { text += "</w></span>"; } } if ((*token == '/') && (token[1] == 'w') && option) { // Word text += "</w></span>"; continue; } // if not a strongs token, keep token in text text.append('<'); text.append(token); text.append('>'); continue; } if (intoken) { if (tokpos < 2045) { token[tokpos++] = *from; token[tokpos+2] = 0; } } else { text.append(*from); } } } return 0; }
char OSISStrongs::processText(SWBuf &text, const SWKey *key, const SWModule *module) { SWBuf token; bool intoken = false; int wordNum = 1; char wordstr[5]; const char *wordStart = 0; SWBuf page = ""; // some modules include <seg> page info, so we add these to the words const SWBuf orig = text; const char * from = orig.c_str(); for (text = ""; *from; ++from) { if (*from == '<') { intoken = true; token = ""; continue; } if (*from == '>') { // process tokens intoken = false; // possible page seg -------------------------------- if (token.startsWith("seg ")) { XMLTag stag(token); SWBuf type = stag.getAttribute("type"); if (type == "page") { SWBuf number = stag.getAttribute("subtype"); if (number.length()) { page = number; } } } // --------------------------------------------------- if (token.startsWith("w ")) { // Word XMLTag wtag(token); if (module->isProcessEntryAttributes()) { wordStart = from+1; char gh = 0; VerseKey *vkey = 0; if (key) { vkey = SWDYNAMIC_CAST(VerseKey, key); } SWBuf lemma = ""; SWBuf morph = ""; SWBuf src = ""; SWBuf morphClass = ""; SWBuf lemmaClass = ""; const char *attrib; sprintf(wordstr, "%03d", wordNum); // why is morph entry attribute processing done in here? Well, it's faster. It makes more local sense to place this code in osismorph. // easier to keep lemma and morph in same wordstr number too maybe. if ((attrib = wtag.getAttribute("morph"))) { int count = wtag.getAttributePartCount("morph", ' '); int i = (count > 1) ? 0 : -1; // -1 for whole value cuz it's faster, but does the same thing as 0 do { SWBuf mClass = ""; SWBuf mp = ""; attrib = wtag.getAttribute("morph", i, ' '); if (i < 0) i = 0; // to handle our -1 condition const char *m = strchr(attrib, ':'); if (m) { int len = m-attrib; mClass.append(attrib, len); attrib += (len+1); } if ((mClass == "x-Robinsons") || (mClass == "x-Robinson") || (mClass == "Robinson")) { mClass = "robinson"; } if (i) { morphClass += " "; morph += " "; } mp += attrib; morphClass += mClass; morph += mp; if (count > 1) { SWBuf tmp; tmp.setFormatted("Morph.%d", i+1); module->getEntryAttributes()["Word"][wordstr][tmp] = mp; tmp.setFormatted("MorphClass.%d", i+1); module->getEntryAttributes()["Word"][wordstr][tmp] = mClass; } } while (++i < count); } if ((attrib = wtag.getAttribute("lemma"))) { int count = wtag.getAttributePartCount("lemma", ' '); int i = (count > 1) ? 0 : -1; // -1 for whole value cuz it's faster, but does the same thing as 0 do { gh = 0; SWBuf lClass = ""; SWBuf l = ""; attrib = wtag.getAttribute("lemma", i, ' '); if (i < 0) i = 0; // to handle our -1 condition const char *m = strchr(attrib, ':'); if (m) { int len = m-attrib; lClass.append(attrib, len); attrib += (len+1); } if ((lClass == "x-Strongs") || (lClass == "strong") || (lClass == "Strong")) { if (isdigit(attrib[0])) { if (vkey) { gh = vkey->getTestament() ? 'H' : 'G'; } } else { gh = *attrib; attrib++; } lClass = "strong"; } if (gh) l += gh; l += attrib; if (i) { lemmaClass += " "; lemma += " "; } lemma += l; lemmaClass += lClass; if (count > 1) { SWBuf tmp; tmp.setFormatted("Lemma.%d", i+1); module->getEntryAttributes()["Word"][wordstr][tmp] = l; tmp.setFormatted("LemmaClass.%d", i+1); module->getEntryAttributes()["Word"][wordstr][tmp] = lClass; } } while (++i < count); module->getEntryAttributes()["Word"][wordstr]["PartCount"].setFormatted("%d", count); } if ((attrib = wtag.getAttribute("src"))) { int count = wtag.getAttributePartCount("src", ' '); int i = (count > 1) ? 0 : -1; // -1 for whole value cuz it's faster, but does the same thing as 0 do { SWBuf mp = ""; attrib = wtag.getAttribute("src", i, ' '); if (i < 0) i = 0; // to handle our -1 condition if (i) src += " "; mp += attrib; src += mp; if (count > 1) { SWBuf tmp; tmp.setFormatted("Src.%d", i+1); module->getEntryAttributes()["Word"][wordstr][tmp] = mp; } } while (++i < count); } if (lemma.length()) module->getEntryAttributes()["Word"][wordstr]["Lemma"] = lemma; if (lemmaClass.length()) module->getEntryAttributes()["Word"][wordstr]["LemmaClass"] = lemmaClass; if (morph.length()) module->getEntryAttributes()["Word"][wordstr]["Morph"] = morph; if (morphClass.length()) module->getEntryAttributes()["Word"][wordstr]["MorphClass"] = morphClass; if (src.length()) module->getEntryAttributes()["Word"][wordstr]["Src"] = src; if (page.length()) module->getEntryAttributes()["Word"][wordstr]["Page"] = page; if (wtag.isEmpty()) { int j; for (j = token.length()-1; ((j>0) && (strchr(" /", token[j]))); j--); token.size(j+1); } token += " wn=\""; token += wordstr; token += "\""; if (wtag.isEmpty()) { token += "/"; } wordNum++; } if (!option) { /* * Code which handles multiple lemma types. Kindof works but breaks at least WEBIF filters for strongs. * int count = wtag.getAttributePartCount("lemma", ' '); for (int i = 0; i < count; i++) { SWBuf a = wtag.getAttribute("lemma", i, ' '); const char *prefix = a.stripPrefix(':'); if ((prefix) && (!strcmp(prefix, "x-Strongs") || !strcmp(prefix, "strong") || !strcmp(prefix, "Strong"))) { // remove attribute part wtag.setAttribute("lemma", 0, i, ' '); i--; count--; } } * Instead the codee below just removes the lemma attribute *****/ const char *l = wtag.getAttribute("lemma"); if (l) { SWBuf savlm = l; wtag.setAttribute("lemma", 0); wtag.setAttribute("savlm", savlm); token = wtag; token.trim(); // drop <> token << 1; token--; } } } if (token.startsWith("/w")) { // Word End if (module->isProcessEntryAttributes()) { if (wordStart) { SWBuf tmp; tmp.append(wordStart, (from-wordStart)-3); sprintf(wordstr, "%03d", wordNum-1); module->getEntryAttributes()["Word"][wordstr]["Text"] = tmp; } } wordStart = 0; } // keep token in text text.append('<'); text.append(token); text.append('>'); continue; } if (intoken) { token += *from; } else { text.append(*from); } } return 0; }
void writeEntry(SWModule *module, const SWBuf &key, const SWBuf &entry) { if (key.size() && entry.size()) { std::cout << "from file: " << key << std::endl; VerseKey *vkey = (VerseKey *)module->getKey(); VerseKey *linkMaster = (VerseKey *)module->createKey(); ListKey listKey = vkey->parseVerseList(key.c_str(), "Gen1:1", true); bool first = true; for (listKey = TOP; !listKey.popError(); listKey++) { *vkey = listKey; if (first) { *linkMaster = *vkey; SWBuf text = module->getRawEntry(); text += entry; //------------------------------------------------------------ // Tregelles Page marking special stuff //------------------------------------------------------------ /* const char *pageMarker = "<seg type=\"page\" subtype=\""; int newPage = page; SWBuf pageData = strstr(text.c_str(), pageMarker); if (pageData.length()) { pageData << strlen(pageMarker); const char *pn = pageData.stripPrefix('"'); if (pn) newPage = atoi(pn); } // add page stuff for treg if (text.startsWith(pageMarker)) { // don't add anything cuz we already start with one } else { SWBuf pm = pageMarker; pm.appendFormatted("%d\" />", page); text = pm + text; } page = newPage; // when our line set a new page number */ //------------------------------------------------------------ std::cout << "adding entry: " << *vkey << " length " << entry.size() << "/" << (unsigned short)text.size() << std::endl; module->setEntry(text); first = false; } else { std::cout << "linking entry: " << *vkey << " to " << *linkMaster << std::endl; module->linkEntry(linkMaster); } } delete linkMaster; } }
int main(int argc, char **argv) { // handle options if (argc < 2) usage(*argv); const char *progName = argv[0]; const char *inFileName = argv[1]; SWBuf v11n = "KJV"; SWBuf outPath = "./"; SWBuf locale = "en"; bool fourByteSize = false; bool append = false; int iType = 4; SWBuf cipherKey = ""; SWCompress *compressor = 0; SWBuf compType = ""; for (int i = 2; i < argc; i++) { if (!strcmp(argv[i], "-a")) { append = true; } else if (!strcmp(argv[i], "-z")) { if (fourByteSize) usage(*argv, "Cannot specify both -z and -4"); compType = "ZIP"; if (i+1 < argc && argv[i+1][0] != '-') { switch (argv[++i][0]) { case 'l': compType = "LZSS"; break; case 'z': compType = "ZIP"; break; case 'b': compType = "BZIP2"; break; case 'x': compType = "XZ"; break; } } } else if (!strcmp(argv[i], "-Z")) { if (compType.size()) usage(*argv, "Cannot specify both -z and -Z"); if (fourByteSize) usage(*argv, "Cannot specify both -Z and -4"); compType = "LZSS"; } else if (!strcmp(argv[i], "-4")) { fourByteSize = true; } else if (!strcmp(argv[i], "-b")) { if (i+1 < argc) { iType = atoi(argv[++i]); if ((iType >= 2) && (iType <= 4)) continue; } usage(*argv, "-b requires one of <2|3|4>"); } else if (!strcmp(argv[i], "-o")) { if (i+1 < argc) outPath = argv[++i]; else usage(progName, "-o requires <output_path>"); } else if (!strcmp(argv[i], "-v")) { if (i+1 < argc) v11n = argv[++i]; else usage(progName, "-v requires <v11n>"); } else if (!strcmp(argv[i], "-l")) { if (i+1 < argc) locale = argv[++i]; else usage(progName, "-l requires <locale>"); } else if (!strcmp(argv[i], "-c")) { if (i+1 < argc) cipherKey = argv[++i]; else usage(*argv, "-c requires <cipher_key>"); } else usage(progName, (((SWBuf)"Unknown argument: ")+ argv[i]).c_str()); } // ----------------------------------------------------- const VersificationMgr::System *v = VersificationMgr::getSystemVersificationMgr()->getVersificationSystem(v11n); if (!v) std::cout << "Warning: Versification " << v11n << " not found. Using KJV versification...\n"; if (compType == "LZSS") { compressor = new LZSSCompress(); } else if (compType == "ZIP") { #ifndef EXCLUDEZLIB compressor = new ZipCompress(); #else usage(*argv, "ERROR: SWORD library not compiled with ZIP compression support.\n\tBe sure libz is available when compiling SWORD library"); #endif } else if (compType == "BZIP2") { #ifndef EXCLUDEBZIP2 compressor = new Bzip2Compress(); #else usage(*argv, "ERROR: SWORD library not compiled with bzip2 compression support.\n\tBe sure libbz2 is available when compiling SWORD library"); #endif } else if (compType == "XZ") { #ifndef EXCLUDEXZ compressor = new XzCompress(); #else usage(*argv, "ERROR: SWORD library not compiled with xz compression support.\n\tBe sure liblzma is available when compiling SWORD library"); #endif } // setup module if (!append) { if (compressor) { if (zText::createModule(outPath, iType, v11n)) { fprintf(stderr, "ERROR: %s: couldn't create module at path: %s \n", *argv, outPath.c_str()); exit(-1); } } else { if (!fourByteSize) RawText::createModule(outPath, v11n); else RawText4::createModule(outPath, v11n); } } SWModule *module = 0; if (compressor) { // Create a compressed text module allowing very large entries // Taking defaults except for first, fourth, fifth and last argument module = new zText( outPath, // ipath 0, // iname 0, // idesc iType, // iblockType compressor, // icomp 0, // idisp ENC_UNKNOWN, // enc DIRECTION_LTR, // dir FMT_UNKNOWN, // markup 0, // lang v11n // versification ); } else { module = (!fourByteSize) ? (SWModule *)new RawText(outPath, 0, 0, 0, ENC_UNKNOWN, DIRECTION_LTR, FMT_UNKNOWN, 0, v11n) : (SWModule *)new RawText4(outPath, 0, 0, 0, ENC_UNKNOWN, DIRECTION_LTR, FMT_UNKNOWN, 0, v11n); } SWFilter *cipherFilter = 0; if (cipherKey.length()) { fprintf(stderr, "Adding cipher filter with phrase: %s\n", cipherKey.c_str() ); cipherFilter = new CipherFilter(cipherKey.c_str()); module->addRawFilter(cipherFilter); } // ----------------------------------------------------- // setup locale manager LocaleMgr::getSystemLocaleMgr()->setDefaultLocaleName(locale); // setup module key to allow full range of possible values, and then some VerseKey *vkey = (VerseKey *)module->createKey(); vkey->setIntros(true); vkey->setAutoNormalize(false); vkey->setPersist(true); module->setKey(*vkey); // ----------------------------------------------------- // process input file FileDesc *fd = FileMgr::getSystemFileMgr()->open(inFileName, FileMgr::RDONLY); SWBuf lineBuffer; SWBuf keyBuffer; SWBuf entBuffer; bool more = true; do { more = FileMgr::getLine(fd, lineBuffer)!=0; if (lineBuffer.startsWith("$$$")) { if ((keyBuffer.size()) && (entBuffer.size())) { writeEntry(module, keyBuffer, entBuffer); } keyBuffer = lineBuffer; keyBuffer << 3; keyBuffer.trim(); entBuffer.size(0); } else { if (keyBuffer.size()) { entBuffer += lineBuffer; entBuffer += "\n"; } } } while (more); if ((keyBuffer.size()) && (entBuffer.size())) { writeEntry(module, keyBuffer, entBuffer); } delete module; if (cipherFilter) delete cipherFilter; delete vkey; FileMgr::getSystemFileMgr()->close(fd); return 0; }
virtual void logMessage(const char *message, int level) const { SWBuf msg = message; if (msg.size() > 512) msg.setSize(512); __android_log_write(levelMapping[level], "libsword.so", msg.c_str()); }