void saveCterms(CIndexer *indexer) { CConnbas_dbox *connbas = indexer->connbas; xmlNodePtr root = xmlDocGetRootElement(indexer->DocCterms); char moddate[16]; // date de maj time_t timer; time(&timer); struct tm *today; today = localtime(&timer); strftime((char *)moddate, 15, "%Y%m%d%H%M%S", today); xmlSetProp(root, (const xmlChar*)"modification_date", (const xmlChar *)moddate ); xmlChar *out; int outsize; xmlKeepBlanksDefault(0); xmlDocDumpFormatMemory(indexer->DocCterms, &out, &outsize, 1); zSyslog._log(CSyslog::LOGL_THESAURUS, CSyslog::LOGC_THESAURUS, "#%d : CTERMS saved", connbas->sbas_id); connbas->updatePref_cterms((char *)out, outsize, moddate ); xmlFree(out); indexer->current_cterms_moddate = timer; }
bool CDOMDocument::loadXML(char *xml, unsigned long len) { bool ret = true; // void *buff; if(!this->parser) return(false); this->depth = -1; this->State = CDOMDocument::INTO_UNKNOWN; this->indexStart = 0; this->indexEnd = 0; this->tokenLen = 0; this->tokenLCLen = 0; this->tokenLCNDLen = 0; this->wordIndex = 0; this->parseText = true; if(this->path) _FREE(this->path); this->path = (char *)_MALLOC_WHY(200, "dom.cpp:loadXML:path"); if(this->path) { this->path_msize = 200; this->path[0] = '\0'; this->freepathoffset = 0; } if(this->upath) _FREE(this->upath); this->upath = (char *)_MALLOC_WHY(200, "dom.cpp:loadXML:upath"); if(this->upath) { this->upath_msize = 200; this->upath[0] = '\0'; this->freeupathoffset = 0; } if(XML_Parse(this->parser, xml, len, true) != XML_STATUS_ERROR) { } else { // handle parse error zSyslog._log(CSyslog::LOGL_ERR, CSyslog::LOGC_XMLERR, "Parse error at line %u:\n%s\n", XML_GetCurrentLineNumber(this->parser), XML_ErrorString(XML_GetErrorCode(this->parser))); ret = false; } return(ret); }
void CIndexer::flush() { if(!this->tRecord) { // printf("| nothing to flush.\n"); } else { this->connbas->execute((char *)("START TRANSACTION"), 17); int nrecs_flushed = 0; // ----------------------------------------------------------------------------- // warning : before flushing thits, we check that the thesaurus/cterms hasn't changed // else we will set the records as 'to-reindex-thesaurus' again // ----------------------------------------------------------------------------- bool thesaurusChanged = false; // --------------------------------------------------- flush xpath CXPath *xp; //start by counting unknown xpath int nNewXPath = 0; for(xp = this->tXPaths; xp; xp = xp->next) { if(xp->id == 0) nNewXPath++; } if(nNewXPath > 0) { // there is some unknown, we get a uid and we write unsigned int xpath_new_uid; xpath_new_uid = this->connbas->getID("XPATH", nNewXPath); for(xp = this->tXPaths; !this->connbas->crashed && xp; xp = xp->next) { if(xp->id == 0) { xp->new_id = xpath_new_uid++; int r; if((r = this->connbas->insertXPath(xp->upath, &(xp->new_id))) == 0) { // ok : we have created the xpath, or if it was existing the id is returned in xp->new_id xp->id = xp->new_id; } else { // err : no way to create the xpath neither finding the existing one } } } } // --------------------------------------------------- flush the record (delete idx, prop, thits) // create a list of rids CRecord *r; int lrids_len = 0; char *pbuff, *lrids_buff; for(r=this->tRecord; r; r=r->next) lrids_len += 34; // 33=lmax of itoa() + comma delimiter. pbuff = lrids_buff = (char *)(_MALLOC_WHY(lrids_len, "indexer.cpp:flush:pbuff")); while( (r = this->tRecord) ) { if(pbuff) { pbuff += sprintf(pbuff, "%d", r->id); if(r->next) *pbuff++ = ','; } this->tRecord = r->next; delete r; nrecs_flushed++; } lrids_len = pbuff-lrids_buff; // ajuste la longueur if(lrids_buff) { // delete idx, prop, thits for those records this->connbas->delRecRefs2(lrids_buff, lrids_len); // lock prefs and thits // if(!this->connbas->crashed && (this->connbas->lockPref() == 0)) // { // check if something has changed in the thesaurus time_t struct_moddate, thesaurus_moddate, cterms_moddate; this->connbas->selectPref_moddates(&struct_moddate, &thesaurus_moddate, &cterms_moddate); thesaurusChanged = (thesaurus_moddate > this->current_thesaurus_moddate || cterms_moddate > this->current_cterms_moddate); if(!thesaurusChanged) { // thesaurus hasn't change if(!this->connbas->crashed && this->ctermsChanged) { // cterms has changed, let's save saveCterms(this); this->ctermsChanged = false; } } // --------------------------------------------------- flush thit CTHit *th; while(!this->connbas->crashed && (th = this->firstTHit) ) { if(!thesaurusChanged) { // if the th/ct hasn't chnaged, we can flush thits this->connbas->insertTHit(th->record_id, th->pxpath->id, th->pxpath->field->name, th->value, th->hitstart, th->hitlen, th->business); } this->firstTHit = th->next; delete th; } // we can unlock // this->connbas->unlockTables(); // flag records 'to-reindex-thesaurus' if(!this->connbas->crashed && thesaurusChanged) { // this->connbas->execute(ibuf, pibuf-ibuf); // _FREE(ibuf); this->connbas->setRecordsToReindexTh2(lrids_buff, lrids_len); } // } // --------------------------------------------------- flush prop CProp *p; while(!this->connbas->crashed && (p = this->firstProp) ) { this->connbas->insertProp(p->record_id, p->pxpath->id, p->pxpath->field->uname, p->value, p->type, p->business); this->firstProp = p->next; delete p; } // --------------------------------------------------- flush kword and idx unsigned int kword_new_uid = 0; if(this->nNewKeywords > 0) kword_new_uid = this->connbas->getID("KEYWORDS", this->nNewKeywords); CKword *k; CHit *h; for(int hash=0; !this->connbas->crashed && (hash<KWORD_HASHSIZE); hash++) { for(k=this->tKeywords[hash]; !this->connbas->crashed && k; k=k->next) { // save new kwords if(k->id == 0) { k->new_id = kword_new_uid++; int r; if((r = this->connbas->insertKword(k->kword, k->l, &(k->new_id))) == 0) { // ok : we have created the kword with new_id, or if it was existing the id is returned in k->new_id k->id = k->new_id; } else { } } // on save les hits if(k->id > 0) { while(!this->connbas->crashed && (h = k->firsthit) ) { this->connbas->insertIdx(h->record_id, k->id, h->index, h->pxpath->id, h->pos, h->len, h->business); k->firsthit = h->next; delete h; } } } } this->nNewKeywords = 0; // flag the records as 'indexed' (status-bit 2,1,0 to '1') this->connbas->updateRecord_unlock2(lrids_buff, lrids_len); this->nrecsIndexed += nrecs_flushed; _FREE(lrids_buff); } zSyslog._log(CSyslog::LOGL_INFO, CSyslog::LOGC_INDEXING, "#%d : %d records flushed", this->connbas->sbas_id, nrecs_flushed); this->connbas->execute((char *)("COMMIT"), 6); for(xp = this->tXPaths; xp; xp = xp->next) { if(xp->id == 0 && xp->new_id != 0) { xp->id = xp->new_id; } } for(int hash=0; hash<KWORD_HASHSIZE; hash++) { for(CKword *k=this->tKeywords[hash]; k; k=k->next) { // save new kwords if(k->id == 0 && k->new_id != 0) { k->id = k->new_id; } } } } }
void loadThesaurus(CIndexer *indexer) { CConnbas_dbox *connbas = indexer->connbas; time_t struct_moddate, thesaurus_moddate, cterms_moddate; // ----------------------- load structure and thesaurus char *xmlstruct; char **pxmlstruct = NULL; unsigned long xmlstruct_length; char *xmlthesaurus; char **pxmlthesaurus = NULL; unsigned long xmlthesaurus_length; char *xmlcterms; char **pxmlcterms = NULL; unsigned long xmlcterms_length; bool struct_changed, thesaurus_changed, cterms_changed; std::string cstr; char strbuff[1000]; // read the 3 moddates connbas->selectPref_moddates(&struct_moddate, &thesaurus_moddate, &cterms_moddate); // what has changed struct_changed = indexer->firstLoad || (struct_moddate > indexer->current_struct_moddate); thesaurus_changed = indexer->firstLoad || (thesaurus_moddate > indexer->current_thesaurus_moddate); cterms_changed = indexer->firstLoad || (cterms_moddate > indexer->current_cterms_moddate); indexer->firstLoad = false; if(!struct_changed && !thesaurus_changed && !cterms_changed) { // nothing changed in the prefs return; } // fix "scout" : when cterms change, links from structure may be corrupted if(cterms_changed || thesaurus_changed) { struct_changed = true; } if(struct_changed) { // the structure changed : reload pxmlstruct = &xmlstruct; } if(thesaurus_changed) { // the thesaurus changed pxmlthesaurus = &xmlthesaurus; } if(cterms_changed) { // the cterms changed pxmlcterms = &xmlcterms; } // read useful fields if(connbas->selectPrefs(pxmlstruct, &xmlstruct_length, pxmlthesaurus, &xmlthesaurus_length, pxmlcterms, &xmlcterms_length) != 0) { // erreur sql return; } // ============================ load thesaurus if(thesaurus_changed) { if(indexer->DocThesaurus) { xmlFreeDoc(indexer->DocThesaurus); indexer->DocThesaurus = NULL; } if(indexer->XPathCtx_thesaurus) { xmlXPathFreeContext(indexer->XPathCtx_thesaurus); indexer->XPathCtx_thesaurus = NULL; } // we have the thesaurus, load in libxml indexer->DocThesaurus = xmlParseMemory(xmlthesaurus, xmlthesaurus_length); if(indexer->DocThesaurus != NULL) { // Create xpath evaluation context indexer->XPathCtx_thesaurus = xmlXPathNewContext(indexer->DocThesaurus); if(indexer->XPathCtx_thesaurus != NULL) { } } zSyslog._log(CSyslog::LOGL_THESAURUS, CSyslog::LOGC_THESAURUS, "#%ld : thesaurus loaded", connbas->sbas_id); } // ============================ load cterms if(cterms_changed) { if(indexer->tStructField) { delete [] (indexer->tStructField); indexer->tStructField = NULL; } if(indexer->DocCterms) { xmlFreeDoc(indexer->DocCterms); indexer->DocCterms = NULL; } if(indexer->XPathCtx_cterms) { xmlXPathFreeContext(indexer->XPathCtx_cterms); indexer->XPathCtx_cterms = NULL; } if(indexer->XPathCtx_deleted) { xmlXPathFreeContext(indexer->XPathCtx_deleted); indexer->XPathCtx_deleted = NULL; } indexer->xmlNodePtr_deleted = NULL; // we have the cterms, load in libxml indexer->DocCterms = xmlParseMemory(xmlcterms, xmlcterms_length); if(indexer->DocCterms != NULL) { // Create xpath evaluation context indexer->XPathCtx_cterms = xmlXPathNewContext(indexer->DocCterms); if(indexer->XPathCtx_cterms != NULL) { xmlXPathObjectPtr xpathObj_cterms = NULL; xpathObj_cterms = xmlXPathEvalExpression((const xmlChar*)("/cterms/te[@delbranch='1']"), indexer->XPathCtx_cterms); if(xpathObj_cterms) { if(xpathObj_cterms->nodesetval) { xmlNodeSetPtr nodes_cterms = xpathObj_cterms->nodesetval; if(nodes_cterms->nodeNr > 0) { xmlNodePtr node_cterms = nodes_cterms->nodeTab[0]; indexer->XPathCtx_deleted = xmlXPathNewContext((xmlDocPtr)node_cterms); // in the indexer, we keep the node to the deleted indexer->xmlNodePtr_deleted = nodes_cterms->nodeTab[0]; } } xmlXPathFreeObject(xpathObj_cterms); } } } indexer->ctermsChanged = false; zSyslog._log(CSyslog::LOGL_THESAURUS, CSyslog::LOGC_THESAURUS, "#%ld : cterms loaded", connbas->sbas_id); } // ============================ load structure if(struct_changed) { xmlDocPtr doc_struct; xmlXPathContextPtr xpathCtx_struct; xmlXPathObjectPtr xpathObj_struct; if(indexer->tStructField) { delete [] (indexer->tStructField); indexer->tStructField = NULL; } // load in libxml doc_struct = xmlParseMemory(xmlstruct, xmlstruct_length); if(doc_struct != NULL) { // Create xpath evaluation context xpathCtx_struct = xmlXPathNewContext(doc_struct); if(xpathCtx_struct != NULL) { // ----- search every fields of the structure // Evaluate xpath expression xpathObj_struct = xmlXPathEvalExpression((const xmlChar*)"/record/description/*", xpathCtx_struct); if(xpathObj_struct != NULL) { if(xpathObj_struct->nodesetval) { xmlNodeSetPtr nodes_struct = xpathObj_struct->nodesetval; indexer->nStructFields = nodes_struct->nodeNr; if(indexer->nStructFields > 0) { // allocate a TABLE of fields indexer->tStructField = new CStructField[indexer->nStructFields]; } // ---- scan every nodes of the result on struct cstr = "/-------------------------------- Loading structure -----\n"; for(int i=0; i<indexer->nStructFields; i++) { xmlNodePtr node_struct = nodes_struct->nodeTab[i]; cstr += "| Field '"+ std::string((const char *)(node_struct->name)) +"'"; // ---- get attribute 'type' if it exists indexer->tStructField[i].type = CStructField::TYPE_NONE; // default xmlChar *type = (xmlChar *)""; if( (type = xmlGetProp(node_struct, (const xmlChar *)"type")) ) { if(!isWhite(type)) { if(strcmp((const char *)type, "text")==0) indexer->tStructField[i].type = CStructField::TYPE_TEXT; // <... type="text" else if(strcmp((const char *)type, "number")==0) indexer->tStructField[i].type = CStructField::TYPE_INT; // <... type="number" else if(strcmp((const char *)type, "float")==0) indexer->tStructField[i].type = CStructField::TYPE_FLOAT; // <... type="float" else if(strcmp((const char *)type, "date")==0) indexer->tStructField[i].type = CStructField::TYPE_DATE; // <... type="date" } snprintf(strbuff, 1000, "{ type='%s' (%d) }", type, indexer->tStructField[i].type); cstr += strbuff; xmlFree(type); } else { snprintf(strbuff, 1000, "{ type='' (%d) }", indexer->tStructField[i].type); cstr += strbuff; } // ---- get attribute 'index' if it exists indexer->tStructField[i].index = true; // default xmlChar *index; if( (index = xmlGetProp(node_struct, (const xmlChar *)"index")) ) { if(!isWhite(index)) { if( isno((const char *)index) ) indexer->tStructField[i].index = false; } xmlFree(index); } snprintf(strbuff, 1000, " { index=%d }", indexer->tStructField[i].index ); cstr += strbuff; // ---- get attribute 'business' if it exists indexer->tStructField[i].business = false; // default if NO attribute xmlChar *business; if( (business = xmlGetProp(node_struct, (const xmlChar *)"business")) ) { indexer->tStructField[i].business = true; // default if attribute exists if(!isWhite(business)) { if( isno((const char *)business) ) indexer->tStructField[i].business = false; } xmlFree(business); } snprintf(strbuff, 1000, " { business=%d }", indexer->tStructField[i].business ); cstr += strbuff; // ---- get attribute 'candidates' if it exists indexer->tStructField[i].candidatesStrings = indexer->tStructField[i].candidatesDates = indexer->tStructField[i].candidatesIntegers = indexer->tStructField[i].candidatesFirstDigit = indexer->tStructField[i].candidatesMultiDigits = true; // default if NO attribute xmlChar *candidates; if( (candidates = xmlGetProp(node_struct, (const xmlChar *)"candidates")) ) { indexer->tStructField[i].candidatesStrings = indexer->tStructField[i].candidatesDates = indexer->tStructField[i].candidatesIntegers = indexer->tStructField[i].candidatesFirstDigit = indexer->tStructField[i].candidatesMultiDigits = false; // default if attribute exists if(!isWhite(candidates)) { for(char *p=(char*)candidates; *p; p++) { switch(*p) { case 'S': case 's': indexer->tStructField[i].candidatesStrings = true; break; case 'D': case 'd': indexer->tStructField[i].candidatesDates = true; break; case 'I': case 'i': indexer->tStructField[i].candidatesIntegers = true; break; case '0': indexer->tStructField[i].candidatesFirstDigit = true; break; case '9': indexer->tStructField[i].candidatesMultiDigits = true; break; } } } xmlFree(candidates); } // ---- get attribute 'tbranch' if it exists bool hastbranch = false; xmlChar *tbranch; if( (tbranch = xmlGetProp(node_struct, (const xmlChar *)"tbranch")) ) { if(!isWhite(tbranch)) { // dump "candidates' field attribute only if there is a tbranch cstr += " { candidates='"; if(indexer->tStructField[i].candidatesStrings == true) cstr += "S"; if(indexer->tStructField[i].candidatesDates == true) cstr += "D"; if(indexer->tStructField[i].candidatesIntegers == true) cstr += "I"; if(indexer->tStructField[i].candidatesFirstDigit == true) cstr += "0"; if(indexer->tStructField[i].candidatesMultiDigits == true) cstr += "9"; cstr += "'}\n"; // --- copy the full path into the field indexer->tStructField[i].set("/record/description/", (const char *)(node_struct->name), (const char *)tbranch); xmlFree(tbranch); if(indexer->tStructField[i].tbranch && indexer->XPathCtx_thesaurus != NULL) { // this field has a tbranch, it's linked to the thesaurus // build links to the thesaurus snprintf(strbuff, 1000, "| searching tbranch ' %s ' in thesaurus \n", indexer->tStructField[i].tbranch); cstr += strbuff; xmlXPathObjectPtr xpathObj_thesaurus = NULL; xpathObj_thesaurus = xmlXPathEvalExpression((const xmlChar*)(indexer->tStructField[i].tbranch), indexer->XPathCtx_thesaurus); if(xpathObj_thesaurus != NULL) { if(xpathObj_thesaurus->nodesetval) { xmlNodeSetPtr nodes_thesaurus = xpathObj_thesaurus->nodesetval; snprintf(strbuff, 1000, "| -> found %d node%s \n", nodes_thesaurus->nodeNr, (nodes_thesaurus->nodeNr==1 ? "s":"")); cstr += strbuff; if(nodes_thesaurus->nodeNr > 0) { hastbranch = true; // in this field, allocate an array of xpathcontext indexer->tStructField[i].tXPathCtxThesaurus = new xmlXPathContextPtr[nodes_thesaurus->nodeNr]; // in this field, allocate an array of nodes indexer->tStructField[i].tNodesThesaurus = new xmlNodePtr[nodes_thesaurus->nodeNr]; if(indexer->tStructField[i].tXPathCtxThesaurus && indexer->tStructField[i].tNodesThesaurus) { indexer->tStructField[i].nXPathCtxThesaurus = nodes_thesaurus->nodeNr; indexer->tStructField[i].nNodesThesaurus = nodes_thesaurus->nodeNr; for(int j=0; j<nodes_thesaurus->nodeNr; j++) { xmlNodePtr node_thesaurus = nodes_thesaurus->nodeTab[j]; indexer->tStructField[i].tXPathCtxThesaurus[j] = xmlXPathNewContext((xmlDocPtr)node_thesaurus); indexer->tStructField[i].tNodesThesaurus[j] = node_thesaurus; } } } } xmlXPathFreeObject(xpathObj_thesaurus); } } if(indexer->tStructField[i].cbranch && indexer->XPathCtx_cterms != NULL) { // build a link to cterms snprintf(strbuff, 1000, "| searching cbranch ' %s ' in cterms \n", indexer->tStructField[i].cbranch); cstr += strbuff; // check if cterms has a branch '...field='..zfname..'... xmlXPathObjectPtr xpathObj_cterms = NULL; xpathObj_cterms = xmlXPathEvalExpression((const xmlChar*)(indexer->tStructField[i].cbranch), indexer->XPathCtx_cterms); if(xpathObj_cterms != NULL) { if(!xpathObj_cterms->nodesetval || xpathObj_cterms->nodesetval->nodeNr == 0) { // the branch does not exists, create it cstr += "| -> node not found, creating \n"; xmlNodePtr root = xmlDocGetRootElement(indexer->DocCterms); // get nextid xmlChar *nextid; if( (nextid = xmlGetProp(root, (const xmlChar *)"nextid")) ) { int l = strlen((const char *)nextid); if(l > 32) l = 32; xmlNodePtr te; if((te = xmlNewChild(root, NULL, (const xmlChar*)"te", NULL)) != NULL) { char ibuff[33]; // prop 'id' ibuff[0] = 'C'; memcpy(ibuff+1, nextid, l+1); xmlSetProp(te, (const xmlChar*)"id", (const xmlChar *)ibuff); // prop 'field' xmlSetProp(te, (const xmlChar*)"field", (const xmlChar *)(indexer->tStructField[i].name)); // prop 'nextid' xmlSetProp(te, (const xmlChar*)"nextid", (const xmlChar *)"0"); // inc nextid sprintf(ibuff, "%d", atoi((const char *)nextid) + 1); xmlSetProp(root, (const xmlChar*)"nextid", (const xmlChar *)ibuff ); // put a xpathcontext into the field indexer->tStructField[i].xmlNodeCterms = te; indexer->tStructField[i].XPathCtxCterms = xmlXPathNewContext((xmlDocPtr)te); } xmlFree(nextid); time(&cterms_moddate); } } else { xmlNodeSetPtr nodes_cterms = xpathObj_cterms->nodesetval; snprintf(strbuff, 1000, "| -> found %d node%s (keeping the first) \n", nodes_cterms->nodeNr, (nodes_cterms->nodeNr==1 ? "s":"")); cstr += strbuff; // in the field, keep the first xpathcontext indexer->tStructField[i].xmlNodeCterms = nodes_cterms->nodeTab[0]; indexer->tStructField[i].XPathCtxCterms = xmlXPathNewContext((xmlDocPtr)(nodes_cterms->nodeTab[0])); } xmlXPathFreeObject(xpathObj_cterms); } } } else { // 'tbranch' is white cstr += "'\n"; indexer->tStructField[i].set("/record/description/", (const char *)(node_struct->name), NULL); } } else { // no 'tbranch' attribute cstr += "'\n"; indexer->tStructField[i].set("/record/description/", (const char *)(node_struct->name), NULL); } } // FIN : boucle sur les nodes du result sur struc cstr += "\\-------------------------------- structure loaded ------\n"; } // FIN : if(xpathObj_struct->nodesetval) xmlXPathFreeObject(xpathObj_struct); } // FIN : if(xpathObj_struct != NULL) xmlXPathFreeContext(xpathCtx_struct); } // FIN : if(xpathCtx_struct != NULL) } } zSyslog._log(CSyslog::LOGL_INFO, CSyslog::LOGC_STRUCTURE, (TCHAR *)(cstr.c_str()) ); cstr.clear(); // ------------------ end loading structure indexer->current_struct_moddate = struct_moddate; indexer->current_thesaurus_moddate = thesaurus_moddate; indexer->current_cterms_moddate = cterms_moddate; }