Beispiel #1
0
void saveCterms(CIndexer *indexer)
{
  CConnbas_dbox *connbas = indexer->connbas;
	xmlNodePtr root = xmlDocGetRootElement(indexer->DocCterms);
	char moddate[16];
	
	// date de m�j
	time_t timer;
	time(&timer);
	struct tm *today;
	today = localtime(&timer);

	strftime((char *)moddate, 15, "%Y%m%d%H%M%S", today);

	xmlSetProp(root, (const xmlChar*)"modification_date", (const xmlChar *)moddate );

	xmlChar *out;
	int outsize;
	xmlKeepBlanksDefault(0);
	xmlDocDumpFormatMemory(indexer->DocCterms, &out, &outsize, 1);

//	zSyslog.log(CSyslog::LOG_INFO, "\n/=================================== Saving CTERMS =========================");
//	zSyslog.log(CSyslog::LOG_INFO, "root = %s", root->name);
//	for(int i=0; i<outsize && i<300; i++)
//	{
//		putchar(out[i]);
//	}
	zSyslog.log(CSyslog::LOGL_INFO, CSyslog::LOGC_THESAURUS, "#%d : CTERMS saved", connbas->sbas_id);

	connbas->updatePref_cterms((char *)out, outsize, moddate );

	xmlFree(out);

	indexer->current_cterms_moddate = timer;
}
Beispiel #2
0
/*
bool CDOMDocument::load(char *filename)
{
  FILE *fp;
	long filesize;
	int bytes_read;
	void *buff;
	bool ret = TRUE;
	
	if(!this->parser)
		return(FALSE);

	if( fp=fopen(filename, "rb" ) )
	{
		fseek(fp, 0, SEEK_END);
		filesize = ftell(fp);
		rewind(fp);

		this->depth = -1;
		this->State = CDOMDocument::INTO_UNKNOWN;
		this->indexStart = 0;
		this->indexEnd = 0;
		this->tokBinLen = 0;
		this->lowtokBinLen = 0;
		this->wordIndex = 0;
		this->parseText = true;

		buff = XML_GetBuffer(this->parser, filesize);

		if (buff != NULL)
		{
			bytes_read = fread(buff, 1, filesize, fp);
			if (bytes_read > 0)
			{
				if(XML_ParseBuffer(this->parser, bytes_read, TRUE) != XML_STATUS_ERROR)
				{
				}
				else
				{
					// handle parse error
					zSyslog.log(CSyslog::LOGL_WARNING, CSyslog::LOGC_XMLERR, "Parse error at line %u:\n%s\n",
												  XML_GetCurrentLineNumber(this->parser),
												  XML_ErrorString(XML_GetErrorCode(this->parser)));
					ret = FALSE;
				}
			}
			else
			{
				// handle error
				ret = FALSE;
			}
		}
		else
		{
			// handle error
			ret = FALSE;
		}
		fclose(fp);
	}

	return(ret);
}
*/
bool CDOMDocument::loadXML(char *xml, unsigned long len)
{
	bool ret = TRUE;
//	void *buff;
	
	if(!this->parser)
		return(FALSE);

	this->depth = -1;
	this->State = CDOMDocument::INTO_UNKNOWN;
	this->indexStart = 0;
	this->indexEnd = 0;
	this->tokBinLen = 0;
	this->lowtokBinLen = 0;
	this->wordIndex = 0;
	this->parseText = true;

	if(this->path)
		_FREE(this->path);
	this->path = (char *)_MALLOC_WHY(200, "dom.cpp:loadXML:path");
	if(this->path)
	{
		this->path_msize = 200;
		this->path[0] = '\0';
		this->freepathoffset = 0;
	}

	if(this->upath)
		_FREE(this->upath);
	this->upath = (char *)_MALLOC_WHY(200, "dom.cpp:loadXML:upath");
	if(this->upath)
	{
		this->upath_msize = 200;
		this->upath[0] = '\0';
		this->freeupathoffset = 0;
	}

	if(XML_Parse(this->parser, xml, len, TRUE) != XML_STATUS_ERROR)
	{
	}
	else
	{
		// handle parse error
		zSyslog.log(CSyslog::LOGL_WARNING, CSyslog::LOGC_XMLERR, "Parse error at line %u:\n%s\n",
												  XML_GetCurrentLineNumber(this->parser),
												  XML_ErrorString(XML_GetErrorCode(this->parser)));
		ret = FALSE;
	}

	return(ret);
}
Beispiel #3
0
void loadThesaurus(CIndexer *indexer)
{
  CConnbas_dbox *connbas = indexer->connbas;
  time_t struct_moddate, thesaurus_moddate, cterms_moddate;

  // ----------------------- load structure and thesaurus
  char *xmlstruct;
  char **pxmlstruct = NULL;
  unsigned long  xmlstruct_length;

  char *xmlthesaurus;
  char **pxmlthesaurus = NULL;
  unsigned long  xmlthesaurus_length;

  char *xmlcterms;
  char **pxmlcterms = NULL;
  unsigned long  xmlcterms_length;

  bool struct_changed, thesaurus_changed, cterms_changed;

  extern int debug_flag;
//printf("loadThesaurus ? \n");
	// read the 3 moddates
	connbas->selectPref_moddates(&struct_moddate, &thesaurus_moddate, &cterms_moddate);

	// what has changed
	struct_changed    = indexer->firstLoad || (struct_moddate    > indexer->current_struct_moddate);
	thesaurus_changed = indexer->firstLoad || (thesaurus_moddate > indexer->current_thesaurus_moddate);
	cterms_changed    = indexer->firstLoad || (cterms_moddate    > indexer->current_cterms_moddate);

	indexer->firstLoad = false;	

	if(!struct_changed && !thesaurus_changed && !cterms_changed)
	{
		// nothing changed in the prefs
		return;
	}

	if(struct_changed)
	{
		// the structure changed : reload
		pxmlstruct    = &xmlstruct;
	}
	if(thesaurus_changed)
	{
		// the thesaurus changed
		pxmlthesaurus = &xmlthesaurus;
	}
	if(cterms_changed)
	{
		// the cterms changed
		pxmlcterms    = &xmlcterms;
	}

	// read useful fields
	if(connbas->selectPrefs(pxmlstruct, &xmlstruct_length, pxmlthesaurus, &xmlthesaurus_length, pxmlcterms, &xmlcterms_length) != 0)
	{
		// erreur sql
		return;
	}


	// ============================ load thesaurus
	if(thesaurus_changed)
	{
		if(indexer->DocThesaurus)
		{
			xmlFreeDoc(indexer->DocThesaurus);
			indexer->DocThesaurus = NULL;
		}

		if(indexer->XPathCtx_thesaurus)
		{
			xmlXPathFreeContext(indexer->XPathCtx_thesaurus);
			indexer->XPathCtx_thesaurus = NULL;
		}

		// we have the thesaurus, load in libxml
		indexer->DocThesaurus = xmlParseMemory(xmlthesaurus, xmlthesaurus_length);
		if(indexer->DocThesaurus != NULL)
		{
			// Create xpath evaluation context 
			indexer->XPathCtx_thesaurus = xmlXPathNewContext(indexer->DocThesaurus);
			if(indexer->XPathCtx_thesaurus != NULL)
			{
			}
		}
		zSyslog.log(CSyslog::LOGL_INFO, CSyslog::LOGC_THESAURUS, "#%ld : thesaurus loaded", connbas->sbas_id);
	}


	// ============================ load cterms
	if(cterms_changed)
	{
		if(indexer->DocCterms)
		{
			xmlFreeDoc(indexer->DocCterms);
			indexer->DocCterms = NULL;
		}

		if(indexer->XPathCtx_cterms)
		{
			xmlXPathFreeContext(indexer->XPathCtx_cterms);
			indexer->XPathCtx_cterms = NULL;
		}

		if(indexer->XPathCtx_deleted)
		{
			xmlXPathFreeContext(indexer->XPathCtx_deleted);
			indexer->XPathCtx_deleted = NULL;
		}
		indexer->xmlNodePtr_deleted = NULL;

		// we have the cterms, load in libxml
		indexer->DocCterms = xmlParseMemory(xmlcterms, xmlcterms_length);
		if(indexer->DocCterms != NULL)
		{
			// Create xpath evaluation context 
			indexer->XPathCtx_cterms = xmlXPathNewContext(indexer->DocCterms);
			if(indexer->XPathCtx_cterms != NULL)
			{

				xmlXPathObjectPtr  xpathObj_cterms = NULL; 

//				zSyslog.log(CSyslog::LOG_DEBUG, "|    searching tbranch ' /cterms/te[@delbranch='1'] ' in cterms");
				xpathObj_cterms = xmlXPathEvalExpression((const xmlChar*)("/cterms/te[@delbranch='1']"), indexer->XPathCtx_cterms);
				if(xpathObj_cterms)
				{
					if(xpathObj_cterms->nodesetval)
					{
						xmlNodeSetPtr nodes_cterms = xpathObj_cterms->nodesetval;

						if(nodes_cterms->nodeNr > 0)
						{
//							zSyslog.log(CSyslog::LOG_DEBUG, "|    -> found %d nodes (keeping the first)", nodes_cterms->nodeNr);
							xmlNodePtr node_cterms = nodes_cterms->nodeTab[0];
							indexer->XPathCtx_deleted = xmlXPathNewContext((xmlDocPtr)node_cterms);

							// in the indexer, we keep the node to the deleted
							indexer->xmlNodePtr_deleted = nodes_cterms->nodeTab[0];
						}
						else
						{
//							zSyslog.log(CSyslog::LOG_DEBUG, "|    -> found 0 node");
						}
					}
					xmlXPathFreeObject(xpathObj_cterms);
				}

			}
		}
		indexer->ctermsChanged = false;
		zSyslog.log(CSyslog::LOGL_DEBUG, CSyslog::LOGC_THESAURUS, "#%ld : cterms loaded", connbas->sbas_id);
	}


// printf(" 0 ------------------------\n");
	// ============================ load structure
	if(struct_changed)
	{
// printf(" 1 ------------------------\n");
		xmlDocPtr          doc_struct;
		xmlXPathContextPtr xpathCtx_struct; 
		xmlXPathObjectPtr  xpathObj_struct;

		if(indexer->tStructField)
		{
			delete [] (indexer->tStructField);
			indexer->tStructField = NULL;
		}

		// load in libxml
		doc_struct = xmlParseMemory(xmlstruct, xmlstruct_length);
		if(doc_struct != NULL)
		{
// printf(" 2 ------------------------\n");
			// Create xpath evaluation context
			xpathCtx_struct = xmlXPathNewContext(doc_struct);
			if(xpathCtx_struct != NULL)
			{
// printf(" 3 ------------------------\n");
				// ----- search every fields of the structure
				// Evaluate xpath expression 
				xpathObj_struct = xmlXPathEvalExpression((const xmlChar*)"/record/description/*", xpathCtx_struct);
				if(xpathObj_struct != NULL)
				{
// printf(" 4 ------------------------\n");
					if(xpathObj_struct->nodesetval)
					{
// printf(" 5 ------------------------\n");
						xmlNodeSetPtr nodes_struct = xpathObj_struct->nodesetval;

						indexer->nStructFields = nodes_struct->nodeNr;
						if(indexer->nStructFields > 0)
						{
							// allocate a TABLE of fields
							indexer->tStructField = new CStructField[indexer->nStructFields];
						}

						// ---- scan every nodes of the result on struct
						if(debug_flag)
							printf("/-------------------------------- Loading structure  -----\n");

						for(int i=0; i<indexer->nStructFields; i++)
						{
							xmlNodePtr node_struct = nodes_struct->nodeTab[i];

							if(debug_flag)
								printf("|  Field '%s' ", node_struct->name);

							// ---- get attribute 'type' if it exists
							xmlChar *type;
							if( (type = xmlGetProp(node_struct, (const xmlChar *)"type")) )
							{
								if(strcmp((const char *)type, "text")==0)
									indexer->tStructField[i].type = CStructField::TYPE_TEXT;	// <... type="text"

								else if(strcmp((const char *)type, "number")==0)
									indexer->tStructField[i].type = CStructField::TYPE_INT;		// <... type="number"

								else if(strcmp((const char *)type, "float")==0)
									indexer->tStructField[i].type = CStructField::TYPE_FLOAT;	// <... type="float"

								else if(strcmp((const char *)type, "date")==0)
									indexer->tStructField[i].type = CStructField::TYPE_DATE;	// <... type="date"

								if(debug_flag)
									printf(" { type='%s' (%d) }", type, indexer->tStructField[i].type );

								xmlFree(type);
							}

							// ---- get attribute 'escape' if it exists
							xmlChar *escape;
							if( (escape = xmlGetProp(node_struct, (const xmlChar *)"escape")) )
							{
								if(debug_flag)
									printf(" { escape='%s' }", escape );

								xmlFree(escape);
							}

							// ---- get attribute 'index' if it exists
							xmlChar *index;
							if( (index = xmlGetProp(node_struct, (const xmlChar *)"index")) )
							{
								if( isno((const char *)index) )
									indexer->tStructField[i].index = false;

								if(debug_flag)
									printf(" { index=%d }", indexer->tStructField[i].index );

								xmlFree(index);
							}

							if(debug_flag)
								putchar('\n');

							// ---- get attribute 'tbranch' if it exists
							xmlChar *tbranch;
							if( (tbranch = xmlGetProp(node_struct, (const xmlChar *)"tbranch")) )
							{
								// --- copy the full path into the field
								indexer->tStructField[i].set("/record/description/", (const char *)(node_struct->name), (const char *)tbranch);
								xmlFree(tbranch);
							}
							else
							{
								// no 'tbranch' attribute
								indexer->tStructField[i].set("/record/description/", (const char *)(node_struct->name), NULL);
							}

							// ---- get attribute 'candidates' if it exists
							xmlChar *candidates;
							if( (candidates = xmlGetProp(node_struct, (const xmlChar *)"candidates")) )
							{
								indexer->tStructField[i].candidatesStrings
									= indexer->tStructField[i].candidatesDates
									= indexer->tStructField[i].candidatesIntegers
									= indexer->tStructField[i].candidatesFirstDigit
									= indexer->tStructField[i].candidatesMultiDigits
									= false;
								for(char *p=(char*)candidates; *p; p++)
								{
									switch(*p)
									{
										case 'S':
										case 's':
											indexer->tStructField[i].candidatesStrings = true;
											break;
										case 'D':
										case 'd':
											indexer->tStructField[i].candidatesDates = true;
											break;
										case 'I':
										case 'i':
											indexer->tStructField[i].candidatesIntegers = true;
											break;
										case '0':
											indexer->tStructField[i].candidatesFirstDigit = true;
											break;
										case '9':
											indexer->tStructField[i].candidatesMultiDigits = true;
											break;
									}
								}
								xmlFree(candidates);
							}

							//	if (nodes_struct->nodeTab[i]->type != XML_NAMESPACE_DECL)
							//		nodes_struct->nodeTab[i] = NULL;

						} // FIN : boucle sur les nodes du result sur struc


						if(debug_flag)
							printf("\\-------------------------------- structure loaded  ------\n");


					} // FIN : if(xpathObj_struct->nodesetval)

					xmlXPathFreeObject(xpathObj_struct);

				} // FIN : if(xpathObj_struct != NULL)

				xmlXPathFreeContext(xpathCtx_struct); 

			} // FIN : if(xpathCtx_struct != NULL)
		}
	}


	// search branches pointed by the tbranch into the thesaurus

	if(debug_flag)
		printf("/-------------------------------- Linking fields to thesaurus  ----- \n");

	for(int i=0; i<indexer->nStructFields; i++)
	{
		if(indexer->tStructField[i].tbranch)
		{
			// this field has a tbranch, it's linked to the thesaurus
			if(debug_flag)
					printf("|  Field '%s'\n", indexer->tStructField[i].name);

			if(indexer->XPathCtx_thesaurus != NULL)
			{
				// build links to the thesaurus
				if(debug_flag)
					printf("|    searching tbranch ' %s ' in thesaurus \n", indexer->tStructField[i].tbranch);
									
				xmlXPathObjectPtr  xpathObj_thesaurus = NULL; 
				xpathObj_thesaurus = xmlXPathEvalExpression((const xmlChar*)(indexer->tStructField[i].tbranch), indexer->XPathCtx_thesaurus);
				if(xpathObj_thesaurus != NULL)
				{
					if(xpathObj_thesaurus->nodesetval)
					{
						xmlNodeSetPtr nodes_thesaurus = xpathObj_thesaurus->nodesetval;

						if(debug_flag)
							printf("|    -> found %d nodes \n", nodes_thesaurus->nodeNr);

						if(nodes_thesaurus->nodeNr > 0)
						{
							// in this field, allocate an array of xpathcontext
							indexer->tStructField[i].tXPathCtxThesaurus = new xmlXPathContextPtr[nodes_thesaurus->nodeNr];
							// in this field, allocate an array of nodes
							indexer->tStructField[i].tNodesThesaurus = new xmlNodePtr[nodes_thesaurus->nodeNr];

							if(indexer->tStructField[i].tXPathCtxThesaurus && indexer->tStructField[i].tNodesThesaurus)
							{
								indexer->tStructField[i].nXPathCtxThesaurus = nodes_thesaurus->nodeNr;
								indexer->tStructField[i].nNodesThesaurus    = nodes_thesaurus->nodeNr;
								for(int j=0; j<nodes_thesaurus->nodeNr; j++)
								{
									xmlNodePtr node_thesaurus = nodes_thesaurus->nodeTab[j];
									indexer->tStructField[i].tXPathCtxThesaurus[j] = xmlXPathNewContext((xmlDocPtr)node_thesaurus);
									indexer->tStructField[i].tNodesThesaurus[j]    = node_thesaurus;
								}
							}
						}
					}
					xmlXPathFreeObject(xpathObj_thesaurus);
				}
			}
		}

		if(indexer->tStructField[i].cbranch)
		{
			// this field has a cbranch: it's linked to cterms

			if(indexer->XPathCtx_cterms != NULL)
			{
				// build a link to cterms
				if(debug_flag)
					printf("|    searching cbranch ' %s ' in cterms \n", indexer->tStructField[i].cbranch);
									
				
				// check if cterms has a branch '...field='..zfname..'...
				xmlXPathObjectPtr  xpathObj_cterms = NULL; 

				xpathObj_cterms = xmlXPathEvalExpression((const xmlChar*)(indexer->tStructField[i].cbranch), indexer->XPathCtx_cterms);
				if(xpathObj_cterms != NULL)
				{
					if(!xpathObj_cterms->nodesetval || xpathObj_cterms->nodesetval->nodeNr == 0)
					{
						// the branch does not exists, create it

						if(debug_flag)
							printf("|    -> nodes not found, creating \n");

						xmlNodePtr root = xmlDocGetRootElement(indexer->DocCterms);

						// get nextid
						xmlChar *nextid;
						if( (nextid = xmlGetProp(root, (const xmlChar *)"nextid")) )
						{
							int l = strlen((const char *)nextid);
							if(l > 32)
								l = 32;
							xmlNodePtr te;
							if((te = xmlNewChild(root, NULL, (const xmlChar*)"te", NULL)) != NULL)
							{
								char ibuff[33];

								// prop 'id'
								ibuff[0] = 'C';
								memcpy(ibuff+1, nextid, l+1);
								xmlSetProp(te, (const xmlChar*)"id", (const xmlChar *)ibuff);

								// prop 'field'
								xmlSetProp(te, (const xmlChar*)"field", (const xmlChar *)(indexer->tStructField[i].name));

								// prop 'nextid'
								xmlSetProp(te, (const xmlChar*)"nextid", (const xmlChar *)"0");

								// inc nextid
								sprintf(ibuff, "%d", atoi((const char *)nextid) + 1);
								xmlSetProp(root, (const xmlChar*)"nextid", (const xmlChar *)ibuff );

								// put a xpathcontext into the field
								indexer->tStructField[i].xmlNodeCterms = te;
								indexer->tStructField[i].XPathCtxCterms = xmlXPathNewContext((xmlDocPtr)te);
							}
							xmlFree(nextid);

							time(&cterms_moddate);
						}
					}
					else
					{
						xmlNodeSetPtr nodes_cterms = xpathObj_cterms->nodesetval;

						if(debug_flag)
							printf("|    -> found %d nodes (keeping the first) \n", nodes_cterms->nodeNr);

						// in the field, keep the first xpathcontext
						indexer->tStructField[i].xmlNodeCterms = nodes_cterms->nodeTab[0];
						indexer->tStructField[i].XPathCtxCterms = xmlXPathNewContext((xmlDocPtr)(nodes_cterms->nodeTab[0]));
					}

					xmlXPathFreeObject(xpathObj_cterms);
				}
			}
		}
	}
	if(debug_flag)
		printf("\\-------------------------------- fields linked to thesaurus  ------ \n");



	// ------------------ end loading structure
	indexer->current_struct_moddate = struct_moddate;
	indexer->current_thesaurus_moddate = thesaurus_moddate;
	indexer->current_cterms_moddate = cterms_moddate;
}