Esempio n. 1
0
    /**  Loads index from one or more file(s) named filename, possibly 
      adding the proper extensions. */
int load_index(char *filename, void **index){

	twcsa *wcsa;
	wcsa = (twcsa *) malloc (sizeof (twcsa) * 1);
	void *Index = (void *) wcsa;
	int error;
	wcsa->text = NULL;
	
	// Inicializes the arrays used to detect if a char is valid or not.
	StartValid();
		
	/** 1 ** loads the vocabulary of words. zonewords, words vector, nwords */
	loadVocabulary (Index, filename);
		{	
		uint totaltmp=0;  //words
		totaltmp += ((((wcsa->nwords+1)* (wcsa->wordsData.elemSize))+W-1) /W) * (sizeof(uint));  //the pointers
		totaltmp += wcsa->wordsData.wordsZoneMem.size * sizeof(byte); //the characters of the words.	
		fprintf(stderr,"\n\t*Loaded Vocabulary of text: %u words, %d bytes\n", wcsa->nwords, totaltmp);
		}
		
	/** 2 ** Loads some configuration constants: sourceTextSize, maxNumOccs */
	loadIndexConstants(Index, filename);
	fprintf(stderr,"\t*Loaded  configuration constants: %lu bytes\n", (ulong) (2 * sizeof(uint ) + sizeof(ulong)) );

	#ifdef FREQ_VECTOR_AVAILABLE
	/** 3 ** Loading freq vector */
	{uint size; //the size of the vocabulary in #ofwords =>> already init in "loadvocabulary"
	loadFreqVector(&(wcsa->freqs), &size, (char *)filename);	
	fprintf(stderr,"\t*Loaded freq vector: %d bytes\n", wcsa->nwords * sizeof(uint) );
	}		
	#endif

	/** 4 ** Loading Compressed Structure of posting lists (il) */
	error = load_il((char*) filename,&(wcsa->ils));
 	IFERRORIL(error);
 	uint sizeil;
 	error = size_il(wcsa->ils,&sizeil);
 	IFERRORIL(error);
	fprintf(stderr,"\n \t*loaded compressed inverted lists structure: %d bytes\n", sizeil);
	
	/** 5 ** Loading the Representation of the source text */
	load_representation( &wcsa->ct,filename); 
	{
		uint size;
		size_representation(wcsa->ct, &size);	
		fprintf(stderr,"\n\t*Loaded (compressed) representation of the source Text: %u bytes\n", size);
	}
	
	(*index) = Index;			
	return 0;
}
Esempio n. 2
0
Writer createWriter (char *fileName){

   Writer w = (Writer) malloc (sizeof(struct Wrt));

	w->_fileName = (byte *) malloc( strlen(fileName) +1 );
	strcpy(w->_fileName,fileName);

	w->bw = bufCreateWriter(w->_fileName);
   openBW(w->bw);


	w->_previousAlfanumerical = 0;
	strncpy((char *)w->_SPACE," ",1);
	StartValid();
	return w;
}
Esempio n. 3
0
/** ***********************************************************************************
	 CONSTRUCTION OF THE INDEX, from a given text file "inbasename".
    ***********************************************************************************/
int build_WordIndex_from_postings (char *inbasename, char *build_options, void **index){
	twcsa *wcsa;
	wcsa = (twcsa *) malloc (sizeof (twcsa) * 1);
	*index = wcsa;
	void *Index = *index;
	wcsa->text = NULL;
	double t0, t1;
	t0 = getSYSTimeBF();

	/** processing the parameters of the index:: blockSize, and q-gram-len (q) */
	{
		char delimiters[] = " =;";
		int j,num_parameters;
		char ** parameters;
		
		if (build_options != NULL) {
			parse_parameters_II(build_options,&num_parameters, &parameters, delimiters);
			for (j=0; j<num_parameters;j++) {

			  if ((strcmp(parameters[j], "blocksize") == 0 ) && (j < num_parameters-1) ) {
				//wcsa->blockSize = atoi(parameters[j+1]) * BLOCK_MULTIPLIER;	    
				j++;
			  } 
			  else if ((strcmp(parameters[j], "qgram") == 0 ) && (j < num_parameters-1) ) {
				//wcsa->q =atoi(parameters[j+1]);	    
				j++;
			  }
			  else if ((strcmp(parameters[j], "path2repaircompressor") == 0 ) && (j < num_parameters-1) ) {
				//strcpy(path2repaircompressor,parameters[j+1]);	    
				j++;
			  }
			  
			}
			free_parameters_II(num_parameters, &parameters);
		}
		//fprintf(stderr,"\n Parameters of II-blocks:: *basename = %s, blocksize = %d, q= %d",inbasename,wcsa->blockSize,wcsa->q);
		//fprintf(stderr,"\n \t path2repaircompressor= %s\n",path2repaircompressor);
	}

	wcsa->freqs=NULL;


	/** 0 ** Inicializes the arrays used to detect if a char is valid or not. **/
	StartValid();
	
		
	/** 1 ** loads the vocabulary of words. zonewords, words vector, nwords */
	t1 = getSYSTimeBF();
	
	loadVocabulary (Index, inbasename);
		{	
		uint totaltmp=0;  //words
		totaltmp += ((((wcsa->nwords+1)* (wcsa->wordsData.elemSize))+W-1) /W) * (sizeof(uint));  //the pointers
		totaltmp += wcsa->wordsData.wordsZoneMem.size * sizeof(byte); //the characters of the words.	
		fprintf(stderr,"\n\t*Loaded Vocabulary: %u words, %d bytes", wcsa->nwords, totaltmp);
		}
		fprintf(stderr,"\n\t... Done: %2.2f seconds (sys+usr t)\n", getSYSTimeBF() -t1);		

	/** 2 ** Loads some configuration constants: sourceTextSize, maxNumOccs */
	loadIndexConstants(Index, inbasename);
	fprintf(stderr,"\n\t*Loaded  configuration constants: %lu bytes\n", (ulong) (2 * sizeof(uint ) + sizeof(ulong)) );		
/*
	//shows the words parsed...
	{
		int i;
		fprintf(stderr,"\n\n Despues de sorting ....");	fflush(stderr);
		unsigned char *str;
		uint len;
//		for (i = 0; i<100; i++) {
		for (i = 0; ((uint)i)<wcsa->nwords; i++) {
			if ((i<10) || (((uint)i) >wcsa->nwords-5)) {
				getWord(wcsa,i,&str,&len);				
				fprintf(stderr,"\n freq[%6d]=%6u ",i,  wcsa->freqs[i]);
				fprintf(stderr,", words[%6d] = ",i);
				printWord(str,len);
			}
		}		
	}

	t1 = getSYSTimeBF();
	fprintf(stderr,"\n %u words have been loaded", wcsa->nwords);
	fprintf(stderr,"\n... Done: %2.2f seconds (sys+usr time)\n", getSYSTimeBF() -t1); 					
*/	

	#ifdef FREQ_VECTOR_AVAILABLE
	/** 3 ** Loading freq vector */
	{uint size; //the size of the vocabulary in #ofwords =>> already init in "loadvocabulary"
	loadFreqVector(&(wcsa->freqs), &size, (char *)inbasename);	
	fprintf(stderr,"\t*Loaded freq vector: %d bytes\n", wcsa->nwords * sizeof(uint) );
	}		
	#endif	
	
	/** 5 ** Loading the Representation of the source text */
	load_representation( &wcsa->ct,inbasename); 
	{
		uint size;
		size_representation(wcsa->ct, &size);	
		fprintf(stderr,"\n\t*Loaded (compressed) representation of the source Text: %u bytes\n", size);
	}
		
	/** 4 ** Loading the uncompressed posting lists previously created by the indexer. */
	
		//Preparing a "list of occurrences" that will be later indexed through build_il() **
		uint *source_il, sourcelen_il;
		uint maxPost ; //just to check it loads OK ;)
		ulong source_il_ulong;

		t1 = getSYSTimeBF();
		fprintf(stderr,"\n... Loading the posting lists from disk \n"); fflush(stderr);	
		
		load_posting_lists_from_file(&maxPost, &source_il_ulong, &source_il, inbasename);

		/** FOR CIKM ILISTS_DO NOT STILL SUPPORT an ULONG HERE **/
		sourcelen_il = (uint)source_il_ulong;
		
		fprintf(stderr,"\n there are %lu uints in all the lists of occurrences: size [uint32] = %lu bytes.\n ", 
		               (ulong)sourcelen_il - wcsa->nwords -2, (ulong) sizeof(uint)*(sourcelen_il - wcsa->nwords -2));
		fprintf(stderr,"\n MAXPOST loaded = %u, source_il_len = %u \n\n",maxPost,sourcelen_il);
		fprintf(stderr,"\n NLISTS loaded = %u, MAXPOSTS_sET \n\n",source_il[0],source_il[1]);

		
		
		t1 = getSYSTimeBF();
		fprintf(stderr,"\n**... entering BUILD INVERTED LIST REPRESENTATION************!! \n\n"); fflush(stderr);	
/*
		{ char fileposts[2048];
			sprintf(fileposts,"%s.%s.%u","postinglistsXX","posts", getpid());
			FILE *ff = fopen(fileposts,"w");
			fwrite(source_il, sizeof(uint), sourcelen_il,ff);
			fclose(ff);
			
		}
*/
		//compressing the lists of occurrences and setting wcsa->ils
		int error = build_il(source_il, sourcelen_il, build_options, &(wcsa->ils));  //source_il is freed inside!.
		IFERRORIL(error);							

		fprintf(stderr,"\n... Done: %2.2f seconds (sys+usr time)\n", getSYSTimeBF() -t1);
		
	
	#ifndef FREQ_VECTOR_AVAILABLE   //<----- not needed in advance, only during construction
		free(wcsa->freqs);
	#endif

		
	ulong sizeI;
	index_size(*index, &sizeI);
	fflush(stderr); fflush(stdout);
	fprintf(stderr,"\n ---------------------------------------------");
	fprintf(stderr,"\n The index has already been built: %lu bytes!!\n", sizeI);
	fprintf(stderr,"\n... Done: OVERALL TIME = %2.2f seconds (sys+usr time)", getSYSTimeBF() -t0);
	fprintf(stderr,"\n ---------------------------------------------\n\n\n");
	fflush(stderr);
	fflush(stdout);
	return 0;
}
Esempio n. 4
0
/** ***********************************************************************************
	 CONSTRUCTION OF THE INDEX, from a given text file "inbasename".
    ***********************************************************************************/
int build_WordIndex (char *inbasename, char *build_options, void **index){
	twcsa *wcsa;
	wcsa = (twcsa *) malloc (sizeof (twcsa) * 1);
	*index = wcsa;
	wcsa->text = NULL;
	double t0, t1;
	t0 = getSYSTimeBF();

	//char path2repaircompressor[1000]="./src/repair64bit/repairCompressor";
	//wcsa->blockSize = DEFAULT_BLOCK_SIZE;
	//wcsa->q = DEFAULT_QGRAM_LEN;

	/** processing the parameters of the index:: blockSize, and q-gram-len (q) */
	{
		char delimiters[] = " =;";
		int j,num_parameters;
		char ** parameters;
		
		if (build_options != NULL) {
			parse_parameters_II(build_options,&num_parameters, &parameters, delimiters);
			for (j=0; j<num_parameters;j++) {

			  if ((strcmp(parameters[j], "blocksize") == 0 ) && (j < num_parameters-1) ) {
				//wcsa->blockSize = atoi(parameters[j+1]) * BLOCK_MULTIPLIER;	    
				j++;
			  } 
			  else if ((strcmp(parameters[j], "qgram") == 0 ) && (j < num_parameters-1) ) {
				//wcsa->q =atoi(parameters[j+1]);	    
				j++;
			  }
			  else if ((strcmp(parameters[j], "path2repaircompressor") == 0 ) && (j < num_parameters-1) ) {
				//strcpy(path2repaircompressor,parameters[j+1]);	    
				j++;
			  }
			  
			}
			free_parameters_II(num_parameters, &parameters);
		}
		//fprintf(stderr,"\n Parameters of II-blocks:: *basename = %s, blocksize = %d, q= %d",inbasename,wcsa->blockSize,wcsa->q);
		//fprintf(stderr,"\n \t path2repaircompressor= %s\n",path2repaircompressor);
	}

	/** 0 ** Inicializes the arrays used to detect if a char is valid or not. **/
	StartValid();
	
	
	/** 1 ** Loads the compressed text into memory. */
	t1 = getSYSTimeBF();
	fprintf(stderr,"\n... Entering LoadTextInmem:%s\n",inbasename);
	loadTextInMem(&(wcsa->text), &(wcsa->sourceTextSize),(char *)inbasename);	
		
	fprintf(stderr,"... Loaded Source Sequence: %lu bytes\n", wcsa->sourceTextSize); 
	fprintf(stderr,"... Done: %2.2f seconds (sys+usr time)\n\n", getSYSTimeBF() -t1); 
	fflush(stderr);

	/** 2 ** loads the array of document boundaries                           */
	uint ndocs;
	ulong *docboundaries;
	loadDocBeginngins(&docboundaries, &ndocs,(char *)inbasename);	
	wcsa->ndocs = ndocs; //just for statistics.

	
	/** 3 ** Parses the sequence and gathers the vocabulary of words (sorted alphanumerically) 
		the frecuency of such words: obtains "words", "nwords", and "wordsZone" 
		Sets also wcsa->freqs (freq of each word)
		Sets also wcsa->maxNumOccs (needed for malloc during extraction) */	
	
	fprintf(stderr,"\n... Entering CreateVocabularyOfWords (1st pass) \n"); fflush(stderr);
	CreateVocabularyOfWords(*index, docboundaries, ndocs);


	//shows the words parsed...
	{
		int i;
		fprintf(stderr,"\n\n Despues de sorting ....");	fflush(stderr);
		unsigned char *str;
		uint len;
//		for (i = 0; i<100; i++) {
		for (i = 0; ((uint)i)<wcsa->nwords; i++) {
			if ((i<10) || (((uint)i) >wcsa->nwords-5)) {
				getWord(wcsa,i,&str,&len);				
				fprintf(stderr,"\n freq[%6d]=%6u ",i,  wcsa->freqs[i]);
				fprintf(stderr,", words[%6d] = ",i);
				printWord(str,len);
			}
		}		
	}

	t1 = getSYSTimeBF();
	fprintf(stderr,"\n %u words have been parsed", wcsa->nwords);
	fprintf(stderr,"\n... Done: %2.2f seconds (sys+usr time)\n", getSYSTimeBF() -t1); 					
	
	/** 4 ** creates a temporal list of occurrences of each word (block-oriented).
					gives also the len of each list */
	{	
		//decompression of the source text and creation of occList[][] and lenList[]
		uint **occList; uint *lenList;
		
		t1 = getSYSTimeBF();
		fprintf(stderr,"\n... Entering createListsOfOccurrences (2nd pass) \n"); fflush(stderr);	
		
		createListsOfOccurrences (*index, &occList, &lenList, docboundaries, ndocs);
		
		fprintf(stderr,"\n %u lists of occurrences were created.", wcsa->nwords);fflush(stderr);
		fprintf(stderr,"\n... Done: %2.2f seconds (sys+usr time)\n", getSYSTimeBF() -t1);





#ifdef CIKM2011_HURRY
	free(wcsa->text);
	wcsa->text = NULL;
#endif
		//Preparing a "list of occurrences" that will be later indexed through build_il() **
		uint *source_il, sourcelen_il;
		uint maxPost = ndocs;
		uint nwords = wcsa->nwords;
		ulong source_il_ulong;

		t1 = getSYSTimeBF();
		fprintf(stderr,"\n... Entering prepareSourceFormatForIListBuilder \n"); fflush(stderr);	
		
		prepareSourceFormatForIListBuilder(nwords,maxPost,lenList, occList, &source_il, &source_il_ulong);

		/** FOR CIKM ILISTS_DO NOT STILL SUPPORT an ULONG HERE **/
		sourcelen_il = (uint)source_il_ulong;
		
		fprintf(stderr,"\n there are %lu uints in all the lists of occurrences: size [uint32] = %lu bytes.\n ", 
		               (ulong)sourcelen_il - nwords -2, (ulong) sizeof(uint)*(sourcelen_il - nwords -2));

		/*
	char fileuintpostings[256] = "postingsSequence.uint32";
	output_posting_lists_concatenated_DEBUGGING_ONLY (nwords, maxPost, lenList, occList,fileuintpostings);		
	*/
		
		
		{ char fileposts[2048];
			sprintf(fileposts,"%s.%s.%u","postinglists","posts", getpid());
			FILE *ff = fopen(fileposts,"w");
			fwrite(source_il, sizeof(uint), sourcelen_il,ff);
			fclose(ff);
			
		}		
/*		
	FILE *ff = fopen ("2gbnopositional.posts.uint32","w");
	fwrite (source_il, sizeof(uint), sourcelen_il, ff);
	fclose(ff);
*/	
	
		fprintf(stderr,"\n the lists of occurrences were formatted for build_il.");fflush(stderr);
		fprintf(stderr,"\n...Done: %2.2f seconds (sys+usr time)\n", getSYSTimeBF() -t1);

		
		t1 = getSYSTimeBF();
		fprintf(stderr,"\n**... entering BUILD INVERTED LIST REPRESENTATION!! \n"); fflush(stderr);	

		//compressing the lists of occurrences and setting wcsa->ils
		int error = build_il(source_il, sourcelen_il, build_options, &(wcsa->ils));  //source_il is freed inside!.
		IFERRORIL(error);							

		fprintf(stderr,"\n... Done: %2.2f seconds (sys+usr time)\n", getSYSTimeBF() -t1);
		
		{
			//frees memory for the posting lists
			uint i;
			for (i=0;i<wcsa->nwords;i++) free(occList[i]);
			free(occList);
			free(lenList);
		}
	
		/** 5 ** compressed representation of the source text */	
		{

#ifdef CIKM2011_HURRY
		uchar text_null[109] = "NULL-TEXT"; uint text_len_null=1;
		uint docbounds_null[2]= {0,10};
		uint ndoc_null = 1;
			build_representation (text_null, text_len_null, docbounds_null, ndoc_null, build_options, &wcsa->ct);
#endif
#ifndef CIKM2011_HURRY
			build_representation (wcsa->text, wcsa->sourceTextSize, docboundaries, ndocs, build_options, &wcsa->ct);
#endif

		
			unsigned char *document;
			uint doclen;
			extract_doc_representation (wcsa->ct, 0, &document, &doclen);
			fprintf(stderr,"\n =================== DOC 0 ======================");
			fprintf(stderr,"\n%s",document);
			fprintf(stderr,"\n =================== ***** ======================\n");
			free(document);
			//free(docbegsUL);
		

		}

	}
	
	#ifndef FREQ_VECTOR_AVAILABLE   //<----- not needed in advance, only during construction
		free(wcsa->freqs);
	#endif

	free(docboundaries);	
		
	ulong sizeI;
	index_size(*index, &sizeI);
	fflush(stderr); fflush(stdout);
	fprintf(stderr,"\n The index has already been built: %lu bytes!!\n", sizeI);
	fprintf(stderr,"\n... Done: OVERALL TIME = %2.2f seconds (sys+usr time)\n\n\n", getSYSTimeBF() -t0);
	fflush(stderr);
	fflush(stdout);
	return 0;
}
Esempio n. 5
0
/** ***********************************************************************************
	 CONSTRUCTION OF THE INDEX 
    ***********************************************************************************/
int build_WordIndex (char *inbasename, char *build_options, void **index){
	twcsa *wcsa;
	wcsa = (twcsa *) malloc (sizeof (twcsa) * 1);
	*index = wcsa;
	wcsa->text = NULL;

	//char path2repaircompressor[1000]="./src/repair64bit/repairCompressor";
	//wcsa->blockSize = DEFAULT_BLOCK_SIZE;
	//wcsa->q = DEFAULT_QGRAM_LEN;

	/** processing the parameters of the index:: blockSize, and q-gram-len (q) */
	{
		char delimiters[] = " =;";
		int j,num_parameters;
		char ** parameters;
		
		if (build_options != NULL) {
			parse_parameters_II(build_options,&num_parameters, &parameters, delimiters);
			for (j=0; j<num_parameters;j++) {

			  if ((strcmp(parameters[j], "blocksize") == 0 ) && (j < num_parameters-1) ) {
				//wcsa->blockSize = atoi(parameters[j+1]) * BLOCK_MULTIPLIER;	    
				j++;
			  } 
			  else if ((strcmp(parameters[j], "qgram") == 0 ) && (j < num_parameters-1) ) {
				//wcsa->q =atoi(parameters[j+1]);	    
				j++;
			  }
			  else if ((strcmp(parameters[j], "path2repaircompressor") == 0 ) && (j < num_parameters-1) ) {
				//strcpy(path2repaircompressor,parameters[j+1]);	    
				j++;
			  }
			  
			}
			free_parameters_II(num_parameters, &parameters);
		}
		//fprintf(stderr,"\n Parameters of II-blocks:: *basename = %s, blocksize = %d, q= %d",inbasename,wcsa->blockSize,wcsa->q);
		//fprintf(stderr,"\n \t path2repaircompressor= %s\n",path2repaircompressor);
	}

	/** 0 ** Inicializes the arrays used to detect if a char is valid or not. **/
	StartValid();
	
	
	/** 1 ** Loads the compressed text into memory. */
	loadTextInMem(&(wcsa->text), &(wcsa->textSize),(char *)inbasename);	
	wcsa->sourceTextSize = wcsa->textSize;
	fprintf(stderr,"... Loaded Source Sequence: %u bytes\n", wcsa->textSize); fflush(stderr);
fflush(stderr);
	/** 2 ** loads the array of document boundaries                           */
	uint ndocs;
	uint *docboundaries;
	loadDocBeginngins(&docboundaries, &ndocs,(char *)inbasename);	
	wcsa->ndocs = ndocs; //just for statistics.

	
	/** 3 ** Parses the sequence and gathers the vocabulary of words (sorted alphanumerically) 
		the frecuency of such words: obtains "words", "nwords", and "wordsZone" 
		Sets also wcsa->freqs (freq of each word)
		Sets also wcsa->maxNumOccs (needed for malloc during extraction) */	
	
	fprintf(stderr,"... Entering CreateVocabularyOfWords \n"); fflush(stderr);
	CreateVocabularyOfWords(*index, docboundaries, ndocs);


	//shows the words parsed...
	{
		int i;
		fprintf(stderr,"\n\n Después de sorting ....");	fflush(stderr);
		unsigned char *str;
		uint len;
//		for (i = 0; i<100; i++) {
		for (i = 0; i<wcsa->nwords; i++) {
			if ((i<15) || (i>wcsa->nwords-5)) {
				getWord(wcsa,i,&str,&len);				
				fprintf(stderr,"\n freq[%6d]=%6u ",i,  wcsa->freqs[i]);
				fprintf(stderr,", words[%6d] = ",i);
				printWord(str,len);
			}
		}		
	}

	fprintf(stderr,"\n %u words have been parsed", wcsa->nwords);
					
	
	/** 4 ** creates a temporal list of occurrences of each word (block-oriented).
					gives also the len of each list */
	{	
		//decompression of the source text and creation of occList[][] and lenList[]
		uint **occList; uint *lenList;
		uint *doc_offsets_sids;
		
		createListsOfOccurrences (*index, &occList, &lenList, &doc_offsets_sids, docboundaries, ndocs);
		wcsa->doc_offsets_sids=doc_offsets_sids;
		

		fprintf(stderr,"\n %u lists of occurrences were created.", wcsa->nwords);fflush(stderr);
		//fprintf(stderr,"\n The ranks of the document beginnings are:");
		//{
		//  int i;
		//  for (i=0;i<=ndocs;i++) 		fprintf(stderr,"[%u-th-> %u]",i,wcsa->doc_offsets_sids[i]);
		//}

		//Preparing a "list of occurrences" that will be later indexed through build_il() **
		uint *source_il, sourcelen_il;
		//uint maxPost = ndocs; //!!
		uint maxPost = doc_offsets_sids[ndocs];
		uint nwords = wcsa->nwords;
		prepareSourceFormatForIListBuilder(nwords,maxPost,lenList, occList, &source_il, &sourcelen_il);

/*	
	char fileuintpostings[256] = "postingsSequence.uint32";
	output_posting_lists_concatenated_DEBUGGING_ONLY (nwords, maxPost, lenList, occList,fileuintpostings);	
*/
	
	
	FILE *ff = fopen ("postings.posts","w");
	fwrite (source_il, sizeof(uint), sourcelen_il, ff);
	fclose(ff);
	
#ifdef WRITE_POSTING_LIST		
		FILE *ff = fopen ("postings.posts","w");
		fwrite (source_il, sizeof(uint), sourcelen_il, ff);
		fclose(ff);
#endif		
		
		fprintf(stderr,"\n there are %lu uints in all the lists of occurrences: size [uint32] = %lu bytes.\n ", (ulong)sourcelen_il - nwords -2, (ulong) sizeof(uint)*(sourcelen_il - nwords -2));
		fprintf(stderr,"\n maxPostValue = %u",maxPost);
		  
		/*{ char fileposts[256];
			sprintf(fileposts,"%s.%s.%u","POSTING_LISTS","posts", getpid());
			FILE *ff = fopen(fileposts,"w");
			fwrite(source_il, sizeof(uint), sourcelen_il,ff);
			fclose(ff);
			
		}		
		for (int x=0;x<10;x++) printf("\n%u --> %u",x,source_il[x]);
		exit(0);
		
		*/
		
		fprintf(stderr,"\n the lists of occurrences were formatted for build_il.");fflush(stderr);


		//compressing the lists of occurrences and setting wcsa->ils
		int error = build_il(source_il, sourcelen_il, build_options, &(wcsa->ils));  //source_il is freed inside!.
		IFERRORIL(error);							


		{
			//frees memory for the posting lists
			uint i;
			for (i=0;i<wcsa->nwords;i++) free(occList[i]);
			free(occList);
			free(lenList);
		}
	
		/** 5 ** compressed representation of the source text */	
		{
			//unsigned long *docbegsUL; 
			unsigned int i;
			//docbegsUL = (ulong *) malloc (sizeof(ulong) * (ndocs+1));
			//for (i=0;i<=ndocs;i++) docbegsUL[i] = docboundaries[i];			
			//build_representation (wcsa->text, wcsa->textSize, docbegsUL, ndocs, build_options, &wcsa->ct);

			fprintf(stderr,"\nNow compressing the text: %lu bytes", (ulong)wcsa->textSize);fflush(stderr);
			
			build_representation (wcsa->text, wcsa->textSize, docboundaries, ndocs, build_options, &wcsa->ct);
			unsigned char *document;
			uint doclen;
			extract_doc_representation (wcsa->ct, 0, &document, &doclen);
			fprintf(stderr,"\n =================== DOC 0 ======================");
			fprintf(stderr,"\n%s",document);
			fprintf(stderr,"\n =================== ***** ======================\n");
			free(document);
			//free(docbegsUL);
		}

	}
	
	#ifndef FREQ_VECTOR_AVAILABLE   //<----- not needed in advance, only during construction
		free(wcsa->freqs);
	#endif

	free(docboundaries);	
		
	ulong sizeI;
	index_size(*index, &sizeI);
	fflush(stderr); fflush(stdout);
	fprintf(stderr,"\n The index has already been built: %lu bytes!!\n", sizeI);
	fflush(stderr);
	fflush(stdout);
	return 0;
}