Esempio n. 1
0
int file_prepare(void)
{
  unsigned int  i;
  char          file_name[512];

  for (i=0; i < num_files; i++)
  {
    snprintf(file_name, sizeof(file_name), "test_file.%d",i);
    /* remove test files for creation test if they exist */
    if (test_mode == MODE_WRITE)  
      unlink(file_name);

    log_text(LOG_DEBUG, "Opening file: %s",file_name);
#ifndef _WIN32
    files[i] = open(file_name, O_CREAT | O_RDWR | file_extra_flags,
                    S_IRUSR | S_IWUSR);
#else
    files[i] = CreateFile(file_name, GENERIC_READ|GENERIC_WRITE, 0, NULL,
      OPEN_ALWAYS, file_extra_flags? file_extra_flags : FILE_ATTRIBUTE_NORMAL,
      NULL);
#endif
    if (!VALID_FILE(files[i]))
    {
      log_errno(LOG_FATAL, "Cannot open file");
      return 1; 
    }
  }

#ifdef HAVE_MMAP
  if (file_mmap_prepare())
    return 1;
#endif

  pthread_mutex_init(&fsync_mutex, NULL);
  
  return 0; 
}
Esempio n. 2
0
int init_sentsim_lmtrlm (SPEC* spec, char* prefix)
{
	DIR_ARRAY dir_array;

    PRINT_TRACE (2, print_string, "Trace: entering init_sentsim_lmtrlm");

    /* Lookup the values of the relevant parameters */
    if (UNDEF == lookup_spec (spec, &spec_args[0], num_spec_args))
        return (UNDEF);

   	if (UNDEF == (doc_fd = open_vector (dvec_file, dvec_file_mode)))
       	return (UNDEF);

	// initalize the LM conversion module
	if (UNDEF == init_lang_model_wt_lm(spec, NULL))
		return UNDEF;
    if (UNDEF == (vec_vec_inst = vec_vec_ptab->init_proc (spec, NULL)))
	    return (UNDEF);

	if (! VALID_FILE (collstat_file)) {
		return UNDEF;
    }
    else {
		if (UNDEF == (collstats_fd = open_dir_array (collstat_file, collstat_mode)))
			return (UNDEF);

		// Read the total number of documents
        dir_array.id_num = COLLSTAT_NUMDOC; // Get the collection frequency list from the file
        if (1 != seek_dir_array (collstats_fd, &dir_array) ||
            1 != read_dir_array (collstats_fd, &dir_array)) {
			return UNDEF;
        }
        else {
			memcpy(&collstats_numdocs, dir_array.list, sizeof(long));
        }

		// Read in collection frequencies
        dir_array.id_num = COLLSTAT_TOTWT; // COLLSTAT_COLLFREQ; // ; // Get the collection frequency list from the file
        if (1 != seek_dir_array (collstats_fd, &dir_array) ||
            1 != read_dir_array (collstats_fd, &dir_array)) {
            collstats_freq = NULL;
            collstats_num_freq = 0;
			return UNDEF;
        }
        else {
            // Read from file successful. Allocate 'freq' array and dump the
            // contents of the file in this list
			collstats_freq = (float*) dir_array.list;
            collstats_num_freq = dir_array.num_list / sizeof (float);
			p_w_Q = (float*) malloc (collstats_num_freq * sizeof(float));
			if (p_w_Q == NULL)
				return UNDEF;
        }

    }
	totalDocFreq = getTotalDocumentFreq();

	if (UNDEF == init_lda_est(&ldamodel, spec)) {
		return UNDEF;
	}

    PRINT_TRACE (2, print_string, "Trace: leaving init_sentsim_lmtrlm");
    return 0;
}
Esempio n. 3
0
// Read from collstat file the collection frequency in the global
// variable collstats_info.
int init_lang_model_wt_cf_lm_nsim(SPEC* spec, char* unused)
{
    DIR_ARRAY dir_array;
	long ctype ;
	char conceptName[256];
	long i;

    PRINT_TRACE (2, print_string, "Trace: entering init_lang_model_wt_cf_lm_nsim");

	// Intialize buffer to copy invec's term weights into outvec's ones
    num_conwt_buf = 8096;

	if ( NULL == (conwt_buf = (CON_WT *)
                 malloc (num_conwt_buf * sizeof (CON_WT))) )
        return (UNDEF);

	// Get the number of concepts
    if (UNDEF == lookup_spec_docdesc (spec, &doc_desc))
        return (UNDEF);

	// Initialize the array of fds for opening the collection files
	collstats_fd = (int*) malloc (sizeof(int) * doc_desc.num_ctypes) ;
	if ( collstats_fd == NULL )
		return UNDEF ;

	collstats_info = (struct coll_info*) malloc (sizeof(struct coll_info) * doc_desc.num_ctypes) ;
	if ( collstats_info == NULL )
		return UNDEF ;

    if (UNDEF == lookup_spec (spec,
                              &spec_args[0],
                              num_spec_args)) {
        return (UNDEF);
    }

	// For each concept, collect the collection frequency
	for (ctype = 0; ctype < doc_desc.num_ctypes; ctype++) {

		snprintf(conceptName, sizeof(conceptName), "ctype.%ld.", ctype) ;
		prefix = conceptName ;
    	if (UNDEF == lookup_spec_prefix (spec, spec_prefix_args, num_spec_prefix_args))
	        return (UNDEF);

	    if (! VALID_FILE (collstat_file)) {
			collstats_info[ctype].freq = NULL ;
        	collstats_info[ctype].num_freq = 0;
	    }
	    else {
   			if (UNDEF == (collstats_fd[ctype] = open_dir_array (collstat_file, collstat_mode)))
				return (UNDEF);

			// Get the frequency mode to use
			dir_array.id_num = 	COLLSTAT_TOTWT ;
 
			// Get the collection frequency list from the file
			if (1 != seek_dir_array (collstats_fd[ctype], &dir_array) ||
			    1 != read_dir_array (collstats_fd[ctype], &dir_array)) {
		    	collstats_info[ctype].freq = NULL;
			    collstats_info[ctype].num_freq = 0;
			}
			else {
				// Read from file successful. Allocate 'freq' array and dump the
				// contents of the file in this list
			    if (NULL == (collstats_info[ctype].freq = (float *)
			                 malloc ((unsigned) dir_array.num_list)))
			        return (UNDEF);
		    	(void) bcopy (dir_array.list,
		                  (char *) collstats_info[ctype].freq,
		                  dir_array.num_list);
			    collstats_info[ctype].num_freq = dir_array.num_list / sizeof (float);
			}

			if (UNDEF == close_dir_array (collstats_fd[ctype]))
				return (UNDEF);
		}
	}

	totalDocFreq = (float*) malloc (sizeof(float) * doc_desc.num_ctypes);

	// Compute the total Document Frequency only once since it is a constant.
	for (i = 0; i < doc_desc.num_ctypes; i++) {
		totalDocFreq[i] = getTotalDocFreq(i);
	}

    PRINT_TRACE (2, print_string, "Trace: leaving init_lang_model_wt_cf_lm_nsim");
    return (1);
}