Esempio n. 1
0
bool GzippedFileReader::OkIndex() {
	if (m_pIndex)
		return true;

	// Try to read index from disk
	wxString indexfile = iso2indexname(m_filename);
	if (indexfile.length() == 0)
		return false; // iso2indexname(...) will print errors if it can't apply the template

	if (wxFileName::FileExists(indexfile) && (m_pIndex = ReadIndexFromFile(indexfile))) {
		Console.WriteLn(Color_Green, L"OK: Gzip quick access index read from disk: '%s'", WX_STR(indexfile));
		if (m_pIndex->span != GZFILE_SPAN_DEFAULT) {
			Console.Warning(L"Note: This index has %1.1f MB intervals, while the current default for new indexes is %1.1f MB.",
			                (float)m_pIndex->span / 1024 / 1024, (float)GZFILE_SPAN_DEFAULT / 1024 / 1024);
			Console.Warning(L"It will work fine, but if you want to generate a new index with default intervals, delete this index file.");
			Console.Warning(L"(smaller intervals mean bigger index file and quicker but more frequent decompressions)");
		}
		InitZstates();
		return true;
	}

	// No valid index file. Generate an index
	Console.Warning(L"This may take a while (but only once). Scanning compressed file to generate a quick access index...");

	Access *index;
	FILE* infile = PX_fopen_rb(m_filename);
	int len = build_index(infile, GZFILE_SPAN_DEFAULT, &index);
	printf("\n"); // build_index prints progress without \n's
	fclose(infile);

	if (len >= 0) {
		m_pIndex = index;
		WriteIndexToFile((Access*)m_pIndex, indexfile);
	} else {
		Console.Error(L"ERROR (%d): index could not be generated for file '%s'", len, WX_STR(m_filename));
		free_index(index);
		InitZstates();
		return false;
	}

	InitZstates();
	return true;
}
Esempio n. 2
0
int main(int argc, char **argv) {
    int i,j,k = 0;//counters
    if(argc == 1) { //printing instructions
        printf("\n");
        printf("Forward propagation of sentences in a file delimited by \\n\n\n");
        printf("Parameters:\n");
        printf("\tValue for the vocabulary size that resulted from training (first number in the output file of word2vec):\n");
        printf("\t\t-vocab_size <int>\n");
        printf("\tValue for the layer size used in training (second number in the output file of word2vec):\n");
        printf("\t\t-layer_size <int>\n");
        printf("\tValue for the window size:\n");
        printf("\t\t-window <int>\n\n");
        return 0;
    } //reading command line arguments
    if ((i = ArgPos((char *)"-layer_size", argc, argv)) > 0) layer1_size = atoi(argv[i + 1]);
    if ((i = ArgPos((char *)"-vocab_size", argc, argv)) > 0) vocab_size = atoi(argv[i + 1]);
    if ((i = ArgPos((char *)"-window", argc, argv)) > 0) window = atoi(argv[i + 1]);

    // allocating memory to store the network elements
    syn0 = (real *)calloc(layer1_size*vocab_size,sizeof(real));
    syn1 = (real *)calloc(layer1_size*vocab_size,sizeof(real));
    neu1 = (real *)calloc(layer1_size,sizeof(real));

    index_buff = (char *)calloc(MAX_INDEX_BUFF_SIZE,sizeof(char));
    // reading the network from file
    read_syn0();
    read_syn1();

    expTable = (real *)malloc((EXP_TABLE_SIZE + 1) * sizeof(real)); //allocating memory for expTable
    for (i = 0; i < EXP_TABLE_SIZE; i++) {
        expTable[i] = exp((i / (real)EXP_TABLE_SIZE * 2 - 1) * MAX_EXP); // Precompute the exp() table in the same way as in word2vec
        expTable[i] = expTable[i] / (expTable[i] + 1);                   // Precompute f(x) = x / (x + 1)
    }
    //building the vocabulary and the vocabulary hash from the files it was stored in
    BuildVocabFromFile();
    BuildVocabHashFromFile();

    int length = 0; //word lenght of sentence variable
    int syno_length = 0; //how many synonyms/replacements
    long long * sen; //sentence variable where words are represented as vocabualry indices
    long long * sen_temp; //temporary sentence variable where words are represented as vocabulary indices
    sen_temp = (long long *)calloc(MAX_SENTENCE_LENGTH,sizeof(long long)); //allocating memory for sen_temp
    long long * synonym; //replacement word (in vocabulary index form)
    long double prob = 0; //probability variable
    long long ptr = 0, ptr_temp = 0; //pointer used to go through the sentences file
    long long syno_ptr = 0, syno_ptr_temp = 0; //pointer used to go through the synonyms/replacements file


    FILE *sentfile = fopen("sentences","r");
    FILE *indices = fopen("indices","r");
    FILE *synfile = fopen("synonyms","r");
    FILE *fo = fopen("wordprobs","w");
    int lines = 0;
    char line[MAX_SENTENCE_LENGTH]; // buffer to store current sentence
    char synline[MAX_SENTENCE_LENGTH]; // buffer to store synonyms


    lines = Lines(sentfile); // how many lines in the sentences file, which is used as the outer loop delimiter
    //(this can be done) since all the files "sentences", "synonyms" and "indices" have the same number of lines delimited by "\n"
    rewind(sentfile);
    rewind(synfile);

    for(i = 0; i<lines; i++) { //outer loop iterating through "sentences", "synonyms" and "indices" line by line

        // read sentence
        ptr = ftell(sentfile); // store beginning of line
        if (readLine(sentfile,line) < 0) break;
        length = LineWordCount(line);
        //printf("sent words %d\n",length);

        // read word replacements
        syno_ptr = ftell(synfile); // store beginning of line
        if (readLine(synfile,synline) < 0) break;
        syno_length = LineWordCount(synline);
        printf("synline %s\n",synline);

        fseek(sentfile,ptr,SEEK_SET); // move the pointer back to the beginning of the line
        sen = FileToSen(length,sentfile); //sen is an array of longs with the words of the sentence in a vocabulary index format

        fseek(synfile,syno_ptr,SEEK_SET);
        synonym = FileToSen(syno_length,synfile); //synonym is an array of longs with the replacements/synonyms from the "synonyms" file in vocabulary index format

        fseek(sentfile,1,SEEK_CUR); // added to get past newline
        fseek(synfile,1,SEEK_CUR);

        ReadIndexFromFile(indices); //reads the index and puts it in the char array "index_buff"
        target_index = GetIndex(); //returns a numerical value from what is in the char array "index_buff"
        for(k=0; k<syno_length; k++) { //repeats forward propagation for each synonym in the line
            memcpy(sen_temp,sen,MAX_SENTENCE_LENGTH*sizeof(long long)); //copying the sentence into sen_temp where synonyms will be changed
            sen_temp[target_index] = synonym[k]; //replacing the target word with a synonym/replacement
            prob = ForwardPropagate(length,sen_temp); //doing forward propagation to get the probability
            //prob = prob * 100000; // multiplying the probabilty by 100000 or taking the negative log is done in this line

            fprintf(fo,"%s %Lf\n",vocab[synonym[k]].word,prob); // SEA the replacement word and its probability
        }
    }

    fclose(fo);
    fclose(sentfile);
    fclose(synfile);
    fclose(indices);

    return 0;
}