Esempio n. 1
0
// compression
void compress(FILE *inputFile, FILE *outputFile) {    
    int prefix = getc(inputFile);
    if (prefix == EOF) {
        return;
    }
    int character;

    int nextCode;
    int index;
    
    // LZW starts out with a dictionary of 256 characters (in the case of 8 codeLength) and uses those as the "standard"
    //  character set.
    nextCode = 256; // next code is the next available string code
    dictionaryInit();
    
    // while (there is still data to be read)
    while ((character = getc(inputFile)) != (unsigned)EOF) { // ch = read a character;
        
        // if (dictionary contains prefix+character)
        if ((index = dictionaryLookup(prefix, character)) != -1) prefix = index; // prefix = prefix+character
        else { // ...no, try to add it
            // encode s to output file
            writeBinary(outputFile, prefix);
            
            // add prefix+character to dictionary
            if (nextCode < dictionarySize) dictionaryAdd(prefix, character, nextCode++);
            
            // prefix = character
            prefix = character; //... output the last string after adding the new one
        }
    }
    // encode s to output file
    writeBinary(outputFile, prefix); // output the last code
    
    if (leftover > 0) fputc(leftoverBits << 4, outputFile);
    
    // free the dictionary here
    dictionaryDestroy();
}
Esempio n. 2
0
int dictionaryLoadFromFile(Dictionary* dict, char* filename)
{
	FILE* inputFile = fopen(filename, "rb");
	int count, len, index, retn = 0;
	char* s;
	checkPointer(dict); checkPointer(filename);
	if (!inputFile)
	{
		fprintf(stderr, "Error[DIC]: could not open file \"%s\".\n", filename);
		return DIC_ERROR;
	}
	if (fseek(inputFile, 0, SEEK_SET))
	{
		fprintf(stderr, "Error[DIC]: could not seek file \"%s\".\n", filename);
		retn = DIC_ERROR;
		goto ldexit;
	}
	if (sizeof(int) != fread(&count, 1, sizeof(int), inputFile))
	{
		fprintf(stderr, "Error[DIC]: broken dictionary file \"%s\".\n", filename);
		retn = DIC_ERROR;
		goto ldexit;
	}
	if (count > dict->maxSize)
	{
		dict->strings = realloc(dict->strings, (dict->maxSize = count) * sizeof(Word*));
		if (!dict->strings)
		{
			fprintf(stderr, "Error[DIC]: not enough memory.\n");
			retn = MEMORY_ERROR;
			goto ldexit;
		}
		dict->index = realloc(dict->index, dict->maxSize * sizeof(char*));
		if (!dict->index)
		{
			fprintf(stderr, "Error[DIC]: not enough memory.\n");
			retn = MEMORY_ERROR;
			goto ldexit;
		}
	}
	for (index = 0; index < count; index++)
	{
		if (sizeof(int) == fread(&len, 1, sizeof(int), inputFile))
		{
			s = malloc(len + 1);
			if (!s)
			{
				fprintf(stderr, "Error[DIC]: not enough memory.\n");
				retn = MEMORY_ERROR;
				goto ldexit;
			}
			if (len != fread(s, 1, len, inputFile))
			{
				fprintf(stderr, "Error[DIC]: broken dictionary file \"%s\".\n", filename);
				retn = DIC_ERROR;
				goto ldexit;
			}
			s[len] = 0;
			if ((retn = dictionaryAdd(dict, s)) < 0)
			{
				fprintf(stderr, "Error[DIC]: could not add a record\n");
				goto ldexit;
			}
			retn = 0;
			free(s);
		}
		else
		{
			fprintf(stderr, "Error[DIC]: broken dictionary file \"%s\".\n", filename);
			retn = DIC_ERROR;
			goto ldexit;
		}
	}
	if (fread(&index, 1, 1, inputFile) != 0)
	{
		fprintf(stderr, "Warning[DIC]: some extra data at the end of the dictionary file \"%s\".\n", filename);
	}
ldexit:
	if (fclose(inputFile) == EOF)
	{
		fprintf(stderr, "Warning[DIC]: closing dictionary file \"%s\" failed.\n", filename);
	}
	return retn;
}
Esempio n. 3
0
/**
 * indexer main()
 *
 * @param argc Command line argument count
 * @param argv command line parameters
 * @return 0 if ok, 1 on failure
 * @todo file date comparison
 */
int main(int argc, char** argv)
{
	FILE* curFile;
	int curFileNumber;
	int filesIndexed = 0;
	Dictionary files, words;
	PairDictionary pairs;
	char* curWord = malloc(MAX_WORD_LENGTH + 1);
	State state = SPACE;
	int curWordIndex = 0, added = 0;
	int ch, da;
	int retn = 0;

	if ((argc == 1) || ((argc == 2) && (strcmp(argv[1], "--help") == 0)) || ((argc == 2) && (strcmp(argv[1], "-?") == 0)))
	{
		printf("Creates a database for the 'search' program.\n\n");
		printf("INDEXER [file1] [file2] [file3] ...\n");
		printf("INDEXER { [--help] | [-?] } \n\n");
		printf("\t--help, -?  Show help (default)\n");
		return EXIT_SUCCESS;
	}

	if (dictionaryInit(&files))
		return BASE_ERROR;
	if (dictionaryInit(&words))
		return BASE_ERROR;
	if (pairDictionaryInit(&pairs))
		return BASE_ERROR;
	
	if ((file_exists("files.db")) && !dictionaryLoadFromFile(&files, "files.db"))
	{
		if (!dictionaryLoadFromFile(&words, "words.db"))
		{
			if (!pairDictionaryLoadFromFile(&pairs, "pairs.db"))
			{
				//printf("Current search base: %i files, %i words, %i pairs.\n", files.count, words.count, pairs.count);
			}
			else
			{
				fprintf(stderr, "Warning: corrupt pairs table.\n");
				pairDictionaryClear(&pairs);
			}
		}
		else
		{
			fprintf(stderr, "Warning: corrupt words database.\n");
			dictionaryClear(&words);
		}
	}
	else
	{
		printf("Creating new search base.\n");
		dictionaryClear(&files);
	}

	for (curFileNumber = 1; curFileNumber < argc; curFileNumber++)
	{
		curWord[MAX_WORD_LENGTH] = 0;
		if ((curFile = fopen(argv[curFileNumber], "r")))
		{
			int curFileId;
			if ((curFileId = dictionaryGetByString(&files, argv[curFileNumber])) != DIC_NO_RECORD)
			{
				pairDictionaryRemoveByFileId(&pairs, curFileId);
			}
			else
				curFileId = dictionaryAdd(&files, argv[curFileNumber]);
			if (curFileId < 0)
			{
				fprintf(stderr, "Error: could not add to the dictionary.\n");
				retn = INDEXER_ERROR;
				goto erexit;
			}
			curWordIndex = 0;
			state = SPACE;
			while((ch = fgetc(curFile)) != EOF)
			{
				if (!isspace(ch))
				{
					if (state != SKIP)
					{
						state = WORD;
						curWord[curWordIndex++] = ch;
						if (curWordIndex == MAX_WORD_LENGTH)
						{
							state = SKIP;
							added = 0;
						}
					}
				}
				else
				{
					if ((state == WORD) || ((state == SKIP) && !added))
					{
						if (curWordIndex < MAX_WORD_LENGTH)
							curWord[curWordIndex] = 0;
						else
							curWord[MAX_WORD_LENGTH] = 0;
						da = dictionaryAdd(&words, curWord);
						if (da < 0)
						{
							fprintf(stderr, "Error: could not add to the dictionary.\n");
							retn = INDEXER_ERROR;
							goto erexit;
						}
						if (pairDictionaryAdd(&pairs, curFileId, da) < 0)
						{
							fprintf(stderr, "Error: could not add to the pair dictionary.\n");
							retn = INDEXER_ERROR;
							goto erexit;
						}
						curWordIndex = 0;
					}
					state = SPACE;
				}
			}
			if (state == WORD)
			{
				curWord[curWordIndex] = 0;
				da = dictionaryAdd(&words, curWord);
				if (da < 0)
				{
					fprintf(stderr, "Error: could not add to the dictionary.\n");
					retn = INDEXER_ERROR;
					goto erexit;
				}
				pairDictionaryAdd(&pairs, curFileId, da);
				curWordIndex = 0;
			}
			filesIndexed++;
			if (curFile) fclose(curFile);
		}
		else
		{
			printf("Warning: file '%s' does not exist.\n", argv[curFileNumber]);
		}
	}

	dictionarySaveToFile(&files, "files.db");
	dictionarySaveToFile(&words, "words.db");
	pairDictionarySaveToFile(&pairs, "pairs.db");
erexit:
	if (curWord) free(curWord);
	dictionaryFinalize(&files);
	dictionaryFinalize(&words);
	pairDictionaryFinalize(&pairs);
	printf("Current search base: %i files, %i words, %i pairs. %i files indexed.\n", files.count, words.count, pairs.count, filesIndexed);
	return retn;
}