예제 #1
0
void save_hash(HashFile *hf, options_t *opt) {
    HashTableStats(hf->h, stderr);
	
#ifdef _WIN32
    _setmode(_fileno(stdout), _O_BINARY);
#endif
    HashFileSave(hf, stdout, opt->prepend_mode ? HASHFILE_PREPEND : 0);
    HashFileDestroy(hf);
}
예제 #2
0
int main(int argc, char *argv[])
{
    char *fname;     // name of dictionary file
    FILE *wordf;     // handle for dictionary file
	Item word;       // current word from file
    int size = 7919; // default size of hash table
    HashTable htab;  // the hash table
	int nwords;      // # words read and stored
    int nfound;      // # words found during search tests

	// set up parameters
	switch (argc) {
	case 2: fname = argv[1]; break;
	case 3: fname = argv[1]; size = atoi(argv[2]); break;
    default: fname = NULL; usage(argv[0]); break;
	}

    // access the word file
	if (eq(fname,"-")) {
		wordf = stdin;
		printf("Reading words from stdin\n");
	}
	else {
    	wordf = fopen(fname,"r");
    	if (wordf == NULL) {
        	printf("Can't open %s\n",fname);
        	exit(1);
    	}
    	printf("Reading words from %s\n",fname);
	}

	// build hash table, containing all words from file
	nwords = 0; nfound = 0;
    htab = newHashTable(size);
	while ((word = ItemGet(wordf)) != NULL) {
		if (eq(word,"")) { dropItem(word); continue; }
		HashTableInsert(htab,word);
		nwords++;
		if (HashTableSearch(htab,word) != NULL)
			nfound++;
		dropItem(word);
	}

	// examine hash table
	HashTableStats(htab);

	// tests
	// warning: we are assuming that "!aaaaaa!" etc.
	// do not occur in the input; this is not guaranteed
	assert(nfound == nwords);
	assert(HashTableSearch(htab,"!aaaaaa!") == NULL) ;
	assert(HashTableSearch(htab,"!xxxxxx!") == NULL) ;
	assert(HashTableSearch(htab,"!yyyyyy!") == NULL) ;
	assert(HashTableSearch(htab,"!zzzzzz!") == NULL) ;
	printf("Testing completed OK\n");

	// clean up
	fclose(wordf);
    dropHashTable(htab);
    return 0;
}
예제 #3
0
int main(int argc, char **argv) {
    HashFile *hf;
    sff_common_header *ch;
    sff_read_header *rh;
    int i, dot, arg;
    char *sff;
    char hdr[31];
    uint64_t index_offset = 0;
    uint32_t index_size, index_skipped;
    FILE *fp, *fpout = NULL;
    int copy_archive = 1;
    

    /* process command line arguments of the form -arg */
    for (argc--, argv++; argc > 0; argc--, argv++) {
	if (**argv != '-' || strcmp(*argv, "--") == 0)
	    break;

	if (strcmp(*argv, "-o") == 0 && argc > 1) {
	    if (NULL == (fpout = fopen(argv[1], "wb+"))) {
		perror(argv[1]);
		return 1;
	    }
	    argv++;
	    argc--;

	} else if (strcmp(*argv, "-t") == 0) {
	    copy_archive = 0;

	} else if (**argv == '-') {
	    usage();
	}

    }

    if (argc < 1)
	usage();

    if (copy_archive == 0 && argc != 1) {
	fprintf(stderr, "-t option only supported with a single sff argument\n");
	return 1;
    }

    /* Create the hash table */
    hf = HashFileCreate(0, HASH_DYNAMIC_SIZE);
    hf->nheaders = 0;
    hf->headers = NULL;

    for (arg = 0; arg < argc; arg++) {
	/* open (and read) the entire sff file */
	sff = argv[arg];

	printf("Indexing %s:\n", sff);
	if (fpout) {
	    if (NULL == (fp = fopen(sff, "rb"))) {
		perror(sff);
		return 1;
	    }
	} else { 
	    if (NULL == (fp = fopen(sff, "rb+"))) {
		perror(sff);
		return 1;
	    }
	}

	/* Read the common header */
	ch = fread_sff_common_header(fp);

	if (ch->index_len && !fpout) {
	    fprintf(stderr, "Archive already contains index.\nReplacing the"
		    " index requires the \"-o outfile\" option.\n");
	    return 1;
	}

	/* Add the SFF common header as a hash file-header */
	hf->nheaders++;
	hf->headers = (HashFileSection *)realloc(hf->headers, hf->nheaders *
						 sizeof(*hf->headers));
	hf->headers[hf->nheaders-1].pos = 0;
	hf->headers[hf->nheaders-1].size = ch->header_len;
	hf->headers[hf->nheaders-1].cached_data = NULL;

	/* Read the index items, adding to the hash */
	index_skipped = 0;
	dot = 0;
	printf("                                                                       |\r|");
	for (i = 0; i < ch->nreads; i++) {
	    int dlen;
	    uint32_t offset;
	    HashData hd;
	    HashFileItem *hfi;
	    
	    if (i >= dot * (ch->nreads/69)) {
		putchar('.');
		fflush(stdout);
		dot++;
	    }

	    /* Skip old index if present */
	    offset = ftell(fp);
	    if (offset == ch->index_offset) {
		fseek(fp, ch->index_len, SEEK_CUR);
		index_skipped = ch->index_len;
		continue;
	    }

	    hfi = (HashFileItem *)calloc(1, sizeof(*hfi));
	    rh = fread_sff_read_header(fp);
	    dlen = (2*ch->flow_len + 3*rh->nbases + 7) & ~7;
	    fseek(fp, dlen, SEEK_CUR);
	
	    hfi->header = hf->nheaders;
	    hfi->footer = 0;
	    hfi->pos = offset - index_skipped;
	    hfi->size = (ftell(fp) - index_skipped) - hfi->pos;
	    hd.p = hfi;

	    HashTableAdd(hf->h, rh->name, rh->name_len, hd, NULL);
	}
	printf("\n");
	HashTableStats(hf->h, stdout);

	index_offset = ftell(fp) - index_skipped;

	/* Copy the archive if needed, minus the old index */
	if (fpout && copy_archive) {
	    char block[8192];
	    size_t len;
	    uint64_t pos = 0;

	    printf("\nCopying archive\n");

	    fseek(fp, 0, SEEK_SET);
	    while (len = fread(block, 1, 8192, fp)) {
		/* Skip previous index */
		if (pos < ch->index_offset && pos+len > ch->index_offset) {
		    len = ch->index_offset - pos;
		    fseek(fp, ch->index_offset + ch->index_len, SEEK_SET);
		}
		if (len && len != fwrite(block, 1, len, fpout)) {
		    fprintf(stderr, "Failed to output new archive\n");
		    return 1;
		}
		pos += len;
	    }
	}
	
	if (!fpout) {
	    /* Save the hash */
	    printf("Saving index\n");
	    fseek(fp, 0, SEEK_END);
	    index_size = HashFileSave(hf, fp, 0);
	    HashFileDestroy(hf);

	    /* Update the common header */
	    fseek(fp, 0, SEEK_SET);
	    fread(hdr, 1, 31, fp);
	    *(uint64_t *)(hdr+8)  = be_int8(index_offset);
	    *(uint32_t *)(hdr+16) = be_int4(index_size);
	    fseek(fp, 0, SEEK_SET);
	    fwrite(hdr, 1, 31, fp);
	}

	fclose(fp);
    }

    if (fpout) {
	/* Save the hash */
	printf("Saving index\n");

	if (!copy_archive) {
	    hf->archive = strdup(argv[0]);
	    index_offset = 0;
	}

	fseek(fpout, 0, SEEK_END);
	index_size = HashFileSave(hf, fpout, 0);
	HashFileDestroy(hf);

	/* Update the common header to indicate index location */
	if (copy_archive) {
	    fseek(fpout, 0, SEEK_SET);
	    fread(hdr, 1, 31, fpout);
	    *(uint64_t *)(hdr+8)  = be_int8(index_offset);
	    *(uint32_t *)(hdr+16) = be_int4(index_size);
	    fseek(fpout, 0, SEEK_SET);
	    fwrite(hdr, 1, 31, fpout);
	}
	fclose(fpout);
    }
    
    return 0;
}