void findDuplicate(std::string infile, std::string outfile, bool debug=false) { std::vector<record> records; readRecords(infile, records, debug); sort(records, debug); duplicate(outfile, records, debug); }
bool TFastaFile::ReadFastaFile(char *fasta_filename, vector<Engine::SequenceManager::Sequence> &seq_list) { if( fasta_filename == NULL ) { seq_list.clear() ; std::cerr<<"Invalid file name" ; return false; } string fn = string(fasta_filename); readRecords(fn, seq_list, true); return true ; }
int main(int argc, char** argv) { if(argc < 4) { fprintf(stderr, "Usage: ./sHash search_2011 hash pagesize\n"); return 1; } // program arguments char* search_file = argv[1]; char* hash_file = argv[2]; int pagesize = atoi(argv[3]); if(pagesize <= 0) { fprintf(stderr, "Pagesize must be greater than 0\n"); return 1; } // open search key / hash files FILE *sf, *hf; if(!openFile(search_file, &sf, "r")) { fprintf(stderr, "Unable to open %s for reading\n", search_file); return 1; } if(!openFile(hash_file, &hf, "rb")) { fprintf(stderr, "Unable to open %s for reading\n", hash_file); return 1; } // read in header data unsigned char temp[sizeof(int)]; int pages; int* record_count; fread(temp, sizeof(int), 1, hf); // number of pages pages = hexToInt(temp, sizeof(int)); // number of records in each page record_count = malloc(pages * sizeof(int)); if(record_count == NULL) { fprintf(stderr, "memory allocation failed\n"); exit(1); } for(int i = 0; i < pages; i++) { fread(temp, sizeof(int), 1, hf); record_count[i] = hexToInt(temp, sizeof(int)); } int hs = sizeof(int) * (pages+1); unsigned int find_id, found, hash_id; char line[MAX_LINE + 1]; Record** records = malloc(pagesize * sizeof(Record*)); if(records == NULL) { fprintf(stderr, "memory allocation failed\n"); exit(1); } for(int i = 0; i < pagesize; i++) { records[i] = calloc(1, sizeof(Record)); if(records[i] == NULL) { fprintf(stderr, "memory allocation failed\n"); exit(1); } } hrtime_t start, end; start = gethrtime(); while(fgets(line, MAX_LINE + 1, sf) != NULL) { find_id = atoi(line); hash_id = find_id % pages; found = 0; // seek to the page given by the hash id fseek(hf, hs + (RECORD_LEN * hash_id * pagesize), SEEK_SET); // read in records from page readRecords(hf, record_count[hash_id], records); // check for an id match. if found, print record. for(int i = 0; i < record_count[hash_id]; i++) { if(records[i] && find_id == records[i]->id) { printf("%s,%d,%d,%d,%s\n", records[i]->name, records[i]->race, records[i]->class, records[i]->id, records[i]->guild); found = 1; break; } } if(!found) fprintf(stderr, "%d: No match\n", find_id); }