//reads a subset of database determined by memory capacity into a DoublyLinkedList //subsequently sorts the list, iterates through it to merge entries with duplicate primary fields separated by delim //and concatenate secondary fields using conn as the separator //and exports it into a temp file //returns the count of temporary files int splitDatabase(string &database, const char delim, const char conn) { ifstream fin(database); ofstream fout; int tempFileCount = 0; for (char buffer[LINE_BUFFER_SIZE]; fin.getline(buffer,LINE_BUFFER_SIZE,'\n');) { static int charsRead; static string tempFileName = string("temp") + to_string(tempFileCount++) + ".db"; static string lineRead = ""; static DoublyLinkedList<string> lines; lineRead += buffer; charsRead += fin.gcount(); //if \n was not found by getline if(fin.fail()) { fin.clear(); } else { lines.add(lineRead); lineRead = ""; if(charsRead > CHARS_ALLOWED_IN_MEMORY || fin.eof()) { fout.open(tempFileName); lines.sort(); for (DoublyLinkedList<string>::iterator iter = lines.begin(), jter = iter.next(); !iter.isNull(); iter = jter, jter = iter.next()) { //if jter is not a null node //and the first field of iter (iter[0] -> iter.find(delim)) is equal to the first field of jter //append the second field of jter to the second field of iter separated by conn //repeat until the first fields differ or jter goes out of the bounds of the list while(!jter.isNull() && (*iter).substr(0,(*iter).find(delim)) == (*jter).substr(0,(*jter).find(delim))) { *iter += conn + (*jter).substr((*jter).find(delim)+1); ++jter; } //store iter in the current temporary database file fout << *iter << endl; } fout.close(); tempFileName = string("temp") + to_string(tempFileCount++) + ".db"; lines.clearList(); charsRead = 0; } } } fin.close(); return tempFileCount-1; }
int main(int argc, char const *argv[]) { DoublyLinkedList<FileInfo> d; FileInfo test; test.fileName[0] = '.'; test.fileName[1] = '_'; test.fileName[2] = '.'; test.fileName[3] = 'T'; test.fileName[4] = 'r'; test.fileName[5] = 'a'; test.fileName[6] = 's'; test.fileName[7] = 'h'; test.fileName[8] = 'e'; test.fileName[9] = 's'; test.fileName[10] = '\0'; test.index = 96; d.add(test); FileInfo test2; test2.fileName[0] = '.'; test2.fileName[1] = 'T'; test2.fileName[2] = 'r'; test2.fileName[3] = 'a'; test2.fileName[4] = 's'; test2.fileName[5] = 'h'; test2.fileName[6] = 'e'; test2.fileName[7] = 's'; test2.fileName[8] = '\0'; test2.index = 192; d.add(test2); FileInfo test3; test3.fileName[0] = '.'; test3.fileName[1] = 'S'; test3.fileName[2] = 'p'; test3.fileName[3] = 'o'; test3.fileName[4] = 't'; test3.fileName[5] = 'l'; test3.fileName[6] = 'i'; test3.fileName[7] = 'g'; test3.fileName[8] = 'h'; test3.fileName[9] = 't'; test3.fileName[10] = '-'; test3.fileName[11] = 'V'; test3.fileName[12] = '1'; test3.fileName[13] = '0'; test3.fileName[14] = '0'; test3.fileName[15] = '\0'; test3.index = 288; d.add(test3); FileInfo test4; test4.fileName[0] = '.'; test4.fileName[1] = 'f'; test4.fileName[2] = 's'; test4.fileName[3] = 'e'; test4.fileName[4] = 'v'; test4.fileName[5] = 'e'; test4.fileName[6] = 'n'; test4.fileName[7] = 't'; test4.fileName[8] = 's'; test4.fileName[9] = 'd'; test4.fileName[10] = '\0'; test4.index = 352; d.add(test4); FileInfo test5; test5.fileName[0] = 'L'; test5.fileName[1] = 'O'; test5.fileName[2] = 'S'; test5.fileName[3] = 'T'; test5.fileName[4] = '.'; test5.fileName[5] = 'D'; test5.fileName[6] = 'I'; test5.fileName[7] = 'R'; test5.fileName[8] = '\0'; test5.index = 384; d.add(test5); FileInfo test6; test6.fileName[0] = '.'; test6.fileName[1] = 'a'; test6.fileName[2] = 'n'; test6.fileName[3] = 'd'; test6.fileName[4] = 'r'; test6.fileName[5] = 'o'; test6.fileName[6] = 'i'; test6.fileName[7] = 'd'; test6.fileName[8] = '_'; test6.fileName[9] = 's'; test6.fileName[10] = 'e'; test6.fileName[11] = 'c'; test6.fileName[12] = 'u'; test6.fileName[13] = 'r'; test6.fileName[14] = 'e'; test6.fileName[15] = '\0'; test6.index = 480; d.add(test6); FileInfo test7; test7.fileName[0] = 'A'; test7.fileName[1] = 'n'; test7.fileName[2] = 'd'; test7.fileName[3] = 'r'; test7.fileName[4] = 'o'; test7.fileName[5] = 'i'; test7.fileName[6] = 'd'; test7.fileName[7] = '\0'; test7.index = 544; d.add(test7); FileInfo test8; test8.fileName[0] = 'S'; test8.fileName[1] = 'i'; test8.fileName[2] = 'z'; test8.fileName[3] = 'e'; test8.fileName[4] = 'T'; test8.fileName[5] = 'e'; test8.fileName[6] = 's'; test8.fileName[7] = 't'; test8.fileName[8] = '.'; test8.fileName[9] = 't'; test8.fileName[10] = 'x'; test8.fileName[11] = 't'; test8.fileName[12] = '\0'; test8.index = 608; d.add(test8); FileInfo test9; test9.fileName[0] = 'L'; test9.fileName[1] = 'G'; test9.fileName[2] = 'B'; test9.fileName[3] = 'a'; test9.fileName[4] = 'c'; test9.fileName[5] = 'k'; test9.fileName[6] = 'u'; test9.fileName[7] = 'p'; test9.fileName[8] = '\0'; test9.index = 672; d.add(test9); FileInfo test10; test10.fileName[0] = 'M'; test10.fileName[1] = 'u'; test10.fileName[2] = 's'; test10.fileName[3] = 'i'; test10.fileName[4] = 'c'; test10.fileName[5] = '\0'; test10.index = 736; d.add(test10); // ._.Trashes - 96 // .Trashes - 192 // .Spotlight-V100 - 288 // .fseventsd - 352 // LOST.DIR - 384 // .android_secure - 480 // Android - 544 // SizeTest.txt - 608 // LGBackup - 672 // Music - 736 // d.printList(); for(int i=0; i<d.getSize(); i++) { std::cout << i << " - "; std::cout << (d.getAt(i)->fileName); std::cout << (" - "); std::cout << (d.getAt(i)->index) << std::endl; } std::cout << std::endl; d.sort(); d.printList(); for(int i=0; i<d.getSize(); i++) { std::cout << i << " - "; std::cout << (d.getAt(i)->fileName); std::cout << (" - "); std::cout << (d.getAt(i)->index) << std::endl; } return 0; }