void FindInProjectDlg::SearchThread::WriteResult(const MMapBuffer& buf, const wxFileName& filepath, vector<FileMatch>& matches) { if (matches.empty()) return; // Header const wxString path = filepath.GetFullPath(); const wxString format = (matches.size() == 1) ? _("<b>%s - %d match</b>") : _("<b>%s - %d matches</b>"); wxString output = wxString::Format(format, path.c_str(), matches.size()); output += wxT("<br><table cellspacing=0>"); unsigned int linecount = 1; const char* subject = buf.data(); const char* end = subject + buf.Length(); const char* linestart = subject; vector<FileMatch>::iterator m = matches.begin(); const char* matchstart = subject + m->start; // Count lines while (subject < end) { if (subject == matchstart) { // Write linenumber with link wxString line = wxString::Format(wxT("<tr><td bgcolor=#f6f6ef align=\"right\"><a href=\"txmt://open&url=file://%s&line=%d\">%d</a></td><td> "), path.c_str(), linecount, linecount); // Start of line line += wxString(linestart, wxConvUTF8, matchstart-linestart); // Match line += wxT("<i style=\"background-color: yellow\">"); const size_t match_len = m->end - m->start; line += wxString(matchstart, wxConvUTF8, match_len); line += wxT("</i>"); // End of line const char* const matchend = subject + match_len; const char* lineend = matchend; while (lineend < end && *lineend != '\n') ++lineend; line += wxString(matchend, wxConvUTF8, lineend - matchend); line += wxT("</td></tr>"); output += line; ++m; if (m == matches.end()) break; matchstart = buf.data() + m->start; } if (*subject == '\n') { ++linecount; linestart = subject+1; } ++subject; } output += wxT("</table><p>"); m_outputCrit.Enter(); m_output += output; m_outputCrit.Leave(); }
void FindInProjectDlg::SearchThread::DoSearch(const MMapBuffer& buf, const SearchInfo& si, vector<FileMatch>& matches) const { // Ignore binary files (we just check for zero bytes in the first // 100 bytes of the file) const wxFileOffset len = buf.Length(); const char* subject = buf.data(); const char* end_pos = buf.data() + wxMin(100,len); for (; subject < end_pos; ++subject) { if (*subject == '\0') return; } // Prepare vars to avoid lookups in loop const size_t last_char_pos = si.byte_len-1; const char lastChar = si.UTF8buffer[last_char_pos]; const char lastCharUpper = si.matchCase ? '\0' : si.UTF8bufferUpper[last_char_pos]; subject = buf.data() + last_char_pos; end_pos = buf.data() + len; while (subject < end_pos) { const char c = *subject; // Get candidate for last char if (c == lastChar || (!si.matchCase && c == lastCharUpper)) { // Match indiviual chars const char* byte_ptr = subject-1; const char* const first_byte_pos = subject - last_char_pos; unsigned int char_pos = last_char_pos-1; while (byte_ptr >= first_byte_pos) { const char c2 = *byte_ptr; if (c2 != si.UTF8buffer[char_pos]) { if (si.matchCase || c2 != si.UTF8bufferUpper[char_pos]) break; } --byte_ptr; --char_pos; } if (byte_ptr < first_byte_pos) { // We got a match const wxFileOffset matchStart = first_byte_pos - buf.data(); const FileMatch m = {0, 0, matchStart, matchStart + si.byte_len}; matches.push_back(m); subject += si.byte_len; continue; } } // If we don't have a match, see how far we can move char_pos subject += si.charmap[(unsigned char)c]; } }
void SearchThread::SearchDir(const wxString& path, const SearchInfo& si, ProjectInfoHandler& infoHandler) { MMapBuffer buf; wxFileName filepath; vector<FileMatch> matches; wxArrayString dirs; wxArrayString filenames; infoHandler.GetDirAndFileLists(path, dirs, filenames); for (size_t f = 0; f < filenames.size(); ++f) { if (!m_isSearching) return; m_outputCrit.Enter(); m_currentPath = path + filenames[f]; m_outputCrit.Leave(); filepath = m_currentPath; // Map the file to memory buf.Open(filepath); if (!buf.IsMapped()) { wxLogDebug(wxT(" Mapping failed!")); continue; } // Search the file DoSearch(buf, si, matches); if (matches.empty()) continue; // Show matches WriteResult(buf, filepath, matches); matches.clear(); } for (size_t d = 0; d < dirs.size(); ++d) { const wxString dirpath = path + dirs[d] + wxFILE_SEP_PATH; SearchDir(dirpath, si, infoHandler); } }
void BitmapBuffer::save() { //loadfromtemp(); static bool first = true; string filename = dir + "/BitmapBuffer"; MMapBuffer * buffer; //new MMapBuffer(filename.c_str(), 0); string predicateFile(filename); predicateFile.append("_predicate"); MMapBuffer * predicateBuffer =new MMapBuffer(predicateFile.c_str(), predicate_managers[0].size() * (sizeof(ID) + sizeof(size_t)) * 2); char* predicateWriter = predicateBuffer->get_address(); char* bufferWriter = NULL; map<ID, ChunkManager*>::const_iterator iter = predicate_managers[0].begin(); char* startPtr; size_t offset = 0; startPtr = iter->second->ptrs[0]; if(first == true) { buffer = new MMapBuffer(filename.c_str(), iter->second->meta->length[0]); //offset = buffer->get_offset(); //first = false; } else { //buffer = new MMapBuffer(filename.c_str(), iter->second->meta->length[0], true); //offset = buffer->get_offset(); } predicateWriter = predicateBuffer->get_address(); bufferWriter = buffer->get_address(); vector<size_t>::iterator pageNoIter = iter->second->usedPage[0].begin(), limit = iter->second->usedPage[0].end(); for(; pageNoIter != limit; pageNoIter++ ) { size_t pageNo = *pageNoIter; memcpy(bufferWriter, temp1->get_address() + pageNo * MemoryBuffer::pagesize, MemoryBuffer::pagesize); bufferWriter = bufferWriter + MemoryBuffer::pagesize; } *((ID*)predicateWriter) = iter->first; predicateWriter = predicateWriter + sizeof(ID); *((size_t*)predicateWriter) = offset; predicateWriter = predicateWriter + sizeof(size_t); offset = offset + iter->second->meta->length[0]; bufferWriter = buffer->resize(iter->second->meta->length[1]); char* startPos = bufferWriter + offset; pageNoIter = iter->second->usedPage[1].begin(); limit = iter->second->usedPage[1].end(); for(; pageNoIter != limit; pageNoIter++ ) { size_t pageNo = *pageNoIter; memcpy(startPos, temp2->get_address() + pageNo * MemoryBuffer::pagesize, MemoryBuffer::pagesize); startPos = startPos + MemoryBuffer::pagesize; } assert(iter->second->meta->length[1] == iter->second->usedPage[1].size() * MemoryBuffer::pagesize); offset = offset + iter->second->meta->length[1]; iter++; for(; iter != predicate_managers[0].end(); iter++) { bufferWriter = buffer->resize(iter->second->meta->length[0]); startPos = bufferWriter + offset; pageNoIter = iter->second->usedPage[0].begin(); limit = iter->second->usedPage[0].end(); for(; pageNoIter != limit; pageNoIter++) { size_t pageNo = *pageNoIter; memcpy(startPos, temp1->get_address() + pageNo * MemoryBuffer::pagesize, MemoryBuffer::pagesize); startPos = startPos + MemoryBuffer::pagesize; } //cout<<"used page count: "<<iter->second->usedPage[0].size()<<endl; //iter->second->meta->endPtr[0] = startPos + iter->second->meta->usedSpace[0]; //used to build index; *((ID*)predicateWriter) = iter->first; predicateWriter = predicateWriter + sizeof(ID); *((size_t*)predicateWriter) = offset; predicateWriter = predicateWriter + sizeof(size_t); offset += iter->second->meta->length[0]; assert(iter->second->usedPage[0].size() * MemoryBuffer::pagesize == iter->second->meta->length[0]); bufferWriter = buffer->resize(iter->second->meta->length[1]); startPos = bufferWriter + offset; //iter->second->meta->startPtr[1] = startPos; //used to build index; //iter->second->meta->endPtr[1] = startPos + iter->second->meta->usedSpace[1]; pageNoIter = iter->second->usedPage[1].begin(); limit = iter->second->usedPage[1].end(); for(; pageNoIter != limit; pageNoIter++) { size_t pageNo = *pageNoIter; memcpy(startPos, temp2->get_address() + pageNo * MemoryBuffer::pagesize, MemoryBuffer::pagesize); startPos = startPos + MemoryBuffer::pagesize; } offset += iter->second->meta->length[1]; assert(iter->second->usedPage[1].size() * MemoryBuffer::pagesize == iter->second->meta->length[1]); } buffer->flush(); temp1->discard(); temp2->discard(); iter = predicate_managers[1].begin(); for(; iter != predicate_managers[1].end(); iter++) { bufferWriter = buffer->resize(iter->second->meta->length[0]); startPos = bufferWriter + offset; pageNoIter = iter->second->usedPage[0].begin(); limit = iter->second->usedPage[0].end(); for(; pageNoIter != limit; pageNoIter++) { size_t pageNo = *pageNoIter; memcpy(startPos, temp3->get_address() + pageNo * MemoryBuffer::pagesize, MemoryBuffer::pagesize); startPos = startPos + MemoryBuffer::pagesize; } *((ID*)predicateWriter) = iter->first; predicateWriter = predicateWriter + sizeof(ID); *((size_t*)predicateWriter) = offset; predicateWriter = predicateWriter + sizeof(size_t); offset += iter->second->meta->length[0]; assert(iter->second->usedPage[0].size() * MemoryBuffer::pagesize == iter->second->meta->length[0]); bufferWriter = buffer->resize(iter->second->usedPage[1].size() * MemoryBuffer::pagesize); startPos = bufferWriter + offset; pageNoIter = iter->second->usedPage[1].begin(); limit = iter->second->usedPage[1].end(); for(; pageNoIter != limit; pageNoIter++) { size_t pageNo = *pageNoIter; memcpy(startPos, temp4->get_address() + pageNo * MemoryBuffer::pagesize, MemoryBuffer::pagesize); startPos = startPos + MemoryBuffer::pagesize; } offset += iter->second->meta->length[1]; assert(iter->second->usedPage[1].size() * MemoryBuffer::pagesize == iter->second->meta->length[1]); } buffer->flush(); predicateBuffer->flush(); predicateWriter = predicateBuffer->get_address(); int i = 0; ID id; for(iter = predicate_managers[0].begin(); iter != predicate_managers[0].end(); iter++, i++) { id = *((ID*)predicateWriter); assert(iter->first == id); predicateWriter = predicateWriter + sizeof(ID); offset = *((size_t*)predicateWriter); predicateWriter = predicateWriter + sizeof(size_t); char* base = buffer->get_address() + offset; iter->second->meta = (ChunkManagerMeta*)base; iter->second->meta->startPtr[0] = base + sizeof(ChunkManagerMeta); iter->second->meta->endPtr[0] = iter->second->meta->startPtr[0] + iter->second->meta->usedSpace[0]; iter->second->meta->startPtr[1] = base + iter->second->meta->length[0]; iter->second->meta->endPtr[1] = iter->second->meta->startPtr[1] + iter->second->meta->usedSpace[1]; //::printMeta(*(iter->second->meta)); } for(iter = predicate_managers[1].begin(); iter != predicate_managers[1].end(); iter++, i++) { id = *((ID*)predicateWriter); assert(iter->first == id); predicateWriter = predicateWriter + sizeof(ID); offset = *((size_t*)predicateWriter); predicateWriter = predicateWriter + sizeof(size_t); char* base = buffer->get_address() + offset; iter->second->meta = (ChunkManagerMeta*)base; iter->second->meta->startPtr[0] = base + sizeof(ChunkManagerMeta); iter->second->meta->endPtr[0] = iter->second->meta->startPtr[0] + iter->second->meta->usedSpace[0]; iter->second->meta->startPtr[1] = base + iter->second->meta->length[0]; iter->second->meta->endPtr[1] = iter->second->meta->startPtr[1] + iter->second->meta->usedSpace[1]; //::printMeta(*(iter->second->meta)); } temp3->discard(); temp4->discard(); //build index; MMapBuffer* bitmapIndex = NULL; #ifdef DEBUG cout<<"build hash index for subject"<<endl; #endif for ( map<ID,ChunkManager*>::iterator iter = predicate_managers[0].begin(); iter != predicate_managers[0].end(); iter++ ) { if ( iter->second != NULL ) { #ifdef DEBUG cout<<iter->first<<endl; #endif iter->second->buildChunkIndex(); iter->second->getChunkIndex(1)->save(bitmapIndex); iter->second->getChunkIndex(2)->save(bitmapIndex); } } #ifdef DEBUG cout<<"build hash index for object"<<endl; #endif for ( map<ID, ChunkManager*>::iterator iter = predicate_managers[1].begin(); iter != predicate_managers[1].end(); iter++ ) { if ( iter->second != NULL ) { #ifdef DEBUF cout<<iter->first<<endl; #endif iter->second->buildChunkIndex(); iter->second->getChunkIndex(1)->save(bitmapIndex); iter->second->getChunkIndex(2)->save(bitmapIndex); } } delete bitmapIndex; delete buffer; delete predicateBuffer; }
void SearchThread::DoSearch(const MMapBuffer& buf, const SearchInfo& si, vector<FileMatch>& matches) const { // Ignore binary files (we just check for zero bytes in the first // 100 bytes of the file) const wxFileOffset len = buf.Length(); const char* subject = buf.data(); const char* end_pos = buf.data() + wxMin(100,len); for (; subject < end_pos; ++subject) { if (*subject == '\0') return; } if (si.regex) { const int OVECCOUNT = 30; int ovector[OVECCOUNT]; int pos = 0; int rc = 0; while(1) { rc = pcre_exec( si.regex, // the compiled pattern NULL, // extra data - if we study the pattern buf.data(), // the subject string len, // the length of the subject pos, // start at offset in the subject PCRE_NOTEMPTY, // options ovector, // output vector for substring information OVECCOUNT); // number of elements in the output vector if (rc <= 0) break; const FileMatch m = {0, 0, ovector[0], ovector[1]}; matches.push_back(m); pos = ovector[1]; } } else { // Prepare vars to avoid lookups in loop const size_t last_char_pos = si.byte_len-1; const char lastChar = si.UTF8buffer[last_char_pos]; const char lastCharUpper = si.matchCase ? '\0' : si.UTF8bufferUpper[last_char_pos]; subject = buf.data() + last_char_pos; end_pos = buf.data() + len; while (subject < end_pos) { const char c = *subject; // Get candidate for last char if (c == lastChar || (!si.matchCase && c == lastCharUpper)) { // Match indiviual chars const char* byte_ptr = subject-1; const char* const first_byte_pos = subject - last_char_pos; unsigned int char_pos = last_char_pos-1; while (byte_ptr >= first_byte_pos) { const char c2 = *byte_ptr; if (c2 != si.UTF8buffer[char_pos]) { if (si.matchCase || c2 != si.UTF8bufferUpper[char_pos]) break; } --byte_ptr; --char_pos; } if (byte_ptr < first_byte_pos) { // We got a match const wxFileOffset matchStart = first_byte_pos - buf.data(); const FileMatch m = {0, 0, matchStart, matchStart + si.byte_len}; matches.push_back(m); subject += si.byte_len; continue; } } // If we don't have a match, see how far we can move char_pos subject += si.charmap[(unsigned char)c]; } } }
~ParMMapBuffer(){ head->free(); for(size_t i = 0; i < num_buffers; i++){ elements.at(i)->free(); } };
TripleBitRepository* TripleBitRepository::create(const string path) { TripleBitRepository* repo = new TripleBitRepository(); string filename = path + "/BitmapBuffer"; if(OSFile::fileExists(filename) == false) { //file dose not exist, repository has not been build; fprintf(stderr, "database files dose not exist!"); return NULL; } // load the repository from image files; //load bitmap #ifdef DEBUG cout<<filename.c_str()<<endl; #endif repo->bitmapImage = new MMapBuffer(filename.c_str(), 0); string predicateFile(filename); predicateFile.append("_predicate"); string indexFile(filename); indexFile.append("_index"); repo->bitmapPredicateImage = new MMapBuffer(predicateFile.c_str(), 0); repo->bitmapIndexImage = new MMapBuffer(indexFile.c_str(), 0); repo->bitmapBuffer = BitmapBuffer::load(repo->bitmapImage, repo->bitmapIndexImage, repo->bitmapPredicateImage); repo->UriTable = URITable::load(path); repo->preTable = PredicateTable::load(path); #ifdef DEBUG cout<<"total triple count: "<<repo->bitmapBuffer->getTripleCount()<<endl; cout<<"URITableSize: "<<repo->UriTable->getSize()<<endl; cout<<"predicateTableSize: "<<repo->preTable->getSize()<<endl; #endif // cout<<"begin load statistics buffer"<<endl; filename = path + "/statIndex"; MMapBuffer* indexBufferFile = MMapBuffer::create(filename.c_str(), 0); char* indexBuffer = indexBufferFile->get_address(); string statFilename = path + "/subject_statis"; repo->subjectStat = OneConstantStatisticsBuffer::load(StatisticsBuffer::SUBJECT_STATIS, statFilename, indexBuffer); statFilename = path + "/object_statis"; repo->objectStat = OneConstantStatisticsBuffer::load(StatisticsBuffer::OBJECT_STATIS, statFilename, indexBuffer); statFilename = path + "/subjectpredicate_statis"; repo->subPredicateStat = TwoConstantStatisticsBuffer::load(StatisticsBuffer::SUBJECTPREDICATE_STATIS, statFilename, indexBuffer); statFilename = path + "/objectpredicate_statis"; repo->objPredicateStat = TwoConstantStatisticsBuffer::load(StatisticsBuffer::OBJECTPREDICATE_STATIS, statFilename, indexBuffer); #ifdef DEBUG cout<<"subject count: "<<((OneConstantStatisticsBuffer*)repo->subjectStat)->getEntityCount()<<endl; cout<<"object count: "<<((OneConstantStatisticsBuffer*)repo->objectStat)->getEntityCount()<<endl; #endif repo->buffer = new EntityIDBuffer(); repo->columnFinder = new FindEntityID(repo); cerr<<"load complete!"<<endl; repo->bitmapQuery = new TripleBitQuery(*repo); repo->query = new RDFQuery(repo->bitmapQuery, repo); return repo; }