Exemple #1
0
void FindInProjectDlg::SearchThread::WriteResult(const MMapBuffer& buf, const wxFileName& filepath, vector<FileMatch>& matches) {
    if (matches.empty()) return;

    // Header
    const wxString path = filepath.GetFullPath();
    const wxString format = (matches.size() == 1) ? _("<b>%s - %d match</b>") : _("<b>%s - %d matches</b>");
    wxString output = wxString::Format(format, path.c_str(), matches.size());
    output += wxT("<br><table cellspacing=0>");

    unsigned int linecount = 1;
    const char* subject = buf.data();
    const char* end = subject + buf.Length();
    const char* linestart = subject;
    vector<FileMatch>::iterator m = matches.begin();
    const char* matchstart = subject + m->start;

    // Count lines
    while (subject < end) {
        if (subject == matchstart) {
            // Write linenumber with link
            wxString line = wxString::Format(wxT("<tr><td bgcolor=#f6f6ef align=\"right\"><a href=\"txmt://open&url=file://%s&line=%d\">%d</a></td><td> "), path.c_str(), linecount, linecount);

            // Start of line
            line += wxString(linestart, wxConvUTF8, matchstart-linestart);

            // Match
            line += wxT("<i style=\"background-color: yellow\">");
            const size_t match_len = m->end - m->start;
            line += wxString(matchstart, wxConvUTF8, match_len);
            line += wxT("</i>");

            // End of line
            const char* const matchend = subject + match_len;
            const char* lineend = matchend;
            while (lineend < end && *lineend != '\n') ++lineend;
            line += wxString(matchend, wxConvUTF8, lineend - matchend);
            line += wxT("</td></tr>");
            output += line;

            ++m;
            if (m == matches.end()) break;
            matchstart = buf.data() + m->start;
        }

        if (*subject == '\n') {
            ++linecount;
            linestart = subject+1;
        }

        ++subject;
    }

    output += wxT("</table><p>");

    m_outputCrit.Enter();
    m_output += output;
    m_outputCrit.Leave();
}
Exemple #2
0
void FindInProjectDlg::SearchThread::DoSearch(const MMapBuffer& buf, const SearchInfo& si, vector<FileMatch>& matches) const {
    // Ignore binary files (we just check for zero bytes in the first
    // 100 bytes of the file)
    const wxFileOffset len = buf.Length();
    const char* subject = buf.data();
    const char* end_pos = buf.data() + wxMin(100,len);
    for (; subject < end_pos; ++subject) {
        if (*subject == '\0') return;
    }

    // Prepare vars to avoid lookups in loop
    const size_t last_char_pos = si.byte_len-1;
    const char lastChar = si.UTF8buffer[last_char_pos];
    const char lastCharUpper = si.matchCase ? '\0' : si.UTF8bufferUpper[last_char_pos];

    subject = buf.data() + last_char_pos;
    end_pos = buf.data() + len;

    while (subject < end_pos) {
        const char c = *subject; // Get candidate for last char

        if (c == lastChar || (!si.matchCase && c == lastCharUpper)) {
            // Match indiviual chars
            const char* byte_ptr = subject-1;
            const char* const first_byte_pos = subject - last_char_pos;
            unsigned int char_pos = last_char_pos-1;
            while (byte_ptr >= first_byte_pos) {
                const char c2 = *byte_ptr;
                if (c2 != si.UTF8buffer[char_pos]) {
                    if (si.matchCase || c2 != si.UTF8bufferUpper[char_pos]) break;
                }
                --byte_ptr;
                --char_pos;
            }

            if (byte_ptr < first_byte_pos) {
                // We got a match
                const wxFileOffset matchStart = first_byte_pos - buf.data();
                const FileMatch m = {0, 0, matchStart, matchStart + si.byte_len};
                matches.push_back(m);

                subject += si.byte_len;
                continue;
            }
        }

        // If we don't have a match, see how far we can move char_pos
        subject += si.charmap[(unsigned char)c];
    }
}
Exemple #3
0
void SearchThread::SearchDir(const wxString& path, const SearchInfo& si, ProjectInfoHandler& infoHandler) {
    MMapBuffer buf;
    wxFileName filepath;
    vector<FileMatch> matches;

    wxArrayString dirs;
    wxArrayString filenames;
    infoHandler.GetDirAndFileLists(path, dirs, filenames);

    for (size_t f = 0; f < filenames.size(); ++f) {
        if (!m_isSearching) return;
        m_outputCrit.Enter();
        m_currentPath = path + filenames[f];
        m_outputCrit.Leave();
        filepath = m_currentPath;

        // Map the file to memory
        buf.Open(filepath);
        if (!buf.IsMapped()) {
            wxLogDebug(wxT(" Mapping failed!"));
            continue;
        }

        // Search the file
        DoSearch(buf, si, matches);
        if (matches.empty()) continue;

        // Show matches
        WriteResult(buf, filepath, matches);
        matches.clear();
    }

    for (size_t d = 0; d < dirs.size(); ++d) {
        const wxString dirpath = path + dirs[d] + wxFILE_SEP_PATH;
        SearchDir(dirpath, si, infoHandler);
    }
}
void BitmapBuffer::save()
{
	//loadfromtemp();
	static bool first = true;
	string filename = dir + "/BitmapBuffer";
	MMapBuffer * buffer; //new MMapBuffer(filename.c_str(), 0);
	string predicateFile(filename);
	predicateFile.append("_predicate");

	MMapBuffer * predicateBuffer =new MMapBuffer(predicateFile.c_str(), predicate_managers[0].size() * (sizeof(ID) + sizeof(size_t)) * 2);

	char* predicateWriter = predicateBuffer->get_address();
	char* bufferWriter = NULL;

	map<ID, ChunkManager*>::const_iterator iter = predicate_managers[0].begin();
	char* startPtr;
	size_t offset = 0;
	startPtr = iter->second->ptrs[0];

	if(first == true) {
		buffer = new MMapBuffer(filename.c_str(), iter->second->meta->length[0]);
		//offset = buffer->get_offset();
		//first = false;
	} else {
		//buffer = new MMapBuffer(filename.c_str(), iter->second->meta->length[0], true);
		//offset = buffer->get_offset();
	}

	predicateWriter = predicateBuffer->get_address();
	bufferWriter = buffer->get_address();
	vector<size_t>::iterator pageNoIter = iter->second->usedPage[0].begin(),
			limit = iter->second->usedPage[0].end();

	for(; pageNoIter != limit; pageNoIter++ ) {
		size_t pageNo = *pageNoIter;
		memcpy(bufferWriter, temp1->get_address() + pageNo * MemoryBuffer::pagesize, MemoryBuffer::pagesize);
		bufferWriter = bufferWriter + MemoryBuffer::pagesize;
	}

	*((ID*)predicateWriter) = iter->first; predicateWriter = predicateWriter + sizeof(ID);
	*((size_t*)predicateWriter) = offset; predicateWriter = predicateWriter + sizeof(size_t);
	offset = offset + iter->second->meta->length[0];

	bufferWriter = buffer->resize(iter->second->meta->length[1]);
	char* startPos = bufferWriter + offset;

	pageNoIter = iter->second->usedPage[1].begin(); limit = iter->second->usedPage[1].end();
	for(; pageNoIter != limit; pageNoIter++ ) {
		size_t pageNo = *pageNoIter;
		memcpy(startPos, temp2->get_address() + pageNo * MemoryBuffer::pagesize, MemoryBuffer::pagesize);
		startPos = startPos + MemoryBuffer::pagesize;
	}

	assert(iter->second->meta->length[1] == iter->second->usedPage[1].size() * MemoryBuffer::pagesize);
	offset = offset + iter->second->meta->length[1];

	iter++;
	for(; iter != predicate_managers[0].end(); iter++) {
		bufferWriter = buffer->resize(iter->second->meta->length[0]);
		startPos = bufferWriter + offset;

		pageNoIter = iter->second->usedPage[0].begin(); limit = iter->second->usedPage[0].end();

		for(; pageNoIter != limit; pageNoIter++) {
			size_t pageNo = *pageNoIter;
			memcpy(startPos, temp1->get_address() + pageNo * MemoryBuffer::pagesize, MemoryBuffer::pagesize);
			startPos = startPos + MemoryBuffer::pagesize;
		}
		//cout<<"used page count: "<<iter->second->usedPage[0].size()<<endl;

		//iter->second->meta->endPtr[0] = startPos + iter->second->meta->usedSpace[0];  //used to build index;

		*((ID*)predicateWriter) = iter->first; predicateWriter = predicateWriter + sizeof(ID);
		*((size_t*)predicateWriter) = offset; predicateWriter = predicateWriter + sizeof(size_t);
		offset += iter->second->meta->length[0];

		assert(iter->second->usedPage[0].size() * MemoryBuffer::pagesize == iter->second->meta->length[0]);

		bufferWriter = buffer->resize(iter->second->meta->length[1]);
		startPos = bufferWriter + offset;
		//iter->second->meta->startPtr[1] = startPos; //used to build index;
		//iter->second->meta->endPtr[1] = startPos + iter->second->meta->usedSpace[1];
		pageNoIter = iter->second->usedPage[1].begin(); limit = iter->second->usedPage[1].end();
		for(; pageNoIter != limit; pageNoIter++) {
			size_t pageNo = *pageNoIter;
			memcpy(startPos, temp2->get_address() + pageNo * MemoryBuffer::pagesize, MemoryBuffer::pagesize);
			startPos = startPos + MemoryBuffer::pagesize;
		}

		offset += iter->second->meta->length[1];
		assert(iter->second->usedPage[1].size() * MemoryBuffer::pagesize == iter->second->meta->length[1]);
	}

	buffer->flush();
	temp1->discard();
	temp2->discard();

	iter = predicate_managers[1].begin();
	for(; iter != predicate_managers[1].end(); iter++) {
		bufferWriter = buffer->resize(iter->second->meta->length[0]);
		startPos = bufferWriter + offset;

		pageNoIter = iter->second->usedPage[0].begin(); limit = iter->second->usedPage[0].end();
		for(; pageNoIter != limit; pageNoIter++) {
			size_t pageNo = *pageNoIter;
			memcpy(startPos, temp3->get_address() + pageNo * MemoryBuffer::pagesize, MemoryBuffer::pagesize);
			startPos = startPos + MemoryBuffer::pagesize;
		}

		*((ID*)predicateWriter) = iter->first; predicateWriter = predicateWriter + sizeof(ID);
		*((size_t*)predicateWriter) = offset; predicateWriter = predicateWriter + sizeof(size_t);
		offset += iter->second->meta->length[0];

		assert(iter->second->usedPage[0].size() * MemoryBuffer::pagesize == iter->second->meta->length[0]);

		bufferWriter = buffer->resize(iter->second->usedPage[1].size() * MemoryBuffer::pagesize);
		startPos = bufferWriter + offset;

		pageNoIter = iter->second->usedPage[1].begin(); limit = iter->second->usedPage[1].end();
		for(; pageNoIter != limit; pageNoIter++) {
			size_t pageNo = *pageNoIter;
			memcpy(startPos, temp4->get_address() + pageNo * MemoryBuffer::pagesize, MemoryBuffer::pagesize);
			startPos = startPos + MemoryBuffer::pagesize;
		}

		offset += iter->second->meta->length[1];
		assert(iter->second->usedPage[1].size() * MemoryBuffer::pagesize == iter->second->meta->length[1]);
	}
	buffer->flush();
	predicateBuffer->flush();

	predicateWriter = predicateBuffer->get_address();
	int i = 0;

	ID id;
	for(iter = predicate_managers[0].begin(); iter != predicate_managers[0].end(); iter++, i++) {
		id = *((ID*)predicateWriter);
		assert(iter->first == id);
		predicateWriter = predicateWriter + sizeof(ID);
		offset = *((size_t*)predicateWriter);
		predicateWriter = predicateWriter + sizeof(size_t);

		char* base = buffer->get_address() + offset;
		iter->second->meta = (ChunkManagerMeta*)base;
		iter->second->meta->startPtr[0] = base + sizeof(ChunkManagerMeta);
		iter->second->meta->endPtr[0] = iter->second->meta->startPtr[0] + iter->second->meta->usedSpace[0];
		iter->second->meta->startPtr[1] = base + iter->second->meta->length[0];
		iter->second->meta->endPtr[1] = iter->second->meta->startPtr[1] + iter->second->meta->usedSpace[1];
		//::printMeta(*(iter->second->meta));
	}

	for(iter = predicate_managers[1].begin(); iter != predicate_managers[1].end(); iter++, i++) {
		id = *((ID*)predicateWriter);
		assert(iter->first == id);
		predicateWriter = predicateWriter + sizeof(ID);
		offset = *((size_t*)predicateWriter);
		predicateWriter = predicateWriter + sizeof(size_t);

		char* base = buffer->get_address() + offset;
		iter->second->meta = (ChunkManagerMeta*)base;
		iter->second->meta->startPtr[0] = base + sizeof(ChunkManagerMeta);
		iter->second->meta->endPtr[0] = iter->second->meta->startPtr[0] + iter->second->meta->usedSpace[0];
		iter->second->meta->startPtr[1] = base + iter->second->meta->length[0];
		iter->second->meta->endPtr[1] = iter->second->meta->startPtr[1] + iter->second->meta->usedSpace[1];
		//::printMeta(*(iter->second->meta));
	}

	temp3->discard();
	temp4->discard();

	//build index;
	MMapBuffer* bitmapIndex = NULL;
#ifdef DEBUG
	cout<<"build hash index for subject"<<endl;
#endif
	for ( map<ID,ChunkManager*>::iterator iter = predicate_managers[0].begin(); iter != predicate_managers[0].end(); iter++ ) {
		if ( iter->second != NULL ) {
#ifdef DEBUG
			cout<<iter->first<<endl;
#endif
			iter->second->buildChunkIndex();
			iter->second->getChunkIndex(1)->save(bitmapIndex);
			iter->second->getChunkIndex(2)->save(bitmapIndex);
		}
	}

#ifdef DEBUG
	cout<<"build hash index for object"<<endl;
#endif
	for ( map<ID, ChunkManager*>::iterator iter = predicate_managers[1].begin(); iter != predicate_managers[1].end(); iter++ ) {
		if ( iter->second != NULL ) {
#ifdef DEBUF
			cout<<iter->first<<endl;
#endif
			iter->second->buildChunkIndex();
			iter->second->getChunkIndex(1)->save(bitmapIndex);
			iter->second->getChunkIndex(2)->save(bitmapIndex);
		}
	}

	delete bitmapIndex;
	delete buffer;
	delete predicateBuffer;

}
Exemple #5
0
void SearchThread::DoSearch(const MMapBuffer& buf, const SearchInfo& si, vector<FileMatch>& matches) const {
    // Ignore binary files (we just check for zero bytes in the first
    // 100 bytes of the file)
    const wxFileOffset len = buf.Length();
    const char* subject = buf.data();
    const char* end_pos = buf.data() + wxMin(100,len);
    for (; subject < end_pos; ++subject) {
        if (*subject == '\0') return;
    }

    if (si.regex) {
        const int OVECCOUNT = 30;
        int ovector[OVECCOUNT];
        int pos = 0;
        int rc = 0;

        while(1) {
            rc = pcre_exec(
                     si.regex,             // the compiled pattern
                     NULL,                 // extra data - if we study the pattern
                     buf.data(),           // the subject string
                     len,                  // the length of the subject
                     pos,                  // start at offset in the subject
                     PCRE_NOTEMPTY,        // options
                     ovector,              // output vector for substring information
                     OVECCOUNT);           // number of elements in the output vector
            if (rc <= 0) break;

            const FileMatch m = {0, 0, ovector[0], ovector[1]};
            matches.push_back(m);
            pos = ovector[1];
        }
    }
    else {
        // Prepare vars to avoid lookups in loop
        const size_t last_char_pos = si.byte_len-1;
        const char lastChar = si.UTF8buffer[last_char_pos];
        const char lastCharUpper = si.matchCase ? '\0' : si.UTF8bufferUpper[last_char_pos];

        subject = buf.data() + last_char_pos;
        end_pos = buf.data() + len;

        while (subject < end_pos) {
            const char c = *subject; // Get candidate for last char

            if (c == lastChar || (!si.matchCase && c == lastCharUpper)) {
                // Match indiviual chars
                const char* byte_ptr = subject-1;
                const char* const first_byte_pos = subject - last_char_pos;
                unsigned int char_pos = last_char_pos-1;
                while (byte_ptr >= first_byte_pos) {
                    const char c2 = *byte_ptr;
                    if (c2 != si.UTF8buffer[char_pos]) {
                        if (si.matchCase || c2 != si.UTF8bufferUpper[char_pos]) break;
                    }
                    --byte_ptr;
                    --char_pos;
                }

                if (byte_ptr < first_byte_pos) {
                    // We got a match
                    const wxFileOffset matchStart = first_byte_pos - buf.data();
                    const FileMatch m = {0, 0, matchStart, matchStart + si.byte_len};
                    matches.push_back(m);

                    subject += si.byte_len;
                    continue;
                }
            }

            // If we don't have a match, see how far we can move char_pos
            subject += si.charmap[(unsigned char)c];
        }
    }
}
 ~ParMMapBuffer(){
   head->free();
   for(size_t i = 0; i < num_buffers; i++){
     elements.at(i)->free();
   }
 };
TripleBitRepository* TripleBitRepository::create(const string path)
{
	TripleBitRepository* repo = new TripleBitRepository();

	string filename = path + "/BitmapBuffer";

	if(OSFile::fileExists(filename) == false) {
		//file dose not exist, repository has not been build;
		fprintf(stderr, "database files dose not exist!");
		return NULL;
	}
	// load the repository from image files;
	//load bitmap
#ifdef DEBUG
	cout<<filename.c_str()<<endl;
#endif
	repo->bitmapImage = new MMapBuffer(filename.c_str(), 0);
	string predicateFile(filename);
	predicateFile.append("_predicate");
	string indexFile(filename);
	indexFile.append("_index");
	repo->bitmapPredicateImage = new MMapBuffer(predicateFile.c_str(), 0);
	repo->bitmapIndexImage = new MMapBuffer(indexFile.c_str(), 0);
	repo->bitmapBuffer = BitmapBuffer::load(repo->bitmapImage, repo->bitmapIndexImage, repo->bitmapPredicateImage);

	repo->UriTable = URITable::load(path);
	repo->preTable = PredicateTable::load(path);

#ifdef DEBUG
	cout<<"total triple count: "<<repo->bitmapBuffer->getTripleCount()<<endl;
	cout<<"URITableSize: "<<repo->UriTable->getSize()<<endl;
	cout<<"predicateTableSize: "<<repo->preTable->getSize()<<endl;
#endif
	
//	cout<<"begin load statistics buffer"<<endl;
	filename = path + "/statIndex";
	MMapBuffer* indexBufferFile = MMapBuffer::create(filename.c_str(), 0);
	char* indexBuffer = indexBufferFile->get_address();

	string statFilename = path + "/subject_statis";
	repo->subjectStat = OneConstantStatisticsBuffer::load(StatisticsBuffer::SUBJECT_STATIS, statFilename, indexBuffer);
	statFilename = path + "/object_statis";
	repo->objectStat = OneConstantStatisticsBuffer::load(StatisticsBuffer::OBJECT_STATIS, statFilename, indexBuffer);
	statFilename = path + "/subjectpredicate_statis";
	repo->subPredicateStat = TwoConstantStatisticsBuffer::load(StatisticsBuffer::SUBJECTPREDICATE_STATIS, statFilename, indexBuffer);
	statFilename = path + "/objectpredicate_statis";
	repo->objPredicateStat = TwoConstantStatisticsBuffer::load(StatisticsBuffer::OBJECTPREDICATE_STATIS, statFilename, indexBuffer);

#ifdef DEBUG
	cout<<"subject count: "<<((OneConstantStatisticsBuffer*)repo->subjectStat)->getEntityCount()<<endl;
	cout<<"object count: "<<((OneConstantStatisticsBuffer*)repo->objectStat)->getEntityCount()<<endl;
#endif

	repo->buffer = new EntityIDBuffer();
	repo->columnFinder = new FindEntityID(repo);

	cerr<<"load complete!"<<endl;
	repo->bitmapQuery = new TripleBitQuery(*repo);
	repo->query = new RDFQuery(repo->bitmapQuery, repo);

	return repo;
}