/* A helper function for mergeBlock(), below. Merge the position lists * pointed to by m->in and pBlockReader. * If the merge matches, write [iDocid] to m->pOut; if m->pOut * has positions then write all matching positions as well. */ static void mergePosList(DocListMerge *m, sqlite_int64 iDocid, DocListReader *pBlockReader){ int block_pos = readPosition(pBlockReader); int in_pos = readPosition(&m->in); int match = 0; while( block_pos!=-1 || in_pos!=-1 ){ if( block_pos-m->iOffset==in_pos ){ if( !match ){ docListAddDocid(m->pOut, iDocid); match = 1; } if( m->pOut->iType >= DL_POSITIONS ){ docListAddPos(m->pOut, in_pos); } block_pos = readPosition(pBlockReader); in_pos = readPosition(&m->in); } else if( in_pos==-1 || (block_pos!=-1 && block_pos-m->iOffset<in_pos) ){ block_pos = readPosition(pBlockReader); } else { in_pos = readPosition(&m->in); } } if( m->pOut->iType >= DL_POSITIONS && match ){ docListAddEndPos(m->pOut); } }
/* Build a hash table containing all terms in zText. */ static int build_terms(Hash *terms, sqlite3_tokenizer *pTokenizer, const char *zText, sqlite_int64 iDocid){ sqlite3_tokenizer_cursor *pCursor; const char *pToken; int nTokenBytes; int iStartOffset, iEndOffset, iPosition; int rc = pTokenizer->pModule->xOpen(pTokenizer, zText, -1, &pCursor); if( rc!=SQLITE_OK ) return rc; pCursor->pTokenizer = pTokenizer; HashInit(terms, HASH_STRING, 1); while( SQLITE_OK==pTokenizer->pModule->xNext(pCursor, &pToken, &nTokenBytes, &iStartOffset, &iEndOffset, &iPosition) ){ DocList *p; /* Positions can't be negative; we use -1 as a terminator internally. */ if( iPosition<0 ) { rc = SQLITE_ERROR; goto err; } p = HashFind(terms, pToken, nTokenBytes); if( p==NULL ){ p = docListNew(DL_POSITIONS_OFFSETS); docListAddDocid(p, iDocid); HashInsert(terms, pToken, nTokenBytes, p); } docListAddPosOffset(p, iPosition, iStartOffset, iEndOffset); } err: /* TODO(shess) Check return? Should this be able to cause errors at ** this point? Actually, same question about sqlite3_finalize(), ** though one could argue that failure there means that the data is ** not durable. *ponder* */ pTokenizer->pModule->xClose(pCursor); return rc; }
static int build_terms(Hash *terms, sqlite3_tokenizer *pTokenizer, const char *zText, sqlite_int64 iDocid){ sqlite3_tokenizer_cursor *pCursor; const char *pToken; int nTokenBytes; int iStartOffset, iEndOffset, iPosition; int rc = pTokenizer->pModule->xOpen(pTokenizer, zText, -1, &pCursor); if( rc!=SQLITE_OK ) return rc; pCursor->pTokenizer = pTokenizer; HashInit(terms, HASH_STRING, 1); while( SQLITE_OK==pTokenizer->pModule->xNext(pCursor, &pToken, &nTokenBytes, &iStartOffset, &iEndOffset, &iPosition) ){ DocList *p; if( iPosition<0 ) { rc = SQLITE_ERROR; goto err; } p = HashFind(terms, pToken, nTokenBytes); if( p==NULL ){ p = docListNew(DL_POSITIONS_OFFSETS); docListAddDocid(p, iDocid); HashInsert(terms, pToken, nTokenBytes, p); } docListAddPosOffset(p, iPosition, iStartOffset, iEndOffset); } err: pTokenizer->pModule->xClose(pCursor); return rc; }
/* Merge one block of an on-disk doclist into a DocListMerge. */ static void mergeBlock(DocListMerge *m, DocList *pBlock){ DocListReader blockReader; assert( pBlock->iType >= DL_POSITIONS ); readerInit(&blockReader, pBlock); while( !readerAtEnd(&blockReader) ){ sqlite_int64 iDocid = readDocid(&blockReader); if( m->in.pDoclist!=NULL ){ while( 1 ){ if( readerAtEnd(&m->in) ) return; /* nothing more to merge */ if( peekDocid(&m->in)>=iDocid ) break; skipDocument(&m->in); } if( peekDocid(&m->in)>iDocid ){ /* [pIn] has no match with iDocid */ skipPositionList(&blockReader); /* skip this docid in the block */ continue; } readDocid(&m->in); } /* We have a document match. */ if( m->in.pDoclist==NULL || m->in.pDoclist->iType < DL_POSITIONS ){ /* We don't need to do a poslist merge. */ docListAddDocid(m->pOut, iDocid); if( m->pOut->iType >= DL_POSITIONS ){ /* Copy all positions to the output doclist. */ while( 1 ){ int pos = readPosition(&blockReader); if( pos==-1 ) break; docListAddPos(m->pOut, pos); } docListAddEndPos(m->pOut); } else skipPositionList(&blockReader); continue; } mergePosList(m, iDocid, &blockReader); } }
static void mergeBlock(DocListMerge *m, DocList *pBlock){ DocListReader blockReader; assert( pBlock->iType >= DL_POSITIONS ); readerInit(&blockReader, pBlock); while( !readerAtEnd(&blockReader) ){ sqlite_int64 iDocid = readDocid(&blockReader); if( m->in.pDoclist!=NULL ){ while( 1 ){ if( readerAtEnd(&m->in) ) return; if( peekDocid(&m->in)>=iDocid ) break; skipDocument(&m->in); } if( peekDocid(&m->in)>iDocid ){ skipPositionList(&blockReader); continue; } readDocid(&m->in); } if( m->in.pDoclist==NULL || m->in.pDoclist->iType < DL_POSITIONS ){ docListAddDocid(m->pOut, iDocid); if( m->pOut->iType >= DL_POSITIONS ){ while( 1 ){ int pos = readPosition(&blockReader); if( pos==-1 ) break; docListAddPos(m->pOut, pos); } docListAddEndPos(m->pOut); } else skipPositionList(&blockReader); continue; } mergePosList(m, iDocid, &blockReader); } }