// Verify that the index is set up correctly // by comparing it to the on-disk version. // This is SLOW void FMIndex::verify(const std::string& filename) { SGABWTReader* p_reader = new SGABWTReader(filename); // Discard header for now size_t n1, n2; BWFlag flag; p_reader->readHeader(n1, n2, flag); AlphaCount64 running_count; // Read one symbol from the bwt at a time size_t i = 0; char b; while((b = p_reader->readChar()) != '\n') { // Verify that the symbol at position i matches the symbol // read from the disk char s = getChar(i); assert(s == b); // Verifiy that the counts interpolated from the markers // are correct running_count.increment(b); // single symbol count size_t occ = getOcc(s, i); assert(occ == running_count.get(b)); // full count AlphaCount64 full_occ = getFullOcc(i); assert(full_occ == running_count); i++; } printf("Verified all %zu match expected\n", i); delete p_reader; }
char *dumpRecord1(RECSTRU *crecp, RECSTRU *recp, char *areap, char *area2p) { if ((crecp != NULL) && (recp != NULL) && (areap != NULL) && (area2p != NULL) && (MFRstatus == ACTIVE)) { int xdir = 0; int loop = 0; int xocc = 0; int xtag = 0; FFI xlen = 0; char stag = 0; int socc = 0; int spos = 0; FFI slen = 0; char *p = areap; char *q = area2p; char *cur = NULL; char *f = NULL; char *xxconvent[256]; RECSTRU *arecp = recp; OccLst *ftop = NULL; OccLst *stop = NULL; *p = '\0'; /* chars to entities */ memset(xxconvent, 0x00, sizeof(xxconvent)); xxconvent['&']="&"; xxconvent['<']="<"; xxconvent['>']=">"; xxconvent['"']="""; xxconvent['\'']="'"; xxconvent['|']="%7C"; // registro de controle recp = crecp; sprintf(p, "<masterfile name=\"%s\" nxtmfn=\"%"_LD_"\" mftype=\"%d\" ewlock=\"%"_LD_"\">\n", RDBname, MF0nxtmfn, MF0mftype, MF0mfcxx3); p += strlen(p); recp = arecp; sprintf(p, "<record mfn=\"%"_LD_"\" nvf=\"%d\" base=\"%"_LD_"\" len=\"%"_LD_"\" status=\"%s\" rclock=\"%d\">\n", MFRmfn, MFRnvf, (LONGX)MFRbase, (LONGX)MFRmfrl, (MFRstatus == ACTIVE) ? "active" : "deleted", REClock); p += strlen(p); for (xdir = 0, loop = MFRnvf; loop--; xdir++) { xtag = DIRtag(xdir); xlen = DIRlen(xdir); xocc = getOcc(&ftop, xtag); stag = 0; spos = 0; stop = NULL; sprintf(p, "<field iocc=\"%d\" tag=\"%u\" occ=\"%d\" len=\"%"_LD_"\">", xdir+1, xtag, xocc, (LONGX)xlen); p += strlen(p); cur = f = FIELDP(xdir); for (; xlen--; cur++) { if (*cur == '^') { if (stag > 0) { socc = getOcc(&stop, stag); slen = (int)((cur - f) - spos - 1); sprintf(p, "<subfield tag=\"%c\" occ=\"%d\" pos=\"%d\" len=\"%"_LD_"\">", stag, socc, spos, slen); p += strlen(p); area2p[slen] = 0; strcpy(p, area2p); p += strlen(p); strcpy(p, "</subfield>"); p += strlen(p); } stag = *++cur; xlen--; spos = (int)(cur - f); q = area2p; } else { if (stag > 0) { if (xxconvent[*cur]) { strcpy(q, xxconvent[*cur]); q += strlen(q); } else { *q++ = (*cur); } } else { if (xxconvent[*cur]) { strcpy(p,xxconvent[*cur]); p += strlen(p); } else { *p++ = (*cur); } } } } if (stag > 0) { socc = getOcc(&stop, stag); slen = (int)((cur - f) - spos - 1); sprintf(p, "<subfield tag=\"%c\" occ=\"%d\" pos=\"%d\" len=\"%"_LD_"\">", stag, socc, spos, slen); p += strlen(p); area2p[slen] = 0; strcpy(p, area2p); p += strlen(p); strcpy(p, "</subfield>"); p += strlen(p); } sprintf(p, "</field>\n"); p += strlen(p); } strcpy(p,"</record>\n"); p += strlen(p); strcpy(p, "</masterfile>"); deleteList(&stop); deleteList(&ftop); } return areap; }