CommandDistance::CompareOutput * compare(CommandDistance::CompareInput * data) { const Sketch & sketchRef = data->sketchRef; Sketch * sketchQuery = data->sketchQuery; CommandDistance::CompareOutput * output = new CommandDistance::CompareOutput(data->sketchRef, data->sketchQuery); if ( sketchQuery->getReferenceCount() == 0 ) { // input was sequence file; sketch now vector<string> fileVector; fileVector.push_back(data->file); sketchQuery->initFromSequence(fileVector, data->parameters); } int sketchSize = sketchQuery->getMinHashesPerWindow() < sketchRef.getMinHashesPerWindow() ? sketchQuery->getMinHashesPerWindow() : sketchRef.getMinHashesPerWindow(); output->pairs.resize(sketchRef.getReferenceCount() * sketchQuery->getReferenceCount()); for ( int i = 0; i < sketchQuery->getReferenceCount(); i++ ) { for ( int j = 0; j < sketchRef.getReferenceCount(); j++ ) { int pairIndex = i * sketchRef.getReferenceCount() + j; compareSketches(output->pairs[pairIndex], sketchRef.getReference(j), sketchQuery->getReference(i), sketchSize, sketchRef.getKmerSize(), sketchRef.getKmerSpace(), data->maxDistance, data->maxPValue); } } return output; }
int CommandInfo::run() const { if ( arguments.size() != 1 || options.at("help").active ) { print(); return 0; } bool header = options.at("header").active; const string & file = arguments[0]; if ( ! hasSuffix(file, suffixSketch) ) { cerr << "ERROR: The file \"" << file << "\" does not look like a sketch." << endl; return 1; } Sketch sketch; sketch.initFromCapnp(file.c_str(), header); cout << "Header:" << endl; cout << " Kmer: " << sketch.getKmerSize() << endl; cout << " Target min-hashes per sketch: " << sketch.getMinHashesPerWindow() << endl; cout << " Canonical kmers: " << (sketch.getNoncanonical() ? "no" : "yes") << endl; if ( ! header ) { cout << endl; cout << "Sketches (" << sketch.getReferenceCount() << "):" << endl; vector<vector<string>> columns(4); columns[0].push_back("Hashes"); columns[1].push_back("Length"); columns[2].push_back("ID"); columns[3].push_back("Comment"); for ( int i = 0; i < sketch.getReferenceCount(); i++ ) { const Sketch::Reference & ref = sketch.getReference(i); columns[0].push_back(to_string(ref.hashesSorted.size())); columns[1].push_back(to_string(ref.length)); columns[2].push_back(ref.name); columns[3].push_back(ref.comment); } printColumns(columns, 2, 2, "-", 0); } return 0; }
CommandContain::ContainOutput * contain(CommandContain::ContainInput * data) { const Sketch & sketchRef = data->sketchRef; Sketch * sketchQuery = data->sketchQuery; CommandContain::ContainOutput * output = new CommandContain::ContainOutput(); if ( sketchQuery->getReferenceCount() == 0 ) { // input was sequence file; sketch now vector<string> fileVector; fileVector.push_back(data->file); sketchQuery->initFromSequence(fileVector, data->parameters); } output->pairs.resize(sketchRef.getReferenceCount() * sketchQuery->getReferenceCount()); for ( int i = 0; i < sketchQuery->getReferenceCount(); i++ ) { for ( int j = 0; j < sketchRef.getReferenceCount(); j++ ) { int pairIndex = i * sketchRef.getReferenceCount() + j; output->pairs[pairIndex].score = containSketches(sketchRef.getReference(j).hashesSorted, sketchQuery->getReference(i).hashesSorted, output->pairs[pairIndex].error); output->pairs[pairIndex].nameRef = sketchRef.getReference(j).name; output->pairs[pairIndex].nameQuery = sketchQuery->getReference(i).name; } } delete data->sketchQuery; return output; }
void Slvs_Solve(Slvs_System *ssys, Slvs_hGroup shg) { if(!IsInit) { InitHeaps(); IsInit = 1; } int i; for(i = 0; i < ssys->params; i++) { Slvs_Param *sp = &(ssys->param[i]); Param p; ZERO(&p); p.h.v = sp->h; p.val = sp->val; SK.param.Add(&p); if(sp->group == shg) { SYS.param.Add(&p); } } for(i = 0; i < ssys->entities; i++) { Slvs_Entity *se = &(ssys->entity[i]); EntityBase e; ZERO(&e); switch(se->type) { case SLVS_E_POINT_IN_3D: e.type = Entity::POINT_IN_3D; break; case SLVS_E_POINT_IN_2D: e.type = Entity::POINT_IN_2D; break; case SLVS_E_NORMAL_IN_3D: e.type = Entity::NORMAL_IN_3D; break; case SLVS_E_NORMAL_IN_2D: e.type = Entity::NORMAL_IN_2D; break; case SLVS_E_DISTANCE: e.type = Entity::DISTANCE; break; case SLVS_E_WORKPLANE: e.type = Entity::WORKPLANE; break; case SLVS_E_LINE_SEGMENT: e.type = Entity::LINE_SEGMENT; break; case SLVS_E_CUBIC: e.type = Entity::CUBIC; break; case SLVS_E_CIRCLE: e.type = Entity::CIRCLE; break; case SLVS_E_ARC_OF_CIRCLE: e.type = Entity::ARC_OF_CIRCLE; break; default: dbp("bad entity type %d", se->type); return; } e.h.v = se->h; e.group.v = se->group; e.workplane.v = se->wrkpl; e.point[0].v = se->point[0]; e.point[1].v = se->point[1]; e.point[2].v = se->point[2]; e.point[3].v = se->point[3]; e.normal.v = se->normal; e.distance.v = se->distance; e.param[0].v = se->param[0]; e.param[1].v = se->param[1]; e.param[2].v = se->param[2]; e.param[3].v = se->param[3]; SK.entity.Add(&e); } for(i = 0; i < ssys->constraints; i++) { Slvs_Constraint *sc = &(ssys->constraint[i]); ConstraintBase c; ZERO(&c); int t; switch(sc->type) { case SLVS_C_POINTS_COINCIDENT: t = Constraint::POINTS_COINCIDENT; break; case SLVS_C_PT_PT_DISTANCE: t = Constraint::PT_PT_DISTANCE; break; case SLVS_C_PT_PLANE_DISTANCE: t = Constraint::PT_PLANE_DISTANCE; break; case SLVS_C_PT_LINE_DISTANCE: t = Constraint::PT_LINE_DISTANCE; break; case SLVS_C_PT_FACE_DISTANCE: t = Constraint::PT_FACE_DISTANCE; break; case SLVS_C_PT_IN_PLANE: t = Constraint::PT_IN_PLANE; break; case SLVS_C_PT_ON_LINE: t = Constraint::PT_ON_LINE; break; case SLVS_C_PT_ON_FACE: t = Constraint::PT_ON_FACE; break; case SLVS_C_EQUAL_LENGTH_LINES: t = Constraint::EQUAL_LENGTH_LINES; break; case SLVS_C_LENGTH_RATIO: t = Constraint::LENGTH_RATIO; break; case SLVS_C_EQ_LEN_PT_LINE_D: t = Constraint::EQ_LEN_PT_LINE_D; break; case SLVS_C_EQ_PT_LN_DISTANCES: t = Constraint::EQ_PT_LN_DISTANCES; break; case SLVS_C_EQUAL_ANGLE: t = Constraint::EQUAL_ANGLE; break; case SLVS_C_EQUAL_LINE_ARC_LEN: t = Constraint::EQUAL_LINE_ARC_LEN; break; case SLVS_C_SYMMETRIC: t = Constraint::SYMMETRIC; break; case SLVS_C_SYMMETRIC_HORIZ: t = Constraint::SYMMETRIC_HORIZ; break; case SLVS_C_SYMMETRIC_VERT: t = Constraint::SYMMETRIC_VERT; break; case SLVS_C_SYMMETRIC_LINE: t = Constraint::SYMMETRIC_LINE; break; case SLVS_C_AT_MIDPOINT: t = Constraint::AT_MIDPOINT; break; case SLVS_C_HORIZONTAL: t = Constraint::HORIZONTAL; break; case SLVS_C_VERTICAL: t = Constraint::VERTICAL; break; case SLVS_C_DIAMETER: t = Constraint::DIAMETER; break; case SLVS_C_PT_ON_CIRCLE: t = Constraint::PT_ON_CIRCLE; break; case SLVS_C_SAME_ORIENTATION: t = Constraint::SAME_ORIENTATION; break; case SLVS_C_ANGLE: t = Constraint::ANGLE; break; case SLVS_C_PARALLEL: t = Constraint::PARALLEL; break; case SLVS_C_PERPENDICULAR: t = Constraint::PERPENDICULAR; break; case SLVS_C_ARC_LINE_TANGENT: t = Constraint::ARC_LINE_TANGENT; break; case SLVS_C_CUBIC_LINE_TANGENT: t = Constraint::CUBIC_LINE_TANGENT; break; case SLVS_C_EQUAL_RADIUS: t = Constraint::EQUAL_RADIUS; break; case SLVS_C_PROJ_PT_DISTANCE: t = Constraint::PROJ_PT_DISTANCE; break; case SLVS_C_WHERE_DRAGGED: t = Constraint::WHERE_DRAGGED; break; case SLVS_C_CURVE_CURVE_TANGENT:t = Constraint::CURVE_CURVE_TANGENT; break; default: dbp("bad constraint type %d", sc->type); return; } c.type = t; c.h.v = sc->h; c.group.v = sc->group; c.workplane.v = sc->wrkpl; c.valA = sc->valA; c.ptA.v = sc->ptA; c.ptB.v = sc->ptB; c.entityA.v = sc->entityA; c.entityB.v = sc->entityB; c.entityC.v = sc->entityC; c.entityD.v = sc->entityD; c.other = (sc->other) ? true : false; c.other2 = (sc->other2) ? true : false; SK.constraint.Add(&c); } for(i = 0; i < (int)arraylen(ssys->dragged); i++) { if(ssys->dragged[i]) { hParam hp = { ssys->dragged[i] }; SYS.dragged.Add(&hp); } } Group g; ZERO(&g); g.h.v = shg; List<hConstraint> bad; ZERO(&bad); // Now we're finally ready to solve! bool andFindBad = ssys->calculateFaileds ? true : false; int how = SYS.Solve(&g, &(ssys->dof), &bad, andFindBad, false); switch(how) { case System::SOLVED_OKAY: ssys->result = SLVS_RESULT_OKAY; break; case System::DIDNT_CONVERGE: ssys->result = SLVS_RESULT_DIDNT_CONVERGE; break; case System::SINGULAR_JACOBIAN: ssys->result = SLVS_RESULT_INCONSISTENT; break; case System::TOO_MANY_UNKNOWNS: ssys->result = SLVS_RESULT_TOO_MANY_UNKNOWNS; break; default: oops(); } // Write the new parameter values back to our caller. for(i = 0; i < ssys->params; i++) { Slvs_Param *sp = &(ssys->param[i]); hParam hp = { sp->h }; sp->val = SK.GetParam(hp)->val; } if(ssys->failed) { // Copy over any the list of problematic constraints. for(i = 0; i < ssys->faileds && i < bad.n; i++) { ssys->failed[i] = bad.elem[i].v; } ssys->faileds = bad.n; } bad.Clear(); SYS.param.Clear(); SYS.entity.Clear(); SYS.eq.Clear(); SYS.dragged.Clear(); SK.param.Clear(); SK.entity.Clear(); SK.constraint.Clear(); FreeAllTemporary(); }
int test_sketch(char* sketch_type, unsigned buckets, unsigned rows, char* random_generator, char* hash_function, char* pcap_file){ unsigned int pkt_counter=0; // packet counter clock_t t1, t2, t3; //temporary packet buffers struct pcap_pkthdr header; // The header that pcap gives us const u_char *packet; // The actual packet // Create the sketch as the type passed as parameter Sketch<KeyType>* sketch = get_sketch<KeyType>(sketch_type, buckets, rows, random_generator, hash_function); if (sketch == NULL) { return -1; } //----------------- //open the pcap file pcap_t *handle; char errbuf[PCAP_ERRBUF_SIZE]; handle = pcap_open_offline(pcap_file, errbuf); //call pcap library function if (handle == NULL) { fprintf(stderr,"Couldn't open pcap file %s: %s\n", pcap_file, errbuf); return -1; } //----------------- //Process one packet at a time while (packet = pcap_next(handle,&header)) { t1 = clock(); // header contains information about the packet (e.g. timestamp) u_char *pkt_ptr = (u_char *)packet; //cast a pointer to the packet data //parse the first (ethernet) header, grabbing the type field int ether_type = ((int)(pkt_ptr[12]) << 8) | (int)pkt_ptr[13]; int ether_offset = 0; if (ether_type == ETHER_TYPE_IP or ether_type == ETHER_TYPE_IPv6) //most common ether_offset = 14; else { fprintf(stderr, "Unknown ethernet type, %04X, skipping...\n", ether_type); continue; } // Only from IP header: pkt_ptr += ether_offset; //skip past the Ethernet II header int packet_length = header.len-ether_offset; // Compute MD5 unsigned char * tmp_hash; tmp_hash = MD5(pkt_ptr, packet_length, NULL); // Strip to the size of the sketch: uint64_t low_hash = low_md5(tmp_hash); // Update sketch t2 = clock(); sketch->update(low_hash,1); t3 = clock(); printf("%s,%u,%u,%u,%s,%s,%f,%f,%f,%f\n", sketch_type, sizeof(KeyType), buckets, rows, random_generator, hash_function, ((float)t1)/CLOCKS_PER_SEC, ((float)t2)/CLOCKS_PER_SEC, ((float)t3)/CLOCKS_PER_SEC, ((float)t3-t1)/CLOCKS_PER_SEC); pkt_counter++; //increment number of packets seen if (pkt_counter >= 1000) break; } //end internal loop for reading packets (all in one file) pcap_close(handle); //close the pcap file return 0; //done }
void SolveSpace::MenuAnalyze(int id) { SS.GW.GroupSelection(); #define gs (SS.GW.gs) switch(id) { case GraphicsWindow::MNU_STEP_DIM: if(gs.constraints == 1 && gs.n == 0) { Constraint *c = SK.GetConstraint(gs.constraint[0]); if(c->HasLabel() && !c->reference) { SS.TW.shown.dimFinish = c->valA; SS.TW.shown.dimSteps = 10; SS.TW.shown.dimIsDistance = (c->type != Constraint::ANGLE) && (c->type != Constraint::LENGTH_RATIO); SS.TW.shown.constraint = c->h; SS.TW.shown.screen = TextWindow::SCREEN_STEP_DIMENSION; // The step params are specified in the text window, // so force that to be shown. SS.GW.ForceTextWindowShown(); SS.later.showTW = true; SS.GW.ClearSelection(); } else { Error("Constraint must have a label, and must not be " "a reference dimension."); } } else { Error("Bad selection for step dimension; select a constraint."); } break; case GraphicsWindow::MNU_NAKED_EDGES: { SS.nakedEdges.Clear(); Group *g = SK.GetGroup(SS.GW.activeGroup); SMesh *m = &(g->displayMesh); SKdNode *root = SKdNode::From(m); bool inters, leaks; root->MakeCertainEdgesInto(&(SS.nakedEdges), SKdNode::NAKED_OR_SELF_INTER_EDGES, true, &inters, &leaks); InvalidateGraphics(); const char *intersMsg = inters ? "The mesh is self-intersecting (NOT okay, invalid)." : "The mesh is not self-intersecting (okay, valid)."; const char *leaksMsg = leaks ? "The mesh has naked edges (NOT okay, invalid)." : "The mesh is watertight (okay, valid)."; char cntMsg[1024]; sprintf(cntMsg, "\n\nThe model contains %d triangles, from " "%d surfaces.", g->displayMesh.l.n, g->runningShell.surface.n); if(SS.nakedEdges.l.n == 0) { Message("%s\n\n%s\n\nZero problematic edges, good.%s", intersMsg, leaksMsg, cntMsg); } else { Error("%s\n\n%s\n\n%d problematic edges, bad.%s", intersMsg, leaksMsg, SS.nakedEdges.l.n, cntMsg); } break; } case GraphicsWindow::MNU_INTERFERENCE: { SS.nakedEdges.Clear(); SMesh *m = &(SK.GetGroup(SS.GW.activeGroup)->displayMesh); SKdNode *root = SKdNode::From(m); bool inters, leaks; root->MakeCertainEdgesInto(&(SS.nakedEdges), SKdNode::SELF_INTER_EDGES, false, &inters, &leaks); InvalidateGraphics(); if(inters) { Error("%d edges interfere with other triangles, bad.", SS.nakedEdges.l.n); } else { Message("The assembly does not interfere, good."); } break; } case GraphicsWindow::MNU_VOLUME: { SMesh *m = &(SK.GetGroup(SS.GW.activeGroup)->displayMesh); double vol = 0; int i; for(i = 0; i < m->l.n; i++) { STriangle tr = m->l.elem[i]; // Translate to place vertex A at (x, y, 0) Vector trans = Vector::From(tr.a.x, tr.a.y, 0); tr.a = (tr.a).Minus(trans); tr.b = (tr.b).Minus(trans); tr.c = (tr.c).Minus(trans); // Rotate to place vertex B on the y-axis. Depending on // whether the triangle is CW or CCW, C is either to the // right or to the left of the y-axis. This handles the // sign of our normal. Vector u = Vector::From(-tr.b.y, tr.b.x, 0); u = u.WithMagnitude(1); Vector v = Vector::From(tr.b.x, tr.b.y, 0); v = v.WithMagnitude(1); Vector n = Vector::From(0, 0, 1); tr.a = (tr.a).DotInToCsys(u, v, n); tr.b = (tr.b).DotInToCsys(u, v, n); tr.c = (tr.c).DotInToCsys(u, v, n); n = tr.Normal().WithMagnitude(1); // Triangles on edge don't contribute if(fabs(n.z) < LENGTH_EPS) continue; // The plane has equation p dot n = a dot n double d = (tr.a).Dot(n); // nx*x + ny*y + nz*z = d // nz*z = d - nx*x - ny*y double A = -n.x/n.z, B = -n.y/n.z, C = d/n.z; double mac = tr.c.y/tr.c.x, mbc = (tr.c.y - tr.b.y)/tr.c.x; double xc = tr.c.x, yb = tr.b.y; // I asked Maple for // int(int(A*x + B*y +C, y=mac*x..(mbc*x + yb)), x=0..xc); double integral = (1.0/3)*( A*(mbc-mac)+ (1.0/2)*B*(mbc*mbc-mac*mac) )*(xc*xc*xc)+ (1.0/2)*(A*yb+B*yb*mbc+C*(mbc-mac))*xc*xc+ C*yb*xc+ (1.0/2)*B*yb*yb*xc; vol += integral; } char msg[1024]; sprintf(msg, "The volume of the solid model is:\n\n" " %.3f %s^3", vol / pow(SS.MmPerUnit(), 3), SS.UnitName()); if(SS.viewUnits == SolveSpace::UNIT_MM) { sprintf(msg+strlen(msg), "\n %.2f mL", vol/(10*10*10)); } strcpy(msg+strlen(msg), "\n\nCurved surfaces have been approximated as triangles.\n" "This introduces error, typically of around 1%."); Message("%s", msg); break; } case GraphicsWindow::MNU_AREA: { Group *g = SK.GetGroup(SS.GW.activeGroup); if(g->polyError.how != Group::POLY_GOOD) { Error("This group does not contain a correctly-formed " "2d closed area. It is open, not coplanar, or self-" "intersecting."); break; } SEdgeList sel; ZERO(&sel); g->polyLoops.MakeEdgesInto(&sel); SPolygon sp; ZERO(&sp); sel.AssemblePolygon(&sp, NULL, true); sp.normal = sp.ComputeNormal(); sp.FixContourDirections(); double area = sp.SignedArea(); double scale = SS.MmPerUnit(); Message("The area of the region sketched in this group is:\n\n" " %.3f %s^2\n\n" "Curves have been approximated as piecewise linear.\n" "This introduces error, typically of around 1%%.", area / (scale*scale), SS.UnitName()); sel.Clear(); sp.Clear(); break; } case GraphicsWindow::MNU_SHOW_DOF: // This works like a normal solve, except that it calculates // which variables are free/bound at the same time. SS.GenerateAll(0, INT_MAX, true); break; case GraphicsWindow::MNU_TRACE_PT: if(gs.points == 1 && gs.n == 1) { SS.traced.point = gs.point[0]; SS.GW.ClearSelection(); } else { Error("Bad selection for trace; select a single point."); } break; case GraphicsWindow::MNU_STOP_TRACING: { char exportFile[MAX_PATH] = ""; if(GetSaveFile(exportFile, CSV_EXT, CSV_PATTERN)) { FILE *f = fopen(exportFile, "wb"); if(f) { int i; SContour *sc = &(SS.traced.path); for(i = 0; i < sc->l.n; i++) { Vector p = sc->l.elem[i].p; double s = SS.exportScale; fprintf(f, "%.10f, %.10f, %.10f\r\n", p.x/s, p.y/s, p.z/s); } fclose(f); } else { Error("Couldn't write to '%s'", exportFile); } } // Clear the trace, and stop tracing SS.traced.point = Entity::NO_ENTITY; SS.traced.path.l.Clear(); InvalidateGraphics(); break; } default: oops(); } }
int CommandContain::run() const { if ( arguments.size() < 2 || options.at("help").active ) { print(); return 0; } int threads = options.at("threads").getArgumentAsNumber(); bool list = options.at("list").active; Sketch::Parameters parameters; parameters.kmerSize = options.at("kmer").getArgumentAsNumber(); parameters.minHashesPerWindow = options.at("sketchSize").getArgumentAsNumber(); parameters.concatenated = ! options.at("individual").active; parameters.noncanonical = options.at("noncanonical").active; parameters.error = options.at("errorThreshold").getArgumentAsNumber(); parameters.bloomFilter = options.at("unique").active; parameters.genomeSize = options.at("genome").getArgumentAsNumber(); parameters.memoryMax = options.at("memory").getArgumentAsNumber(); parameters.bloomError = options.at("bloomError").getArgumentAsNumber(); if ( options.at("genome").active || options.at("memory").active ) { parameters.bloomFilter = true; } if ( parameters.bloomFilter ) { parameters.concatenated = true; } Sketch sketch; const string & fileReference = arguments[0]; if ( hasSuffix(fileReference, suffixSketch) ) { if ( options.at("kmer").active ) { cerr << "ERROR: The option " << options.at("kmer").identifier << " cannot be used when a sketch is provided; it is inherited from the sketch.\n"; return 1; } if ( options.at("noncanonical").active ) { cerr << "ERROR: The option " << options.at("noncanonical").identifier << " cannot be used when a sketch is provided; it is inherited from the sketch.\n"; return 1; } sketch.initFromCapnp(fileReference.c_str()); parameters.kmerSize = sketch.getKmerSize(); parameters.noncanonical = sketch.getNoncanonical(); } else { bool sketchFileExists = sketch.initHeaderFromBaseIfValid(fileReference, false); if ( (options.at("kmer").active && parameters.kmerSize != sketch.getKmerSize()) ) { sketchFileExists = false; } if ( false && sketchFileExists ) { sketch.initFromBase(fileReference, false); parameters.kmerSize = sketch.getKmerSize(); parameters.noncanonical = sketch.getNoncanonical(); } else { vector<string> refArgVector; refArgVector.push_back(fileReference); //cerr << "Sketch for " << fileReference << " not found or out of date; creating..." << endl; cerr << "Sketching " << fileReference << " (provide sketch file made with \"mash sketch\" to skip)...\n"; sketch.initFromSequence(refArgVector, parameters); /* if ( sketch.writeToFile() ) { cerr << "Sketch saved for subsequent runs." << endl; } else { cerr << "The sketch for " << fileReference << " could not be saved; it will be sketched again next time." << endl; }*/ } } ThreadPool<ContainInput, ContainOutput> threadPool(contain, threads); vector<string> queryFiles; for ( int i = 1; i < arguments.size(); i++ ) { if ( list ) { splitFile(arguments[i], queryFiles); } else { queryFiles.push_back(arguments[i]); } } for ( int i = 0; i < queryFiles.size(); i++ ) { // If the input is a sketch file, load in the main thread; otherwise, // leave it to the child. Either way, the child will delete. // Sketch * sketchQuery = new Sketch(); if ( hasSuffix(queryFiles[i], suffixSketch) ) { // init header to check params // sketchQuery->initFromCapnp(queryFiles[i].c_str(), true); if ( sketchQuery->getKmerSize() != sketch.getKmerSize() ) { cerr << "\nWARNING: The query sketch " << queryFiles[i] << " has a kmer size (" << sketchQuery->getKmerSize() << ") that does not match the reference sketch (" << sketch.getKmerSize() << "). This query will be skipped.\n\n"; delete sketchQuery; continue; } if ( sketchQuery->getNoncanonical() != sketch.getNoncanonical() ) { cerr << "\nWARNING: The query sketch " << queryFiles[i] << " is " << (sketchQuery->getNoncanonical() ? "noncanonical" : "canonical") << " but the reference sketch is not. This query will be skipped.\n\n"; delete sketchQuery; continue; } // init fully // sketchQuery->initFromCapnp(queryFiles[i].c_str()); } threadPool.runWhenThreadAvailable(new ContainInput(sketch, sketchQuery, queryFiles[i], parameters)); while ( threadPool.outputAvailable() ) { writeOutput(threadPool.popOutputWhenAvailable(), parameters.error); } } while ( threadPool.running() ) { writeOutput(threadPool.popOutputWhenAvailable(), parameters.error); } return 0; }
int CommandDistance::run() const { if ( arguments.size() < 2 || options.at("help").active ) { /*char tab = '\t'; for ( int kmerSize = 4; kmerSize <= 32; kmerSize++ ) { double kmerSpace = pow(4, kmerSize); for ( uint64_t refSize = 10000; refSize <= 1000000000000; refSize *= 10 ) { for ( uint64_t qrySize = 10000; qrySize <= refSize; qrySize *= 10 ) { for ( int sketchSize = 100; sketchSize <= 1000; sketchSize += 100 ) { for ( int common = 1; common <= sketchSize + 1; common += 10 ) { if ( common > sketchSize ) { common = sketchSize; } if ( common > kmerSpace ) { continue; } double pX = 1. / (1. + (double)kmerSpace / refSize); double pY = 1. / (1. + (double)kmerSpace / qrySize); double r = pX * pY / (pX + pY - pX * pY); uint64_t M = (double)kmerSpace * (pX + pY) / (1. + r); //cout << "k: " << kmerSize << tab << "L1: " << refSize << tab << "L2: " << qrySize << tab << "s: " << sketchSize << tab << "x: " << common << tab << " | " << "Ek: " << kmerSpace << tab << "pX: " << pX << tab << "pY: " << pY << tab << "r: " << r << tab << "M: " << M << tab; //cout << (M < sketchSize ? M : sketchSize) << tab << r * M << tab << M - r * M << endl; //double p = cdf(complement(hypergeometric_distribution(r * M, M < sketchSize ? M : sketchSize, M), common - 1 )); //double p = cdf(complement(binomial(M < sketchSize ? M : sketchSize, r), common - 1 )); //double p = gsl_cdf_hypergeometric_Q(common - 1, r * M, M - uint64_t(r * M), M < sketchSize ? M : sketchSize); double p = gsl_cdf_binomial_Q(common - 1, r, M < sketchSize ? M : sketchSize); cout << p << endl; } } } } }*/ print(); return 0; } int threads = options.at("threads").getArgumentAsNumber(); bool list = options.at("list").active; bool table = options.at("table").active; //bool log = options.at("log").active; double pValueMax = options.at("pvalue").getArgumentAsNumber(); double distanceMax = options.at("distance").getArgumentAsNumber(); Sketch::Parameters parameters; parameters.kmerSize = options.at("kmer").getArgumentAsNumber(); parameters.minHashesPerWindow = options.at("sketchSize").getArgumentAsNumber(); parameters.concatenated = ! options.at("individual").active; parameters.noncanonical = options.at("noncanonical").active; parameters.bloomFilter = options.at("unique").active; parameters.genomeSize = options.at("genome").getArgumentAsNumber(); parameters.memoryMax = options.at("memory").getArgumentAsNumber(); parameters.bloomError = options.at("bloomError").getArgumentAsNumber(); parameters.warning = options.at("warning").getArgumentAsNumber(); if ( options.at("genome").active || options.at("memory").active || options.at("bloomError").active ) { parameters.bloomFilter = true; } if ( parameters.bloomFilter && ! parameters.concatenated ) { cerr << "ERROR: The option " << options.at("individual").identifier << " cannot be used with " << options.at("unique").identifier << "." << endl; return 1; } Sketch sketch; uint64_t lengthThreshold = (parameters.warning * pow(parameters.protein ? 20 : 4, parameters.kmerSize)) / (1. - parameters.warning); uint64_t lengthMax; double randomChance; int kMin; string lengthMaxName; int warningCount = 0; const string & fileReference = arguments[0]; if ( hasSuffix(fileReference, suffixSketch) ) { if ( options.at("kmer").active ) { cerr << "ERROR: The option " << options.at("kmer").identifier << " cannot be used when a sketch is provided; it is inherited from the sketch." << endl; return 1; } if ( options.at("noncanonical").active ) { cerr << "ERROR: The option " << options.at("noncanonical").identifier << " cannot be used when a sketch is provided; it is inherited from the sketch." << endl; return 1; } sketch.initFromCapnp(fileReference.c_str()); if ( options.at("sketchSize").active ) { if ( parameters.bloomFilter && parameters.minHashesPerWindow != sketch.getMinHashesPerWindow() ) { cerr << "ERROR: The sketch size must match the reference when using a bloom filter (leave this option out to inherit from the reference sketch)." << endl; return 1; } } else { parameters.minHashesPerWindow = sketch.getMinHashesPerWindow(); } parameters.kmerSize = sketch.getKmerSize(); parameters.noncanonical = sketch.getNoncanonical(); } else { bool sketchFileExists = false;//sketch.initHeaderFromBaseIfValid(fileReference, false); /* if ( (options.at("kmer").active && parameters.kmerSize != sketch.getKmerSize()) ) { sketchFileExists = false; } */ if ( sketchFileExists ) { sketch.initFromBase(fileReference, false); parameters.kmerSize = sketch.getKmerSize(); parameters.noncanonical = sketch.getNoncanonical(); } else { vector<string> refArgVector; refArgVector.push_back(fileReference); //cerr << "Sketch for " << fileReference << " not found or out of date; creating..." << endl; cerr << "Sketching " << fileReference << " (provide sketch file made with \"mash sketch\" to skip)..."; sketch.initFromSequence(refArgVector, parameters); for ( int i = 0; i < sketch.getReferenceCount(); i++ ) { int length = sketch.getReference(i).length; if ( length > lengthThreshold ) { if ( warningCount == 0 || length > lengthMax ) { lengthMax = length; lengthMaxName = sketch.getReference(i).name; randomChance = sketch.getRandomKmerChance(i); kMin = sketch.getMinKmerSize(i); } warningCount++; } } cerr << "done.\n"; /* if ( sketch.writeToFile() ) { cerr << "Sketch saved for subsequent runs." << endl; } else { cerr << "The sketch for " << fileReference << " could not be saved; it will be sketched again next time." << endl; }*/ } } if ( table ) { cout << "#query"; for ( int i = 0; i < sketch.getReferenceCount(); i++ ) { cout << '\t' << sketch.getReference(i).name; } cout << endl; } ThreadPool<CompareInput, CompareOutput> threadPool(compare, threads); vector<string> queryFiles; for ( int i = 1; i < arguments.size(); i++ ) { if ( list ) { splitFile(arguments[i], queryFiles); } else { queryFiles.push_back(arguments[i]); } } for ( int i = 0; i < queryFiles.size(); i++ ) { // If the input is a sketch file, load in the main thread; otherwise, // leave it to the child. Either way, the child will delete. // Sketch * sketchQuery = new Sketch(); bool isSketch = hasSuffix(queryFiles[i], suffixSketch); if ( isSketch ) { // init header to check params // sketchQuery->initFromCapnp(queryFiles[i].c_str(), true); if ( sketchQuery->getKmerSize() != sketch.getKmerSize() ) { cerr << "\nWARNING: The query sketch " << queryFiles[i] << " has a kmer size (" << sketchQuery->getKmerSize() << ") that does not match the reference sketch (" << sketch.getKmerSize() << "). This query will be skipped.\n\n"; delete sketchQuery; continue; } if ( sketchQuery->getNoncanonical() != sketch.getNoncanonical() ) { cerr << "\nWARNING: The query sketch " << queryFiles[i] << " is " << (sketchQuery->getNoncanonical() ? "noncanonical" : "canonical") << " but the reference sketch is not. This query will be skipped.\n\n"; delete sketchQuery; continue; } // init fully // sketchQuery->initFromCapnp(queryFiles[i].c_str()); } threadPool.runWhenThreadAvailable(new CompareInput(sketch, sketchQuery, queryFiles[i], parameters, distanceMax, pValueMax)); /* if ( ! isSketch ) { for ( int j = 0; j < sketchQuery->getReferenceCount(); j++ ) { int length = sketchQuery->getReference(j).length; if ( length > lengthThreshold ) { if ( warningCount == 0 || length > lengthMax ) { lengthMax = length; lengthMaxName = sketchQuery->getReference(j).name; randomChance = sketchQuery->getRandomKmerChance(j); kMin = sketchQuery->getMinKmerSize(j); } warningCount++; } } } */ while ( threadPool.outputAvailable() ) { writeOutput(threadPool.popOutputWhenAvailable(), table); } } while ( threadPool.running() ) { writeOutput(threadPool.popOutputWhenAvailable(), table); } if ( warningCount > 0 && ! parameters.bloomFilter ) { sketch.warnKmerSize(lengthMax, lengthMaxName, randomChance, kMin, warningCount); } return 0; }