Beispiel #1
0
CommandDistance::CompareOutput * compare(CommandDistance::CompareInput * data)
{
    const Sketch & sketchRef = data->sketchRef;
    Sketch * sketchQuery = data->sketchQuery;
    
    CommandDistance::CompareOutput * output = new CommandDistance::CompareOutput(data->sketchRef, data->sketchQuery);
    
    if ( sketchQuery->getReferenceCount() == 0 )
    {
        // input was sequence file; sketch now
        
        vector<string> fileVector;
        fileVector.push_back(data->file);
        
        sketchQuery->initFromSequence(fileVector, data->parameters);
    }
    
    int sketchSize = sketchQuery->getMinHashesPerWindow() < sketchRef.getMinHashesPerWindow() ?
        sketchQuery->getMinHashesPerWindow() :
        sketchRef.getMinHashesPerWindow();
    
    output->pairs.resize(sketchRef.getReferenceCount() * sketchQuery->getReferenceCount());
    
    for ( int i = 0; i < sketchQuery->getReferenceCount(); i++ )
    {
        for ( int j = 0; j < sketchRef.getReferenceCount(); j++ )
        {
            int pairIndex = i * sketchRef.getReferenceCount() + j;
            
            compareSketches(output->pairs[pairIndex], sketchRef.getReference(j), sketchQuery->getReference(i), sketchSize, sketchRef.getKmerSize(), sketchRef.getKmerSpace(), data->maxDistance, data->maxPValue);
        }
    }
    
    return output;
}
Beispiel #2
0
int CommandInfo::run() const
{
    if ( arguments.size() != 1 || options.at("help").active )
    {
        print();
        return 0;
    }
    
    bool header = options.at("header").active;
    
    const string & file = arguments[0];
    
    if ( ! hasSuffix(file, suffixSketch) )
    {
        cerr << "ERROR: The file \"" << file << "\" does not look like a sketch." << endl;
        return 1;
    }
    
    Sketch sketch;
    
    sketch.initFromCapnp(file.c_str(), header);
    
    cout << "Header:" << endl;
    cout << "  Kmer:                          " << sketch.getKmerSize() << endl;
    cout << "  Target min-hashes per sketch:  " << sketch.getMinHashesPerWindow() << endl;
    cout << "  Canonical kmers:               " << (sketch.getNoncanonical() ? "no" : "yes") << endl;
    
    if ( ! header )
    {
        cout << endl;
        cout << "Sketches (" << sketch.getReferenceCount() << "):" << endl;
        
        vector<vector<string>> columns(4);
        
        columns[0].push_back("Hashes");
        columns[1].push_back("Length");
        columns[2].push_back("ID");
        columns[3].push_back("Comment");
        
        for ( int i = 0; i < sketch.getReferenceCount(); i++ )
        {
            const Sketch::Reference & ref = sketch.getReference(i);
            
            columns[0].push_back(to_string(ref.hashesSorted.size()));
            columns[1].push_back(to_string(ref.length));
            columns[2].push_back(ref.name);
            columns[3].push_back(ref.comment);
        }
        
        printColumns(columns, 2, 2, "-", 0);
    }
    
    return 0;
}
Beispiel #3
0
CommandContain::ContainOutput * contain(CommandContain::ContainInput * data)
{
    const Sketch & sketchRef = data->sketchRef;
    Sketch * sketchQuery = data->sketchQuery;
    
    CommandContain::ContainOutput * output = new CommandContain::ContainOutput();
    
    if ( sketchQuery->getReferenceCount() == 0 )
    {
        // input was sequence file; sketch now
        
        vector<string> fileVector;
        fileVector.push_back(data->file);
        
        sketchQuery->initFromSequence(fileVector, data->parameters);
    }
    
    output->pairs.resize(sketchRef.getReferenceCount() * sketchQuery->getReferenceCount());
    
    for ( int i = 0; i < sketchQuery->getReferenceCount(); i++ )
    {
        for ( int j = 0; j < sketchRef.getReferenceCount(); j++ )
        {
            int pairIndex = i * sketchRef.getReferenceCount() + j;
            
            output->pairs[pairIndex].score = containSketches(sketchRef.getReference(j).hashesSorted, sketchQuery->getReference(i).hashesSorted, output->pairs[pairIndex].error);
            output->pairs[pairIndex].nameRef = sketchRef.getReference(j).name;
            output->pairs[pairIndex].nameQuery = sketchQuery->getReference(i).name;
        }
    }
    
    delete data->sketchQuery;
    
    return output;
}
Beispiel #4
0
void Slvs_Solve(Slvs_System *ssys, Slvs_hGroup shg)
{
    if(!IsInit) {
        InitHeaps();
        IsInit = 1;
    }

    int i;
    for(i = 0; i < ssys->params; i++) {
        Slvs_Param *sp = &(ssys->param[i]);
        Param p;
        ZERO(&p);
        
        p.h.v = sp->h;
        p.val = sp->val;
        SK.param.Add(&p);
        if(sp->group == shg) {
            SYS.param.Add(&p);
        }
    }

    for(i = 0; i < ssys->entities; i++) {
        Slvs_Entity *se = &(ssys->entity[i]);
        EntityBase e;
        ZERO(&e);

        switch(se->type) {
case SLVS_E_POINT_IN_3D:        e.type = Entity::POINT_IN_3D; break;
case SLVS_E_POINT_IN_2D:        e.type = Entity::POINT_IN_2D; break;
case SLVS_E_NORMAL_IN_3D:       e.type = Entity::NORMAL_IN_3D; break;
case SLVS_E_NORMAL_IN_2D:       e.type = Entity::NORMAL_IN_2D; break;
case SLVS_E_DISTANCE:           e.type = Entity::DISTANCE; break;
case SLVS_E_WORKPLANE:          e.type = Entity::WORKPLANE; break;
case SLVS_E_LINE_SEGMENT:       e.type = Entity::LINE_SEGMENT; break;
case SLVS_E_CUBIC:              e.type = Entity::CUBIC; break;
case SLVS_E_CIRCLE:             e.type = Entity::CIRCLE; break;
case SLVS_E_ARC_OF_CIRCLE:      e.type = Entity::ARC_OF_CIRCLE; break;

default: dbp("bad entity type %d", se->type); return;
        }
        e.h.v           = se->h;
        e.group.v       = se->group;
        e.workplane.v   = se->wrkpl;
        e.point[0].v    = se->point[0];
        e.point[1].v    = se->point[1];
        e.point[2].v    = se->point[2];
        e.point[3].v    = se->point[3];
        e.normal.v      = se->normal;
        e.distance.v    = se->distance;
        e.param[0].v    = se->param[0];
        e.param[1].v    = se->param[1];
        e.param[2].v    = se->param[2];
        e.param[3].v    = se->param[3];

        SK.entity.Add(&e);
    }

    for(i = 0; i < ssys->constraints; i++) {
        Slvs_Constraint *sc = &(ssys->constraint[i]);
        ConstraintBase c;
        ZERO(&c);

        int t;
        switch(sc->type) {
case SLVS_C_POINTS_COINCIDENT:  t = Constraint::POINTS_COINCIDENT; break;
case SLVS_C_PT_PT_DISTANCE:     t = Constraint::PT_PT_DISTANCE; break;
case SLVS_C_PT_PLANE_DISTANCE:  t = Constraint::PT_PLANE_DISTANCE; break;
case SLVS_C_PT_LINE_DISTANCE:   t = Constraint::PT_LINE_DISTANCE; break;
case SLVS_C_PT_FACE_DISTANCE:   t = Constraint::PT_FACE_DISTANCE; break;
case SLVS_C_PT_IN_PLANE:        t = Constraint::PT_IN_PLANE; break;
case SLVS_C_PT_ON_LINE:         t = Constraint::PT_ON_LINE; break;
case SLVS_C_PT_ON_FACE:         t = Constraint::PT_ON_FACE; break;
case SLVS_C_EQUAL_LENGTH_LINES: t = Constraint::EQUAL_LENGTH_LINES; break;
case SLVS_C_LENGTH_RATIO:       t = Constraint::LENGTH_RATIO; break;
case SLVS_C_EQ_LEN_PT_LINE_D:   t = Constraint::EQ_LEN_PT_LINE_D; break;
case SLVS_C_EQ_PT_LN_DISTANCES: t = Constraint::EQ_PT_LN_DISTANCES; break;
case SLVS_C_EQUAL_ANGLE:        t = Constraint::EQUAL_ANGLE; break;
case SLVS_C_EQUAL_LINE_ARC_LEN: t = Constraint::EQUAL_LINE_ARC_LEN; break;
case SLVS_C_SYMMETRIC:          t = Constraint::SYMMETRIC; break;
case SLVS_C_SYMMETRIC_HORIZ:    t = Constraint::SYMMETRIC_HORIZ; break;
case SLVS_C_SYMMETRIC_VERT:     t = Constraint::SYMMETRIC_VERT; break;
case SLVS_C_SYMMETRIC_LINE:     t = Constraint::SYMMETRIC_LINE; break;
case SLVS_C_AT_MIDPOINT:        t = Constraint::AT_MIDPOINT; break;
case SLVS_C_HORIZONTAL:         t = Constraint::HORIZONTAL; break;
case SLVS_C_VERTICAL:           t = Constraint::VERTICAL; break;
case SLVS_C_DIAMETER:           t = Constraint::DIAMETER; break;
case SLVS_C_PT_ON_CIRCLE:       t = Constraint::PT_ON_CIRCLE; break;
case SLVS_C_SAME_ORIENTATION:   t = Constraint::SAME_ORIENTATION; break;
case SLVS_C_ANGLE:              t = Constraint::ANGLE; break;
case SLVS_C_PARALLEL:           t = Constraint::PARALLEL; break;
case SLVS_C_PERPENDICULAR:      t = Constraint::PERPENDICULAR; break;
case SLVS_C_ARC_LINE_TANGENT:   t = Constraint::ARC_LINE_TANGENT; break;
case SLVS_C_CUBIC_LINE_TANGENT: t = Constraint::CUBIC_LINE_TANGENT; break;
case SLVS_C_EQUAL_RADIUS:       t = Constraint::EQUAL_RADIUS; break;
case SLVS_C_PROJ_PT_DISTANCE:   t = Constraint::PROJ_PT_DISTANCE; break;
case SLVS_C_WHERE_DRAGGED:      t = Constraint::WHERE_DRAGGED; break;
case SLVS_C_CURVE_CURVE_TANGENT:t = Constraint::CURVE_CURVE_TANGENT; break;

default: dbp("bad constraint type %d", sc->type); return;
        }

        c.type = t;

        c.h.v           = sc->h;
        c.group.v       = sc->group;
        c.workplane.v   = sc->wrkpl;
        c.valA          = sc->valA;
        c.ptA.v         = sc->ptA;
        c.ptB.v         = sc->ptB;
        c.entityA.v     = sc->entityA;
        c.entityB.v     = sc->entityB;
        c.entityC.v     = sc->entityC;
        c.entityD.v     = sc->entityD;
        c.other         = (sc->other) ? true : false;
        c.other2        = (sc->other2) ? true : false;

        SK.constraint.Add(&c);
    }

    for(i = 0; i < (int)arraylen(ssys->dragged); i++) {
        if(ssys->dragged[i]) {
            hParam hp = { ssys->dragged[i] };
            SYS.dragged.Add(&hp);
        }
    }

    Group g;
    ZERO(&g);
    g.h.v = shg;

    List<hConstraint> bad;
    ZERO(&bad);

    // Now we're finally ready to solve!
    bool andFindBad = ssys->calculateFaileds ? true : false;
    int how = SYS.Solve(&g, &(ssys->dof), &bad, andFindBad, false);

    switch(how) {
        case System::SOLVED_OKAY:
            ssys->result = SLVS_RESULT_OKAY;
            break;

        case System::DIDNT_CONVERGE:
            ssys->result = SLVS_RESULT_DIDNT_CONVERGE;
            break;

        case System::SINGULAR_JACOBIAN:
            ssys->result = SLVS_RESULT_INCONSISTENT;
            break;

        case System::TOO_MANY_UNKNOWNS:
            ssys->result = SLVS_RESULT_TOO_MANY_UNKNOWNS;
            break;

        default: oops();
    }

    // Write the new parameter values back to our caller.
    for(i = 0; i < ssys->params; i++) {
        Slvs_Param *sp = &(ssys->param[i]);
        hParam hp = { sp->h };
        sp->val = SK.GetParam(hp)->val;
    }

    if(ssys->failed) {
        // Copy over any the list of problematic constraints.
        for(i = 0; i < ssys->faileds && i < bad.n; i++) {
            ssys->failed[i] = bad.elem[i].v;
        }
        ssys->faileds = bad.n;
    }

    bad.Clear();
    SYS.param.Clear();
    SYS.entity.Clear();
    SYS.eq.Clear();
    SYS.dragged.Clear();

    SK.param.Clear();
    SK.entity.Clear();
    SK.constraint.Clear();

    FreeAllTemporary();
}
int test_sketch(char* sketch_type, unsigned buckets, unsigned rows, 
                    char* random_generator, char* hash_function,
                    char* pcap_file){
    unsigned int pkt_counter=0;   // packet counter 
    clock_t t1, t2, t3;
    //temporary packet buffers 
    struct pcap_pkthdr header; // The header that pcap gives us 
    const u_char *packet; // The actual packet 
    // Create the sketch as the type passed as parameter
    Sketch<KeyType>* sketch = get_sketch<KeyType>(sketch_type, buckets, rows, 
        random_generator, hash_function);
    if (sketch == NULL) {
        return -1;
    }
    //----------------- 
    //open the pcap file 
    pcap_t *handle; 
    char errbuf[PCAP_ERRBUF_SIZE];
    handle = pcap_open_offline(pcap_file, errbuf);   //call pcap library function 

    if (handle == NULL) {
        fprintf(stderr,"Couldn't open pcap file %s: %s\n", pcap_file, errbuf);
        return -1;
    }

    //----------------- 
    //Process one packet at a time
    while (packet = pcap_next(handle,&header)) {
        t1 = clock();
        // header contains information about the packet (e.g. timestamp) 
        u_char *pkt_ptr = (u_char *)packet; //cast a pointer to the packet data 
        //parse the first (ethernet) header, grabbing the type field 
        int ether_type = ((int)(pkt_ptr[12]) << 8) | (int)pkt_ptr[13]; 
        int ether_offset = 0; 

        if (ether_type == ETHER_TYPE_IP or ether_type == ETHER_TYPE_IPv6) //most common 
            ether_offset = 14; 
        else {
            fprintf(stderr, "Unknown ethernet type, %04X, skipping...\n", ether_type); 
            continue;
        }
        // Only from IP header:
        pkt_ptr += ether_offset;  //skip past the Ethernet II header 
        int packet_length = header.len-ether_offset;

        // Compute MD5
        unsigned char * tmp_hash;
        tmp_hash = MD5(pkt_ptr, packet_length, NULL);
        // Strip to the size of the sketch:
        uint64_t low_hash = low_md5(tmp_hash);
        
        // Update sketch
        t2 = clock();

        sketch->update(low_hash,1);
        t3 = clock();

        printf("%s,%u,%u,%u,%s,%s,%f,%f,%f,%f\n",
                sketch_type,
                sizeof(KeyType),
                buckets,
                rows,
                random_generator,
                hash_function,
                ((float)t1)/CLOCKS_PER_SEC, 
                ((float)t2)/CLOCKS_PER_SEC, 
                ((float)t3)/CLOCKS_PER_SEC,
                ((float)t3-t1)/CLOCKS_PER_SEC);
        pkt_counter++; //increment number of packets seen 
        
        if (pkt_counter >= 1000)
            break;

    } //end internal loop for reading packets (all in one file) 

    pcap_close(handle);  //close the pcap file 
    return 0; //done
}
void SolveSpace::MenuAnalyze(int id) {
    SS.GW.GroupSelection();
#define gs (SS.GW.gs)

    switch(id) {
        case GraphicsWindow::MNU_STEP_DIM:
            if(gs.constraints == 1 && gs.n == 0) {
                Constraint *c = SK.GetConstraint(gs.constraint[0]);
                if(c->HasLabel() && !c->reference) {
                    SS.TW.shown.dimFinish = c->valA;
                    SS.TW.shown.dimSteps = 10;
                    SS.TW.shown.dimIsDistance =
                        (c->type != Constraint::ANGLE) &&
                        (c->type != Constraint::LENGTH_RATIO);
                    SS.TW.shown.constraint = c->h;
                    SS.TW.shown.screen = TextWindow::SCREEN_STEP_DIMENSION;

                    // The step params are specified in the text window,
                    // so force that to be shown.
                    SS.GW.ForceTextWindowShown();

                    SS.later.showTW = true;
                    SS.GW.ClearSelection();
                } else {
                    Error("Constraint must have a label, and must not be "
                          "a reference dimension.");
                }
            } else {
                Error("Bad selection for step dimension; select a constraint.");
            }
            break;

        case GraphicsWindow::MNU_NAKED_EDGES: {
            SS.nakedEdges.Clear();

            Group *g = SK.GetGroup(SS.GW.activeGroup);
            SMesh *m = &(g->displayMesh);
            SKdNode *root = SKdNode::From(m);
            bool inters, leaks;
            root->MakeCertainEdgesInto(&(SS.nakedEdges), 
                SKdNode::NAKED_OR_SELF_INTER_EDGES, true, &inters, &leaks);

            InvalidateGraphics();

            const char *intersMsg = inters ?
                "The mesh is self-intersecting (NOT okay, invalid)." :
                "The mesh is not self-intersecting (okay, valid).";
            const char *leaksMsg = leaks ?
                "The mesh has naked edges (NOT okay, invalid)." :
                "The mesh is watertight (okay, valid).";

            char cntMsg[1024];
            sprintf(cntMsg, "\n\nThe model contains %d triangles, from "
                            "%d surfaces.",
                g->displayMesh.l.n, g->runningShell.surface.n);

            if(SS.nakedEdges.l.n == 0) {
                Message("%s\n\n%s\n\nZero problematic edges, good.%s",
                    intersMsg, leaksMsg, cntMsg);
            } else {
                Error("%s\n\n%s\n\n%d problematic edges, bad.%s",
                    intersMsg, leaksMsg, SS.nakedEdges.l.n, cntMsg);
            }
            break;
        }

        case GraphicsWindow::MNU_INTERFERENCE: {
            SS.nakedEdges.Clear();

            SMesh *m = &(SK.GetGroup(SS.GW.activeGroup)->displayMesh);
            SKdNode *root = SKdNode::From(m);
            bool inters, leaks;
            root->MakeCertainEdgesInto(&(SS.nakedEdges),
                SKdNode::SELF_INTER_EDGES, false, &inters, &leaks);

            InvalidateGraphics();

            if(inters) {
                Error("%d edges interfere with other triangles, bad.",
                    SS.nakedEdges.l.n);
            } else {
                Message("The assembly does not interfere, good.");
            }
            break;
        }

        case GraphicsWindow::MNU_VOLUME: {
            SMesh *m = &(SK.GetGroup(SS.GW.activeGroup)->displayMesh);
           
            double vol = 0;
            int i;
            for(i = 0; i < m->l.n; i++) {
                STriangle tr = m->l.elem[i];

                // Translate to place vertex A at (x, y, 0)
                Vector trans = Vector::From(tr.a.x, tr.a.y, 0);
                tr.a = (tr.a).Minus(trans);
                tr.b = (tr.b).Minus(trans);
                tr.c = (tr.c).Minus(trans);

                // Rotate to place vertex B on the y-axis. Depending on
                // whether the triangle is CW or CCW, C is either to the
                // right or to the left of the y-axis. This handles the
                // sign of our normal.
                Vector u = Vector::From(-tr.b.y, tr.b.x, 0);
                u = u.WithMagnitude(1);
                Vector v = Vector::From(tr.b.x, tr.b.y, 0);
                v = v.WithMagnitude(1);
                Vector n = Vector::From(0, 0, 1);

                tr.a = (tr.a).DotInToCsys(u, v, n);
                tr.b = (tr.b).DotInToCsys(u, v, n);
                tr.c = (tr.c).DotInToCsys(u, v, n);

                n = tr.Normal().WithMagnitude(1);

                // Triangles on edge don't contribute
                if(fabs(n.z) < LENGTH_EPS) continue;
               
                // The plane has equation p dot n = a dot n
                double d = (tr.a).Dot(n);
                // nx*x + ny*y + nz*z = d
                // nz*z = d - nx*x - ny*y
                double A = -n.x/n.z, B = -n.y/n.z, C = d/n.z;

                double mac = tr.c.y/tr.c.x, mbc = (tr.c.y - tr.b.y)/tr.c.x;
                double xc = tr.c.x, yb = tr.b.y;
               
                // I asked Maple for
                //    int(int(A*x + B*y +C, y=mac*x..(mbc*x + yb)), x=0..xc);
                double integral = 
                    (1.0/3)*(
                        A*(mbc-mac)+
                        (1.0/2)*B*(mbc*mbc-mac*mac)
                    )*(xc*xc*xc)+
                    (1.0/2)*(A*yb+B*yb*mbc+C*(mbc-mac))*xc*xc+
                    C*yb*xc+
                    (1.0/2)*B*yb*yb*xc;

                vol += integral;
            }

            char msg[1024];
            sprintf(msg, "The volume of the solid model is:\n\n"
                         "    %.3f %s^3",
                vol / pow(SS.MmPerUnit(), 3),
                SS.UnitName());

            if(SS.viewUnits == SolveSpace::UNIT_MM) {
                sprintf(msg+strlen(msg), "\n    %.2f mL", vol/(10*10*10));
            }
            strcpy(msg+strlen(msg),
                "\n\nCurved surfaces have been approximated as triangles.\n"
                "This introduces error, typically of around 1%.");
            Message("%s", msg);
            break;
        }

        case GraphicsWindow::MNU_AREA: {
            Group *g = SK.GetGroup(SS.GW.activeGroup);
            if(g->polyError.how != Group::POLY_GOOD) {
                Error("This group does not contain a correctly-formed "
                      "2d closed area. It is open, not coplanar, or self-"
                      "intersecting.");
                break;
            }
            SEdgeList sel;
            ZERO(&sel);
            g->polyLoops.MakeEdgesInto(&sel);
            SPolygon sp;
            ZERO(&sp);
            sel.AssemblePolygon(&sp, NULL, true);
            sp.normal = sp.ComputeNormal();
            sp.FixContourDirections();
            double area = sp.SignedArea();
            double scale = SS.MmPerUnit();
            Message("The area of the region sketched in this group is:\n\n"
                    "    %.3f %s^2\n\n"
                    "Curves have been approximated as piecewise linear.\n"
                    "This introduces error, typically of around 1%%.",
                area / (scale*scale),
                SS.UnitName());
            sel.Clear();
            sp.Clear();
            break;
        }

        case GraphicsWindow::MNU_SHOW_DOF:
            // This works like a normal solve, except that it calculates
            // which variables are free/bound at the same time.
            SS.GenerateAll(0, INT_MAX, true);
            break;

        case GraphicsWindow::MNU_TRACE_PT:
            if(gs.points == 1 && gs.n == 1) {
                SS.traced.point = gs.point[0];
                SS.GW.ClearSelection();
            } else {
                Error("Bad selection for trace; select a single point.");
            }
            break;
            
        case GraphicsWindow::MNU_STOP_TRACING: {
            char exportFile[MAX_PATH] = "";
            if(GetSaveFile(exportFile, CSV_EXT, CSV_PATTERN)) {
                FILE *f = fopen(exportFile, "wb");
                if(f) {
                    int i;
                    SContour *sc = &(SS.traced.path);
                    for(i = 0; i < sc->l.n; i++) {
                        Vector p = sc->l.elem[i].p;
                        double s = SS.exportScale;
                        fprintf(f, "%.10f, %.10f, %.10f\r\n",
                            p.x/s, p.y/s, p.z/s);
                    }
                    fclose(f);
                } else {
                    Error("Couldn't write to '%s'", exportFile);
                }
            }
            // Clear the trace, and stop tracing
            SS.traced.point = Entity::NO_ENTITY;
            SS.traced.path.l.Clear();
            InvalidateGraphics();
            break;
        }

        default: oops();
    }
}
Beispiel #7
0
int CommandContain::run() const
{
    if ( arguments.size() < 2 || options.at("help").active )
    {
        print();
        return 0;
    }
    
    int threads = options.at("threads").getArgumentAsNumber();
    bool list = options.at("list").active;
    
    Sketch::Parameters parameters;
    
    parameters.kmerSize = options.at("kmer").getArgumentAsNumber();
    parameters.minHashesPerWindow = options.at("sketchSize").getArgumentAsNumber();
    parameters.concatenated = ! options.at("individual").active;
    parameters.noncanonical = options.at("noncanonical").active;
    parameters.error = options.at("errorThreshold").getArgumentAsNumber();
    parameters.bloomFilter = options.at("unique").active;
    parameters.genomeSize = options.at("genome").getArgumentAsNumber();
    parameters.memoryMax = options.at("memory").getArgumentAsNumber();
    parameters.bloomError = options.at("bloomError").getArgumentAsNumber();
    
    if ( options.at("genome").active || options.at("memory").active )
    {
        parameters.bloomFilter = true;
    }
    
    if ( parameters.bloomFilter )
    {
        parameters.concatenated = true;
    }
    
    Sketch sketch;
    
    const string & fileReference = arguments[0];
    
    if ( hasSuffix(fileReference, suffixSketch) )
    {
        if ( options.at("kmer").active )
        {
            cerr << "ERROR: The option " << options.at("kmer").identifier << " cannot be used when a sketch is provided; it is inherited from the sketch.\n";
            return 1;
        }
        
        if ( options.at("noncanonical").active )
        {
            cerr << "ERROR: The option " << options.at("noncanonical").identifier << " cannot be used when a sketch is provided; it is inherited from the sketch.\n";
            return 1;
        }
        
        sketch.initFromCapnp(fileReference.c_str());
        
        parameters.kmerSize = sketch.getKmerSize();
        parameters.noncanonical = sketch.getNoncanonical();
    }
    else
    {
        bool sketchFileExists = sketch.initHeaderFromBaseIfValid(fileReference, false);
        
        if
        (
            (options.at("kmer").active && parameters.kmerSize != sketch.getKmerSize())
        )
        {
            sketchFileExists = false;
        }
        
        if ( false && sketchFileExists )
        {
            sketch.initFromBase(fileReference, false);
            parameters.kmerSize = sketch.getKmerSize();
            parameters.noncanonical = sketch.getNoncanonical();
        }
        else
        {
            vector<string> refArgVector;
            refArgVector.push_back(fileReference);
            
            //cerr << "Sketch for " << fileReference << " not found or out of date; creating..." << endl;
            cerr << "Sketching " << fileReference << " (provide sketch file made with \"mash sketch\" to skip)...\n";
            
            sketch.initFromSequence(refArgVector, parameters);
            /*
            if ( sketch.writeToFile() )
            {
                cerr << "Sketch saved for subsequent runs." << endl;
            }
            else
            {
                cerr << "The sketch for " << fileReference << " could not be saved; it will be sketched again next time." << endl;
            }*/
        }
    }
    
    ThreadPool<ContainInput, ContainOutput> threadPool(contain, threads);
    
    vector<string> queryFiles;
    
    for ( int i = 1; i < arguments.size(); i++ )
    {
        if ( list )
        {
            splitFile(arguments[i], queryFiles);
        }
        else
        {
            queryFiles.push_back(arguments[i]);
        }
    }
    
    for ( int i = 0; i < queryFiles.size(); i++ )
    {
        // If the input is a sketch file, load in the main thread; otherwise,
        // leave it to the child. Either way, the child will delete.
        //
        Sketch * sketchQuery = new Sketch();
        
        if ( hasSuffix(queryFiles[i], suffixSketch) )
        {
            // init header to check params
            //
            sketchQuery->initFromCapnp(queryFiles[i].c_str(), true);
            
            if ( sketchQuery->getKmerSize() != sketch.getKmerSize() )
            {
                cerr << "\nWARNING: The query sketch " << queryFiles[i] << " has a kmer size (" << sketchQuery->getKmerSize() << ") that does not match the reference sketch (" << sketch.getKmerSize() << "). This query will be skipped.\n\n";
                delete sketchQuery;
                continue;
            }
            
            if ( sketchQuery->getNoncanonical() != sketch.getNoncanonical() )
            {
                cerr << "\nWARNING: The query sketch " << queryFiles[i] << " is " << (sketchQuery->getNoncanonical() ? "noncanonical" : "canonical") << " but the reference sketch is not. This query will be skipped.\n\n";
                delete sketchQuery;
                continue;
            }
            
            // init fully
            //
            sketchQuery->initFromCapnp(queryFiles[i].c_str());
        }
        
        threadPool.runWhenThreadAvailable(new ContainInput(sketch, sketchQuery, queryFiles[i], parameters));
        
        while ( threadPool.outputAvailable() )
        {
            writeOutput(threadPool.popOutputWhenAvailable(), parameters.error);
        }
    }
    
    while ( threadPool.running() )
    {
        writeOutput(threadPool.popOutputWhenAvailable(), parameters.error);
    }
    
    return 0;
}
Beispiel #8
0
int CommandDistance::run() const
{
    if ( arguments.size() < 2 || options.at("help").active )
    {
        /*char tab = '\t';
        
        for ( int kmerSize = 4; kmerSize <= 32; kmerSize++ )
        {
            double kmerSpace = pow(4, kmerSize);
            
            for ( uint64_t refSize = 10000; refSize <= 1000000000000; refSize *= 10 )
            {
                for ( uint64_t qrySize = 10000; qrySize <= refSize; qrySize *= 10 )
                {
                    for ( int sketchSize = 100; sketchSize <= 1000; sketchSize += 100 )
                    {
                        for ( int common = 1; common <= sketchSize + 1; common += 10 )
                        {
                            if ( common > sketchSize )
                            {
                                common = sketchSize;
                            }
                            
                            if ( common > kmerSpace )
                            {
                                continue;
                            }
                            
                            double pX = 1. / (1. + (double)kmerSpace / refSize);
                            double pY = 1. / (1. + (double)kmerSpace / qrySize);
    
                            double r = pX * pY / (pX + pY - pX * pY);
    
                            uint64_t M = (double)kmerSpace * (pX + pY) / (1. + r);
                            
                            //cout << "k: " << kmerSize << tab << "L1: " << refSize << tab << "L2: " << qrySize << tab << "s: " << sketchSize << tab << "x: " << common << tab << " | " << "Ek: " << kmerSpace << tab << "pX: " << pX << tab << "pY: " << pY << tab << "r: " << r << tab << "M: " << M << tab;
                            //cout << (M < sketchSize ? M : sketchSize) << tab << r * M << tab << M - r * M << endl;
                            //double p = cdf(complement(hypergeometric_distribution(r * M, M < sketchSize ? M : sketchSize, M), common - 1 ));
                            //double p = cdf(complement(binomial(M < sketchSize ? M : sketchSize, r), common - 1 ));
                            //double p = gsl_cdf_hypergeometric_Q(common - 1, r * M, M - uint64_t(r * M), M < sketchSize ? M : sketchSize);
                            double p = gsl_cdf_binomial_Q(common - 1, r, M < sketchSize ? M : sketchSize);
                            
                            cout << p << endl;
                        }
                    }
                }
            }
        }*/
        
        print();
        return 0;
    }
    
    int threads = options.at("threads").getArgumentAsNumber();
    bool list = options.at("list").active;
    bool table = options.at("table").active;
    //bool log = options.at("log").active;
    double pValueMax = options.at("pvalue").getArgumentAsNumber();
    double distanceMax = options.at("distance").getArgumentAsNumber();
    
    Sketch::Parameters parameters;
    
    parameters.kmerSize = options.at("kmer").getArgumentAsNumber();
    parameters.minHashesPerWindow = options.at("sketchSize").getArgumentAsNumber();
    parameters.concatenated = ! options.at("individual").active;
    parameters.noncanonical = options.at("noncanonical").active;
    parameters.bloomFilter = options.at("unique").active;
    parameters.genomeSize = options.at("genome").getArgumentAsNumber();
    parameters.memoryMax = options.at("memory").getArgumentAsNumber();
    parameters.bloomError = options.at("bloomError").getArgumentAsNumber();
    parameters.warning = options.at("warning").getArgumentAsNumber();
    
    if ( options.at("genome").active || options.at("memory").active || options.at("bloomError").active )
    {
        parameters.bloomFilter = true;
    }
    
    if ( parameters.bloomFilter && ! parameters.concatenated )
    {
        cerr << "ERROR: The option " << options.at("individual").identifier << " cannot be used with " << options.at("unique").identifier << "." << endl;
        return 1;
    }
    
    Sketch sketch;
    
    uint64_t lengthThreshold = (parameters.warning * pow(parameters.protein ? 20 : 4, parameters.kmerSize)) / (1. - parameters.warning);
    uint64_t lengthMax;
    double randomChance;
    int kMin;
    string lengthMaxName;
    int warningCount = 0;
    
    const string & fileReference = arguments[0];
    
    if ( hasSuffix(fileReference, suffixSketch) )
    {
        if ( options.at("kmer").active )
        {
            cerr << "ERROR: The option " << options.at("kmer").identifier << " cannot be used when a sketch is provided; it is inherited from the sketch." << endl;
            return 1;
        }
        
        if ( options.at("noncanonical").active )
        {
            cerr << "ERROR: The option " << options.at("noncanonical").identifier << " cannot be used when a sketch is provided; it is inherited from the sketch." << endl;
            return 1;
        }
        
        sketch.initFromCapnp(fileReference.c_str());
        
        if ( options.at("sketchSize").active )
        {
            if ( parameters.bloomFilter && parameters.minHashesPerWindow != sketch.getMinHashesPerWindow() )
            {
                cerr << "ERROR: The sketch size must match the reference when using a bloom filter (leave this option out to inherit from the reference sketch)." << endl;
                return 1;
            }
        }
        else
        {
            parameters.minHashesPerWindow = sketch.getMinHashesPerWindow();
        }
        
        parameters.kmerSize = sketch.getKmerSize();
        parameters.noncanonical = sketch.getNoncanonical();
    }
    else
    {
        bool sketchFileExists = false;//sketch.initHeaderFromBaseIfValid(fileReference, false);
        /*
        if
        (
            (options.at("kmer").active && parameters.kmerSize != sketch.getKmerSize())
        )
        {
            sketchFileExists = false;
        }
        */
        if ( sketchFileExists )
        {
            sketch.initFromBase(fileReference, false);
            parameters.kmerSize = sketch.getKmerSize();
            parameters.noncanonical = sketch.getNoncanonical();
        }
        else
        {
            vector<string> refArgVector;
            refArgVector.push_back(fileReference);
            
            //cerr << "Sketch for " << fileReference << " not found or out of date; creating..." << endl;
            cerr << "Sketching " << fileReference << " (provide sketch file made with \"mash sketch\" to skip)...";
            
            sketch.initFromSequence(refArgVector, parameters);
            
            for ( int i = 0; i < sketch.getReferenceCount(); i++ )
            {
                int length = sketch.getReference(i).length;
                
                if ( length > lengthThreshold )
                {
                    if ( warningCount == 0 || length > lengthMax )
                    {
                        lengthMax = length;
                        lengthMaxName = sketch.getReference(i).name;
                        randomChance = sketch.getRandomKmerChance(i);
                        kMin = sketch.getMinKmerSize(i);
                    }
                    
                    warningCount++;
                }
            }
            
            cerr << "done.\n";
            /*
            if ( sketch.writeToFile() )
            {
                cerr << "Sketch saved for subsequent runs." << endl;
            }
            else
            {
                cerr << "The sketch for " << fileReference << " could not be saved; it will be sketched again next time." << endl;
            }*/
        }
    }
    
    if ( table )
    {
        cout << "#query";
        
        for ( int i = 0; i < sketch.getReferenceCount(); i++ )
        {
            cout << '\t' << sketch.getReference(i).name;
        }
        
        cout << endl;
    }
    
    ThreadPool<CompareInput, CompareOutput> threadPool(compare, threads);
    
    vector<string> queryFiles;
    
    for ( int i = 1; i < arguments.size(); i++ )
    {
        if ( list )
        {
            splitFile(arguments[i], queryFiles);
        }
        else
        {
            queryFiles.push_back(arguments[i]);
        }
    }
    
    for ( int i = 0; i < queryFiles.size(); i++ )
    {
        // If the input is a sketch file, load in the main thread; otherwise,
        // leave it to the child. Either way, the child will delete.
        //
        Sketch * sketchQuery = new Sketch();
        bool isSketch = hasSuffix(queryFiles[i], suffixSketch);
        
        if ( isSketch )
        {
            // init header to check params
            //
            sketchQuery->initFromCapnp(queryFiles[i].c_str(), true);
            
            if ( sketchQuery->getKmerSize() != sketch.getKmerSize() )
            {
                cerr << "\nWARNING: The query sketch " << queryFiles[i] << " has a kmer size (" << sketchQuery->getKmerSize() << ") that does not match the reference sketch (" << sketch.getKmerSize() << "). This query will be skipped.\n\n";
                delete sketchQuery;
                continue;
            }
            
            if ( sketchQuery->getNoncanonical() != sketch.getNoncanonical() )
            {
                cerr << "\nWARNING: The query sketch " << queryFiles[i] << " is " << (sketchQuery->getNoncanonical() ? "noncanonical" : "canonical") << " but the reference sketch is not. This query will be skipped.\n\n";
                delete sketchQuery;
                continue;
            }
            
            // init fully
            //
            sketchQuery->initFromCapnp(queryFiles[i].c_str());
        }
        
        threadPool.runWhenThreadAvailable(new CompareInput(sketch, sketchQuery, queryFiles[i], parameters, distanceMax, pValueMax));
        /*
        if ( ! isSketch )
        {
            for ( int j = 0; j < sketchQuery->getReferenceCount(); j++ )
            {
                int length = sketchQuery->getReference(j).length;
                
                if ( length > lengthThreshold )
                {
                    if ( warningCount == 0 || length > lengthMax )
                    {
                        lengthMax = length;
                        lengthMaxName = sketchQuery->getReference(j).name;
                        randomChance = sketchQuery->getRandomKmerChance(j);
                        kMin = sketchQuery->getMinKmerSize(j);
                    }
                    
                    warningCount++;
                }
            }
        }
        */
        while ( threadPool.outputAvailable() )
        {
            writeOutput(threadPool.popOutputWhenAvailable(), table);
        }
    }
    
    while ( threadPool.running() )
    {
        writeOutput(threadPool.popOutputWhenAvailable(), table);
    }
    
    if ( warningCount > 0 && ! parameters.bloomFilter )
    {
    	sketch.warnKmerSize(lengthMax, lengthMaxName, randomChance, kMin, warningCount);
    }
    
    return 0;
}