/////////////////////////////////////////////////////////// // rev2bSnpJoin test // case that first came up in lrc_kir where sanity check // fails on a seemingly straightforward 2base reverse snp // /////////////////////////////////////////////////////////// void rev2bSnpJoinTest(CuTest *testCase) { CuAssertTrue(testCase, true); SideGraph sg; SGPosition p1a(0, 5); SGPosition p2a(1, 0); sg.addSequence(new SGSequence(0, 20, "seq1")); sg.addSequence(new SGSequence(1, 2, "seq2")); sg.addJoin(new SGJoin(SGSide(p1a, true), SGSide(p2a, false))); vector<string> bases(2); bases[0] = "ACCTGACCATAGGCATGGGC"; bases[1] = "TA"; vector<SGNamedPath> paths; Side2Seq converter; converter.init(&sg, &bases, &paths); try { converter.convert(); } catch(exception& e) { cerr << "Exception caught " << e.what() << endl; CuAssertTrue(testCase, false); } const SideGraph* outGraph = converter.getOutGraph(); CuAssertTrue(testCase, outGraph->getNumSequences() == 4); CuAssertTrue(testCase, outGraph->getSequence(0)->getLength() == 5); CuAssertTrue(testCase, outGraph->getSequence(1)->getLength() == 15); CuAssertTrue(testCase, outGraph->getSequence(2)->getLength() == 1); CuAssertTrue(testCase, outGraph->getSequence(3)->getLength() == 1); // original join SGJoin j1(SGSide(SGPosition(1, 0), true), SGSide(SGPosition(2, 0), false)); CuAssertTrue(testCase, outGraph->getJoin(&j1) != NULL); // 1 new join added by fragmentation of sequence 0 SGJoin j2(SGSide(SGPosition(0, 4), false), SGSide(SGPosition(1, 0), true)); CuAssertTrue(testCase, outGraph->getJoin(&j2) != NULL); // 1 new join added by fragmentation of sequence 1 SGJoin j3(SGSide(SGPosition(2, 0), false), SGSide(SGPosition(3, 0), true)); CuAssertTrue(testCase, outGraph->getJoin(&j3) != NULL); CuAssertTrue(testCase, outGraph->getJoinSet()->size() == 3); }
/////////////////////////////////////////////////////////// // doubleCut test // case that first came up in camel-brca1 where segment cut // failed because of equivalent join sides being processed // when fragmenting segment // /////////////////////////////////////////////////////////// void doubleCutTest(CuTest *testCase) { CuAssertTrue(testCase, true); SideGraph sg; SGPosition p1a(0, 5); SGPosition p1b(0, 6); SGPosition p1c(0, 10); sg.addSequence(new SGSequence(0, 20, "seq1")); sg.addJoin(new SGJoin(SGSide(p1a, false), SGSide(p1c, true))); sg.addJoin(new SGJoin(SGSide(p1b, true), SGSide(p1c, false))); vector<string> bases(1); bases[0] = "ACCTGACCATAGGCATGGGC"; vector<SGNamedPath> paths; Side2Seq converter; converter.init(&sg, &bases, &paths); try { converter.convert(); } catch(exception& e) { cerr << "Exception caught " << e.what() << endl; CuAssertTrue(testCase, false); } const SideGraph* outGraph = converter.getOutGraph(); CuAssertTrue(testCase, outGraph->getNumSequences() == 4); CuAssertTrue(testCase, outGraph->getSequence(0)->getLength() == 6); CuAssertTrue(testCase, outGraph->getSequence(1)->getLength() == 4); CuAssertTrue(testCase, outGraph->getSequence(2)->getLength() == 1); CuAssertTrue(testCase, outGraph->getSequence(3)->getLength() == 9); }
int main(int argc, char** argv) { if (argc < 2) { help(argv); return 1; } int pageSize = SGClient::DefaultPageSize; bool upperCase = false; bool seqPaths = false; bool skipPaths = false; optind = 1; while (true) { static struct option long_options[] = { {"help", no_argument, 0, 'h'}, {"page", required_argument, 0, 'p'}, {"upper", no_argument, 0, 'u'}, {"paths", no_argument, 0, 'a'}, {"no-paths", no_argument, 0, 'n'} }; int option_index = 0; int c = getopt_long(argc, argv, "hp:uan", long_options, &option_index); if (c == -1) { break; } switch(c) { case 'h': case '?': help(argv); exit(1); case 'p': pageSize = atoi(optarg); break; case 'u': upperCase = true; break; case 'a': seqPaths = true; break; case 'n': skipPaths = true; break; default: abort(); } } Download::init(); string url = argv[optind]; SGClient sgClient; sgClient.setURL(url); sgClient.setOS(&cerr); sgClient.setPageSize(pageSize); sgClient.setSkipPaths(skipPaths); // ith element is bases for sequence with id i in side graph vector<string> bases; // ith element is <name, segment vector> for allele i vector<SGClient::NamedPath> paths; const SideGraph* sg = sgClient.downloadGraph(bases, paths); // convert side graph into sequence graph (which is stored cerr << "Converting Side Graph to VG Sequence Graph" << endl; Side2Seq converter; converter.init(sg, &bases, &paths, upperCase, seqPaths, "&SG_"); converter.convert(); const SideGraph* outGraph = converter.getOutGraph(); const vector<string>& outBases = converter.getOutBases(); const vector<SGClient::NamedPath>& outPaths = converter.getOutPaths(); // write to vg json cerr << "Writing VG JSON to stdout" << endl; SG2VGJSON jsonWriter; jsonWriter.init(&cout); jsonWriter.writeGraph(outGraph, outBases, outPaths); /* cerr << "INPUT " << endl; cerr << *sgClient.getSideGraph() << endl; cerr << "OUTPUT " << endl; cerr << *outGraph << endl; for (int i = 0; i < outPaths.size(); ++i) { cerr << "path " << i << ":\n"; cerr << "input " << "name=" << outPaths[i].first; for (int j = 0; j < outPaths[i].second.size(); ++j) { cerr << outPaths[i].second[j] << ", "; } cerr << endl; cerr << "output " << "name=" << outPaths[i].first; for (int j = 0; j < outPaths[i].second.size(); ++j) { cerr << outPaths[i].second[j] << ", "; } cerr << endl; } */ Download::cleanup(); }
void chopTest(CuTest *testCase) { CuAssertTrue(testCase, true); // build easy side graph with one snp. SideGraph sg; sg.addSequence(new SGSequence(0, 10, "seq1")); sg.addSequence(new SGSequence(1, 1, "snp")); sg.addJoin(new SGJoin(SGSide(SGPosition(0, 3), false), SGSide(SGPosition(1, 0), true))); sg.addJoin(new SGJoin(SGSide(SGPosition(1, 0), false), SGSide(SGPosition(0, 5), true))); vector<string> bases(2); bases[0] = string(10, 'A'); bases[1] = string(1, 'G'); vector<SGNamedPath> paths(2); paths[0].first = "path1"; paths[0].second.push_back(SGSegment(SGSide(SGPosition(0, 0), true), 10)); paths[1].first = "path2"; paths[1].second.push_back(SGSegment(SGSide(SGPosition(0, 0), true), 4)); paths[1].second.push_back(SGSegment(SGSide(SGPosition(1, 0), true), 1)); paths[1].second.push_back(SGSegment(SGSide(SGPosition(0, 5), true), 5)); Side2Seq converter; converter.init(&sg, &bases, &paths, false, false, "", 2); converter.convert(); const SideGraph* outGraph = converter.getOutGraph(); const vector<string> outBases = converter.getOutBases(); const vector<SGNamedPath> outPaths = converter.getOutPaths(); // expect sequences of length 2, 2, 1, 1, 2, 2, 1 CuAssertTrue(testCase, outGraph->getNumSequences() == 7); CuAssertTrue(testCase, outGraph->getSequence(0)->getLength() == 2); CuAssertTrue(testCase, outGraph->getSequence(1)->getLength() == 2); CuAssertTrue(testCase, outGraph->getSequence(2)->getLength() == 1); CuAssertTrue(testCase, outGraph->getSequence(3)->getLength() == 1); CuAssertTrue(testCase, outGraph->getSequence(4)->getLength() == 2); CuAssertTrue(testCase, outGraph->getSequence(5)->getLength() == 2); CuAssertTrue(testCase, outGraph->getSequence(6)->getLength() == 1); // expect 5 extra joins CuAssertTrue(testCase, outGraph->getJoinSet()->size() == 7); SGJoin j1(SGSide(SGPosition(0, 1), false), SGSide(SGPosition(1, 0), true)); CuAssertTrue(testCase, outGraph->getJoin(&j1) != NULL); SGJoin j2(SGSide(SGPosition(1, 1), false), SGSide(SGPosition(2, 0), true)); CuAssertTrue(testCase, outGraph->getJoin(&j2) != NULL); SGJoin j3(SGSide(SGPosition(1, 1), false), SGSide(SGPosition(6, 0), true)); CuAssertTrue(testCase, outGraph->getJoin(&j3) != NULL); SGJoin j4(SGSide(SGPosition(2, 0), false), SGSide(SGPosition(3, 0), true)); CuAssertTrue(testCase, outGraph->getJoin(&j4) != NULL); SGJoin j5(SGSide(SGPosition(6, 0), false), SGSide(SGPosition(3, 0), true)); CuAssertTrue(testCase, outGraph->getJoin(&j5) != NULL); SGJoin j6(SGSide(SGPosition(3, 0), false), SGSide(SGPosition(4, 0), true)); CuAssertTrue(testCase, outGraph->getJoin(&j6) != NULL); SGJoin j7(SGSide(SGPosition(4, 1), false), SGSide(SGPosition(5, 0), true)); CuAssertTrue(testCase, outGraph->getJoin(&j7) != NULL); // both paths should be broken up over the six segments CuAssertTrue(testCase, outPaths.size() == 2); CuAssertTrue(testCase, outPaths[0].second.size() == 6); CuAssertTrue(testCase, outPaths[0].second.size() == 6); }
/////////////////////////////////////////////////////////// // Make sure trivial graph runs through ok // /////////////////////////////////////////////////////////// void simpleTest1(CuTest *testCase) { CuAssertTrue(testCase, true); // build easy side graph with one snp. SideGraph sg; sg.addSequence(new SGSequence(0, 10, "seq1")); sg.addSequence(new SGSequence(1, 1, "snp")); sg.addJoin(new SGJoin(SGSide(SGPosition(0, 3), false), SGSide(SGPosition(1, 0), true))); sg.addJoin(new SGJoin(SGSide(SGPosition(1, 0), false), SGSide(SGPosition(0, 5), true))); vector<string> bases(2); bases[0] = string(10, 'A'); bases[1] = string(1, 'G'); vector<SGNamedPath> paths(2); paths[0].first = "path1"; paths[0].second.push_back(SGSegment(SGSide(SGPosition(0, 0), true), 10)); paths[1].first = "path2"; paths[1].second.push_back(SGSegment(SGSide(SGPosition(0, 0), true), 4)); paths[1].second.push_back(SGSegment(SGSide(SGPosition(1, 0), true), 1)); paths[1].second.push_back(SGSegment(SGSide(SGPosition(0, 5), true), 5)); Side2Seq converter; converter.init(&sg, &bases, &paths); converter.convert(); const SideGraph* outGraph = converter.getOutGraph(); const vector<string> outBases = converter.getOutBases(); const vector<SGNamedPath> outPaths = converter.getOutPaths(); // expect sequences of length 4, 1, 5, 1 CuAssertTrue(testCase, outGraph->getNumSequences() == 4); CuAssertTrue(testCase, outGraph->getSequence(0)->getLength() == 4); CuAssertTrue(testCase, outGraph->getSequence(1)->getLength() == 1); CuAssertTrue(testCase, outGraph->getSequence(2)->getLength() == 5); CuAssertTrue(testCase, outGraph->getSequence(3)->getLength() == 1); // expect 2 extra joins CuAssertTrue(testCase, outGraph->getJoinSet()->size() == 4); SGJoin j1(SGSide(SGPosition(0, 3), false), SGSide(SGPosition(1, 0), true)); CuAssertTrue(testCase, outGraph->getJoin(&j1) != NULL); SGJoin j2(SGSide(SGPosition(0, 3), false), SGSide(SGPosition(3, 0), true)); CuAssertTrue(testCase, outGraph->getJoin(&j2) != NULL); SGJoin j3(SGSide(SGPosition(1, 0), false), SGSide(SGPosition(2, 0), true)); CuAssertTrue(testCase, outGraph->getJoin(&j3) != NULL); SGJoin j4(SGSide(SGPosition(3, 0), false), SGSide(SGPosition(2, 0), true)); CuAssertTrue(testCase, outGraph->getJoin(&j4) != NULL); for (int i = 0; i < outBases.size(); ++i) { CuAssertTrue(testCase, outBases[i].length() == outGraph->getSequence(i)->getLength()); if (i == 3) { CuAssertTrue(testCase, outBases[i] == bases[1]); } else { CuAssertTrue(testCase, outBases[i] == string(outBases[i].length(), 'A')); } } // both paths should be broken up over the three segments CuAssertTrue(testCase, outPaths.size() == 2); CuAssertTrue(testCase, outPaths[0].second.size() == 3); CuAssertTrue(testCase, outPaths[0].second.size() == 3); CuAssertTrue(testCase, outPaths[0].second[0] == SGSegment(SGSide(SGPosition(0, 0), true), 4)); CuAssertTrue(testCase, outPaths[0].second[1] == SGSegment(SGSide(SGPosition(1, 0), true), 1)); CuAssertTrue(testCase, outPaths[0].second[2] == SGSegment(SGSide(SGPosition(2, 0), true), 5)); CuAssertTrue(testCase, outPaths[1].second[0] == SGSegment(SGSide(SGPosition(0, 0), true), 4)); CuAssertTrue(testCase, outPaths[1].second[1] == SGSegment(SGSide(SGPosition(3, 0), true), 1)); CuAssertTrue(testCase, outPaths[1].second[2] == SGSegment(SGSide(SGPosition(2, 0), true), 5)); CuAssertTrue(testCase, outPaths[0].first == paths[0].first); CuAssertTrue(testCase, outPaths[1].first == paths[1].first); }
/////////////////////////////////////////////////////////// // inversion test // Make sure all 4 types of joins are converted // /////////////////////////////////////////////////////////// void inversionTest(CuTest *testCase) { CuAssertTrue(testCase, true); SideGraph sg; sg.addSequence(new SGSequence(0, 20, "seq1")); sg.addSequence(new SGSequence(1, 10, "seq2")); SGPosition p1a(0, 5); SGPosition p1b(0, 9); SGPosition p1c(0, 10); SGPosition p2a(1, 2); SGPosition p2b(1, 4); SGPosition p2c(1, 6); // R->F sg.addJoin(new SGJoin(SGSide(p1a, false), SGSide(p2a, true))); // R->R sg.addJoin(new SGJoin(SGSide(p2a, false), SGSide(p2b, false))); // F->R sg.addJoin(new SGJoin(SGSide(p2b, true), SGSide(p1c, false))); // F->F sg.addJoin(new SGJoin(SGSide(p1b, true), SGSide(p2c, true))); vector<string> bases(2); bases[0] = "ACCTGACCATAGGCATGGGC"; bases[1] = "TCCGCCTAAA"; vector<SGNamedPath> paths(2); // follow joins "left to right" paths[0].first = "path1"; paths[0].second.push_back(SGSegment(SGSide(SGPosition(0, 0), true), 6)); paths[0].second.push_back(SGSegment(SGSide(p2a, true), 1)); paths[0].second.push_back(SGSegment(SGSide(p2b, false), 1)); paths[0].second.push_back(SGSegment(SGSide(p1b, false), 2)); paths[0].second.push_back(SGSegment(SGSide(p2c, true), 2)); // same path but in other direction paths[1].first = "path2"; paths[1].second.push_back(SGSegment(SGSide(SGPosition(1, 9), false), 2)); paths[1].second.push_back(SGSegment(SGSide(p1b, true), 2)); paths[1].second.push_back(SGSegment(SGSide(p2b, true), 1)); paths[1].second.push_back(SGSegment(SGSide(p2a, false), 1)); paths[1].second.push_back(SGSegment(SGSide(p1a, false), 6)); Side2Seq converter; converter.init(&sg, &bases, &paths); try { converter.convert(); } catch(exception& e) { // inside convert, are some sanity checks making sure the paths // get converted properly. we'll be lazy and rely on this code // to make sure the paths got converted. cerr << "Exception caught " << e.what() << endl; CuAssertTrue(testCase, false); } const SideGraph* outGraph = converter.getOutGraph(); const vector<string> outBases = converter.getOutBases(); const vector<SGNamedPath> outPaths = converter.getOutPaths(); // first sequence should be cut up into 4, and 2nd into 7 CuAssertTrue(testCase, outGraph->getNumSequences() == 11); CuAssertTrue(testCase, outGraph->getSequence(0)->getLength() == 6); CuAssertTrue(testCase, outGraph->getSequence(1)->getLength() == 3); CuAssertTrue(testCase, outGraph->getSequence(2)->getLength() == 2); CuAssertTrue(testCase, outGraph->getSequence(3)->getLength() == 9); CuAssertTrue(testCase, outGraph->getSequence(4)->getLength() == 2); CuAssertTrue(testCase, outGraph->getSequence(5)->getLength() == 1); CuAssertTrue(testCase, outGraph->getSequence(6)->getLength() == 1); CuAssertTrue(testCase, outGraph->getSequence(7)->getLength() == 1); CuAssertTrue(testCase, outGraph->getSequence(8)->getLength() == 1); CuAssertTrue(testCase, outGraph->getSequence(9)->getLength() == 2); CuAssertTrue(testCase, outGraph->getSequence(10)->getLength() == 2); // joins // 4 original joins as mapped to new graph SGJoin j1(SGSide(SGPosition(0, 5), false), SGSide(SGPosition(5, 0), true)); CuAssertTrue(testCase, outGraph->getJoin(&j1) != NULL); SGJoin j2(SGSide(SGPosition(5, 0), false), SGSide(SGPosition(7, 0), false)); CuAssertTrue(testCase, outGraph->getJoin(&j2) != NULL); SGJoin j3(SGSide(SGPosition(7, 0), true), SGSide(SGPosition(2, 1), false)); CuAssertTrue(testCase, outGraph->getJoin(&j3) != NULL); SGJoin j4(SGSide(SGPosition(2, 0), true), SGSide(SGPosition(9, 0), true)); CuAssertTrue(testCase, outGraph->getJoin(&j3) != NULL); // 3 new joins added by fragmentation to sequence 0 for (int i = 0; i < 3; ++i) { SGJoin j5(SGSide(SGPosition(i, outGraph->getSequence(i)->getLength() - 1), false), SGSide(SGPosition(i + 1, 0), true)); CuAssertTrue(testCase, outGraph->getJoin(&j5) != NULL); } // 5 new joins added by fragmentation to sequence 1 for (int i = 5; i < 9; ++i) { SGJoin j5(SGSide(SGPosition(i, outGraph->getSequence(i)->getLength() - 1), false), SGSide(SGPosition(i + 1, 0), true)); CuAssertTrue(testCase, outGraph->getJoin(&j5) != NULL); } CuAssertTrue(testCase, outGraph->getJoinSet()->size() == 13); }