コード例 #1
0
ファイル: graph.hpp プロジェクト: markuspf/ferret
// Takes a graph with multi-coloured (and possibly multiple occurrences)
// of edges, and replaces it with one where there is at most one edge between
// each pair of vertices.
inline vec1<vec1<ColEdge> > compressGraph(const vec1<vec1<ColEdge> >& graph)
{
  std::map<std::multiset<int>, int> seen_maps;
  
  vec1<vec1<ColEdge> > output_graph(graph.size());
  for(int i = 1; i <= graph.size(); i++) {
    std::map<int, std::multiset<int> > edges;
    for(int j = 1; j <= graph[i].size(); ++j) {
      edges[graph[i][j].target()].insert(graph[i][j].colour());
    }

    for(auto & edge : edges)
    {
      if(seen_maps.count(edge.second) == 0) {
        int val = seen_maps.size() + 1;
        seen_maps[edge.second] = val;
      }
      output_graph[i].push_back(ColEdge(edge.first, seen_maps[edge.second]));
    }
  }
  return output_graph;
}
コード例 #2
0
ファイル: repeat_sin_len.c プロジェクト: COL-IU/RepGraph
main(int argc, char **argv)
{
	int	i, j, k, l, m, n;
	int	dist[20];
	int	reads;
	int	num_vertex, num_class, num_edge;
	int	*len_seq, num_seq, num_remain;
	int	**num_pa;
	char	**src_seq, **src_name;
	char	temp[100];
	ALIGN	**eq_class, *align;
	EDGE	**edge, *edge1, *edge2, *bal_edge1, *bal_edge2;
	PATH	*path;
	int	num_path;
	NODES	**vertex, *begin, *node, *node_next, **start_node;
	LIST	**list;
	READINTERVAL	*readinterval;
	POSITION	*position;
	FILE	*fp, *fp1;

	readpar();
	random1(&idum);
	initenv(argc, argv);
	printf("%d %d %d\n", sizeof(POSITION), sizeof(NODES), sizeof(LIST));

/*	Input the length of the genome (required) */

	len_seq = (int *) ckalloc(2 * MAX_NUM * sizeof(int));
	src_name = alloc_name(MAX_NUM, 100);
	fp = ckopen(lenfile, "r");
	num_seq = readlen(fp, len_seq, src_name);
	fclose(fp);

	src_seq = (char **) ckalloc(2 * num_seq * sizeof(char *));
	l = 0;
	printf("Genome length: ");
	for(i = 0; i < num_seq; i ++)	{
		l += len_seq[i];
		printf("%d ", len_seq[i]);
	}
	printf("\n");
	printf("Total length: %d\n", l);

/*	Make reverse complements of input sequences rev(i) --> i + num_seq	*/

	for(i = 0; i < num_seq; i ++)	{
		len_seq[i + num_seq] = len_seq[i];
		src_seq[i] = (char *) ckalloc(len_seq[i] * sizeof(char));
		src_seq[i + num_seq] = (char *) ckalloc(len_seq[i] * sizeof(char));
		for(j = 0; j < len_seq[i]; j ++)	{
			src_seq[num_seq + i][j] = rev(src_seq[i][len_seq[i] - j - 1]);
		}
	}

/*	Input equivalent readintervales between reads --
	see the format of the equivalent readinterval files	*/

	printf("Read equivalent readintervales...\n");
	eq_class = (ALIGN **) ckalloc(2 * num_seq * sizeof(ALIGN *));
	fp = ckopen(inpfile, "r");
	num_class = readclass(eq_class, num_seq, fp);
	fclose(fp);
	printf("# equivalent readintervales input: %d\n", num_class);

/*
	for(i = 0; i < 2 * num_seq; i ++)	{
		align = eq_class[i];
		while(align)	{
			printf("See: \n");
			output_align(align, src_name, src_seq, len_seq, num_seq);
			getchar();
			align = align -> next;
		}
	}
*/

/*	Initialize the nodes: each position in each read is assigned
	as a new node. An array of "list" is set up for each read	*/

	list = (LIST **) ckalloc(2 * num_seq * sizeof(LIST *));
	for(i = 0; i < 2 * num_seq; i ++)	{
		list[i] = (LIST *) ckalloc(len_seq[i] * sizeof(LIST));
	}
	printf("intitialize nodes...\n");
	initialize(list, len_seq, num_seq);
	printf("done.\n");
	n = countnode(list, len_seq, 2 * num_seq);
	printf("# of nodes before merge: %d\n", n);

/*	Glue together two nodes if their corresponding positions are defined
	as equivalent in a pairwise alignment		*/

	printf("Merge...\n");
	merge(num_seq, len_seq, eq_class, num_class, list);
	printf("done.\n");
	for(i = 0; i < num_seq; i ++)	{
		while(eq_class[i])	{
			eq_class[i] = free_align(eq_class[i]);
		}
	}
	free((void **) eq_class);

/*      Compute the width of each node  */

        for(i = 0; i < 2 * num_seq; i ++)       {
                for(j = 0; j < len_seq[i]; j ++)        {
                        if(!list[i][j].node -> visit)   {
                                list[i][j].node -> num_path = countthickness(list[i][j].node);
                                list[i][j].node -> visit = 1;
                        }
                }
        }
	cleannode(list, len_seq, 2 * num_seq);
	n = countnode(list, len_seq, 2 * num_seq);
	printf("# of nodes after merge: %d\n", n);

/*	Add edges to the graph		*/
	edge = (EDGE **) ckalloc(n * sizeof(EDGE *));
	num_edge = graph(num_seq, len_seq, list, edge);
	printf("# edges: %d\n", num_edge);
	start_node = (NODES **) ckalloc(num_seq * sizeof(NODES *));
	for(i = 0; i < num_seq; i ++)	{
		if(len_seq[i] > 0)	{
			start_node[i] = list[i][0].node;
		} else	{
			start_node[i] = (NODES *) NULL;
		}
	}
	for(i = 0; i < 2 * num_seq; i ++)	{
		free((void *) list[i]);
	}
	free((void **) list);

	vertex = (NODES **) ckalloc(2 * num_edge * sizeof(NODES *));
	num_vertex = count_vertex(edge, num_edge, vertex);
	free((void **) edge);

	num_pa = (int **) ckalloc(MAX_BRA * sizeof(int *));
	for(i = 0; i < MAX_BRA; i ++)	{
		num_pa[i] = (int *) ckalloc(MAX_BRA * sizeof(int));
	}
	num_edge = count_edge_simp(vertex, num_vertex, num_pa);
	printf("%d vertices %d edges (%d source %d sinks) remained.\n", num_vertex, num_edge,
		num_pa[0][1], num_pa[1][0]);

/*	Assign the complementary edges of each edge	*/
	for(i = 0; i < num_vertex; i ++)	{
		for(j = 0; j < vertex[i] -> num_nextedge; j ++)	{
			edge1 = vertex[i] -> nextedge[j];
			edge1 -> bal_edge = find_bal_edge(edge1, len_seq, num_seq, i);
		}
	}

/*	Remove bulges in the graph	*/
	printf("Shave...\n");
	num_vertex = shave_graph(vertex, num_vertex);
	printf("done.\n");

/*      Remove cycles shorter than some threshold in the graph  */
/*
        printf("Shaving graph...\n");
        num_vertex = rem_cycle(vertex, num_vertex);
        printf("done.\n%d vertices remained.\n", num_vertex);
*/

/*	remove short edges	*/
/*
	printf("Remove shortedges...\n");
	num_vertex = rem_short_edge(vertex, num_vertex, len_seq);
	printf("done.\n%d vertices remained.\n", num_vertex);
	fflush(stdout);
*/

	num_edge = count_edge_simp(vertex, num_vertex, num_pa);
	printf("%d vertices %d edges (%d source %d sinks) remained.\n", num_vertex, num_edge,
		num_pa[0][1], num_pa[1][0]);
	fflush(stdout);

/*	Allocate the spaces for paths	*/
	printf("Allocating paths...\n");
	for(i = 0; i < num_vertex; i ++)	{
		vertex[i] -> num_path = 0;
	}

/*	Build sequence paths	*/
	printf("Define paths...\n");
	m = 0;
	for(i = 0; i < num_vertex; i ++)	{
		for(j = 0; j < vertex[i] -> num_nextedge; j ++)	{
			m += vertex[i] -> nextedge[j] -> multip;
		}
	}
	path = (PATH *) ckalloc(2 * num_seq * sizeof(PATH));
	for(i = 0; i < 2 * num_seq; i ++)	{
		path[i].edge = (EDGE **) ckalloc(m * sizeof(EDGE *));
	}
	num_path = readpath(start_node, path, num_seq);
	free((void **) start_node);
	num_edge = count_edge_simp(vertex, num_vertex, num_pa);
	m = l = 0;
	for(i = 0; i < num_vertex; i ++)	{
		for(j = 0; j < vertex[i] -> num_nextedge; j ++)	{
			l += vertex[i] -> nextedge[j] -> length;
			if(vertex[i] -> nextedge[j] -> length > m)	{
				m = vertex[i] -> nextedge[j] -> length;
			}
		}
	}
	printf("%d vertics %d edges (%d source %d sinks) remained: total length %d (maximal %d).\n", num_vertex, num_edge,
	 	num_pa[0][1], num_pa[1][0], l, m);
	fflush(stdout);

/*	Make consensus of edges	*/
	initial_edge(vertex, num_vertex, src_seq, num_seq);
	printf("edge initialed\n");

/*	Output sequence path	*/

	n = 0;
	for(i = 0; i < num_vertex; i ++)	{
		vertex[i] -> visit = i;
		for(j = 0; j < vertex[i] -> num_nextedge; j ++)	{
			vertex[i] -> nextedge[j] -> start_cover = n;
			n ++;
		}
	}
	for(m = 0; m < num_seq; m ++)	{
		printf("len_path %d\n", path[m].len_path);
		printf("Sequence%d: ", m + 1);
		for(i = 0; i < path[m].len_path; i ++)	{
			printf("%d -- %d(%d,%d) --> ", path[m].edge[i] -> begin -> visit,
				path[m].edge[i] -> start_cover, path[m].edge[i] -> multip,
				path[m].edge[i] -> length);
			if(i % 5 == 4)	{
				printf("\n");
			}
		}
		if(path[m].len_path > 0)	{
			printf("%d\n", path[m].edge[i - 1] -> end -> visit);
		} else	{
			printf("\n");
		}
		fflush(stdout);
	}

/*	Output graph & contigs	*/
	sprintf(temp, "%s.edge", seqfile);
	fp = ckopen(temp, "w");
	sprintf(temp, "%s.graph", seqfile);
	fp1 = ckopen(temp, "w");
	write_graph(vertex, num_vertex, fp, fp1);
	fclose(fp);
	fclose(fp1);

/*	Output read intervals in each edge	*/
	sprintf(temp, "%s.intv", seqfile);
	fp = ckopen(temp, "w");
	write_interval(vertex, num_vertex, fp);
	fclose(fp);

/*	Output graphviz format graph	*/

	sprintf(temp, "%s", outfile);
	fp = ckopen(temp, "w");
	output_graph(vertex, num_vertex, fp);
	fclose(fp);

	for(i = 0; i < MAX_BRA; i ++)	{
		free((void *) num_pa[i]);
	}
	free((void **) num_pa);
	for(i = 0; i < 2 * num_seq; i ++)	{
		if(path[i].len_path > 0)	{
			free((void **) path[i].edge);
		}
	}
	free((void *) path);
	free_graph(vertex, num_vertex);
	for(i = 0; i < 2 * num_seq; i ++)	{
		free((void *) src_seq[i]);
	}
	free((void **) src_seq);
	free_name(src_name, MAX_NUM);
	free((void *) len_seq);
}