void readkgml_sign_int(const char* filename,
								vector<string> &vertices,
								vector<int> &edges,
								vector< vector<string> > &attr,
								vector< vector<string> > &pathway_attr,
								bool expand_complexes, bool verbose)
{
	xmlDocPtr doc;
	xmlXPathContextPtr xpathCtx = NULL;
	xmlXPathObjectPtr nodes;

	if(verbose)	Rprintf("Processing KGML file: %s",filename);

	/* Load XML document */
	doc = xmlParseFile(filename);
	if (doc == NULL) {
		Rf_warningcall(mkChar(filename), "Unable to parse file.");
		if(verbose)	Rprintf(": Error.\n");
		return;
	}

	//Check if the xml file has a KEGG DTD System.
	/* Check it is a kegg pathway file */
	if(doc->intSubset == NULL ||
	   strcmp( (char *) (doc->intSubset->name), "pathway") != 0 )
	   //strncmp( (char *) (doc->intSubset->SystemID), "http://www.kegg.jp/kegg/", 24) !=0)
	{
		Rf_warningcall(mkChar(filename), "File is not KEGG pathway file.");
		xmlFreeDoc(doc);
		if(verbose)	Rprintf(": Error.\n");
		return;
	}

	/* Get pathway information :*/
	xmlNodePtr pathway =  xmlDocGetRootElement(doc);
	if(!pathway || strcmp( (char *) (pathway->name), "pathway") != 0){
		Rf_warningcall(mkChar(filename), "No pathways in file.");
		xmlXPathFreeContext(xpathCtx);
		xmlFreeDoc(doc);
		if(verbose)	Rprintf(": Error.\n");
		return;
	}

	vector<string> pathway_info;
	const char* pathwayId = get_attr(pathway, "name");
	if(!pathwayId){
		Rf_warningcall(mkChar(filename), "Pathway ID not found in file. Using file name instead.");
		pathwayId = filename;
	}else{
		pathwayId +=5; //Remove "path:" leading characters//
	}

	const char* pathwayTitle = get_attr(pathway, "title");
	if(!pathwayTitle){
		Rf_warningcall(mkChar(pathwayId), "Pathway title not found in file.");
		pathwayTitle = "";
	}
	if(verbose)	Rprintf(" \"%s\"",pathwayTitle);

	/* Create xpath evaluation context */
	xpathCtx = xmlXPathNewContext(doc);
	if(xpathCtx == NULL) {
		Rf_warningcall(mkChar(filename), "Unable to create new XPath context.");
		xmlFreeDoc(doc);
		if(verbose)	Rprintf(": Error.\n");
		return;
	}

	/* Evaluate xpath expression */
	nodes = xmlXPathEvalExpression((xmlChar *) "//relation", xpathCtx);
	if(nodes == NULL || nodes->nodesetval == NULL || nodes->nodesetval->nodeNr == 0) {
		Rf_warningcall(mkChar(pathwayId), "Pathway contains no Protein-protein relationships.");
		xmlXPathFreeContext(xpathCtx);
		xmlFreeDoc(doc);
		if(verbose)	Rprintf(": Error.\n");
		return;
	}

	/* Parse XML Reactions*/
	xmlNodePtr curRelation;
    int size = (nodes->nodesetval) ? nodes->nodesetval->nodeNr : 0;

    if(verbose)	Rprintf(": %d gene relations found.\n",size);

    /* Looping over "relations" */
    for(int i = 0; i < size; ++i) {
		curRelation = nodes->nodesetval->nodeTab[i];
		char* type = get_attr(curRelation, "type");
		if(!type || strcmp(type, "maplink") == 0 )
			continue;

		// Get gene names for entry1 and entry2
		vector<string> p1,p2; //Holder objects for all gene names in this "relation"

		char* entry1 = get_attr(curRelation, "entry1");
		char* p1_name = entry1 ? attr_by_id(entry1, "name", xpathCtx) : NULL;
		if(p1_name && strcmp(p1_name, "undefined") == 0 ){
			p1_name = get_group_components(entry1, xpathCtx);
		}
		if(!p1_name) continue;

		char* entry2 = get_attr(curRelation, "entry2");
		char* p2_name = entry2 ? attr_by_id(entry2, "name", xpathCtx) : NULL;
		if(p2_name && strcmp(p2_name, "undefined") == 0 ){
			p2_name = get_group_components(entry2, xpathCtx);
		}
		if(!p2_name) continue;


		/* If complexes are expanded, each gene is a separate vertex.
		 * Otherwise, all genes participating in the relation are kept in a single vertex.
		 */
		if(expand_complexes){
			p1 = split(p1_name, ' ');
			p2 = split(p2_name, ' ');
		}else{
			p1.push_back(p1_name);
			p2.push_back(p2_name);
		}

		// Check if p1, p2 are already in our stack.
		vector<size_t> p1_pos, p2_pos;
		for(size_t j = 0; j < p1.size(); j++){
			p1_pos.push_back( elem_pos( vertices, p1[j] ) );

			if(p1_pos[j] == vertices.size())
				vertices.push_back(p1[j]);
		}

		for(size_t k = 0; k < p2.size(); k++){
			p2_pos.push_back( elem_pos( vertices, p2[k] ) );

			if(p2_pos[k] == vertices.size())
				vertices.push_back(p2[k]);
		}

		/* Setting pathway attributes for all added vertices */
		//making sure pathway and vertices vectors are of the same size//
		for(size_t p_attr = pathway_attr.size(); p_attr < vertices.size(); p_attr++)
			{ pathway_attr.push_back(vector<string>()); }

		//Adding this pathway as attribute, if it's not already added//
		string pid = pathwayId;
		for(size_t j=0; j<p1_pos.size(); j++){
			if( !elem_in_vector(pathway_attr[ p1_pos[j] ], pid ) ){
				pathway_attr[ p1_pos[j] ].push_back(pathwayId);
				pathway_attr[ p1_pos[j] ].push_back(pathwayTitle);
			}
		}

		for(size_t k=0; k<p2_pos.size(); k++){
			if( !elem_in_vector(pathway_attr[ p2_pos[k] ], pid ) ){
				pathway_attr[ p2_pos[k] ].push_back(pathwayId);
				pathway_attr[ p2_pos[k] ].push_back(pathwayTitle);
			}
		}

		/* Edges to connect the added vertices, and their attributes */
		// Relation parsing depennds on its type //
		if( strcmp(type, "PPrel") == 0 || strcmp(type, "GErel") == 0 || strcmp(type, "PCrel") == 0 ){
			// Add all combinatiosn from p1-> p2 as edges.
			for(size_t j=0; j<p1_pos.size(); j++){
				for(size_t k=0; k<p2_pos.size(); k++){
					edges.push_back(p1_pos[j]);	edges.push_back(p2_pos[k]);
				}
			}

			vector<string> e_attr;

			xpathCtx->node = curRelation;
			xmlNodeSetPtr subtype = xmlXPathEvalExpression( (const xmlChar *) "./subtype", xpathCtx ) ->nodesetval;
			int numOfattr = (subtype) ? subtype->nodeNr : 0;

			for (int a = 0;a < numOfattr;a++){
				xmlNodePtr sub_node = subtype->nodeTab[a];
				char* subtype_name = get_attr(sub_node, "name");

				if(!subtype_name)
					continue;

				if(strcmp(subtype_name, "compound") == 0){
					char* cpd_name = attr_by_id(get_attr(sub_node, "value"), "name", xpathCtx);
					e_attr.push_back(cpd_name);
				}else{
					e_attr.push_back(subtype_name);
				}
			}

			// Add the same attribute for all added edges.
			for(size_t l=0; l<p1_pos.size()*p2_pos.size(); l++)
				attr.push_back(e_attr);
		}
		else if(strcmp(type, "ECrel") == 0 ){
			/* ECrel indicated participation in 2 succesive reactions
			 * For ECrel, KGML deson't respect the direction of the relation
			 * Here, I will try to find whether it's entry1->entry2, or the reverse.
			 * Below, p1 particpates in r1, and p2 in r2, and the shared compound is cpd.
			 */
			char* cpd_id = get_attr(curRelation->children->next, "value");
			char* cpd = attr_by_id(cpd_id, "name", xpathCtx); //Cpd name
			if(!cpd) continue;
			char* r1_name = attr_by_id(entry1, "reaction",xpathCtx);
			xmlNodePtr r1node = r1_name ? node_by_attr_val("name", r1_name, "reaction",xpathCtx) : NULL;
			if(!r1node)continue;

			bool r1_rev = strcmp(get_attr(r1node, "type"), "reversible") == 0;
			bool r1_cpd = false; //If R1->Cpd (compound is a product of R1).

			if(!r1_rev){
				xpathCtx->node = r1node;
				string childXPath = ((string)"./*[@name='")+((string)cpd)+((string)"']");
				xmlNodeSetPtr children = xmlXPathEvalExpression(
							(const xmlChar *) childXPath.c_str(), xpathCtx ) ->nodesetval;
				char* role = children && children->nodeNr >0 ? (char*)children->nodeTab[0]-> name : NULL;
				if(!role) continue;

				if(strcmp( role , "product") == 0)
						r1_cpd = true;
				else{ r1_cpd = false;}
			}// !r1_rev

			char* r2_name = attr_by_id(entry2, "reaction",xpathCtx);
			xmlNodePtr r2node = r2_name ? node_by_attr_val("name", r2_name, "reaction",xpathCtx) : NULL;
			if(!r2node)continue;

			bool r2_rev = strcmp(get_attr(r2node, "type"), "reversible") == 0;
			bool r2_cpd = false;
			if(!r2_rev){
				xpathCtx->node = r2node;
				string childXPath = ((string)"./*[@name='")+((string)cpd)+((string)"']");
				xmlNodeSetPtr children = xmlXPathEvalExpression(
							(const xmlChar *) childXPath.c_str(), xpathCtx ) ->nodesetval;

				char* role = children && children->nodeNr >0 ? (char*)children->nodeTab[0]-> name : NULL;
				if(!role) continue;

				if(strcmp( role , "product") == 0)
						r2_cpd = true;
				else{ r2_cpd = false;}
			}// !r2_rev

			/* the order of r1, r2 is:
			 * r1 -> r2 if cpd is a product of r1 and substrate of 2
			 * meaning r1_cpd=true, r2_cpd=false.
			 * The opposite is also true.
			 * The value of r1_cpd doesn't matter if r1 reversible.
			 */
			if((r1_rev || r1_cpd) && (r2_rev || !r2_cpd)){
				for(size_t j=0; j<p1_pos.size(); j++){
					for(size_t k=0; k<p2_pos.size(); k++){
						edges.push_back(p1_pos[j]);	edges.push_back(p2_pos[k]);
						vector<string> e_attr;	e_attr.push_back(cpd);
						attr.push_back(e_attr);
					}
				}
			}// R1->R2

			if((r1_rev || !r1_cpd) && (r2_rev || r2_cpd)){
				for(size_t j=0; j<p1_pos.size(); j++){
					for(size_t k=0; k<p2_pos.size(); k++){
						edges.push_back(p2_pos[k]);	edges.push_back(p1_pos[j]);
						vector<string> e_attr;	e_attr.push_back(cpd);
						attr.push_back(e_attr);
					}
				}
			}// R2->R1 (order is reversed)
		}// End ECrel
    }// End for(relations)
}//kgml_sig_int
bool elem_in_vector(vector<T> v, T &e){
	return( elem_pos(v,e) < v.size() );
}
Ejemplo n.º 3
0
/*
 * Returns maximal element of m1 * m2.
 */
struct elem_pos_t* max_res_elem(struct matrix_t* m1, struct matrix_t* m2) {
    msize_t row, col;
    elem_t* restrict res;

    msize_t max_row, max_col;
    elem_t max_res;

#ifndef NDEBUG
    max_row = max_col = -1;
#endif
    max_res = MIN_ELEM;

#ifndef NDEBUG
    print_matrix(m1);
    print_matrix(m2);
#endif

    assert (m1->n_cols == m2->n_rows);

    /* Optimize for cache misses */
    if ((m2 = transpose(m2)) == NULL) {
	return NULL;
    }

    if ((res = (elem_t*) _mm_malloc(sizeof(elem_t) * m1->n_rows * m2->n_rows, ELEM_ALIGN)) == NULL) {
	return NULL;
    }

    {
        msize_t m1_nrows, m2_nrows, m1_ncols;
        m1_nrows = m1->n_rows;
        m2_nrows = m2->n_rows;
        m1_ncols = m1->n_cols;

        /* no dependencies between iterations */
        #pragma parallel
        for (row = 0; row < m1_nrows; row++) {
            #pragma parallel
            for (col = 0; col < m2_nrows; col++) {
                res[row * m2_nrows + col] = 
                            mult_vect(m1->data[row], m2->data[col], m1_ncols);
            }
        }
    

	for (row = 0; row < m1_nrows; row++) {
            for (col = 0; col < m2_nrows; col++) {
		if (res[row * m2_nrows + col] > max_res) {
	            max_res = res[row * m2_nrows + col];
		    max_row = row;
		    max_col = col;
    	        }
            }
        }
    }
    free_matrix(m2);
    _mm_free(res);

    assert ((max_row >= 0) && (max_col >= 0));
    return elem_pos(max_row, max_col, max_res);
}