void readkgml_sign_int(const char* filename, vector<string> &vertices, vector<int> &edges, vector< vector<string> > &attr, vector< vector<string> > &pathway_attr, bool expand_complexes, bool verbose) { xmlDocPtr doc; xmlXPathContextPtr xpathCtx = NULL; xmlXPathObjectPtr nodes; if(verbose) Rprintf("Processing KGML file: %s",filename); /* Load XML document */ doc = xmlParseFile(filename); if (doc == NULL) { Rf_warningcall(mkChar(filename), "Unable to parse file."); if(verbose) Rprintf(": Error.\n"); return; } //Check if the xml file has a KEGG DTD System. /* Check it is a kegg pathway file */ if(doc->intSubset == NULL || strcmp( (char *) (doc->intSubset->name), "pathway") != 0 ) //strncmp( (char *) (doc->intSubset->SystemID), "http://www.kegg.jp/kegg/", 24) !=0) { Rf_warningcall(mkChar(filename), "File is not KEGG pathway file."); xmlFreeDoc(doc); if(verbose) Rprintf(": Error.\n"); return; } /* Get pathway information :*/ xmlNodePtr pathway = xmlDocGetRootElement(doc); if(!pathway || strcmp( (char *) (pathway->name), "pathway") != 0){ Rf_warningcall(mkChar(filename), "No pathways in file."); xmlXPathFreeContext(xpathCtx); xmlFreeDoc(doc); if(verbose) Rprintf(": Error.\n"); return; } vector<string> pathway_info; const char* pathwayId = get_attr(pathway, "name"); if(!pathwayId){ Rf_warningcall(mkChar(filename), "Pathway ID not found in file. Using file name instead."); pathwayId = filename; }else{ pathwayId +=5; //Remove "path:" leading characters// } const char* pathwayTitle = get_attr(pathway, "title"); if(!pathwayTitle){ Rf_warningcall(mkChar(pathwayId), "Pathway title not found in file."); pathwayTitle = ""; } if(verbose) Rprintf(" \"%s\"",pathwayTitle); /* Create xpath evaluation context */ xpathCtx = xmlXPathNewContext(doc); if(xpathCtx == NULL) { Rf_warningcall(mkChar(filename), "Unable to create new XPath context."); xmlFreeDoc(doc); if(verbose) Rprintf(": Error.\n"); return; } /* Evaluate xpath expression */ nodes = xmlXPathEvalExpression((xmlChar *) "//relation", xpathCtx); if(nodes == NULL || nodes->nodesetval == NULL || nodes->nodesetval->nodeNr == 0) { Rf_warningcall(mkChar(pathwayId), "Pathway contains no Protein-protein relationships."); xmlXPathFreeContext(xpathCtx); xmlFreeDoc(doc); if(verbose) Rprintf(": Error.\n"); return; } /* Parse XML Reactions*/ xmlNodePtr curRelation; int size = (nodes->nodesetval) ? nodes->nodesetval->nodeNr : 0; if(verbose) Rprintf(": %d gene relations found.\n",size); /* Looping over "relations" */ for(int i = 0; i < size; ++i) { curRelation = nodes->nodesetval->nodeTab[i]; char* type = get_attr(curRelation, "type"); if(!type || strcmp(type, "maplink") == 0 ) continue; // Get gene names for entry1 and entry2 vector<string> p1,p2; //Holder objects for all gene names in this "relation" char* entry1 = get_attr(curRelation, "entry1"); char* p1_name = entry1 ? attr_by_id(entry1, "name", xpathCtx) : NULL; if(p1_name && strcmp(p1_name, "undefined") == 0 ){ p1_name = get_group_components(entry1, xpathCtx); } if(!p1_name) continue; char* entry2 = get_attr(curRelation, "entry2"); char* p2_name = entry2 ? attr_by_id(entry2, "name", xpathCtx) : NULL; if(p2_name && strcmp(p2_name, "undefined") == 0 ){ p2_name = get_group_components(entry2, xpathCtx); } if(!p2_name) continue; /* If complexes are expanded, each gene is a separate vertex. * Otherwise, all genes participating in the relation are kept in a single vertex. */ if(expand_complexes){ p1 = split(p1_name, ' '); p2 = split(p2_name, ' '); }else{ p1.push_back(p1_name); p2.push_back(p2_name); } // Check if p1, p2 are already in our stack. vector<size_t> p1_pos, p2_pos; for(size_t j = 0; j < p1.size(); j++){ p1_pos.push_back( elem_pos( vertices, p1[j] ) ); if(p1_pos[j] == vertices.size()) vertices.push_back(p1[j]); } for(size_t k = 0; k < p2.size(); k++){ p2_pos.push_back( elem_pos( vertices, p2[k] ) ); if(p2_pos[k] == vertices.size()) vertices.push_back(p2[k]); } /* Setting pathway attributes for all added vertices */ //making sure pathway and vertices vectors are of the same size// for(size_t p_attr = pathway_attr.size(); p_attr < vertices.size(); p_attr++) { pathway_attr.push_back(vector<string>()); } //Adding this pathway as attribute, if it's not already added// string pid = pathwayId; for(size_t j=0; j<p1_pos.size(); j++){ if( !elem_in_vector(pathway_attr[ p1_pos[j] ], pid ) ){ pathway_attr[ p1_pos[j] ].push_back(pathwayId); pathway_attr[ p1_pos[j] ].push_back(pathwayTitle); } } for(size_t k=0; k<p2_pos.size(); k++){ if( !elem_in_vector(pathway_attr[ p2_pos[k] ], pid ) ){ pathway_attr[ p2_pos[k] ].push_back(pathwayId); pathway_attr[ p2_pos[k] ].push_back(pathwayTitle); } } /* Edges to connect the added vertices, and their attributes */ // Relation parsing depennds on its type // if( strcmp(type, "PPrel") == 0 || strcmp(type, "GErel") == 0 || strcmp(type, "PCrel") == 0 ){ // Add all combinatiosn from p1-> p2 as edges. for(size_t j=0; j<p1_pos.size(); j++){ for(size_t k=0; k<p2_pos.size(); k++){ edges.push_back(p1_pos[j]); edges.push_back(p2_pos[k]); } } vector<string> e_attr; xpathCtx->node = curRelation; xmlNodeSetPtr subtype = xmlXPathEvalExpression( (const xmlChar *) "./subtype", xpathCtx ) ->nodesetval; int numOfattr = (subtype) ? subtype->nodeNr : 0; for (int a = 0;a < numOfattr;a++){ xmlNodePtr sub_node = subtype->nodeTab[a]; char* subtype_name = get_attr(sub_node, "name"); if(!subtype_name) continue; if(strcmp(subtype_name, "compound") == 0){ char* cpd_name = attr_by_id(get_attr(sub_node, "value"), "name", xpathCtx); e_attr.push_back(cpd_name); }else{ e_attr.push_back(subtype_name); } } // Add the same attribute for all added edges. for(size_t l=0; l<p1_pos.size()*p2_pos.size(); l++) attr.push_back(e_attr); } else if(strcmp(type, "ECrel") == 0 ){ /* ECrel indicated participation in 2 succesive reactions * For ECrel, KGML deson't respect the direction of the relation * Here, I will try to find whether it's entry1->entry2, or the reverse. * Below, p1 particpates in r1, and p2 in r2, and the shared compound is cpd. */ char* cpd_id = get_attr(curRelation->children->next, "value"); char* cpd = attr_by_id(cpd_id, "name", xpathCtx); //Cpd name if(!cpd) continue; char* r1_name = attr_by_id(entry1, "reaction",xpathCtx); xmlNodePtr r1node = r1_name ? node_by_attr_val("name", r1_name, "reaction",xpathCtx) : NULL; if(!r1node)continue; bool r1_rev = strcmp(get_attr(r1node, "type"), "reversible") == 0; bool r1_cpd = false; //If R1->Cpd (compound is a product of R1). if(!r1_rev){ xpathCtx->node = r1node; string childXPath = ((string)"./*[@name='")+((string)cpd)+((string)"']"); xmlNodeSetPtr children = xmlXPathEvalExpression( (const xmlChar *) childXPath.c_str(), xpathCtx ) ->nodesetval; char* role = children && children->nodeNr >0 ? (char*)children->nodeTab[0]-> name : NULL; if(!role) continue; if(strcmp( role , "product") == 0) r1_cpd = true; else{ r1_cpd = false;} }// !r1_rev char* r2_name = attr_by_id(entry2, "reaction",xpathCtx); xmlNodePtr r2node = r2_name ? node_by_attr_val("name", r2_name, "reaction",xpathCtx) : NULL; if(!r2node)continue; bool r2_rev = strcmp(get_attr(r2node, "type"), "reversible") == 0; bool r2_cpd = false; if(!r2_rev){ xpathCtx->node = r2node; string childXPath = ((string)"./*[@name='")+((string)cpd)+((string)"']"); xmlNodeSetPtr children = xmlXPathEvalExpression( (const xmlChar *) childXPath.c_str(), xpathCtx ) ->nodesetval; char* role = children && children->nodeNr >0 ? (char*)children->nodeTab[0]-> name : NULL; if(!role) continue; if(strcmp( role , "product") == 0) r2_cpd = true; else{ r2_cpd = false;} }// !r2_rev /* the order of r1, r2 is: * r1 -> r2 if cpd is a product of r1 and substrate of 2 * meaning r1_cpd=true, r2_cpd=false. * The opposite is also true. * The value of r1_cpd doesn't matter if r1 reversible. */ if((r1_rev || r1_cpd) && (r2_rev || !r2_cpd)){ for(size_t j=0; j<p1_pos.size(); j++){ for(size_t k=0; k<p2_pos.size(); k++){ edges.push_back(p1_pos[j]); edges.push_back(p2_pos[k]); vector<string> e_attr; e_attr.push_back(cpd); attr.push_back(e_attr); } } }// R1->R2 if((r1_rev || !r1_cpd) && (r2_rev || r2_cpd)){ for(size_t j=0; j<p1_pos.size(); j++){ for(size_t k=0; k<p2_pos.size(); k++){ edges.push_back(p2_pos[k]); edges.push_back(p1_pos[j]); vector<string> e_attr; e_attr.push_back(cpd); attr.push_back(e_attr); } } }// R2->R1 (order is reversed) }// End ECrel }// End for(relations) }//kgml_sig_int
bool elem_in_vector(vector<T> v, T &e){ return( elem_pos(v,e) < v.size() ); }
/* * Returns maximal element of m1 * m2. */ struct elem_pos_t* max_res_elem(struct matrix_t* m1, struct matrix_t* m2) { msize_t row, col; elem_t* restrict res; msize_t max_row, max_col; elem_t max_res; #ifndef NDEBUG max_row = max_col = -1; #endif max_res = MIN_ELEM; #ifndef NDEBUG print_matrix(m1); print_matrix(m2); #endif assert (m1->n_cols == m2->n_rows); /* Optimize for cache misses */ if ((m2 = transpose(m2)) == NULL) { return NULL; } if ((res = (elem_t*) _mm_malloc(sizeof(elem_t) * m1->n_rows * m2->n_rows, ELEM_ALIGN)) == NULL) { return NULL; } { msize_t m1_nrows, m2_nrows, m1_ncols; m1_nrows = m1->n_rows; m2_nrows = m2->n_rows; m1_ncols = m1->n_cols; /* no dependencies between iterations */ #pragma parallel for (row = 0; row < m1_nrows; row++) { #pragma parallel for (col = 0; col < m2_nrows; col++) { res[row * m2_nrows + col] = mult_vect(m1->data[row], m2->data[col], m1_ncols); } } for (row = 0; row < m1_nrows; row++) { for (col = 0; col < m2_nrows; col++) { if (res[row * m2_nrows + col] > max_res) { max_res = res[row * m2_nrows + col]; max_row = row; max_col = col; } } } } free_matrix(m2); _mm_free(res); assert ((max_row >= 0) && (max_col >= 0)); return elem_pos(max_row, max_col, max_res); }