mclx* mclxIOstreamIn ( mcxIO* xf , mcxbits bits , mclpAR* transform , void (*ivpmerge)(void* ivp1, const void* ivp2) , mclxIOstreamer* streamer , mcxOnFail ON_FAIL ) { mcxstatus status = STATUS_FAIL ; const char* me = module ; mcxbool symmetric = bits & MCLXIO_STREAM_SYMMETRIC ; mcxbool mirror = bits & MCLXIO_STREAM_MIRROR ; mcxbool abc = bits & MCLXIO_STREAM_ABC ? TRUE : FALSE ; mcxbool one23 = bits & MCLXIO_STREAM_123 ? TRUE : FALSE ; mcxbool etc = bits & (MCLXIO_STREAM_ETC | MCLXIO_STREAM_ETC_AI) ? TRUE : FALSE ; mcxbool longlist = bits & (MCLXIO_STREAM_ETCANY | MCLXIO_STREAM_235ANY) ? TRUE : FALSE ; mcxTing* linebuf = mcxTingEmpty(NULL, 100) ; map_state map_c = { NULL, NULL, -1 , 0} ; map_state map_r = { NULL, NULL, -1 , 0} ; stream_state iface ; etc_state etcstate ; unsigned long n_ite = 0 ; mclx* mx = NULL ; if (!ivpmerge) ivpmerge = mclpMergeMax ; if (symmetric) iface.map_c = &map_c /* this bit of hidgery-pokery */ , iface.map_r = &map_c /* is a crucial interfacummathingy */ ; else iface.map_c = &map_c , iface.map_r = &map_r ;if(DEBUG2)fprintf(stderr, "%s abc\n", abc ? "yes" : "no") ; etcstate.etcbuf = NULL ; etcstate.etcbuf_ofs = 0 ; etcstate.etcbuf_check = 0 ; etcstate.x_prev = ULONG_MAX /* note we depend on ULONG_MAX + 1 == 0 */ ; etcstate.n_y = 0 /* fixme incomplete and distributed initialization of iface */ ; iface.pars = NULL ; iface.pars_n_alloc = 0 ; iface.pars_n_used = 0 ;if(DEBUG3)fprintf(stderr, "1 + max c %lu\n", (ulong) (iface.map_c->max_seen+1)) /* fixme: put the block below in a subroutine */ ; while (1) { if (abc + one23 + longlist > TRUE) /* OUCH */ { mcxErr(module, "multiple stream formats specified") ; break ; } if (!symmetric && streamer->tab_sym_in) { mcxErr(module, "for now disallowed, single tab, different domains") ; break ; } if ((!one23 && !abc && !longlist)) { mcxErr(module, "not enough to get going") ; break ; } /* These have maps associated with them. * Note that bitsp may be changed (by filling in * somewhat underspecified settings). * todo hierverder: etc case supported below ? */ if (abc || etc) stream_state_set_map(symmetric, &iface, streamer, &bits) ; if (xf->fp == NULL && (mcxIOopen(xf, ON_FAIL) != STATUS_OK)) { mcxErr(me, "cannot open stream <%s>", xf->fn->str) ; break ; } status = STATUS_OK ; break ; } iface.bits = bits ; if (!status) while (1) { unsigned long x = 876543210, y = 876543210 ; double value = 0 ; n_ite++ ; iface.x = 0 ; iface.y = 0 ; if (n_ite % 20000 == 0) fputc('.', stderr) /* fixme conditional to sth */ ; if (n_ite % 1000000 == 0) fprintf(stderr, " %ldM\n", (long) (n_ite / 1000000)) /* * - the read routines largely manage iface, including * map_c->max_seen and map_r->max_seen. It would be * nice to encapsulate that management in a single * place. Note the read_abc requirement that sometimes * a label may need to be deleted from a hash. The fact * that handle_label (called by read_etc and read_abc) * also manages max_seen complicate encapsulation though. * * - read_etc manages its line buffer. */ ; status = one23 ? read_123(xf, linebuf, &iface, streamer, &value, bits) : abc ? read_abc(xf, linebuf, &iface, &value) : longlist ? read_etc(xf, &iface, &etcstate, &value) : STATUS_FAIL ; x = iface.x ; y = iface.y /* considerme: etc status ignore could still expand column range. * do we change the status and deal with not incorporating the row, * or do we keep status, and change realloc/ignore logic below? */ ;if(0)fprintf(stderr, "#x now %lu status %s\n", (ulong) (iface.map_c->max_seen+1), MCXSTATUS(status)) /* etc/235 are special in that with NEW x and IGNORE y * we respect x * fixme: should not do that for auto-increment */ ; if (status == STATUS_IGNORE) /* maybe restrict mode */ { if ( longlist && iface.statusx == STATUS_NEW && iface.map_c->max_seen+1 > iface.pars_n_used /* note mixed-sign comparison */ ) { if ((status = pars_realloc(&iface, iface.map_c->max_seen+1))) break ; } continue ; } else if (status) /* FAIL or DONE */ break ; if ( iface.map_c->max_seen >= iface.pars_n_used /* note mixed-sign comparison */ && (status = pars_realloc(&iface, iface.map_c->max_seen+1)) ) break ; status = STATUS_FAIL /* fixme restructure logic, mid-re-initialization is ugly */ ; if ( bits & (MCLXIO_STREAM_LOGTRANSFORM | MCLXIO_STREAM_NEGLOGTRANSFORM) ) { if (bits & MCLXIO_STREAM_LOGTRANSFORM) value = value > 0 ? log(value) : -PVAL_MAX ; else if (bits & MCLXIO_STREAM_NEGLOGTRANSFORM) value = value > 0 ? -log(value) : PVAL_MAX ; if (bits & MCLXIO_STREAM_LOG10) value /= log(10) ; } if (transform) { mclp bufivp ; bufivp.idx = 0 ; bufivp.val = value ; value = mclpUnary(&bufivp, transform) ; } /* fixme: below we have canonical dependence, index as offset */ if (value) { if(DEBUG3)fprintf(stderr, "attempt to extend %d\n", (int) x) ; if (mclpARextend(iface.pars+x, y, value)) { mcxErr(me, "x-extend fails") ; break ; } if (mirror && mclpARextend(iface.pars+y, x, value)) { mcxErr(me, "y-extend fails") ; break ; } } status = STATUS_OK ; } if (n_ite >= 1000000 && n_ite % 5000000) fputc('\n', stderr) ; mcxTingFree(&(etcstate.etcbuf)) ; if (status == STATUS_FAIL || ferror(xf->fp)) mcxErr(me, "error occurred (status %d lc %d)", (int) status, (int) xf->lc) ; else { mx = make_mx_from_pars(streamer, &iface, ivpmerge, bits) ; status = mx ? STATUS_OK : STATUS_FAIL ; } mcxTingFree(&linebuf) ; free_pars(&iface) ; if (status == STATUS_FAIL) { if (ON_FAIL == EXIT_ON_FAIL) mcxDie(1, me, "fini") ; } /* with 123, etcai there is simply no column tab * todo: perhaps create a dummy one (integers). */ if ( !status && (abc || (bits & (MCLXIO_STREAM_ETC | MCLXIO_STREAM_ETC_AI))) ) { if (symmetric) streamer->tab_sym_out = make_tab(iface.map_c) ; else { if (!(bits & MCLXIO_STREAM_ETC_AI)) streamer->tab_col_out = make_tab(iface.map_c) ;if(0)fprintf(stderr, "%p x %p\n", (void*) iface.map_c->map, (void*) iface.map_c->tab) ;if(0)mcxHashStats(stdout, iface.map_c->map) ; streamer->tab_row_out = make_tab(iface.map_r) ; } } mcxHashFree(&(iface.map_c->map), mcxTingRelease, NULL) ; if (!symmetric) mcxHashFree(&(iface.map_r->map), mcxTingRelease, NULL) ; return mx ; }
mclgTF* mclgTFparse ( mcxLink* encoding_link , mcxTing* thestring ) { mclgTF* gtf = mcxAlloc(sizeof gtf[0], EXIT_ON_FAIL) ; const char* me = "mclgTFparse" ; const char* a = thestring->str ; const char* z = thestring->str + thestring->len ; mcxTing* func = mcxTingEmpty(NULL, thestring->len) ; mcxTing* arg = mcxTingEmpty(NULL, thestring->len) ; int n = 0 ; if (!(gtf->par_edge = mclpARensure(NULL, 10))) return NULL /* +memleak gtf */ ; if (!(gtf->par_graph = mclpARensure(NULL, 10))) return NULL /* +memleak gtf, gtf->par_edge */ ; if ( thestring && !mcxStrChrAint(thestring->str, isspace, thestring->len) ) return gtf ; while (a < z) { const char* val, *key ; char* onw = NULL ; int tfe = -1, tfg = -1 ; mcxbool nought = FALSE ; unsigned char k0 ; double d ; int t ; mcxTingEmpty(arg, z-a) ; mcxTingEmpty(func, z-a) ; n = 0 ; if ((t = sscanf(a, " %[a-z_#-] ( )%n", func->str, &n)) >= 1 && n > 0) NOTHING ; else if ((t = sscanf(a, " %[a-z_#-] ( %[^)_ ] )%n", func->str, arg->str, &n)) >= 2 && n > 0) NOTHING ; else break ; a += n ; key= func->str ; val= arg->str ; k0 = key[0] ; d = strtod(val, &onw) ; if (!val || !strlen(val)) nought = TRUE ; else if (val == onw) { mcxErr(me, "failed to parse number <%s>", val) ; break ; } if (k0 == '#') { if (!strcmp(key, "#ceilnb")) tfg = MCLG_TF_CEILNB ; else if (!strcmp(key, "#knn")) tfg = MCLG_TF_KNN ; else if (!strcmp(key, "#n")) tfg = MCLG_TF_TOPN ; else if (!strcmp(key, "#ils")) tfg = MCLG_TF_ILS ; else if (!strcmp(key, "#mcl")) tfg = MCLG_TF_MCL ; else if (!strcmp(key, "#arcmcl")) tfg = MCLG_TF_ARC_MCL ; else if (!strcmp(key, "#arcsub")) tfg = MCLG_TF_ARCSUB ; else if (!strcmp(key, "#arcmax")) tfg = MCLG_TF_ARCMAX ; else if (!strcmp(key, "#arcmingq")) tfg = MCLG_TF_ARCMINGQ ; else if (!strcmp(key, "#arcmingt")) tfg = MCLG_TF_ARCMINGT ; else if (!strcmp(key, "#arcmimlq")) tfg = MCLG_TF_ARCMINLQ ; else if (!strcmp(key, "#arcminlt")) tfg = MCLG_TF_ARCMINLT ; else if (!strcmp(key, "#arcdiffgq")) tfg = MCLG_TF_ARCDIFFGQ ; else if (!strcmp(key, "#arcdiffgt")) tfg = MCLG_TF_ARCDIFFGT ; else if (!strcmp(key, "#arcdifflq")) tfg = MCLG_TF_ARCDIFFLQ ; else if (!strcmp(key, "#arcdifflt")) tfg = MCLG_TF_ARCDIFFLT ; else if (!strcmp(key, "#arcmaxgq")) tfg = MCLG_TF_ARCMAXGQ ; else if (!strcmp(key, "#arcmaxgt")) tfg = MCLG_TF_ARCMAXGT ; else if (!strcmp(key, "#arcmaxlq")) tfg = MCLG_TF_ARCMAXLQ ; else if (!strcmp(key, "#arcmaxlt")) tfg = MCLG_TF_ARCMAXLT ; else if (!strcmp(key, "#selfrm")) tfg = MCLG_TF_SELFRM ; else if (!strcmp(key, "#selfmax")) tfg = MCLG_TF_SELFMAX ; else if (!strcmp(key, "#normself")) tfg = MCLG_TF_NORMSELF ; else if (!strcmp(key, "#add")) tfg = MCLG_TF_ADD ; else if (!strcmp(key, "#max")) tfg = MCLG_TF_MAX ; else if (!strcmp(key, "#min")) tfg = MCLG_TF_MIN ; else if (!strcmp(key, "#mul")) tfg = MCLG_TF_MUL ; else if (!strcmp(key, "#tug")) tfg = MCLG_TF_TUG ; else if (!strcmp(key, "#ssq")) tfg = MCLG_TF_SSQ ; else if (!strcmp(key, "#qt")) tfg = MCLG_TF_QT ; else if (!strcmp(key, "#tp") || !strcmp(key, "#rev")) tfg = MCLG_TF_TRANSPOSE ; else if (!strcmp(key, "#step")) tfg = MCLG_TF_STEP ; else if (!strcmp(key, "#thread")) tfg = MCLG_TF_THREAD ; else if (!strcmp(key, "#shrug")) tfg = MCLG_TF_SHRUG ; else if (!strcmp(key, "#shuffle")) tfg = MCLG_TF_SHUFFLE ; } else { if (!strcmp(key, "gq")) tfe = MCLX_UNARY_GQ ; else if (!strcmp(key, "gt")) tfe = MCLX_UNARY_GT ; else if (!strcmp(key, "lt")) tfe = MCLX_UNARY_LT ; else if (!strcmp(key, "lq")) tfe = MCLX_UNARY_LQ ; else if (!strcmp(key, "rand")) tfe = MCLX_UNARY_RAND ; else if (!strcmp(key, "mul")) tfe = MCLX_UNARY_MUL ; else if (!strcmp(key, "scale")) tfe = MCLX_UNARY_SCALE ; else if (!strcmp(key, "add")) tfe = MCLX_UNARY_ADD ; else if (!strcmp(key, "abs")) tfe = MCLX_UNARY_ABS ; else if (!strcmp(key, "ceil")) tfe = MCLX_UNARY_CEIL ; else if (!strcmp(key, "floor")) tfe = MCLX_UNARY_FLOOR ; else if (!strcmp(key, "pow")) tfe = MCLX_UNARY_POW ; else if (!strcmp(key, "exp")) tfe = MCLX_UNARY_EXP ; else if (!strcmp(key, "log")) tfe = MCLX_UNARY_LOG ; else if (!strcmp(key, "neglog")) tfe = MCLX_UNARY_NEGLOG ; } if (tfe < 0 && tfg < 0) { mcxErr(me, "unknown value transform <%s>", key) ; break ; } if (tfe >= 0) { if (nought) { if ( tfe == MCLX_UNARY_LOG || tfe == MCLX_UNARY_ABS || tfe == MCLX_UNARY_EXP || tfe == MCLX_UNARY_NEGLOG ) d = 0.0 ; else { mcxErr(me, "transform <%s> needs value", key) ; break ; } ; } mclpARextend(gtf->par_edge, tfe, d) ; } else if (tfg >= 0) { if (nought) { if ( tfg >= MCLG_TF_DUMMY_NOVALUE_START && tfg <= MCLG_TF_DUMMY_NOVALUE_END ) d = 0.0 ; else if (tfg == MCLG_TF_TUG || tfg == MCLG_TF_SHRUG) d = 1000.0 ; else if (tfg == MCLG_TF_STEP) d = 2.0 ; else { mcxErr(me, "transform <%s> needs value", key) ; break ; } ; } mclpARextend(gtf->par_edge, MCLX_UNARY_UNUSED, 0.0) ; mclpARextend(gtf->par_graph, tfg, d) ; } a = mcxStrChrAint(a, isspace, z-a) ; if (!a || a[0] != ',') break ; a++ ; } if (a) { mcxErr(me, "trailing part <%s> not matched", a) ; mclpARfree(&(gtf->par_edge)) ; mcxFree(gtf) ; gtf = NULL ; } return gtf ; }
/* this aids in finding heuristically likely starting points * for long shortest paths, by looking at dead ends * in the lattice. * experimental, oefully underdocumented. */ static dim diameter_rough ( mclv* vec , mclx* mx , u8* rough_scratch , long* rough_priority ) { mclv* curr = mclvInsertIdx(NULL, vec->vid, 1.0) ; mclpAR* par = mclpARensure(NULL, 1024) ; dim d = 0, n_dead_ends = 0, n_dead_ends_res = 0 ; memset(rough_scratch, 0, N_COLS(mx)) ; rough_scratch[vec->vid] = 1 /* seen */ ; rough_priority[vec->vid] = -1 /* remove from priority list */ ; while (1) { mclp* currivp = curr->ivps ; dim t ; mclpARreset(par) ; while (currivp < curr->ivps + curr->n_ivps) { mclv* ls = mx->cols+currivp->idx ; mclp* newivp = ls->ivps ; int hit = 0 ; while (newivp < ls->ivps + ls->n_ivps) { u8* tst = rough_scratch+newivp->idx ; if (!*tst || *tst & 2) { if (!*tst) mclpARextend(par, newivp->idx, 1.0) ; *tst = 2 ; hit = 1 ; } newivp++ ; } if (!hit && rough_priority[currivp->idx] >= 0) rough_priority[currivp->idx] += d+1 , n_dead_ends_res++ ; else if (!hit) n_dead_ends++ /* ,fprintf(stderr, "[%ld->%ld]", (long) currivp->idx, (long) rough_priority[currivp->idx]) */ ; #if 0 if (currivp->idx == 115 || currivp->idx == 128) fprintf(stdout, "pivot %d node %d d %d dead %d pri %d\n", (int) vec->vid, (int) currivp->idx, d, (int) (1-hit), (int) rough_priority[currivp->idx]) #endif ; currivp++ ; } if (!par->n_ivps) break ; d++ ; mclvFromIvps(curr, par->ivps, par->n_ivps) ; for (t=0;t<curr->n_ivps;t++) rough_scratch[curr->ivps[t].idx] = 1 ; } mclvFree(&curr) ; mclpARfree(&par) ;if(0)fprintf(stdout, "deadends %d / %d\n", (int) n_dead_ends, (int) n_dead_ends_res) ; return d ; }