static char *number(char *arg, struct switch_s *s) { long ln; word n; char *next; n = ln = parse_narg(arg, &next); if (arg == next) syntax("Argument expected for switch /%s", s->sw); if (ln > 0xFFFFu || s->min > n || s->max < n) syntax("Argument /%s:%lu is out of range (%u-%u)", s->sw, ln, s->min, s->max); defined_format |= s->ID; forced_format |= s->ID; if (s->loc) *(int *)s->loc = (int)n; switch(s->ID) { case SECTOR_SIZE: if (!ispow2(n)) syntax("Invalid sector size - must be a power of two"); break; case CLUSTER_SIZE: if (!ispow2(n)) syntax("Invalid cluster size - must be a power of two"); break; } return next; }
static TIFF *tifopenr(const char *tif, // TIFF file name bool *c, // return is complex? int *l, // return log2 tile size int *n, // return log2 height int *m, // return log2 width int *p) // return pixel size { TIFF *T; if ((T = TIFFOpen(tif, "r"))) { uint32 W = 0; uint32 L = 0; uint32 S = 0; uint16 P = 0; uint16 B = 0; uint16 G = 0; uint16 F = 0; TIFFGetField(T, TIFFTAG_IMAGEWIDTH, &W); TIFFGetField(T, TIFFTAG_IMAGELENGTH, &L); TIFFGetField(T, TIFFTAG_TILEWIDTH, &S); TIFFGetField(T, TIFFTAG_SAMPLESPERPIXEL, &P); TIFFGetField(T, TIFFTAG_BITSPERSAMPLE, &B); TIFFGetField(T, TIFFTAG_PLANARCONFIG, &G); TIFFGetField(T, TIFFTAG_SAMPLEFORMAT, &F); if (G == PLANARCONFIG_CONTIG) { if (F == SAMPLEFORMAT_IEEEFP) { if (B == 32) { if (ispow2(W) && ispow2(L)) { *c = (P % 2) ? false : true; *p = (P % 2) ? P : P / 2; *l = log2i(S); *n = log2i(L); *m = log2i(W); return T; } else apperr("TIFF image size must be power of 2"); } else apperr("TIFF must have 32 bits per sample"); } else apperr("TIFF must have floating point sample format"); } else apperr("TIFF must have contiguous planar configuration"); TIFFClose(T); } return 0; }
void RasterNode::setArgs(const ArgList& args) { AreaNode::setArgs(args); if ((!ispow2(m_MaxTileSize.x) && m_MaxTileSize.x != -1) || (!ispow2(m_MaxTileSize.y) && m_MaxTileSize.y != -1)) { throw Exception(AVG_ERR_OUT_OF_RANGE, "maxtilewidth and maxtileheight must be powers of two."); } bool bMipmap = args.getArgVal<bool>("mipmap"); m_Material = MaterialInfo(GL_CLAMP_TO_EDGE, GL_CLAMP_TO_EDGE, bMipmap); m_pSurface = new OGLSurface(); }
void test_int_helpers () { std::cout << "\ntest_int_helpers\n"; // ispow2 for (int i = 1; i < (1<<30); i *= 2) { OIIO_CHECK_ASSERT (ispow2(i)); if (i > 1) OIIO_CHECK_ASSERT (! ispow2(i+1)); } OIIO_CHECK_ASSERT (ispow2(int(0))); OIIO_CHECK_ASSERT (! ispow2(-1)); OIIO_CHECK_ASSERT (! ispow2(-2)); // ispow2, try size_t, which is unsigned for (size_t i = 1; i < (1<<30); i *= 2) { OIIO_CHECK_ASSERT (ispow2(i)); if (i > 1) OIIO_CHECK_ASSERT (! ispow2(i+1)); } OIIO_CHECK_ASSERT (ispow2((unsigned int)0)); // pow2roundup OIIO_CHECK_EQUAL (pow2roundup(4), 4); OIIO_CHECK_EQUAL (pow2roundup(5), 8); OIIO_CHECK_EQUAL (pow2roundup(6), 8); OIIO_CHECK_EQUAL (pow2roundup(7), 8); OIIO_CHECK_EQUAL (pow2roundup(8), 8); // pow2rounddown OIIO_CHECK_EQUAL (pow2rounddown(4), 4); OIIO_CHECK_EQUAL (pow2rounddown(5), 4); OIIO_CHECK_EQUAL (pow2rounddown(6), 4); OIIO_CHECK_EQUAL (pow2rounddown(7), 4); OIIO_CHECK_EQUAL (pow2rounddown(8), 8); // round_to_multiple OIIO_CHECK_EQUAL (round_to_multiple(1, 5), 5); OIIO_CHECK_EQUAL (round_to_multiple(2, 5), 5); OIIO_CHECK_EQUAL (round_to_multiple(3, 5), 5); OIIO_CHECK_EQUAL (round_to_multiple(4, 5), 5); OIIO_CHECK_EQUAL (round_to_multiple(5, 5), 5); OIIO_CHECK_EQUAL (round_to_multiple(6, 5), 10); // round_to_multiple_of_pow2 OIIO_CHECK_EQUAL (round_to_multiple_of_pow2(int(1), 4), 4); OIIO_CHECK_EQUAL (round_to_multiple_of_pow2(int(2), 4), 4); OIIO_CHECK_EQUAL (round_to_multiple_of_pow2(int(3), 4), 4); OIIO_CHECK_EQUAL (round_to_multiple_of_pow2(int(4), 4), 4); OIIO_CHECK_EQUAL (round_to_multiple_of_pow2(int(5), 4), 8); // round_to_multiple_of_pow2 OIIO_CHECK_EQUAL (round_to_multiple_of_pow2(size_t(1), size_t(4)), 4); OIIO_CHECK_EQUAL (round_to_multiple_of_pow2(size_t(2), size_t(4)), 4); OIIO_CHECK_EQUAL (round_to_multiple_of_pow2(size_t(3), size_t(4)), 4); OIIO_CHECK_EQUAL (round_to_multiple_of_pow2(size_t(4), size_t(4)), 4); OIIO_CHECK_EQUAL (round_to_multiple_of_pow2(size_t(5), size_t(4)), 8); }
/* * cause the alignment to become a multiple of n */ void defalign(int n) { n = ispow2(n / SZCHAR); if (n == -1) cerror("defalign: n != 2^i"); printf("\t.p2align %d\n", n); }
template <typename real> void ana(const char *in, const char *out, int bb, int fo, int fn) { float *src = 0; float *dst = 0; int w; int h; int b; int c; // Load the image and cast it to floating point. if ((src = image_read_float(in, &w, &h, &c, &b))) { if (ispow2(w) && ispow2(h)) { int n = w / 2; // Allocate a destination buffer and do the work. if ((dst = (float *) calloc(n * n * c, sizeof (float)))) { sht<real> T(n, c); T.S.set(src, h); T.ana(); switch (fo) { case 'h': T.F.hanning(fn ? fn : n); break; case 'l': T.F.lanczos(fn ? fn : n); break; case 'g': T.F.gauss (fn ? fn : n); break; case 'd': T.F.diffuse(); break; } T.F.get(dst, n); image_write_float(out, n, n, c, bb, dst); } } else fprintf(stderr, "Input must be power-of-two\n"); } else fprintf(stderr, "Failed to load %s\n", in); free(dst); free(src); }
int main(void) { int i; for (i = 0; i < 256; ++i) printf("%3d: %d\n", i, ispow2(i)); return 0; }
status_t evlog_init_etc(evlog_t *e, uint len, uint unitsize, uintptr_t *items) { if (len < 2 || !ispow2(len)) { return ERR_INVALID_ARGS; } if (unitsize < 1 || !ispow2(unitsize)) { return ERR_INVALID_ARGS; } if (unitsize > len) { return ERR_INVALID_ARGS; } e->head = 0; e->unitsize = unitsize; e->len_pow2 = log2(len); e->items = items; return NO_ERROR; }
void cbuf_initialize(cbuf_t *cbuf, size_t len) { DEBUG_ASSERT(cbuf); DEBUG_ASSERT(len > 0); DEBUG_ASSERT(ispow2(len)); cbuf->head = 0; cbuf->tail = 0; cbuf->len_pow2 = log2(len); cbuf->buf = malloc(len); event_init(&cbuf->event, false, 0); LTRACEF("len %zd, len_pow2 %u\n", len, cbuf->len_pow2); }
/* ** Returns the real size of the 'array' array */ LUAI_FUNC unsigned int luaH_realasize (const Table *t) { if (limitequalsasize(t)) return t->alimit; /* this is the size */ else { unsigned int size = t->alimit; /* compute the smallest power of 2 not smaller than 'n' */ size |= (size >> 1); size |= (size >> 2); size |= (size >> 4); size |= (size >> 8); size |= (size >> 16); #if (INT_MAX >> 30 >> 1) > 0 size |= (size >> 32); /* int has more than 32 bits */ #endif size++; lua_assert(ispow2(size) && size/2 < t->alimit && t->alimit < size); return size; } }
/* Funtion to setup a simple fifo structure. * Note: Addr should be 8 byte aligned. * bam : BAM instance for the descriptors to be queued. * pipe_num : pipe number for the descriptors to be queued. */ int bam_pipe_fifo_init(struct bam_instance *bam, uint8_t pipe_num) { if (bam->pipe[pipe_num].fifo.size > 0x7FFF) { dprintf(CRITICAL, "Size exceeds max size for a descriptor(0x7FFF)\n"); return BAM_RESULT_FAILURE; } /* Check if fifo start is 8-byte alligned */ ASSERT(!((uint32_t)bam->pipe[pipe_num].fifo.head & 0x7)); /* Check if fifo size is a power of 2. * The circular fifo logic in lk expects this. */ ASSERT(ispow2(bam->pipe[pipe_num].fifo.size)); bam->pipe[pipe_num].fifo.current = bam->pipe[pipe_num].fifo.head; /* Set the descriptor buffer size. Must be a multiple of 8 */ writel(bam->pipe[pipe_num].fifo.size * BAM_DESC_SIZE, BAM_P_FIFO_SIZESn(bam->pipe[pipe_num].pipe_num, bam->base)); /* Write descriptors FIFO base addr must be 8-byte aligned */ writel((uint32_t)bam->pipe[pipe_num].fifo.head, BAM_P_DESC_FIFO_ADDRn(bam->pipe[pipe_num].pipe_num, bam->base)); /* Initialize FIFO offset for the first read */ bam->pipe[pipe_num].fifo.offset = BAM_DESC_SIZE; /* Everything is set. * Flag pipe init done. */ bam->pipe[pipe_num].initialized = 1; return BAM_RESULT_SUCCESS; }
/* * local optimizations, most of which are probably * machine independent */ NODE * optim(NODE *p) { int o, ty; NODE *sp, *q; OFFSZ sz; int i; if (odebug) return(p); ty = coptype(p->n_op); if( ty == LTYPE ) return(p); if( ty == BITYPE ) p->n_right = optim(p->n_right); p->n_left = optim(p->n_left); /* collect constants */ again: o = p->n_op; switch(o){ case SCONV: if (concast(p->n_left, p->n_type)) { q = p->n_left; nfree(p); p = q; break; } /* FALLTHROUGH */ case PCONV: if (p->n_type != VOID) p = clocal(p); break; case FORTCALL: p->n_right = fortarg( p->n_right ); break; case ADDROF: if (LO(p) == TEMP) break; if( LO(p) != NAME ) cerror( "& error" ); if( !andable(p->n_left) && !statinit) break; LO(p) = ICON; setuleft: /* paint over the type of the left hand side with the type of the top */ p->n_left->n_type = p->n_type; p->n_left->n_df = p->n_df; p->n_left->n_ap = p->n_ap; q = p->n_left; nfree(p); p = q; break; case NOT: case UMINUS: case COMPL: if (LCON(p) && conval(p->n_left, o, p->n_left)) p = nfree(p); break; case UMUL: /* Do not discard ADDROF TEMP's */ if (LO(p) == ADDROF && LO(p->n_left) != TEMP) { q = p->n_left->n_left; nfree(p->n_left); nfree(p); p = q; break; } if( LO(p) != ICON ) break; LO(p) = NAME; goto setuleft; case RS: if (LCON(p) && RCON(p) && conval(p->n_left, o, p->n_right)) goto zapright; sz = tsize(p->n_type, p->n_df, p->n_ap); if (LO(p) == RS && RCON(p->n_left) && RCON(p) && (RV(p) + RV(p->n_left)) < sz) { /* two right-shift by constants */ RV(p) += RV(p->n_left); p->n_left = zapleft(p->n_left); } #if 0 else if (LO(p) == LS && RCON(p->n_left) && RCON(p)) { RV(p) -= RV(p->n_left); if (RV(p) < 0) o = p->n_op = LS, RV(p) = -RV(p); p->n_left = zapleft(p->n_left); } #endif if (RO(p) == ICON) { if (RV(p) < 0) { RV(p) = -RV(p); p->n_op = LS; goto again; } #ifdef notyet /* must check for side effects, --a >> 32; */ if (RV(p) >= tsize(p->n_type, p->n_df, p->n_sue) && ISUNSIGNED(p->n_type)) { /* ignore signed shifts */ /* too many shifts */ tfree(p->n_left); nfree(p->n_right); p->n_op = ICON; p->n_lval = 0; p->n_sp = NULL; } else #endif /* avoid larger shifts than type size */ if (RV(p) >= sz) { RV(p) = RV(p) % sz; werror("shift larger than type"); } if (RV(p) == 0) p = zapleft(p); } break; case LS: if (LCON(p) && RCON(p) && conval(p->n_left, o, p->n_right)) goto zapright; sz = tsize(p->n_type, p->n_df, p->n_ap); if (LO(p) == LS && RCON(p->n_left) && RCON(p)) { /* two left-shift by constants */ RV(p) += RV(p->n_left); p->n_left = zapleft(p->n_left); } #if 0 else if (LO(p) == RS && RCON(p->n_left) && RCON(p)) { RV(p) -= RV(p->n_left); p->n_left = zapleft(p->n_left); } #endif if (RO(p) == ICON) { if (RV(p) < 0) { RV(p) = -RV(p); p->n_op = RS; goto again; } #ifdef notyet /* must check for side effects */ if (RV(p) >= tsize(p->n_type, p->n_df, p->n_sue)) { /* too many shifts */ tfree(p->n_left); nfree(p->n_right); p->n_op = ICON; p->n_lval = 0; p->n_sp = NULL; } else #endif /* avoid larger shifts than type size */ if (RV(p) >= sz) { RV(p) = RV(p) % sz; werror("shift larger than type"); } if (RV(p) == 0) p = zapleft(p); } break; case MINUS: if (LCON(p) && RCON(p) && p->n_left->n_sp == p->n_right->n_sp) { /* link-time constants, but both are the same */ /* solve it now by forgetting the symbols */ p->n_left->n_sp = p->n_right->n_sp = NULL; } if( !nncon(p->n_right) ) break; RV(p) = -RV(p); o = p->n_op = PLUS; case MUL: /* * Check for u=(x-y)+z; where all vars are pointers to * the same struct. This has two advantages: * 1: avoid a mul+div * 2: even if not allowed, people may get surprised if this * calculation do not give correct result if using * unaligned structs. */ if (p->n_type == INTPTR && RCON(p) && LO(p) == DIV && RCON(p->n_left) && RV(p) == RV(p->n_left) && LO(p->n_left) == MINUS) { q = p->n_left->n_left; if (q->n_left->n_type == PTR+STRTY && q->n_right->n_type == PTR+STRTY && strmemb(q->n_left->n_ap) == strmemb(q->n_right->n_ap)) { p = zapleft(p); p = zapleft(p); } } /* FALLTHROUGH */ case PLUS: case AND: case OR: case ER: /* commutative ops; for now, just collect constants */ /* someday, do it right */ if( nncon(p->n_left) || ( LCON(p) && !RCON(p) ) ) SWAP( p->n_left, p->n_right ); /* make ops tower to the left, not the right */ if( RO(p) == o ){ NODE *t1, *t2, *t3; t1 = p->n_left; sp = p->n_right; t2 = sp->n_left; t3 = sp->n_right; /* now, put together again */ p->n_left = sp; sp->n_left = t1; sp->n_right = t2; sp->n_type = p->n_type; p->n_right = t3; } if(o == PLUS && LO(p) == MINUS && RCON(p) && RCON(p->n_left) && conval(p->n_right, MINUS, p->n_left->n_right)){ zapleft: q = p->n_left->n_left; nfree(p->n_left->n_right); nfree(p->n_left); p->n_left = q; } if( RCON(p) && LO(p)==o && RCON(p->n_left) && conval( p->n_right, o, p->n_left->n_right ) ){ goto zapleft; } else if( LCON(p) && RCON(p) && conval( p->n_left, o, p->n_right ) ){ zapright: nfree(p->n_right); q = makety(p->n_left, p->n_type, p->n_qual, p->n_df, p->n_ap); nfree(p); p = clocal(q); break; } /* change muls to shifts */ if( o == MUL && nncon(p->n_right) && (i=ispow2(RV(p)))>=0){ if( i == 0 ) { /* multiplication by 1 */ goto zapright; } o = p->n_op = LS; p->n_right->n_type = INT; p->n_right->n_df = NULL; RV(p) = i; } /* change +'s of negative consts back to - */ if( o==PLUS && nncon(p->n_right) && RV(p)<0 ){ RV(p) = -RV(p); o = p->n_op = MINUS; } /* remove ops with RHS 0 */ if ((o == PLUS || o == MINUS || o == OR || o == ER) && nncon(p->n_right) && RV(p) == 0) { goto zapright; } break; case DIV: if( nncon( p->n_right ) && p->n_right->n_lval == 1 ) goto zapright; if (LCON(p) && RCON(p) && conval(p->n_left, DIV, p->n_right)) goto zapright; if (RCON(p) && ISUNSIGNED(p->n_type) && (i=ispow2(RV(p))) > 0) { p->n_op = RS; RV(p) = i; q = p->n_right; if(tsize(q->n_type, q->n_df, q->n_ap) > SZINT) p->n_right = makety(q, INT, 0, 0, 0); break; } break; case MOD: if (RCON(p) && ISUNSIGNED(p->n_type) && ispow2(RV(p)) > 0) { p->n_op = AND; RV(p) = RV(p) -1; break; } break; case EQ: case NE: case LT: case LE: case GT: case GE: case ULT: case ULE: case UGT: case UGE: if( !LCON(p) ) break; /* exchange operands */ sp = p->n_left; p->n_left = p->n_right; p->n_right = sp; p->n_op = revrel[p->n_op - EQ ]; break; #ifdef notyet case ASSIGN: /* Simple test to avoid two branches */ if (RO(p) != NE) break; q = p->n_right; if (RCON(q) && RV(q) == 0 && LO(q) == AND && RCON(q->n_left) && (i = ispow2(RV(q->n_left))) && q->n_left->n_type == INT) { q->n_op = RS; RV(q) = i; } break; #endif } return(p); }
/*********************************************************************************** * This function is the entry point of the parallel ordering algorithm. * This function assumes that the graph is already nice partitioned among the * processors and then proceeds to perform recursive bisection. ************************************************************************************/ void ParMETIS_V3_NodeND(idxtype *vtxdist, idxtype *xadj, idxtype *adjncy, int *numflag, int *options, idxtype *order, idxtype *sizes, MPI_Comm *comm) { int i, j; int ltvwgts[MAXNCON]; int nparts, npes, mype, wgtflag = 0, seed = GLOBAL_SEED; CtrlType ctrl; WorkSpaceType wspace; GraphType *graph, *mgraph; idxtype *morder; int minnvtxs; MPI_Comm_size(*comm, &npes); MPI_Comm_rank(*comm, &mype); nparts = npes; if (!ispow2(npes)) { if (mype == 0) printf("Error: The number of processors must be a power of 2!\n"); return; } if (vtxdist[npes] < (int)((float)(npes*npes)*1.2)) { if (mype == 0) printf("Error: Too many processors for this many vertices.\n"); return; } minnvtxs = vtxdist[1]-vtxdist[0]; for (i=0; i<npes; i++) minnvtxs = (minnvtxs < vtxdist[i+1]-vtxdist[i]) ? minnvtxs : vtxdist[i+1]-vtxdist[i]; if (minnvtxs < (int)((float)npes*1.1)) { if (mype == 0) printf("Error: vertices are not distributed equally.\n"); return; } if (*numflag == 1) ChangeNumbering(vtxdist, xadj, adjncy, order, npes, mype, 1); SetUpCtrl(&ctrl, nparts, options[PMV3_OPTION_DBGLVL], *comm); ctrl.CoarsenTo = amin(vtxdist[npes]+1, 25*npes); ctrl.CoarsenTo = amin(vtxdist[npes]+1, 25*amax(npes, nparts)); ctrl.seed = mype; ctrl.sync = seed; ctrl.partType = STATIC_PARTITION; ctrl.ps_relation = -1; ctrl.tpwgts = fsmalloc(nparts, 1.0/(float)(nparts), "tpwgts"); ctrl.ubvec[0] = 1.03; graph = Moc_SetUpGraph(&ctrl, 1, vtxdist, xadj, NULL, adjncy, NULL, &wgtflag); PreAllocateMemory(&ctrl, graph, &wspace); /*======================================================= * Compute the initial k-way partitioning =======================================================*/ IFSET(ctrl.dbglvl, DBG_TIME, InitTimers(&ctrl)); IFSET(ctrl.dbglvl, DBG_TIME, MPI_Barrier(ctrl.gcomm)); IFSET(ctrl.dbglvl, DBG_TIME, starttimer(ctrl.TotalTmr)); Moc_Global_Partition(&ctrl, graph, &wspace); IFSET(ctrl.dbglvl, DBG_TIME, MPI_Barrier(ctrl.gcomm)); IFSET(ctrl.dbglvl, DBG_TIME, stoptimer(ctrl.TotalTmr)); IFSET(ctrl.dbglvl, DBG_TIME, PrintTimingInfo(&ctrl)); /*======================================================= * Move the graph according to the partitioning =======================================================*/ IFSET(ctrl.dbglvl, DBG_TIME, MPI_Barrier(ctrl.gcomm)); IFSET(ctrl.dbglvl, DBG_TIME, starttimer(ctrl.MoveTmr)); MALLOC_CHECK(NULL); graph->ncon = 1; mgraph = Moc_MoveGraph(&ctrl, graph, &wspace); MALLOC_CHECK(NULL); IFSET(ctrl.dbglvl, DBG_TIME, MPI_Barrier(ctrl.gcomm)); IFSET(ctrl.dbglvl, DBG_TIME, stoptimer(ctrl.MoveTmr)); /*======================================================= * Now compute an ordering of the moved graph =======================================================*/ IFSET(ctrl.dbglvl, DBG_TIME, MPI_Barrier(ctrl.gcomm)); IFSET(ctrl.dbglvl, DBG_TIME, starttimer(ctrl.TotalTmr)); FreeWSpace(&wspace); PreAllocateMemory(&ctrl, mgraph, &wspace); ctrl.ipart = ISEP_NODE; ctrl.CoarsenTo = amin(vtxdist[npes]+1, amax(20*npes, 1000)); /* compute tvwgts */ for (j=0; j<mgraph->ncon; j++) ltvwgts[j] = 0; for (i=0; i<mgraph->nvtxs; i++) for (j=0; j<mgraph->ncon; j++) ltvwgts[j] += mgraph->vwgt[i*mgraph->ncon+j]; for (j=0; j<mgraph->ncon; j++) ctrl.tvwgts[j] = GlobalSESum(&ctrl, ltvwgts[j]); mgraph->nvwgt = fmalloc(mgraph->nvtxs*mgraph->ncon, "mgraph->nvwgt"); for (i=0; i<mgraph->nvtxs; i++) for (j=0; j<mgraph->ncon; j++) mgraph->nvwgt[i*mgraph->ncon+j] = (float)(mgraph->vwgt[i*mgraph->ncon+j]) / (float)(ctrl.tvwgts[j]); morder = idxmalloc(mgraph->nvtxs, "PAROMETIS: morder"); MultilevelOrder(&ctrl, mgraph, morder, sizes, &wspace); MALLOC_CHECK(NULL); /* Invert the ordering back to the original graph */ ProjectInfoBack(&ctrl, graph, order, morder, &wspace); MALLOC_CHECK(NULL); IFSET(ctrl.dbglvl, DBG_TIME, MPI_Barrier(ctrl.gcomm)); IFSET(ctrl.dbglvl, DBG_TIME, stoptimer(ctrl.TotalTmr)); IFSET(ctrl.dbglvl, DBG_TIME, PrintTimingInfo(&ctrl)); IFSET(ctrl.dbglvl, DBG_TIME, MPI_Barrier(ctrl.gcomm)); free(ctrl.tpwgts); free(morder); FreeGraph(mgraph); FreeInitialGraphAndRemap(graph, 0); FreeWSpace(&wspace); FreeCtrl(&ctrl); if (*numflag == 1) ChangeNumbering(vtxdist, xadj, adjncy, order, npes, mype, 0); MALLOC_CHECK(NULL); }
/*********************************************************************************** * This function is the entry point of the serial ordering algorithm. ************************************************************************************/ int ParMETIS_SerialNodeND(idx_t *vtxdist, idx_t *xadj, idx_t *adjncy, idx_t *numflag, idx_t *options, idx_t *order, idx_t *sizes, MPI_Comm *comm) { idx_t i, npes, mype; ctrl_t *ctrl=NULL; graph_t *agraph=NULL; idx_t *perm=NULL, *iperm=NULL; idx_t *sendcount, *displs; /* Setup the ctrl */ ctrl = SetupCtrl(PARMETIS_OP_OMETIS, options, 1, 1, NULL, NULL, *comm); npes = ctrl->npes; mype = ctrl->mype; if (!ispow2(npes)) { if (mype == 0) printf("Error: The number of processors must be a power of 2!\n"); FreeCtrl(&ctrl); return METIS_ERROR; } if (*numflag > 0) ChangeNumbering(vtxdist, xadj, adjncy, order, npes, mype, 1); STARTTIMER(ctrl, ctrl->TotalTmr); STARTTIMER(ctrl, ctrl->MoveTmr); agraph = AssembleEntireGraph(ctrl, vtxdist, xadj, adjncy); STOPTIMER(ctrl, ctrl->MoveTmr); if (mype == 0) { perm = imalloc(agraph->nvtxs, "PAROMETISS: perm"); iperm = imalloc(agraph->nvtxs, "PAROMETISS: iperm"); METIS_NodeNDP(agraph->nvtxs, agraph->xadj, agraph->adjncy, agraph->vwgt, npes, NULL, perm, iperm, sizes); } STARTTIMER(ctrl, ctrl->MoveTmr); /* Broadcast the sizes array */ gkMPI_Bcast((void *)sizes, 2*npes, IDX_T, 0, ctrl->gcomm); /* Scatter the iperm */ sendcount = imalloc(npes, "PAROMETISS: sendcount"); displs = imalloc(npes, "PAROMETISS: displs"); for (i=0; i<npes; i++) { sendcount[i] = vtxdist[i+1]-vtxdist[i]; displs[i] = vtxdist[i]; } gkMPI_Scatterv((void *)iperm, sendcount, displs, IDX_T, (void *)order, vtxdist[mype+1]-vtxdist[mype], IDX_T, 0, ctrl->gcomm); STOPTIMER(ctrl, ctrl->MoveTmr); STOPTIMER(ctrl, ctrl->TotalTmr); IFSET(ctrl->dbglvl, DBG_TIME, PrintTimingInfo(ctrl)); IFSET(ctrl->dbglvl, DBG_TIME, gkMPI_Barrier(ctrl->gcomm)); gk_free((void **)&agraph->xadj, &agraph->adjncy, &perm, &iperm, &sendcount, &displs, &agraph, LTERM); if (*numflag > 0) ChangeNumbering(vtxdist, xadj, adjncy, order, npes, mype, 0); goto DONE; DONE: FreeCtrl(&ctrl); return METIS_OK; }
int server_main(char *home, char *dev, char *port, int udp, int ipv4, int log) { int lfd = -1, kdpfd, nfds, nfd, curfds, efd[2], refd[2], tunfd, i; unsigned int cpus = 0, threads, udp_cpu = 0; ssize_t ret; struct epoll_event *events; struct addrinfo hints, *ahead, *ai; auth_log = !!log; openlog("curvetun", LOG_PID | LOG_CONS | LOG_NDELAY, LOG_DAEMON); syslog(LOG_INFO, "curvetun server booting!\n"); syslog_maybe(!auth_log, LOG_INFO, "curvetun user logging disabled!\n"); parse_userfile_and_generate_user_store_or_die(home); memset(&hints, 0, sizeof(hints)); hints.ai_family = PF_UNSPEC; hints.ai_socktype = udp ? SOCK_DGRAM : SOCK_STREAM; hints.ai_protocol = udp ? IPPROTO_UDP : IPPROTO_TCP; hints.ai_flags = AI_PASSIVE; ret = getaddrinfo(NULL, port, &hints, &ahead); if (ret < 0) syslog_panic("Cannot get address info!\n"); for (ai = ahead; ai != NULL && lfd < 0; ai = ai->ai_next) { lfd = socket(ai->ai_family, ai->ai_socktype, ai->ai_protocol); if (lfd < 0) continue; if (ai->ai_family == AF_INET6) { #ifdef IPV6_V6ONLY ret = set_ipv6_only(lfd); if (ret < 0) { close(lfd); lfd = -1; continue; } #else close(lfd); lfd = -1; continue; #endif /* IPV6_V6ONLY */ } set_reuseaddr(lfd); set_mtu_disc_dont(lfd); ret = bind(lfd, ai->ai_addr, ai->ai_addrlen); if (ret < 0) { close(lfd); lfd = -1; continue; } if (!udp) { ret = listen(lfd, 5); if (ret < 0) { close(lfd); lfd = -1; continue; } } if (ipv4 == -1) { ipv4 = (ai->ai_family == AF_INET6 ? 0 : (ai->ai_family == AF_INET ? 1 : -1)); } syslog_maybe(auth_log, LOG_INFO, "curvetun on IPv%d via %s " "on port %s!\n", ai->ai_family == AF_INET ? 4 : 6, udp ? "UDP" : "TCP", port); syslog_maybe(auth_log, LOG_INFO, "Allowed overlay proto is " "IPv%d!\n", ipv4 ? 4 : 6); } freeaddrinfo(ahead); if (lfd < 0 || ipv4 < 0) syslog_panic("Cannot create socket!\n"); tunfd = tun_open_or_die(dev ? dev : DEVNAME_SERVER, IFF_TUN | IFF_NO_PI); pipe_or_die(efd, O_NONBLOCK); pipe_or_die(refd, O_NONBLOCK); set_nonblocking(lfd); events = xzmalloc(MAX_EPOLL_SIZE * sizeof(*events)); for (i = 0; i < MAX_EPOLL_SIZE; ++i) events[i].data.fd = -1; kdpfd = epoll_create(MAX_EPOLL_SIZE); if (kdpfd < 0) syslog_panic("Cannot create socket!\n"); set_epoll_descriptor(kdpfd, EPOLL_CTL_ADD, lfd, udp ? EPOLLIN | EPOLLET | EPOLLONESHOT : EPOLLIN); set_epoll_descriptor(kdpfd, EPOLL_CTL_ADD, efd[0], EPOLLIN); set_epoll_descriptor(kdpfd, EPOLL_CTL_ADD, refd[0], EPOLLIN); set_epoll_descriptor(kdpfd, EPOLL_CTL_ADD, tunfd, EPOLLIN | EPOLLET | EPOLLONESHOT); curfds = 4; trie_init(); cpus = get_number_cpus_online(); threads = cpus * THREADS_PER_CPU; if (!ispow2(threads)) syslog_panic("Thread number not power of two!\n"); threadpool = xzmalloc(sizeof(*threadpool) * threads); thread_spawn_or_panic(cpus, efd[1], refd[1], tunfd, ipv4, udp); init_cpusched(threads); register_socket(tunfd); register_socket(lfd); syslog(LOG_INFO, "curvetun up and running!\n"); while (likely(!sigint)) { nfds = epoll_wait(kdpfd, events, curfds, -1); if (nfds < 0) { syslog(LOG_ERR, "epoll_wait error: %s\n", strerror(errno)); break; } for (i = 0; i < nfds; ++i) { if (unlikely(events[i].data.fd < 0)) continue; if (events[i].data.fd == lfd && !udp) { int ncpu; char hbuff[256], sbuff[256]; struct sockaddr_storage taddr; socklen_t tlen; tlen = sizeof(taddr); nfd = accept(lfd, (struct sockaddr *) &taddr, &tlen); if (nfd < 0) { syslog(LOG_ERR, "accept error: %s\n", strerror(errno)); continue; } if (curfds + 1 > MAX_EPOLL_SIZE) { close(nfd); continue; } curfds++; ncpu = register_socket(nfd); memset(hbuff, 0, sizeof(hbuff)); memset(sbuff, 0, sizeof(sbuff)); getnameinfo((struct sockaddr *) &taddr, tlen, hbuff, sizeof(hbuff), sbuff, sizeof(sbuff), NI_NUMERICHOST | NI_NUMERICSERV); syslog_maybe(auth_log, LOG_INFO, "New connection " "from %s:%s (%d active client connections) - id %d on CPU%d", hbuff, sbuff, curfds-4, nfd, ncpu); set_nonblocking(nfd); set_socket_keepalive(nfd); set_tcp_nodelay(nfd); ret = set_epoll_descriptor2(kdpfd, EPOLL_CTL_ADD, nfd, EPOLLIN | EPOLLET | EPOLLONESHOT); if (ret < 0) { close(nfd); curfds--; continue; } } else if (events[i].data.fd == refd[0]) { int fd_one; ret = read_exact(refd[0], &fd_one, sizeof(fd_one), 1); if (ret != sizeof(fd_one) || fd_one <= 0) continue; ret = set_epoll_descriptor2(kdpfd, EPOLL_CTL_MOD, fd_one, EPOLLIN | EPOLLET | EPOLLONESHOT); if (ret < 0) { close(fd_one); continue; } } else if (events[i].data.fd == efd[0]) { int fd_del, test; ret = read_exact(efd[0], &fd_del, sizeof(fd_del), 1); if (ret != sizeof(fd_del) || fd_del <= 0) continue; ret = read(fd_del, &test, sizeof(test)); if (ret < 0 && errno == EBADF) continue; ret = set_epoll_descriptor2(kdpfd, EPOLL_CTL_DEL, fd_del, 0); if (ret < 0) { close(fd_del); continue; } close(fd_del); curfds--; unregister_socket(fd_del); syslog_maybe(auth_log, LOG_INFO, "Closed connection " "with id %d (%d active client connections remain)\n", fd_del, curfds-4); } else { int cpu, fd_work = events[i].data.fd; if (!udp) cpu = socket_to_cpu(fd_work); else udp_cpu = (udp_cpu + 1) & (threads - 1); write_exact(threadpool[udp ? udp_cpu : cpu].efd[1], &fd_work, sizeof(fd_work), 1); } } } syslog(LOG_INFO, "curvetun prepare shut down!\n"); close(lfd); close(efd[0]); close(efd[1]); close(refd[0]); close(refd[1]); close(tunfd); thread_finish(cpus); xfree(threadpool); xfree(events); unregister_socket(lfd); unregister_socket(tunfd); destroy_cpusched(); trie_cleanup(); destroy_user_store(); syslog(LOG_INFO, "curvetun shut down!\n"); closelog(); return 0; }
/* ** Check whether real size of the array is a power of 2. ** (If it is not, 'alimit' cannot be changed to any other value ** without changing the real size.) */ static int ispow2realasize (const Table *t) { return (!isrealasize(t) || ispow2(t->alimit)); }
status_t virtio_alloc_ring(struct virtio_device *dev, uint index, uint16_t len) { LTRACEF("dev %p, index %u, len %u\n", dev, index, len); DEBUG_ASSERT(dev); DEBUG_ASSERT(len > 0 && ispow2(len)); DEBUG_ASSERT(index < MAX_VIRTIO_RINGS); if (len == 0 || !ispow2(len)) return ERR_INVALID_ARGS; struct vring *ring = &dev->ring[index]; /* allocate a ring */ size_t size = vring_size(len, PAGE_SIZE); LTRACEF("need %zu bytes\n", size); #if WITH_KERNEL_VM void *vptr; status_t err = vmm_alloc_contiguous(vmm_get_kernel_aspace(), "virtio_ring", size, &vptr, 0, 0, ARCH_MMU_FLAG_UNCACHED_DEVICE); if (err < 0) return ERR_NO_MEMORY; LTRACEF("allocated virtio_ring at va %p\n", vptr); /* compute the physical address */ paddr_t pa; err = arch_mmu_query((vaddr_t)vptr, &pa, NULL); if (err < 0) { return ERR_NO_MEMORY; } LTRACEF("virtio_ring at pa 0x%lx\n", pa); #else void *vptr = memalign(PAGE_SIZE, size); if (!vptr) return ERR_NO_MEMORY; LTRACEF("ptr %p\n", vptr); memset(vptr, 0, size); /* compute the physical address */ paddr_t pa = (paddr_t)vptr; #endif /* initialize the ring */ vring_init(ring, len, vptr, PAGE_SIZE); dev->ring[index].free_list = 0xffff; dev->ring[index].free_count = 0; /* add all the descriptors to the free list */ for (uint i = 0; i < len; i++) { virtio_free_desc(dev, index, i); } /* register the ring with the device */ DEBUG_ASSERT(dev->mmio_config); dev->mmio_config->guest_page_size = PAGE_SIZE; dev->mmio_config->queue_sel = index; dev->mmio_config->queue_num = len; dev->mmio_config->queue_align = PAGE_SIZE; dev->mmio_config->queue_pfn = pa / PAGE_SIZE; /* mark the ring active */ dev->active_rings_bitmap |= (1 << index); return NO_ERROR; }