Example #1
0
int
loadbloom(Bloom *b)
{
	int i, n;
	uint ones;
	uchar *data;
	u32int *a;
	
	data = vtmallocz(b->size);
	if(readpart(b->part, 0, data, b->size) < 0){
		vtfree(b);
		vtfree(data);
		return -1;
	}
	b->data = data;

	a = (u32int*)b->data;
	n = b->size/4;
	ones = 0;
	for(i=0; i<n; i++)
		ones += countbits(a[i]); 
	addstat(StatBloomOnes, ones);

	if(b->size == MaxBloomSize)	/* 2^32 overflows ulong */
		addstat(StatBloomBits, b->size*8-1);
	else
		addstat(StatBloomBits, b->size*8);
		
	return 0;
}
Example #2
0
int bitvec_count(const bitvec_t *v)
{
  int c;
  int i;

  c = 0;
  for (i = 0; i < v->length; i++)
    c += countbits(v->vec[i]);

  return c;
}
Example #3
0
int countbits ( slice & v )
{
  int c = 0;

  for(size_t i = 0; i < v.size(); i++)
  {
    int d = countbits(v[i]);

    c += d;
  }

  return c;
}
Example #4
0
int countxor ( slice & a, slice & b )
{
  assert(a.size() == b.size());

  int c = 0;

  for(size_t i = 0; i < a.size(); i++)
  {
    int d = countbits(a[i] ^ b[i]);

    c += d;
  }

  return c;
}
Example #5
0
File: main.c Project: ecashin/low
/* This wrapper for the assembly subroutine handles alignment. */
static inline int cntbits(uint16_t *a, size_t n)
{
	int b = 0;
	unsigned char *p = (unsigned char *) a;
	unsigned char *e;
	const int blksiz = 8; /* call countbits with 64-bit "blocks" */

	for (; n > 0 && ((unsigned long) p) % blksiz; ++p, --n) /* until aligned */
		b += cbchar(*p);
	e = &p[n];
	n = ifloor(e - p, blksiz);
	b += countbits((uint16_t *) p, n); /* this is the fast part */
	for (p += n; p < e; ++p)
		b += cbchar(*p); /* the remaining odd bytes */
	return b;
}
Example #6
0
// Callback function that ispc compiler emits calls to when --instrument
// command-line flag is given while compiling.
void
ISPCInstrument(const char *fn, const char *note, int line, uint64_t mask) {
    char sline[16];
    sprintf(sline, "%04d", line);
    std::string s = std::string(fn) + std::string("(") + std::string(sline) +
        std::string(") - ") + std::string(note);

    // Find or create a CallInfo instance for this callsite.
    CallInfo &ci = callInfo[s];

    // And update its statistics... 
    ++ci.count;
    if (mask == 0)
        ++ci.allOff;
    ci.laneCount += countbits(mask);
}
Example #7
0
int main()
{
	int i, j;

	printf("static unsigned bitcount[] = {\n");
	for (i = 0, j = 0; i < 256; i++) {
		int n = countbits((unsigned char)i);
		if (i > 0)
			putc(',', stdout);
		if (++j > 30) {
			putc('\n', stdout);
			j = 0;
		}
		printf("%d", n);
			
	}
	printf("};\n");
	return 0;
}
Example #8
0
  void RayStreamLogger::logRay16Intersect(const void* valid_i, void* scene, RTCRay16& start, RTCRay16& end)
  {
    mutex.lock();

    LogRay16 logRay16;

    logRay16.type    = RAY_INTERSECT;
#if defined(__MIC__)
    logRay16.m_valid = *(mic_i*)valid_i != mic_i(0);
    logRay16.numRays = countbits(logRay16.m_valid);
#endif
    /* ray16 before intersect */
    logRay16.ray16 = start;
    ray16->write((char*)&logRay16 ,sizeof(logRay16));

    /* ray16 after intersect */
    logRay16.ray16   = end;
    ray16_verify->write((char*)&logRay16 ,sizeof(logRay16));

    mutex.unlock();
  }
Example #9
0
void FactorSlices ( slice_vec & slices )
{
  std::vector<int> counts(slices.size(),0);

  for(size_t i = 0; i < slices.size(); i++)
  {
    counts[i] = countbits(slices[i]);
  }

  bool changed = true;

  while(changed)
  {
    int bestA = -1;
    int bestB = -1;

    for(int j = 0; j < (int)slices.size()-1; j++)
    {
      for(int i = j+1; i < (int)slices.size(); i++)
      {
        int d = countxor(slices[i],slices[j]);

        if((d < counts[i]) && (d < counts[j]))
        {
          if(counts[i] < counts[j])
          {
            bestA = j;
            bestB = i;
          }
        }
        else if(d < counts[i])
        {
          //bestA = 
        }
      }
    }
  }
}
static void
eliminate_phantom_keys(struct vrkiu_chip *chip, unsigned short *scandata)
{
	unsigned char inkey[KIU_NSCANLINE], *prevkey, *reskey;
	int i, j;
#ifdef VRKIUDEBUG
	int modified = 0;
	static int prevmod = 0;
#endif

	reskey = (unsigned char *)scandata;
	for (i = 0; i < KIU_NSCANLINE; i++)
		inkey[i] = reskey[i];
	prevkey = (unsigned char *)chip->kc_scandata;

	for (i = 0; i < KIU_NSCANLINE; i++) {
		if (countbits(inkey[i]) > 1) {
			for (j = 0; j < KIU_NSCANLINE; j++) {
				if (i != j && (inkey[i] & inkey[j])) {
#ifdef VRKIUDEBUG
					modified = 1;
					if (!prevmod) {
						DPRINTF(("vrkiu_scan: %x:%02x->%02x",
						    i, inkey[i], prevkey[i]));
						DPRINTF(("  %x:%02x->%02x\n",
						    j, inkey[j], prevkey[j]));
					}
#endif
					reskey[i] = prevkey[i];
					reskey[j] = prevkey[j];
				}
			}
		}
	}
#ifdef VRKIUDEBUG
	prevmod = modified;
#endif
}
    void BVH4mbIntersector16Single::occluded(mic_i* valid_i, BVH4mb* bvh, Ray16& ray16)
    {
      /* near and node stack */
      __align(64) NodeRef stack_node[3*BVH4i::maxDepth+1];

      /* setup */
      const mic_m m_valid     = *(mic_i*)valid_i != mic_i(0);
      const mic3f rdir16      = rcp_safe(ray16.dir);
      unsigned int terminated = toInt(!m_valid);
      const mic_f inf         = mic_f(pos_inf);
      const mic_f zero        = mic_f::zero();

      const Node               * __restrict__ nodes = (Node     *)bvh->nodePtr();
      const BVH4mb::Triangle01 * __restrict__ accel = (BVH4mb::Triangle01 *)bvh->triPtr();

      stack_node[0] = BVH4i::invalidNode;

      long rayIndex = -1;
      while((rayIndex = bitscan64(rayIndex,toInt(m_valid))) != BITSCAN_NO_BIT_SET_64)	    
        {
	  stack_node[1] = bvh->root;
	  size_t sindex = 2;

	  const mic_f org_xyz      = loadAOS4to16f(rayIndex,ray16.org.x,ray16.org.y,ray16.org.z);
	  const mic_f dir_xyz      = loadAOS4to16f(rayIndex,ray16.dir.x,ray16.dir.y,ray16.dir.z);
	  const mic_f rdir_xyz     = loadAOS4to16f(rayIndex,rdir16.x,rdir16.y,rdir16.z);
	  const mic_f org_rdir_xyz = org_xyz * rdir_xyz;
	  const mic_f min_dist_xyz = broadcast1to16f(&ray16.tnear[rayIndex]);
	  const mic_f max_dist_xyz = broadcast1to16f(&ray16.tfar[rayIndex]);
	  const mic_f time         = broadcast1to16f(&ray16.time[rayIndex]);

	  const unsigned int leaf_mask = BVH4I_LEAF_MASK;

	  while (1)
	    {
	      NodeRef curNode = stack_node[sindex-1];
	      sindex--;

	      const mic_f one_time = (mic_f::one() - time);
            
	      while (1) 
		{
		  /* test if this is a leaf node */
		  if (unlikely(curNode.isLeaf(leaf_mask))) break;
        
		  const Node* __restrict__ const node = curNode.node(nodes);
		  const float* __restrict const plower = (float*)node->lower;
		  const float* __restrict const pupper = (float*)node->upper;

		  prefetch<PFHINT_L1>((char*)node + 0*64);
		  prefetch<PFHINT_L1>((char*)node + 1*64);
		  prefetch<PFHINT_L1>((char*)node + 2*64);
		  prefetch<PFHINT_L1>((char*)node + 3*64);

		  const BVH4mb::Node* __restrict__ const nodeMB = (BVH4mb::Node*)node;
		  const mic_f lower = one_time  * load16f((float*)nodeMB->lower) + time * load16f((float*)nodeMB->lower_t1);
		  const mic_f upper = one_time  * load16f((float*)nodeMB->upper) + time * load16f((float*)nodeMB->upper_t1);
        
		  /* intersect single ray with 4 bounding boxes */
		  const mic_f tLowerXYZ = lower * rdir_xyz - org_rdir_xyz;
		  const mic_f tUpperXYZ = upper * rdir_xyz - org_rdir_xyz;

		  const mic_f tLower = mask_min(0x7777,min_dist_xyz,tLowerXYZ,tUpperXYZ);
		  const mic_f tUpper = mask_max(0x7777,max_dist_xyz,tLowerXYZ,tUpperXYZ);

		  const Node* __restrict__ const next = curNode.node(nodes);
		  prefetch<PFHINT_L2>((char*)next + 0);
		  prefetch<PFHINT_L2>((char*)next + 64);

		  sindex--;
		  const mic_f tNear = vreduce_max4(tLower);
		  const mic_f tFar  = vreduce_min4(tUpper);  
		  const mic_m hitm = le(0x8888,tNear,tFar);
		  const mic_f tNear_pos = select(hitm,tNear,inf);

		  curNode = stack_node[sindex]; // early pop of next node

		  /* if no child is hit, continue with early popped child */
		  if (unlikely(none(hitm))) continue;
		  sindex++;
        
		  const unsigned long hiti = toInt(hitm);
		  const unsigned long pos_first = bitscan64(hiti);
		  const unsigned long num_hitm = countbits(hiti); 
        
		  /* if a single child is hit, continue with that child */
		  curNode = ((unsigned int *)plower)[pos_first];
		  if (likely(num_hitm == 1)) continue;
        
		  /* if two children are hit, push in correct order */
		  const unsigned long pos_second = bitscan64(pos_first,hiti);
		  if (likely(num_hitm == 2))
		    {
		      const unsigned int dist_first  = ((unsigned int*)&tNear)[pos_first];
		      const unsigned int dist_second = ((unsigned int*)&tNear)[pos_second];
		      const unsigned int node_first  = curNode;
		      const unsigned int node_second = ((unsigned int*)plower)[pos_second];
          
		      if (dist_first <= dist_second)
			{
			  stack_node[sindex] = node_second;
			  sindex++;
			  assert(sindex < 3*BVH4i::maxDepth+1);
			  continue;
			}
		      else
			{
			  stack_node[sindex] = curNode;
			  curNode = node_second;
			  sindex++;
			  assert(sindex < 3*BVH4i::maxDepth+1);
			  continue;
			}
		    }
        
		  /* continue with closest child and push all others */
		  const mic_f min_dist = set_min_lanes(tNear_pos);
		  const unsigned int old_sindex = sindex;
		  sindex += countbits(hiti) - 1;
		  assert(sindex < 3*BVH4i::maxDepth+1);
        
		  const mic_m closest_child = eq(hitm,min_dist,tNear);
		  const unsigned long closest_child_pos = bitscan64(closest_child);
		  const mic_m m_pos = andn(hitm,andn(closest_child,(mic_m)((unsigned int)closest_child - 1)));
		  const mic_i plower_node = load16i((int*)plower);
		  curNode = ((unsigned int*)plower)[closest_child_pos];
		  compactustore16i(m_pos,&stack_node[old_sindex],plower_node);
		}
	  
	    

	      /* return if stack is empty */
	      if (unlikely(curNode == BVH4i::invalidNode)) break;


	      /* intersect one ray against four triangles */

	      //////////////////////////////////////////////////////////////////////////////////////////////////

	      const BVH4mb::Triangle01* tptr  = (BVH4mb::Triangle01*) curNode.leaf(accel);

	      prefetch<PFHINT_L1>((mic_f*)tptr +  0); 
	      prefetch<PFHINT_L1>((mic_f*)tptr +  1); 
	      prefetch<PFHINT_L1>((mic_f*)tptr +  2); 
	      prefetch<PFHINT_L1>((mic_f*)tptr +  3); 

	      const mic_i and_mask = broadcast4to16i(zlc4);
	      
	      const mic_f v0_t0 = gather_4f_zlc(and_mask,
						(float*)&tptr[0].t0.v0,
						(float*)&tptr[1].t0.v0,
						(float*)&tptr[2].t0.v0,
						(float*)&tptr[3].t0.v0);
	      
	      const mic_f v1_t0 = gather_4f_zlc(and_mask,
						(float*)&tptr[0].t0.v1,
						(float*)&tptr[1].t0.v1,
						(float*)&tptr[2].t0.v1,
						(float*)&tptr[3].t0.v1);
	      
	      const mic_f v2_t0 = gather_4f_zlc(and_mask,
						(float*)&tptr[0].t0.v2,
						(float*)&tptr[1].t0.v2,
						(float*)&tptr[2].t0.v2,
						(float*)&tptr[3].t0.v2);


	      prefetch<PFHINT_L2>((mic_f*)tptr +  4); 
	      prefetch<PFHINT_L2>((mic_f*)tptr +  5); 
	      prefetch<PFHINT_L2>((mic_f*)tptr +  6); 
	      prefetch<PFHINT_L2>((mic_f*)tptr +  7); 

	      const mic_f v0_t1 = gather_4f_zlc(and_mask,
						(float*)&tptr[0].t1.v0,
						(float*)&tptr[1].t1.v0,
						(float*)&tptr[2].t1.v0,
						(float*)&tptr[3].t1.v0);
	      
	      const mic_f v1_t1 = gather_4f_zlc(and_mask,
						(float*)&tptr[0].t1.v1,
						(float*)&tptr[1].t1.v1,
						(float*)&tptr[2].t1.v1,
						(float*)&tptr[3].t1.v1);
	      
	      const mic_f v2_t1 = gather_4f_zlc(and_mask,
						(float*)&tptr[0].t1.v2,
						(float*)&tptr[1].t1.v2,
						(float*)&tptr[2].t1.v2,
						(float*)&tptr[3].t1.v2);

	      const mic_f v0 = v0_t0 * one_time + time * v0_t1;
	      const mic_f v1 = v1_t0 * one_time + time * v1_t1;
	      const mic_f v2 = v2_t0 * one_time + time * v2_t1;

	      const mic_f e1 = v1 - v0;
	      const mic_f e2 = v0 - v2;	     
	      const mic_f normal = lcross_zxy(e1,e2);
	      const mic_f org = v0 - org_xyz;
	      const mic_f odzxy = msubr231(org * swizzle(dir_xyz,_MM_SWIZ_REG_DACB), dir_xyz, swizzle(org,_MM_SWIZ_REG_DACB));
	      const mic_f den = ldot3_zxy(dir_xyz,normal);	      
	      const mic_f rcp_den = rcp(den);
	      const mic_f uu = ldot3_zxy(e2,odzxy); 
	      const mic_f vv = ldot3_zxy(e1,odzxy); 
	      const mic_f u = uu * rcp_den;
	      const mic_f v = vv * rcp_den;

#if defined(__BACKFACE_CULLING__)
	      const mic_m m_init = (mic_m)0x1111 & (den > zero);
#else
	      const mic_m m_init = 0x1111;
#endif

	      const mic_m valid_u = ge((mic_m)m_init,u,zero);
	      const mic_m valid_v = ge(valid_u,v,zero);
	      const mic_m m_aperture = le(valid_v,u+v,mic_f::one()); 

	      const mic_f nom = ldot3_zxy(org,normal);
	      const mic_f t = rcp_den*nom;
	      if (unlikely(none(m_aperture))) continue;

	      mic_m m_final  = lt(lt(m_aperture,min_dist_xyz,t),t,max_dist_xyz);

#if defined(__USE_RAY_MASK__)
	      const mic_i rayMask(ray16.mask[rayIndex]);
	      const mic_i triMask = swDDDD(gather16i_4i_align(&tptr[0].t0.v2,&tptr[1].t0.v2,&tptr[2].t0.v2,&tptr[3].t0.v2));
	      const mic_m m_ray_mask = (rayMask & triMask) != mic_i::zero();
	      m_final &= m_ray_mask;	      
#endif

	      if (unlikely(any(m_final)))
		{
		  terminated |= mic_m::shift1[rayIndex];
		  break;
		}
	      //////////////////////////////////////////////////////////////////////////////////////////////////

	    }


	  if (unlikely(all(toMask(terminated)))) break;
	}


      store16i(m_valid & toMask(terminated),&ray16.geomID,0);
    }
    void BVH4mbIntersector16Hybrid<LeafIntersector>::intersect(int16* valid_i, BVH4mb* bvh, Ray16& ray16)
    {
      /* near and node stack */
      __aligned(64) float16   stack_dist[3*BVH4i::maxDepth+1];
      __aligned(64) NodeRef stack_node[3*BVH4i::maxDepth+1];
      __aligned(64) NodeRef stack_node_single[3*BVH4i::maxDepth+1]; 

      /* load ray */
      const bool16 valid0     = *(int16*)valid_i != int16(0);
      const Vec3f16 rdir16     = rcp_safe(ray16.dir);
      const Vec3f16 org_rdir16 = ray16.org * rdir16;
      float16 ray_tnear        = select(valid0,ray16.tnear,pos_inf);
      float16 ray_tfar         = select(valid0,ray16.tfar ,neg_inf);
      const float16 inf        = float16(pos_inf);
      
      /* allocate stack and push root node */
      stack_node[0] = BVH4i::invalidNode;
      stack_dist[0] = inf;
      stack_node[1] = bvh->root;
      stack_dist[1] = ray_tnear; 
      NodeRef* __restrict__ sptr_node = stack_node + 2;
      float16*   __restrict__ sptr_dist = stack_dist + 2;
      
      const Node      * __restrict__ nodes = (Node     *)bvh->nodePtr();
      const BVH4mb::Triangle01 * __restrict__ accel = (BVH4mb::Triangle01 *)bvh->triPtr();

      while (1) pop:
      {
        /* pop next node from stack */
        NodeRef curNode = *(sptr_node-1);
        float16 curDist   = *(sptr_dist-1);
        sptr_node--;
        sptr_dist--;
	const bool16 m_stackDist = ray_tfar > curDist;

	/* stack emppty ? */
        if (unlikely(curNode == BVH4i::invalidNode))  break;
        
        /* cull node if behind closest hit point */
        if (unlikely(none(m_stackDist))) continue;
        
	///////////////////////////////////////////////////////////////////////////////////////////////////////////////
	///////////////////////////////////////////////////////////////////////////////////////////////////////////////
	///////////////////////////////////////////////////////////////////////////////////////////////////////////////

	/* switch to single ray mode */
        if (unlikely(countbits(m_stackDist) <= BVH4i::hybridSIMDUtilSwitchThreshold)) 
	  {
	    float   *__restrict__ stack_dist_single = (float*)sptr_dist;
	    store16f(stack_dist_single,inf);

	    /* traverse single ray */	  	  
	    long rayIndex = -1;
	    while((rayIndex = bitscan64(rayIndex,m_stackDist)) != BITSCAN_NO_BIT_SET_64) 
	      {	    
		stack_node_single[0] = BVH4i::invalidNode;
		stack_node_single[1] = curNode;
		size_t sindex = 2;

		const float16 org_xyz      = loadAOS4to16f(rayIndex,ray16.org.x,ray16.org.y,ray16.org.z);
		const float16 dir_xyz      = loadAOS4to16f(rayIndex,ray16.dir.x,ray16.dir.y,ray16.dir.z);
		const float16 rdir_xyz     = loadAOS4to16f(rayIndex,rdir16.x,rdir16.y,rdir16.z);
		const float16 org_rdir_xyz = org_xyz * rdir_xyz;
		const float16 min_dist_xyz = broadcast1to16f(&ray16.tnear[rayIndex]);
		float16       max_dist_xyz = broadcast1to16f(&ray16.tfar[rayIndex]);
		const float16 time         = broadcast1to16f(&ray16.time[rayIndex]);

		const unsigned int leaf_mask = BVH4I_LEAF_MASK;

		while (1) 
		  {
		    NodeRef curNode = stack_node_single[sindex-1];
		    sindex--;
            
		    traverse_single_intersect(curNode,
					      sindex,
					      rdir_xyz,
					      org_rdir_xyz,
					      min_dist_xyz,
					      max_dist_xyz,
					      time,
					      stack_node_single,
					      stack_dist_single,
					      nodes,
					      leaf_mask);	    

		    /* return if stack is empty */
		    if (unlikely(curNode == BVH4i::invalidNode)) break;


		    /* intersect one ray against four triangles */
		    const bool hit = LeafIntersector::intersect(curNode,
								rayIndex,
								dir_xyz,
								org_xyz,
								min_dist_xyz,
								max_dist_xyz,
								ray16,
								accel,
								(Scene*)bvh->geometry);
		    
		    if (hit)
		      compactStack(stack_node_single,stack_dist_single,sindex,max_dist_xyz);

		  }	  
	      }
	    ray_tfar = select(valid0,ray16.tfar ,neg_inf);
	    continue;
	  }

	///////////////////////////////////////////////////////////////////////////////////////////////////////////////
	///////////////////////////////////////////////////////////////////////////////////////////////////////////////
	///////////////////////////////////////////////////////////////////////////////////////////////////////////////

	const unsigned int leaf_mask = BVH4I_LEAF_MASK;

	const float16 time     = ray16.time;
	const float16 one_time = (float16::one() - time);

        while (1)
        {
          /* test if this is a leaf node */
          if (unlikely(curNode.isLeaf(leaf_mask))) break;
          
          STAT3(normal.trav_nodes,1,popcnt(ray_tfar > curDist),16);
          const Node* __restrict__ const node = curNode.node(nodes);
          
          const BVH4mb::Node* __restrict__ const nodeMB = (BVH4mb::Node*)node;

          /* pop of next node */
          sptr_node--;
          sptr_dist--;
          curNode = *sptr_node; 
          curDist = *sptr_dist;
          
	  prefetch<PFHINT_L1>((char*)node + 0*64); 
	  prefetch<PFHINT_L1>((char*)node + 1*64); 
	  prefetch<PFHINT_L1>((char*)node + 2*64); 
	  prefetch<PFHINT_L1>((char*)node + 3*64); 

#pragma unroll(4)
          for (unsigned int i=0; i<4; i++)
          {
	    const NodeRef child = node->lower[i].child;

	    const float16 lower_x =  one_time * nodeMB->lower[i].x + time * nodeMB->lower_t1[i].x;
	    const float16 lower_y =  one_time * nodeMB->lower[i].y + time * nodeMB->lower_t1[i].y;
	    const float16 lower_z =  one_time * nodeMB->lower[i].z + time * nodeMB->lower_t1[i].z;
	    const float16 upper_x =  one_time * nodeMB->upper[i].x + time * nodeMB->upper_t1[i].x;
	    const float16 upper_y =  one_time * nodeMB->upper[i].y + time * nodeMB->upper_t1[i].y;
	    const float16 upper_z =  one_time * nodeMB->upper[i].z + time * nodeMB->upper_t1[i].z;

	    if (unlikely(i >=2 && child == BVH4i::invalidNode)) break;

            const float16 lclipMinX = msub(lower_x,rdir16.x,org_rdir16.x);
            const float16 lclipMinY = msub(lower_y,rdir16.y,org_rdir16.y);
            const float16 lclipMinZ = msub(lower_z,rdir16.z,org_rdir16.z);
            const float16 lclipMaxX = msub(upper_x,rdir16.x,org_rdir16.x);
            const float16 lclipMaxY = msub(upper_y,rdir16.y,org_rdir16.y);
            const float16 lclipMaxZ = msub(upper_z,rdir16.z,org_rdir16.z);
	    
            const float16 lnearP = max(max(min(lclipMinX, lclipMaxX), min(lclipMinY, lclipMaxY)), min(lclipMinZ, lclipMaxZ));
            const float16 lfarP  = min(min(max(lclipMinX, lclipMaxX), max(lclipMinY, lclipMaxY)), max(lclipMinZ, lclipMaxZ));
            const bool16 lhit   = max(lnearP,ray_tnear) <= min(lfarP,ray_tfar);   
	    const float16 childDist = select(lhit,lnearP,inf);
            const bool16 m_child_dist = childDist < curDist;
            /* if we hit the child we choose to continue with that child if it 
               is closer than the current next child, or we push it onto the stack */
            if (likely(any(lhit)))
            {
              sptr_node++;
              sptr_dist++;
              
              /* push cur node onto stack and continue with hit child */
              if (any(m_child_dist))
              {
                *(sptr_node-1) = curNode;
                *(sptr_dist-1) = curDist; 
                curDist = childDist;
                curNode = child;
              }              
              /* push hit child onto stack*/
              else 
		{
		  *(sptr_node-1) = child;
		  *(sptr_dist-1) = childDist; 
		}
              assert(sptr_node - stack_node < BVH4i::maxDepth);
            }	      
          }
#if SWITCH_ON_DOWN_TRAVERSAL == 1
	  const bool16 curUtil = ray_tfar > curDist;
	  if (unlikely(countbits(curUtil) <= BVH4i::hybridSIMDUtilSwitchThreshold))
	    {
	      *sptr_node++ = curNode;
	      *sptr_dist++ = curDist; 
	      goto pop;
	    }
#endif

        }
        
        /* return if stack is empty */
        if (unlikely(curNode == BVH4i::invalidNode)) break;
        

        /* intersect leaf */
        const bool16 m_valid_leaf = ray_tfar > curDist;
        STAT3(normal.trav_leaves,1,popcnt(m_valid_leaf),16);

	LeafIntersector::intersect16(curNode,
				     m_valid_leaf,
				     ray16.dir,
				     ray16.org,
				     ray16,
				     accel,
				     (Scene*)bvh->geometry);


        ray_tfar = select(m_valid_leaf,ray16.tfar,ray_tfar);
      }
Example #13
0
void BVH4iIntersector1::occluded(BVH4i* bvh, Ray& ray)
{
    /* near and node stack */
    __aligned(64) NodeRef stack_node[3*BVH4i::maxDepth+1];

    /* setup */
    const mic3f rdir16      = rcp_safe(mic3f(ray.dir.x,ray.dir.y,ray.dir.z));
    const mic_f inf         = mic_f(pos_inf);
    const mic_f zero        = mic_f::zero();

    const Node      * __restrict__ nodes = (Node     *)bvh->nodePtr();
    const Triangle1 * __restrict__ accel = (Triangle1*)bvh->triPtr();

    stack_node[0] = BVH4i::invalidNode;
    stack_node[1] = bvh->root;
    size_t sindex = 2;

    const mic_f org_xyz      = loadAOS4to16f(ray.org.x,ray.org.y,ray.org.z);
    const mic_f dir_xyz      = loadAOS4to16f(ray.dir.x,ray.dir.y,ray.dir.z);
    const mic_f rdir_xyz     = loadAOS4to16f(rdir16.x[0],rdir16.y[0],rdir16.z[0]);
    const mic_f org_rdir_xyz = org_xyz * rdir_xyz;
    const mic_f min_dist_xyz = broadcast1to16f(&ray.tnear);
    const mic_f max_dist_xyz = broadcast1to16f(&ray.tfar);

    const unsigned int leaf_mask = BVH4I_LEAF_MASK;

    while (1)
    {
        NodeRef curNode = stack_node[sindex-1];
        sindex--;

        while (1)
        {
            /* test if this is a leaf node */
            if (unlikely(curNode.isLeaf(leaf_mask))) break;

            const Node* __restrict__ const node = curNode.node(nodes);
            const float* __restrict const plower = (float*)node->lower;
            const float* __restrict const pupper = (float*)node->upper;

            prefetch<PFHINT_L1>((char*)node + 0);
            prefetch<PFHINT_L1>((char*)node + 64);

            /* intersect single ray with 4 bounding boxes */
            const mic_f tLowerXYZ = load16f(plower) * rdir_xyz - org_rdir_xyz;
            const mic_f tUpperXYZ = load16f(pupper) * rdir_xyz - org_rdir_xyz;
            const mic_f tLower = mask_min(0x7777,min_dist_xyz,tLowerXYZ,tUpperXYZ);
            const mic_f tUpper = mask_max(0x7777,max_dist_xyz,tLowerXYZ,tUpperXYZ);

            sindex--;
            curNode = stack_node[sindex];

            const Node* __restrict__ const next = curNode.node(nodes);
            prefetch<PFHINT_L2>((char*)next + 0);
            prefetch<PFHINT_L2>((char*)next + 64);

            const mic_f tNear = vreduce_max4(tLower);
            const mic_f tFar  = vreduce_min4(tUpper);
            const mic_m hitm = le(0x8888,tNear,tFar);
            const mic_f tNear_pos = select(hitm,tNear,inf);


            /* if no child is hit, continue with early popped child */
            if (unlikely(none(hitm))) continue;
            sindex++;

            const unsigned long hiti = toInt(hitm);
            const unsigned long pos_first = bitscan64(hiti);
            const unsigned long num_hitm = countbits(hiti);

            /* if a single child is hit, continue with that child */
            curNode = ((unsigned int *)plower)[pos_first];
            if (likely(num_hitm == 1)) continue;

            /* if two children are hit, push in correct order */
            const unsigned long pos_second = bitscan64(pos_first,hiti);
            if (likely(num_hitm == 2))
            {
                const unsigned int dist_first  = ((unsigned int*)&tNear)[pos_first];
                const unsigned int dist_second = ((unsigned int*)&tNear)[pos_second];
                const unsigned int node_first  = curNode;
                const unsigned int node_second = ((unsigned int*)plower)[pos_second];

                if (dist_first <= dist_second)
                {
                    stack_node[sindex] = node_second;
                    sindex++;
                    assert(sindex < 3*BVH4i::maxDepth+1);
                    continue;
                }
                else
                {
                    stack_node[sindex] = curNode;
                    curNode = node_second;
                    sindex++;
                    assert(sindex < 3*BVH4i::maxDepth+1);
                    continue;
                }
            }

            /* continue with closest child and push all others */
            const mic_f min_dist = set_min_lanes(tNear_pos);
            const unsigned old_sindex = sindex;
            sindex += countbits(hiti) - 1;
            assert(sindex < 3*BVH4i::maxDepth+1);

            const mic_m closest_child = eq(hitm,min_dist,tNear);
            const unsigned long closest_child_pos = bitscan64(closest_child);
            const mic_m m_pos = andn(hitm,andn(closest_child,(mic_m)((unsigned int)closest_child - 1)));
            const mic_i plower_node = load16i((int*)plower);
            curNode = ((unsigned int*)plower)[closest_child_pos];
            compactustore16i(m_pos,&stack_node[old_sindex],plower_node);
        }



        /* return if stack is empty */
        if (unlikely(curNode == BVH4i::invalidNode)) break;


        /* intersect one ray against four triangles */

        //////////////////////////////////////////////////////////////////////////////////////////////////

        const Triangle1* tptr  = (Triangle1*) curNode.leaf(accel);
        prefetch<PFHINT_L1>(tptr + 3);
        prefetch<PFHINT_L1>(tptr + 2);
        prefetch<PFHINT_L1>(tptr + 1);
        prefetch<PFHINT_L1>(tptr + 0);

        const mic_i and_mask = broadcast4to16i(zlc4);

        const mic_f v0 = gather_4f_zlc(and_mask,
                                       (float*)&tptr[0].v0,
                                       (float*)&tptr[1].v0,
                                       (float*)&tptr[2].v0,
                                       (float*)&tptr[3].v0);

        const mic_f v1 = gather_4f_zlc(and_mask,
                                       (float*)&tptr[0].v1,
                                       (float*)&tptr[1].v1,
                                       (float*)&tptr[2].v1,
                                       (float*)&tptr[3].v1);

        const mic_f v2 = gather_4f_zlc(and_mask,
                                       (float*)&tptr[0].v2,
                                       (float*)&tptr[1].v2,
                                       (float*)&tptr[2].v2,
                                       (float*)&tptr[3].v2);

        const mic_f e1 = v1 - v0;
        const mic_f e2 = v0 - v2;
        const mic_f normal = lcross_zxy(e1,e2);
        const mic_f org = v0 - org_xyz;
        const mic_f odzxy = msubr231(org * swizzle(dir_xyz,_MM_SWIZ_REG_DACB), dir_xyz, swizzle(org,_MM_SWIZ_REG_DACB));
        const mic_f den = ldot3_zxy(dir_xyz,normal);
        const mic_f rcp_den = rcp(den);
        const mic_f uu = ldot3_zxy(e2,odzxy);
        const mic_f vv = ldot3_zxy(e1,odzxy);
        const mic_f u = uu * rcp_den;
        const mic_f v = vv * rcp_den;

#if defined(__BACKFACE_CULLING__)
        const mic_m m_init = (mic_m)0x1111 & (den > zero);
#else
        const mic_m m_init = 0x1111;
#endif
        const mic_m valid_u = ge(m_init,u,zero);
        const mic_m valid_v = ge(valid_u,v,zero);
        const mic_m m_aperture = le(valid_v,u+v,mic_f::one());

        const mic_f nom = ldot3_zxy(org,normal);
        const mic_f t = rcp_den*nom;

        if (unlikely(none(m_aperture))) continue;

        mic_m m_final  = lt(lt(m_aperture,min_dist_xyz,t),t,max_dist_xyz);

#if defined(__USE_RAY_MASK__)
        const mic_i rayMask(ray.mask);
        const mic_i triMask = swDDDD(gather16i_4i_align(&tptr[0].v2,&tptr[1].v2,&tptr[2].v2,&tptr[3].v2));
        const mic_m m_ray_mask = (rayMask & triMask) != mic_i::zero();
        m_final &= m_ray_mask;
#endif

#if defined(__INTERSECTION_FILTER__)

        /* did the ray hit one of the four triangles? */
        while (any(m_final))
        {
            const mic_f temp_t  = select(m_final,t,max_dist_xyz);
            const mic_f min_dist = vreduce_min(temp_t);
            const mic_m m_dist = eq(min_dist,temp_t);
            const size_t vecIndex = bitscan(toInt(m_dist));
            const size_t triIndex = vecIndex >> 2;
            const Triangle1  *__restrict__ tri_ptr = tptr + triIndex;
            const mic_m m_tri = m_dist^(m_dist & (mic_m)((unsigned int)m_dist - 1));
            const mic_f gnormalx = mic_f(tri_ptr->Ng.x);
            const mic_f gnormaly = mic_f(tri_ptr->Ng.y);
            const mic_f gnormalz = mic_f(tri_ptr->Ng.z);
            const int geomID = tri_ptr->geomID();
            const int primID = tri_ptr->primID();
            Geometry* geom = ((Scene*)bvh->geometry)->get(geomID);

            if (likely(!geom->hasOcclusionFilter1())) break;

            if (runOcclusionFilter1(geom,ray,u,v,min_dist,gnormalx,gnormaly,gnormalz,m_tri,geomID,primID))
                break;

            m_final ^= m_tri; /* clear bit */
        }
#endif

        if (unlikely(any(m_final)))
        {
            ray.geomID = 0;
            return;
        }
        //////////////////////////////////////////////////////////////////////////////////////////////////

    }
}
Example #14
0
int main(int argc, char *argv[]) {
    int c, index;
    double dist = 0.0;
    int finish = -1, pace = -1;
    double finishd = -1.0, paced = -1.0;
    int mode = MODE_NONE;

    while ((c = getopt_long(argc, argv, "hd:f:p:", options, &index)) != -1) {
        switch (c) {
            case 'h':
                print_help();
                return 0;

            case 'd':
                if (strcmp("h", optarg) == 0)
                    dist = DIST_HALF;
                else if (strcmp("f", optarg) == 0)
                    dist = DIST_FULL;
                else
                    dist = atof(optarg);
                break;

            case 'f':
                finish = str_to_time(optarg);
                mode = mode | MODE_FTOP;
                break;

            case 'p':
                pace = str_to_time(optarg);
                mode = mode | MODE_PTOF;
                break;

            default:
                break;
        }
    }

    /* main proc */
    /* more than 1 mode are on */
    if (1 < countbits(mode))
        exit_with_error("Selected more than 1 mode.");

    /* calc and prepare result */
    char finish_str[128], pace_str[128];
    if (mode == MODE_FTOP) {
        paced = (double)finish / dist;
        dtime_to_str(paced, pace_str);
        time_to_str(finish, finish_str);
    } else if (mode == MODE_PTOF) {
        finishd = (double)pace * dist;
        time_to_str(pace, pace_str);
        dtime_to_str(finishd, finish_str);
    }

    /* disp results */
    cout << "Results:" << endl;
    cout << "  mode        " << mode_str[mode] << endl;
    cout << "  dist        " << dist << " km" << endl;
    cout << "  finish-time " << finish_str << endl;
    cout << "  pace-time   " << pace_str << " /km" << endl;

    return 0;
}
    void BVH4iIntersector16Hybrid<LeafIntersector,ENABLE_COMPRESSED_BVH4I_NODES>::intersect(mic_i* valid_i, BVH4i* bvh, Ray16& ray16)
    {
      /* near and node stack */
      __aligned(64) mic_f   stack_dist[3*BVH4i::maxDepth+1];
      __aligned(64) NodeRef stack_node[3*BVH4i::maxDepth+1];
      __aligned(64) NodeRef stack_node_single[3*BVH4i::maxDepth+1]; 

      /* load ray */
      const mic_m valid0     = *(mic_i*)valid_i != mic_i(0);
      const mic3f rdir16     = rcp_safe(ray16.dir);
      const mic3f org_rdir16 = ray16.org * rdir16;
      mic_f ray_tnear        = select(valid0,ray16.tnear,pos_inf);
      mic_f ray_tfar         = select(valid0,ray16.tfar ,neg_inf);
      const mic_f inf        = mic_f(pos_inf);
      
      /* allocate stack and push root node */
      stack_node[0] = BVH4i::invalidNode;
      stack_dist[0] = inf;
      stack_node[1] = bvh->root;
      stack_dist[1] = ray_tnear; 
      NodeRef* __restrict__ sptr_node = stack_node + 2;
      mic_f*   __restrict__ sptr_dist = stack_dist + 2;
      
      const Node      * __restrict__ nodes = (Node     *)bvh->nodePtr();
      const Triangle1 * __restrict__ accel = (Triangle1*)bvh->triPtr();

      while (1) pop:
      {
        /* pop next node from stack */
        NodeRef curNode = *(sptr_node-1);
        mic_f curDist   = *(sptr_dist-1);
        sptr_node--;
        sptr_dist--;
	const mic_m m_stackDist = ray_tfar > curDist;

	/* stack emppty ? */
        if (unlikely(curNode == BVH4i::invalidNode))  break;
        
        /* cull node if behind closest hit point */
        if (unlikely(none(m_stackDist))) continue;
        
	///////////////////////////////////////////////////////////////////////////////////////////////////////////////
	///////////////////////////////////////////////////////////////////////////////////////////////////////////////
	///////////////////////////////////////////////////////////////////////////////////////////////////////////////

	/* switch to single ray mode */
        if (unlikely(countbits(m_stackDist) <= BVH4i::hybridSIMDUtilSwitchThreshold)) 
	  {
	    float   *__restrict__ stack_dist_single = (float*)sptr_dist;
	    store16f(stack_dist_single,inf);

	    /* traverse single ray */	  	  
	    long rayIndex = -1;
	    while((rayIndex = bitscan64(rayIndex,m_stackDist)) != BITSCAN_NO_BIT_SET_64) 
	      {	    
		stack_node_single[0] = BVH4i::invalidNode;
		stack_node_single[1] = curNode;
		size_t sindex = 2;

		const mic_f org_xyz      = loadAOS4to16f(rayIndex,ray16.org.x,ray16.org.y,ray16.org.z);
		const mic_f dir_xyz      = loadAOS4to16f(rayIndex,ray16.dir.x,ray16.dir.y,ray16.dir.z);
		const mic_f rdir_xyz     = loadAOS4to16f(rayIndex,rdir16.x,rdir16.y,rdir16.z);
		const mic_f org_rdir_xyz = org_xyz * rdir_xyz;
		const mic_f min_dist_xyz = broadcast1to16f(&ray16.tnear[rayIndex]);
		mic_f       max_dist_xyz = broadcast1to16f(&ray16.tfar[rayIndex]);

		const unsigned int leaf_mask = BVH4I_LEAF_MASK;

		while (1) 
		  {
		    NodeRef curNode = stack_node_single[sindex-1];
		    sindex--;
            
		    traverse_single_intersect<ENABLE_COMPRESSED_BVH4I_NODES>(curNode,
									     sindex,
									     rdir_xyz,
									     org_rdir_xyz,
									     min_dist_xyz,
									     max_dist_xyz,
									     stack_node_single,
									     stack_dist_single,
									     nodes,
									     leaf_mask);
	    

		    /* return if stack is empty */
		    if (unlikely(curNode == BVH4i::invalidNode)) break;


		    /* intersect one ray against four triangles */

		    const bool hit = LeafIntersector::intersect(curNode,
								rayIndex,
								dir_xyz,
								org_xyz,
								min_dist_xyz,
								max_dist_xyz,
								ray16,
								accel,
								(Scene*)bvh->geometry);

		    if (hit)
		      compactStack(stack_node_single,stack_dist_single,sindex,max_dist_xyz);
		  }
	      }
	    ray_tfar = select(valid0,ray16.tfar ,neg_inf);
	    continue;
	  }

	///////////////////////////////////////////////////////////////////////////////////////////////////////////////
	///////////////////////////////////////////////////////////////////////////////////////////////////////////////
	///////////////////////////////////////////////////////////////////////////////////////////////////////////////

	const unsigned int leaf_mask = BVH4I_LEAF_MASK;

	const mic3f org = ray16.org;
	const mic3f dir = ray16.dir;

        while (1)
        {
          /* test if this is a leaf node */
          if (unlikely(curNode.isLeaf(leaf_mask))) break;
          
          STAT3(normal.trav_nodes,1,popcnt(ray_tfar > curDist),16);
          const Node* __restrict__ const node = curNode.node(nodes);

	  prefetch<PFHINT_L1>((mic_f*)node + 0); 
	  prefetch<PFHINT_L1>((mic_f*)node + 1); 
          
          /* pop of next node */
          sptr_node--;
          sptr_dist--;
          curNode = *sptr_node; 
          curDist = *sptr_dist;
          

#pragma unroll(4)
          for (unsigned int i=0; i<4; i++)
          {
	    BVH4i::NodeRef child;
	    mic_f lclipMinX,lclipMinY,lclipMinZ;
	    mic_f lclipMaxX,lclipMaxY,lclipMaxZ;

	    if (!ENABLE_COMPRESSED_BVH4I_NODES)
	      {
		child = node->lower[i].child;

		lclipMinX = msub(node->lower[i].x,rdir16.x,org_rdir16.x);
		lclipMinY = msub(node->lower[i].y,rdir16.y,org_rdir16.y);
		lclipMinZ = msub(node->lower[i].z,rdir16.z,org_rdir16.z);
		lclipMaxX = msub(node->upper[i].x,rdir16.x,org_rdir16.x);
		lclipMaxY = msub(node->upper[i].y,rdir16.y,org_rdir16.y);
		lclipMaxZ = msub(node->upper[i].z,rdir16.z,org_rdir16.z);
	      }
	    else
	      {
		BVH4i::QuantizedNode* __restrict__ const compressed_node = (BVH4i::QuantizedNode*)node;
		child = compressed_node->child(i);

		const mic_f startXYZ = compressed_node->decompress_startXYZ();
		const mic_f diffXYZ  = compressed_node->decompress_diffXYZ();
		const mic_f clower   = compressed_node->decompress_lowerXYZ(startXYZ,diffXYZ);
		const mic_f cupper   = compressed_node->decompress_upperXYZ(startXYZ,diffXYZ);

		lclipMinX = msub(mic_f(clower[4*i+0]),rdir16.x,org_rdir16.x);
		lclipMinY = msub(mic_f(clower[4*i+1]),rdir16.y,org_rdir16.y);
		lclipMinZ = msub(mic_f(clower[4*i+2]),rdir16.z,org_rdir16.z);
		lclipMaxX = msub(mic_f(cupper[4*i+0]),rdir16.x,org_rdir16.x);
		lclipMaxY = msub(mic_f(cupper[4*i+1]),rdir16.y,org_rdir16.y);
		lclipMaxZ = msub(mic_f(cupper[4*i+2]),rdir16.z,org_rdir16.z);		
	      }
	    
	    if (unlikely(i >=2 && child == BVH4i::invalidNode)) break;

            const mic_f lnearP = max(max(min(lclipMinX, lclipMaxX), min(lclipMinY, lclipMaxY)), min(lclipMinZ, lclipMaxZ));
            const mic_f lfarP  = min(min(max(lclipMinX, lclipMaxX), max(lclipMinY, lclipMaxY)), max(lclipMinZ, lclipMaxZ));
            const mic_m lhit   = max(lnearP,ray_tnear) <= min(lfarP,ray_tfar);   
	    const mic_f childDist = select(lhit,lnearP,inf);
            const mic_m m_child_dist = childDist < curDist;


            /* if we hit the child we choose to continue with that child if it 
               is closer than the current next child, or we push it onto the stack */
            if (likely(any(lhit)))
            {
              sptr_node++;
              sptr_dist++;

              /* push cur node onto stack and continue with hit child */
              if (any(m_child_dist))
              {
                *(sptr_node-1) = curNode;
                *(sptr_dist-1) = curDist; 
                curDist = childDist;
                curNode = child;
              }              
              /* push hit child onto stack*/
              else 
		{
		  *(sptr_node-1) = child;
		  *(sptr_dist-1) = childDist; 

		  const char* __restrict__ const pnode = (char*)child.node(nodes);             
		  prefetch<PFHINT_L2>(pnode + 0);
		  prefetch<PFHINT_L2>(pnode + 64);
		}
              assert(sptr_node - stack_node < BVH4i::maxDepth);
            }	      
          }
#if SWITCH_ON_DOWN_TRAVERSAL == 1
	  const mic_m curUtil = ray_tfar > curDist;
	  if (unlikely(countbits(curUtil) <= BVH4i::hybridSIMDUtilSwitchThreshold))
	    {
	      *sptr_node++ = curNode;
	      *sptr_dist++ = curDist; 
	      goto pop;
	    }
#endif
        }
        
        /* return if stack is empty */
        if (unlikely(curNode == BVH4i::invalidNode)) break;
        

        /* intersect leaf */
        const mic_m m_valid_leaf = ray_tfar > curDist;
        STAT3(normal.trav_leaves,1,popcnt(m_valid_leaf),16);

	LeafIntersector::intersect16(curNode,m_valid_leaf,dir,org,ray16,accel,(Scene*)bvh->geometry);

        ray_tfar = select(m_valid_leaf,ray16.tfar,ray_tfar);
      }
Example #16
0
static
void
check_bitmap(void)
{
	uint8_t bits[SFS_BLOCKSIZE], *found, *tofree, tmp;
	uint32_t alloccount=0, freecount=0, i, j;
	int bchanged;

	for (i=0; i<bitblocks; i++) {
		diskread(bits, SFS_MAP_LOCATION+i);
		swapbits(bits);
		found = bitmapdata + i*SFS_BLOCKSIZE;
		tofree = tofreedata + i*SFS_BLOCKSIZE;
		bchanged = 0;

		for (j=0; j<SFS_BLOCKSIZE; j++) {
			/* we shouldn't have blocks marked both ways */
			assert((found[j] & tofree[j])==0);

			if (bits[j]==found[j]) {
				continue;
			}

			if (bits[j]==(found[j] | tofree[j])) {
				bits[j] = found[j];
				bchanged = 1;
				continue;
			}

			/* free the ones we're freeing */
			bits[j] &= ~tofree[j];

			/* are we short any? */
			if ((bits[j] & found[j]) != found[j]) {
				tmp = found[j] & ~bits[j];
				alloccount += countbits(tmp);
				if (tmp != 0) {
					reportbits(i, j, tmp, "free");
				}
			}

			/* do we have any extra? */
			if ((bits[j] & found[j]) != bits[j]) {
				tmp = bits[j] & ~found[j];
				freecount += countbits(tmp);
				if (tmp != 0) {
					reportbits(i, j, tmp, "allocated");
				}
			}

			bits[j] = found[j];
			bchanged = 1;
		}

		if (bchanged) {
			swapbits(bits);
			diskwrite(bits, SFS_MAP_LOCATION+i);
		}
	}

	if (alloccount > 0) {
		warnx("%lu blocks erroneously shown free in bitmap (fixed)",
		      (unsigned long) alloccount);
		setbadness(EXIT_RECOV);
	}
	if (freecount > 0) {
		warnx("%lu blocks erroneously shown used in bitmap (fixed)",
		      (unsigned long) freecount);
		setbadness(EXIT_RECOV);
	}
}
int main(int argc,char *argv[]) {
    int argno, i;
    char *filebasename;
#define MAXPATH 80
    char filename[MAXPATH];
    unsigned int tonegens_used;  // bitmap of tone generators used
    unsigned int num_tonegens_used;  // count of tone generators used


    printf("MIDITONES_SCROLL V%s, (C) 2011 Len Shustek\n", VERSION);
    printf("See the source code for license information.\n\n");
    if (argc == 1) { /* no arguments */
        SayUsage(argv[0]);
        return 1;
    }

    /* process options */

    argno = HandleOptions(argc,argv);
    filebasename = argv[argno];

    /* Open the input file */

    strlcpy(filename, filebasename, MAXPATH);
    strlcat(filename, ".bin", MAXPATH);
    infile = fopen(filename, "rb");
    if (!infile) {
        fprintf(stderr, "Unable to open input file %s", filename);
        return 1;
    }

    /* Open the output file */

    if (codeoutput) {
        strlcpy(filename, filebasename, MAXPATH);
        strlcat(filename, ".c", MAXPATH);
        outfile = fopen(filename, "w");
        if (!infile) {
            fprintf(stderr, "Unable to open output file %s", filename);
            return 1;
        }
    }
    else outfile = stdout;

    /* Read the whole input file into memory */

    fseek(infile, 0, SEEK_END); /* find file size */
    buflen = ftell(infile);
    fseek(infile, 0, SEEK_SET);

    buffer = (unsigned char *) malloc (buflen+1);
    if (!buffer) {
        fprintf(stderr, "Unable to allocate %ld bytes for the file", buflen);
        return 1;
    }

    fread(buffer, buflen, 1, infile);
    fclose(infile);
    printf("Processing %s.bin, %ld bytes\n", filebasename, buflen);
    if (codeoutput) {
        time_t rawtime;
                    time (&rawtime);
        fprintf(outfile, "// Playtune bytestream for file \"%s.bin\"", filebasename);
        fprintf(outfile, " created by MIDITONES_SCROLL V%s on %s\n", VERSION, asctime(localtime(&rawtime)));

        fprintf(outfile, "byte PROGMEM score [] = {\n");
    }

    /* Process the commmands sequentially */

    fprintf(outfile, "\n");
    if (codeoutput) fprintf(outfile, "//");
    fprintf(outfile, "duration    time   ");
    for (i=0; i< MAX_TONEGENS; ++i) fprintf(outfile, " gen%-2d", i);
    fprintf(outfile,"        bytestream code\n\n");

    for (gen=0; gen<MAX_TONEGENS; ++gen) gen_status[gen] = SILENT;
    tonegens_used = 0;
    lastbufptr = buffer;

    for (bufptr = buffer; bufptr < buffer+buflen; ++bufptr) {
        cmd = *bufptr;
        if (cmd < 0x80) { //*****  delay
            delay = ((unsigned int)cmd << 8) + *++bufptr;
            print_status(); // tone generator status now
            timenow += delay;  // advance time
        }
        else {
            gen = cmd & 0x0f;
            if (gen >= MAX_TONEGENS) file_error ("too many tone generators used", bufptr);
            cmd = cmd & 0xf0;
            if (cmd == 0x90) {//******  note on
                gen_status[gen] = *++bufptr; // note number
                if (gen_status[gen] > 127) file_error ("note higher than 127", bufptr);
                tonegens_used |= 1<<gen;  // record that we used this generator at least once
            }
            else if (cmd == 0x80) { //******  note off
                if (gen_status[gen] == SILENT) file_error ("tone generator not on", bufptr);
                gen_status[gen] = SILENT;
            }
            else if (cmd == 0xf0) { // end of score
            }
            else file_error ("unknownn command", bufptr);
        }
    }

    delay = 0;
    --bufptr;
    if (codeoutput) --bufptr;  //don't do 0xF0 because don't want the trailing comma
    print_status();  // print final status

    if (codeoutput) {
        fprintf(outfile, " 0xf0};\n");
        num_tonegens_used = countbits(tonegens_used);
        fprintf(outfile, "// This score contains %ld bytes, and %d tone generator%s used.\n",
            buflen, num_tonegens_used, num_tonegens_used == 1 ? " is" : "s are");
    }
    printf ("Done.\n");

}