Example #1
0
void Net::Classify(const mxArray *mx_data, mxArray *&mx_pred) {  

  //mexPrintMsg("Start classification...");
  ReadData(mx_data);
  size_t test_num = data_.size1();
  labels_.resize(test_num, layers_.back()->length_);  
  labels_.reorder(true, false);
  size_t numbatches = DIVUP(test_num, params_.batchsize_);
  size_t offset = 0;
  Mat data_batch, pred_batch;    
  for (size_t batch = 0; batch < numbatches; ++batch) {
    size_t batchsize = MIN(test_num - offset, params_.batchsize_);
    data_batch.resize(batchsize, data_.size2());
    SubSet(data_, data_batch, offset, true);    
    InitActiv(data_batch);
    Forward(pred_batch, 0);            
    SubSet(labels_, pred_batch, offset, false);    
    offset += batchsize;
    if (params_.verbose_ == 2) {
      mexPrintInt("Batch", (int) batch + 1);
    }      
  }
  labels_.reorder(kMatlabOrder, true);
  mx_pred = mexSetMatrix(labels_);  
  //mexPrintMsg("Classification finished");
}
Example #2
0
int main(int argc, char **argv) {
  int pf_sock = 0;
  ssize_t ret_size = 0;

  int error;
  struct sadb_msg msg_hdr;

  memset(&msg_hdr, 0, sizeof(msg_hdr));

  msg_hdr.sadb_msg_version = PF_KEY_V2;
  msg_hdr.sadb_msg_type = SADB_FLUSH;
  msg_hdr.sadb_msg_errno = 0;
  msg_hdr.sadb_msg_satype = SADB_SATYPE_UNSPEC;
  msg_hdr.sadb_msg_len = DIVUP(sizeof(msg_hdr), IPSEC_PFKEYv2_ALIGN);
  msg_hdr.sadb_msg_seq = 0;
  msg_hdr.sadb_msg_pid = getpid();

  pf_sock = socket(PF_KEY, SOCK_RAW, PF_KEY_V2);
  if (pf_sock < 0) {
    perror("pf_sock");
    exit(1);
  }

  errno = 0;
  error = write(pf_sock, (char*)&msg_hdr, sizeof(msg_hdr));
  if(error<0){
    fprintf(stderr, "pfkey_send_flush: send error with %s\n", strerror(errno));
    exit(1);
  }

  fprintf(stderr, "succeeded\n");

  return 0;
  
}
Example #3
0
/*-------------------------------------------------------------------------*/
void AzPrepText2::gen_X_bow(const AzIntArr &ia_tokno, int dic_sz, 
                       int pch_sz, int pch_step, int padding,  
                       bool do_skip_stopunk, 
                       /*---  output  ---*/
                       AzSmat *m_feat, 
                       AzIntArr *ia_pos) const /* patch position: may be NULL */
{
  const char *eyec = "AzPrepText2::gen_X_bow"; 
  AzX::throw_if_null(m_feat, eyec, "m_feat"); 
  int t_num; 
  const int *tokno = ia_tokno.point(&t_num); 
    
  int pch_num = DIVUP(t_num+padding*2-pch_sz, pch_step) + 1; 
  m_feat->reform(dic_sz, pch_num);   
  if (ia_pos != NULL) ia_pos->reset(); 
  
  int col = 0; 
  int tx0 = -padding; 
  for (int pch_no = 0; pch_no < pch_num; ++pch_no) {
    int tx1 = tx0 + pch_sz; 
    
    AzIntArr ia_rows; 
    for (int tx = MAX(0, tx0); tx < MIN(t_num, tx1); ++tx) {
      if (tokno[tx] >= 0) ia_rows.put(tokno[tx]); 
    }
    if (!do_skip_stopunk || ia_rows.size() > 0) {
      ia_rows.unique();  /* sorting too */
      m_feat->col_u(col)->load(&ia_rows, 1); 
      if (ia_pos != NULL) ia_pos->put(tx0); 
      ++col; 
    }

    if (tx1 >= t_num+padding) break; 
    
    int dist = 1; 
    if (do_skip_stopunk) {
      /*---  to avoid repeating the same bow  ---*/
      int tx; 
      for (tx = tx0; tx < t_num; ++tx) if (tx >= 0 && tokno[tx] >= 0) break;    
      int dist0 = tx-tx0+1; /* to lose a word, we have to slide a window this much */

      tx = tx1; 
      for (tx = tx1; tx < t_num; ++tx) if (tx >= 0 && tokno[tx] >= 0) break; 
      int dist1 = tx-tx1+1; /* to get a new word, we have to slide a window this much */
      dist = MIN(dist0, dist1); 
    }
    tx0 += MAX(dist, pch_step); 
  }
  m_feat->resize(col);     
} 
Example #4
0
/*-------------------------------------------------------------------------*/
void AzPrepText2::gen_Y_ifeat(int top_num_each, int top_num_total, const AzSmat *m_feat, 
                              const AzIntArr &ia_tokno, const AzIntArr &ia_pos, 
                              int xpch_sz, int min_dist, int max_dist, 
                              bool do_nolr, 
                              int f_pch_sz, int f_pch_step, int f_padding, 
                              AzSmat *m_y, 
                              feat_info fi[2]) const
{
  const char *eyec = "AzPrepText2::gen_neigh_topfeat"; 
  AzX::throw_if_null(m_feat, eyec, "m_feat"); 
  AzX::throw_if_null(m_y, eyec, "m_y"); 
  int t_num; 
  const int *tokno = ia_tokno.point(&t_num); 

  int feat_sz = m_feat->rowNum(); 
  int f_pch_num = DIVUP(t_num+f_padding*2-f_pch_sz, f_pch_step) + 1; 
  if (m_feat->colNum() != f_pch_num) {
    AzBytArr s("#patch mismatch: Expcected: "); s << f_pch_num << " Actual: " << m_feat->colNum(); 
    AzX::throw_if(true, AzInputError, eyec, s.c_str()); 
  }
  
  if (do_nolr) m_y->reform(feat_sz,    ia_pos.size()); 
  else         m_y->reform(feat_sz*2,  ia_pos.size()); 
  for (int ix = 0; ix < ia_pos.size(); ++ix) {
    int xtx0 = ia_pos[ix]; 
    int xtx1 = xtx0 + xpch_sz; 
     
    AzIFarr ifa_ctx; 
    int offs = 0;     
    for (int tx = xtx0+min_dist; tx < xtx0; ++tx) {
      if (tx + f_pch_sz > xtx0) break; 
      set_ifeat(m_feat, top_num_each, (tx+f_padding)/f_pch_step, offs, &ifa_ctx, fi); 
    }
    
    if (!do_nolr) offs = feat_sz; 
    for (int tx = xtx1; tx < xtx1+max_dist; ++tx) {
      if (tx + f_pch_sz > xtx1+max_dist) break; 
      set_ifeat(m_feat, top_num_each, (tx+f_padding)/f_pch_step, offs, &ifa_ctx, fi); 
    }
    ifa_ctx.squeeze_Max(); 
    if (top_num_total > 0 && ifa_ctx.size() > top_num_total) {
      ifa_ctx.sort_Float(false); 
      ifa_ctx.cut(top_num_total);       
    }
    m_y->col_u(ix)->load(&ifa_ctx); 
  }
}                                   
__global__ void kAggShortRows2(const float* mat, float* matSum, const uint width, const uint height, Agg agg, UnaryOp uop, BinaryOp bop) {
    const uint shmemX = AGG_SHORT_ROWS_THREADS_X + 1;
    __shared__ float shmem[AGG_SHORT_ROWS_THREADS_Y*shmemX];
    const uint LOOPS_X = DIVUP(width, AGG_SHORT_ROWS_THREADS_X);
    const uint tidx = hipThreadIdx_y * AGG_SHORT_ROWS_THREADS_X + hipThreadIdx_x;

    const uint bidx = hipBlockIdx_y * hipGridDim_x + hipBlockIdx_x;
    const uint blockRowIdx = bidx * AGG_SHORT_ROWS_LOOPS_Y * AGG_SHORT_ROWS_THREADS_Y;

    float* shmemWrite = shmem + MUL24(hipThreadIdx_y, shmemX) + hipThreadIdx_x;
    matSum += blockRowIdx + tidx;
//    shmem[MUL24(hipThreadIdx_y, shmemX) + hipThreadIdx_x] = 0;
    mat += width * blockRowIdx + MUL24(hipThreadIdx_y, width) + hipThreadIdx_x;

    bool doAgg = tidx < AGG_SHORT_ROWS_THREADS_Y;
    if(blockRowIdx < height) {
        for (uint y = 0; y < AGG_SHORT_ROWS_LOOPS_Y*AGG_SHORT_ROWS_THREADS_Y; y += AGG_SHORT_ROWS_THREADS_Y) {
            doAgg &= tidx + y + blockRowIdx < height;
            const bool heightIdxOK = hipThreadIdx_y + y + blockRowIdx < height;
            float accum = agg.getBaseValue();
            shmemWrite[0] = agg.getBaseValue();

            for(uint x = 0; x < LOOPS_X * AGG_SHORT_ROWS_THREADS_X; x+= AGG_SHORT_ROWS_THREADS_X) {
//                __syncthreads();
                if (heightIdxOK && x + hipThreadIdx_x < width) {
                    shmemWrite[0] = agg(uop(mat[x]), shmemWrite[0]);
                }
            }

            __syncthreads();
            if (doAgg) {
                float* shmemRead = shmem + MUL24(tidx, shmemX);

#pragma unroll
                for (uint i = 0; i < AGG_SHORT_ROWS_THREADS_X; i++) {
                    accum = agg(accum, shmemRead[0]);
                    shmemRead++;
                }

                matSum[0] = bop(matSum[0], accum);
                matSum += AGG_SHORT_ROWS_THREADS_Y;
            }
            __syncthreads();
            mat += width * AGG_SHORT_ROWS_THREADS_Y;
        }
    }
}
Example #6
0
void Net::Train(const mxArray *mx_data, const mxArray *mx_labels) {  

  //mexPrintMsg("Start training...");  
  ReadData(mx_data);
  ReadLabels(mx_labels);
  InitNorm();
  
  size_t train_num = data_.size1();
  size_t numbatches = DIVUP(train_num, params_.batchsize_);
  trainerrors_.resize(params_.epochs_, 2);
  trainerrors_.assign(0);
  for (size_t epoch = 0; epoch < params_.epochs_; ++epoch) {    
    if (params_.shuffle_) {
      Shuffle(data_, labels_);      
    }
    StartTimer();    
    size_t offset = 0;
    Mat data_batch, labels_batch, pred_batch;      
    for (size_t batch = 0; batch < numbatches; ++batch) {
      size_t batchsize = MIN(train_num - offset, params_.batchsize_);      
      UpdateWeights(epoch, false);
      data_batch.resize(batchsize, data_.size2());
      labels_batch.resize(batchsize, labels_.size2());      
      SubSet(data_, data_batch, offset, true);      
      SubSet(labels_, labels_batch, offset, true);
      ftype error1;      
      InitActiv(data_batch);
      Forward(pred_batch, 1);            
      InitDeriv(labels_batch, error1);
      trainerrors_(epoch, 0) += error1;
      Backward();
      UpdateWeights(epoch, true); 
      offset += batchsize;
      if (params_.verbose_ == 2) {
        mexPrintInt("Epoch", (int) epoch + 1);
        mexPrintInt("Batch", (int) batch + 1);
      }
    } // batch  
    MeasureTime("totaltime");
    if (params_.verbose_ == 1) {
      mexPrintInt("Epoch", (int) epoch + 1);
    }        
  } // epoch  
  trainerrors_ /= (ftype) numbatches;
  //mexPrintMsg("Training finished");
}
Example #7
0
/*-------------------------------------------------------------------------*/
void AzPrepText2::gen_X_seq(const AzIntArr &ia_tokno, int dic_sz, 
                       int pch_sz, int pch_step, int padding,  
                       bool do_allow_zero, bool do_skip_stopunk, 
                       /*---  output  ---*/
                       AzSmat *m_feat, 
                       AzIntArr *ia_pos) const /* patch position: may be NULL */
{
  const char *eyec = "AzPrepText2::gen_X_seq"; 
  AzX::throw_if_null(m_feat, eyec, "m_feat"); 
  AzX::no_support(do_skip_stopunk, eyec, "variable strides with Seq"); 
  int t_num; 
  const int *tokno = ia_tokno.point(&t_num); 
    
  int pch_num = DIVUP(t_num+padding*2-pch_sz, pch_step) + 1; 
  m_feat->reform(dic_sz*pch_sz, pch_num);   
  if (ia_pos != NULL) ia_pos->reset(); 

  int col = 0; 
  int tx0 = -padding; 
  for (int pch_no = 0; pch_no < pch_num; ++pch_no) {
    int tx1 = tx0 + pch_sz; 
    
    AzSmat m; 
    for (int tx = tx0; tx < tx1; ++tx) {
      AzSmat m0(dic_sz, 1); 
      if (tx >= 0 && tx < t_num && tokno[tx] >= 0) {
        AzIntArr ia_row; ia_row.put(tokno[tx]); 
        m0.col_u(0)->load(&ia_row, 1); 
      }
      if (tx == tx0) m.set(&m0); 
      else           m.rbind(&m0); 
    }
    if (do_allow_zero || !m.isZero()) {
      m_feat->col_u(col)->set(m.col(0)); 
      if (ia_pos != NULL) ia_pos->put(tx0); 
      ++col; 
    }

    if (tx1 >= t_num+padding) break; 
    tx0 += pch_step; 
  }
  m_feat->resize(col);   
}
Example #8
0
void MStringResize(MString_t *String, size_t Length)
{
	/* Calculate the new byte-count we 
	 * need to encompass with blocks */
	size_t DataLength = DIVUP(Length, MSTRING_BLOCK_SIZE) * MSTRING_BLOCK_SIZE;

	/* Expand and reset buffer */
	void *Data = dsalloc(DataLength);
	memset(Data, 0, DataLength);

	/* Copy old data over */
	memcpy(Data, String->Data, String->Length);

	/* Free the old buffer */
	dsfree(String->Data);

	/* Update string to new buffer */
	String->MaxLength = DataLength;
	String->Data = Data;
}
Example #9
0
__global__ void findMinMedian(
    float* minMedian,
    unsigned* minIdx,
    CParam<float> median,
    CParam<unsigned> idx)
{
    const int tid = threadIdx.x;

    __shared__ float s_minMedian[256];
    __shared__ unsigned s_minIdx[256];

    s_minMedian[tid] = FLT_MAX;
    s_minIdx[tid] = 0;
    __syncthreads();

    const int loop = DIVUP(median.dims[0], blockDim.x);

    for (int i = 0; i < loop; i++) {
        int j = i * blockDim.x + tid;
        if (j < median.dims[0] && median.ptr[j] < s_minMedian[tid]) {
            s_minMedian[tid] = median.ptr[j];
            s_minIdx[tid] = idx.ptr[j];
        }
        __syncthreads();
    }

    for (unsigned t = 128; t > 0; t >>= 1) {
        if (tid < t) {
            if (s_minMedian[tid + t] < s_minMedian[tid]) {
                s_minMedian[tid] = s_minMedian[tid + t];
                s_minIdx[tid]    = s_minIdx[tid + t];
            }
        }
        __syncthreads();
    }

    *minMedian = s_minMedian[0];
    *minIdx = s_minIdx[0];
}
Example #10
0
 void reset(const AzOut &out, const AzxD *input, AzxD *output) {
   if (p.pl_num > 0) {
     AzX::throw_if((input->get_dim() != 1), "AzpPoolingDflt::reset", 
                   "num_pooling is allowed only with 1D data"); 
     p.pl_sz = p.pl_step =  DIVUP(input->sz(0), p.pl_num);                    
     AzBytArr s("Given num_pooling="); s << p.pl_num; 
     s << ", setting pooling size and stride to " << p.pl_sz; 
     AzPrint::writeln(out, s);       
   }
   int minsz = input->get_min_size();   
   if (p.pl_sz > minsz) {
     p.pl_sz = minsz; 
     AzBytArr s("pooling unit size is too large: shrinking to "); s.cn(p.pl_sz); 
     AzPrint::writeln(out, s); 
   }  
   int padding = 0; 
   map.reset_for_pooling(input, p.pl_sz, p.pl_step, padding, p.do_pl_simple_grid, 
                         &pia2_out2inp, &pia2_inp2out, &pia_out2num);     
   innum = input->size(); 
   if (p.ptyp == AzpPoolingDflt_None) {
     AzX::throw_if((!map.no_change_in_shape()), "AzpPoolingDflt::reset", "input and output must be the same for no pooling"); 
   }
   map.output_region(output); 
 }
Example #11
0
int ipsec_sa_init(struct ipsec_sa *ipsp)
{
	int error = 0;
	char sa[SATOT_BUF];
	size_t sa_len;

#ifdef CONFIG_KLIPS_DEBUG
	char ipaddr_txt[ADDRTOA_BUF];
	char ipaddr2_txt[ADDRTOA_BUF];
#endif
#if defined (CONFIG_KLIPS_AUTH_HMAC_MD5) || \
	defined (CONFIG_KLIPS_AUTH_HMAC_SHA1)
	unsigned char kb[AHMD596_BLKLEN];
	int i;
#endif

	if (ipsp == NULL) {
		KLIPS_PRINT(debug_pfkey,
			    "ipsec_sa_init: "
			    "ipsp is NULL, fatal\n");
		SENDERR(EINVAL);
	}

	sa_len = KLIPS_SATOT(debug_pfkey, &ipsp->ips_said, 0, sa, sizeof(sa));

	KLIPS_PRINT(debug_pfkey,
		    "ipsec_sa_init: "
		    "(pfkey defined) called for SA:%s\n",
		    sa_len ? sa : " (error)");

	KLIPS_PRINT(debug_pfkey,
		    "ipsec_sa_init: "
		    "calling init routine of %s%s%s\n",
		    IPS_XFORM_NAME(ipsp));

	switch (ipsp->ips_said.proto) {
#ifdef CONFIG_KLIPS_IPIP
	case IPPROTO_IPIP: {
		ipsp->ips_xformfuncs = ipip_xform_funcs;
#ifdef CONFIG_KLIPS_DEBUG
		sin_addrtot(ipsp->ips_addr_s, 0, ipaddr_txt,
			    sizeof(ipaddr_txt));
		sin_addrtot(ipsp->ips_addr_d, 0, ipaddr2_txt,
			    sizeof(ipaddr2_txt));
		KLIPS_PRINT(debug_pfkey,
			    "ipsec_sa_init: "
			    "(pfkey defined) IPIP ipsec_sa set for %s->%s.\n",
			    ipaddr_txt,
			    ipaddr2_txt);
#endif
	}
	break;
#endif          /* !CONFIG_KLIPS_IPIP */

#ifdef CONFIG_KLIPS_AH
	case IPPROTO_AH:

		ipsp->ips_xformfuncs = ah_xform_funcs;

#ifdef CONFIG_KLIPS_OCF
		if (ipsec_ocf_sa_init(ipsp, ipsp->ips_authalg, 0))
			break;
#endif

#ifdef CONFIG_KLIPS_ALG
		error = ipsec_alg_auth_key_create(ipsp);
		if ((error < 0) && (error != -EPROTO))
			SENDERR(-error);

		if (error == -EPROTO) {
			/* perform manual key generation,
			   ignore this particular error */
			error = 0;
#endif              /* CONFIG_KLIPS_ALG */

		switch (ipsp->ips_authalg) {
# ifdef CONFIG_KLIPS_AUTH_HMAC_MD5
		case AH_MD5: {
			unsigned char *akp;
			unsigned int aks;
			MD5_CTX *ictx;
			MD5_CTX *octx;

			if (ipsp->ips_key_bits_a != (AHMD596_KLEN * 8)) {
				KLIPS_PRINT(debug_pfkey,
					    "ipsec_sa_init: "
					    "incorrect key size: %d bits -- must be %d bits\n" /*octets (bytes)\n"*/,
					    ipsp->ips_key_bits_a, AHMD596_KLEN *
					    8);
				SENDERR(EINVAL);
			}

#  if KLIPS_DIVULGE_HMAC_KEY
			KLIPS_PRINT(debug_pfkey && sysctl_ipsec_debug_verbose,
				    "ipsec_sa_init: "
				    "hmac md5-96 key is 0x%08x %08x %08x %08x\n",
				    ntohl(*(((__u32 *)ipsp->ips_key_a) + 0)),
				    ntohl(*(((__u32 *)ipsp->ips_key_a) + 1)),
				    ntohl(*(((__u32 *)ipsp->ips_key_a) + 2)),
				    ntohl(*(((__u32 *)ipsp->ips_key_a) + 3)));
#  endif                        /* KLIPS_DIVULGE_HMAC_KEY */

			ipsp->ips_auth_bits = AHMD596_ALEN * 8;

			/* save the pointer to the key material */
			akp = ipsp->ips_key_a;
			aks = ipsp->ips_key_a_size;

			KLIPS_PRINT(debug_pfkey && sysctl_ipsec_debug_verbose,
				    "ipsec_sa_init: "
				    "allocating %lu bytes for md5_ctx.\n",
				    (unsigned long) sizeof(struct md5_ctx));
			if ((ipsp->ips_key_a = (caddr_t)
					       kmalloc(sizeof(struct md5_ctx),
						       GFP_ATOMIC)) == NULL) {
				ipsp->ips_key_a = akp;
				SENDERR(ENOMEM);
			}
			ipsp->ips_key_a_size = sizeof(struct md5_ctx);

			for (i = 0; i < DIVUP(ipsp->ips_key_bits_a, 8); i++)
				kb[i] = akp[i] ^ HMAC_IPAD;
			for (; i < AHMD596_BLKLEN; i++)
				kb[i] = HMAC_IPAD;

			ictx = &(((struct md5_ctx*)(ipsp->ips_key_a))->ictx);
			osMD5Init(ictx);
			osMD5Update(ictx, kb, AHMD596_BLKLEN);

			for (i = 0; i < AHMD596_BLKLEN; i++)
				kb[i] ^= (HMAC_IPAD ^ HMAC_OPAD);

			octx = &(((struct md5_ctx*)(ipsp->ips_key_a))->octx);
			osMD5Init(octx);
			osMD5Update(octx, kb, AHMD596_BLKLEN);

#  if KLIPS_DIVULGE_HMAC_KEY
			KLIPS_PRINT(debug_pfkey && sysctl_ipsec_debug_verbose,
				    "ipsec_sa_init: "
				    "MD5 ictx=0x%08x %08x %08x %08x octx=0x%08x %08x %08x %08x\n",
				    ((__u32*)ictx)[0],
				    ((__u32*)ictx)[1],
				    ((__u32*)ictx)[2],
				    ((__u32*)ictx)[3],
				    ((__u32*)octx)[0],
				    ((__u32*)octx)[1],
				    ((__u32*)octx)[2],
				    ((__u32*)octx)[3] );
#  endif                        /* KLIPS_DIVULGE_HMAC_KEY */

			/* zero key buffer -- paranoid */
			memset(akp, 0, aks);
			kfree(akp);
		}
		break;
# endif                 /* CONFIG_KLIPS_AUTH_HMAC_MD5 */
# ifdef CONFIG_KLIPS_AUTH_HMAC_SHA1
		case AH_SHA: {
			unsigned char *akp;
			unsigned int aks;
			SHA1_CTX *ictx;
			SHA1_CTX *octx;

			if (ipsp->ips_key_bits_a != (AHSHA196_KLEN * 8)) {
				KLIPS_PRINT(debug_pfkey,
					    "ipsec_sa_init: "
					    "incorrect key size: %d bits -- must be %d bits\n" /*octets (bytes)\n"*/,
					    ipsp->ips_key_bits_a, AHSHA196_KLEN *
					    8);
				SENDERR(EINVAL);
			}

#  if KLIPS_DIVULGE_HMAC_KEY
			KLIPS_PRINT(debug_pfkey && sysctl_ipsec_debug_verbose,
				    "ipsec_sa_init: "
				    "hmac sha1-96 key is 0x%08x %08x %08x %08x\n",
				    ntohl(*(((__u32 *)ipsp->ips_key_a) + 0)),
				    ntohl(*(((__u32 *)ipsp->ips_key_a) + 1)),
				    ntohl(*(((__u32 *)ipsp->ips_key_a) + 2)),
				    ntohl(*(((__u32 *)ipsp->ips_key_a) + 3)));
#  endif                        /* KLIPS_DIVULGE_HMAC_KEY */

			ipsp->ips_auth_bits = AHSHA196_ALEN * 8;

			/* save the pointer to the key material */
			akp = ipsp->ips_key_a;
			aks = ipsp->ips_key_a_size;

			KLIPS_PRINT(debug_pfkey && sysctl_ipsec_debug_verbose,
				    "ipsec_sa_init: "
				    "allocating %lu bytes for sha1_ctx.\n",
				    (unsigned long) sizeof(struct sha1_ctx));
			if ((ipsp->ips_key_a = (caddr_t)
					       kmalloc(sizeof(struct sha1_ctx),
						       GFP_ATOMIC)) == NULL) {
				ipsp->ips_key_a = akp;
				SENDERR(ENOMEM);
			}
			ipsp->ips_key_a_size = sizeof(struct sha1_ctx);

			for (i = 0; i < DIVUP(ipsp->ips_key_bits_a, 8); i++)
				kb[i] = akp[i] ^ HMAC_IPAD;
			for (; i < AHMD596_BLKLEN; i++)
				kb[i] = HMAC_IPAD;

			ictx = &(((struct sha1_ctx*)(ipsp->ips_key_a))->ictx);
			SHA1Init(ictx);
			SHA1Update(ictx, kb, AHSHA196_BLKLEN);

			for (i = 0; i < AHSHA196_BLKLEN; i++)
				kb[i] ^= (HMAC_IPAD ^ HMAC_OPAD);

			octx = &(((struct sha1_ctx*)(ipsp->ips_key_a))->octx);
			SHA1Init(octx);
			SHA1Update(octx, kb, AHSHA196_BLKLEN);

#  if KLIPS_DIVULGE_HMAC_KEY
			KLIPS_PRINT(debug_pfkey && sysctl_ipsec_debug_verbose,
				    "ipsec_sa_init: "
				    "SHA1 ictx=0x%08x %08x %08x %08x octx=0x%08x %08x %08x %08x\n",
				    ((__u32*)ictx)[0],
				    ((__u32*)ictx)[1],
				    ((__u32*)ictx)[2],
				    ((__u32*)ictx)[3],
				    ((__u32*)octx)[0],
				    ((__u32*)octx)[1],
				    ((__u32*)octx)[2],
				    ((__u32*)octx)[3] );
#  endif                        /* KLIPS_DIVULGE_HMAC_KEY */
			/* zero key buffer -- paranoid */
			memset(akp, 0, aks);
			kfree(akp);
		}
		break;
# endif                 /* CONFIG_KLIPS_AUTH_HMAC_SHA1 */
		default:
			KLIPS_PRINT(debug_pfkey,
				    "ipsec_sa_init: "
				    "authalg=%d support not available in the kernel",
				    ipsp->ips_authalg);
			SENDERR(EINVAL);
		}
#ifdef CONFIG_KLIPS_ALG
			/* closure of the -EPROTO condition above */
		}
#endif
		break;
#endif          /* CONFIG_KLIPS_AH */

#ifdef CONFIG_KLIPS_ESP
	case IPPROTO_ESP:
		ipsp->ips_xformfuncs = esp_xform_funcs;
		{
#ifdef CONFIG_KLIPS_OCF
			if (ipsec_ocf_sa_init(ipsp, ipsp->ips_authalg,
					      ipsp->ips_encalg))
				break;
#endif

#ifdef CONFIG_KLIPS_ALG
			error = ipsec_alg_enc_key_create(ipsp);
			if (error < 0)
				SENDERR(-error);

			error = ipsec_alg_auth_key_create(ipsp);
			if ((error < 0) && (error != -EPROTO))
				SENDERR(-error);

			if (error == -EPROTO) {
				/* perform manual key generation,
				   ignore this particular error */
				error = 0;
#endif                  /* CONFIG_KLIPS_ALG */

			switch (ipsp->ips_authalg) {
#if defined (CONFIG_KLIPS_AUTH_HMAC_MD5) || \
				defined (CONFIG_KLIPS_AUTH_HMAC_SHA1)
				unsigned char *akp;
				unsigned int aks;
#endif
# ifdef CONFIG_KLIPS_AUTH_HMAC_MD5
			case AH_MD5: {
				MD5_CTX *ictx;
				MD5_CTX *octx;

				if (ipsp->ips_key_bits_a !=
				    (AHMD596_KLEN * 8)) {
					KLIPS_PRINT(debug_pfkey,
						    "ipsec_sa_init: "
						    "incorrect authorisation key size: %d bits -- must be %d bits\n" /*octets (bytes)\n"*/,
						    ipsp->ips_key_bits_a,
						    AHMD596_KLEN * 8);
					SENDERR(EINVAL);
				}

#  if KLIPS_DIVULGE_HMAC_KEY
				KLIPS_PRINT(
					debug_pfkey && sysctl_ipsec_debug_verbose,
					"ipsec_sa_init: "
					"hmac md5-96 key is 0x%08x %08x %08x %08x\n",
					ntohl(*(((__u32 *)(ipsp->ips_key_a)) +
						0)),
					ntohl(*(((__u32 *)(ipsp->ips_key_a)) +
						1)),
					ntohl(*(((__u32 *)(ipsp->ips_key_a)) +
						2)),
					ntohl(*(((__u32 *)(ipsp->ips_key_a)) +
						3)));
#  endif                                /* KLIPS_DIVULGE_HMAC_KEY */
				ipsp->ips_auth_bits = AHMD596_ALEN * 8;

				/* save the pointer to the key material */
				akp = ipsp->ips_key_a;
				aks = ipsp->ips_key_a_size;

				KLIPS_PRINT(
					debug_pfkey && sysctl_ipsec_debug_verbose,
					"ipsec_sa_init: "
					"allocating %lu bytes for md5_ctx.\n",
					(unsigned long) sizeof(struct
							       md5_ctx));
				if ((ipsp->ips_key_a = (caddr_t)
					kmalloc(sizeof(struct md5_ctx),
						GFP_ATOMIC)) == NULL) {
					ipsp->ips_key_a = akp;
					SENDERR(ENOMEM);
				}
				ipsp->ips_key_a_size = sizeof(struct md5_ctx);

				for (i = 0; i < DIVUP(ipsp->ips_key_bits_a, 8);
				     i++)
					kb[i] = akp[i] ^ HMAC_IPAD;
				for (; i < AHMD596_BLKLEN; i++)
					kb[i] = HMAC_IPAD;

				ictx = &(((struct md5_ctx*)(ipsp->ips_key_a))
					  ->ictx);
				osMD5Init(ictx);
				osMD5Update(ictx, kb, AHMD596_BLKLEN);

				for (i = 0; i < AHMD596_BLKLEN; i++)
					kb[i] ^= (HMAC_IPAD ^ HMAC_OPAD);

				octx = &(((struct md5_ctx*)(ipsp->ips_key_a))
					  ->octx);
				osMD5Init(octx);
				osMD5Update(octx, kb, AHMD596_BLKLEN);

#  if KLIPS_DIVULGE_HMAC_KEY
				KLIPS_PRINT(
					debug_pfkey && sysctl_ipsec_debug_verbose,
					"ipsec_sa_init: "
					"MD5 ictx=0x%08x %08x %08x %08x octx=0x%08x %08x %08x %08x\n",
					((__u32*)ictx)[0],
					((__u32*)ictx)[1],
					((__u32*)ictx)[2],
					((__u32*)ictx)[3],
					((__u32*)octx)[0],
					((__u32*)octx)[1],
					((__u32*)octx)[2],
					((__u32*)octx)[3] );
#  endif                                /* KLIPS_DIVULGE_HMAC_KEY */
				/* paranoid */
				memset(akp, 0, aks);
				kfree(akp);
				break;
			}
# endif                         /* CONFIG_KLIPS_AUTH_HMAC_MD5 */
# ifdef CONFIG_KLIPS_AUTH_HMAC_SHA1
			case AH_SHA: {
				SHA1_CTX *ictx;
				SHA1_CTX *octx;

				if (ipsp->ips_key_bits_a !=
				    (AHSHA196_KLEN * 8)) {
					KLIPS_PRINT(debug_pfkey,
						    "ipsec_sa_init: "
						    "incorrect authorisation key size: %d bits -- must be %d bits\n" /*octets (bytes)\n"*/,
						    ipsp->ips_key_bits_a,
						    AHSHA196_KLEN * 8);
					SENDERR(EINVAL);
				}

#  if KLIPS_DIVULGE_HMAC_KEY
				KLIPS_PRINT(
					debug_pfkey && sysctl_ipsec_debug_verbose,
					"ipsec_sa_init: "
					"hmac sha1-96 key is 0x%08x %08x %08x %08x\n",
					ntohl(*(((__u32 *)ipsp->ips_key_a) +
						0)),
					ntohl(*(((__u32 *)ipsp->ips_key_a) +
						1)),
					ntohl(*(((__u32 *)ipsp->ips_key_a) +
						2)),
					ntohl(*(((__u32 *)ipsp->ips_key_a) +
						3)));
#  endif                                /* KLIPS_DIVULGE_HMAC_KEY */
				ipsp->ips_auth_bits = AHSHA196_ALEN * 8;

				/* save the pointer to the key material */
				akp = ipsp->ips_key_a;
				aks = ipsp->ips_key_a_size;

				KLIPS_PRINT(
					debug_pfkey && sysctl_ipsec_debug_verbose,
					"ipsec_sa_init: "
					"allocating %lu bytes for sha1_ctx.\n",
					(unsigned long) sizeof(struct
							       sha1_ctx));
				if ((ipsp->ips_key_a = (caddr_t)
					kmalloc(sizeof(struct sha1_ctx),
						GFP_ATOMIC)) == NULL) {
					ipsp->ips_key_a = akp;
					SENDERR(ENOMEM);
				}
				ipsp->ips_key_a_size = sizeof(struct sha1_ctx);

				for (i = 0; i < DIVUP(ipsp->ips_key_bits_a, 8);
				     i++)
					kb[i] = akp[i] ^ HMAC_IPAD;
				for (; i < AHMD596_BLKLEN; i++)
					kb[i] = HMAC_IPAD;

				ictx = &(((struct sha1_ctx*)(ipsp->ips_key_a))
					  ->ictx);
				SHA1Init(ictx);
				SHA1Update(ictx, kb, AHSHA196_BLKLEN);

				for (i = 0; i < AHSHA196_BLKLEN; i++)
					kb[i] ^= (HMAC_IPAD ^ HMAC_OPAD);

				octx = &((struct sha1_ctx*)(ipsp->ips_key_a))
					->octx;
				SHA1Init(octx);
				SHA1Update(octx, kb, AHSHA196_BLKLEN);

#  if KLIPS_DIVULGE_HMAC_KEY
				KLIPS_PRINT(
					debug_pfkey && sysctl_ipsec_debug_verbose,
					"ipsec_sa_init: "
					"SHA1 ictx=0x%08x %08x %08x %08x octx=0x%08x %08x %08x %08x\n",
					((__u32*)ictx)[0],
					((__u32*)ictx)[1],
					((__u32*)ictx)[2],
					((__u32*)ictx)[3],
					((__u32*)octx)[0],
					((__u32*)octx)[1],
					((__u32*)octx)[2],
					((__u32*)octx)[3] );
#  endif                                /* KLIPS_DIVULGE_HMAC_KEY */
				memset(akp, 0, aks);
				kfree(akp);
				break;
			}
# endif                         /* CONFIG_KLIPS_AUTH_HMAC_SHA1 */
			case AH_NONE:
				break;
			default:
				KLIPS_PRINT(debug_pfkey,
					    "ipsec_sa_init: "
					    "authalg=%d support not available in the kernel.\n",
					    ipsp->ips_authalg);
				SENDERR(EINVAL);
			}
#ifdef CONFIG_KLIPS_ALG
			/* closure of the -EPROTO condition above */
		}
#endif

			ipsp->ips_iv_size =
				ipsp->ips_alg_enc->ixt_common.ixt_support.
				ias_ivlen / 8;

			/* Create IV */
			if (ipsp->ips_iv_size) {
				if ((ipsp->ips_iv = (caddr_t)
					kmalloc(ipsp->ips_iv_size,
						GFP_ATOMIC)) == NULL)
					SENDERR(ENOMEM);
				prng_bytes(&ipsec_prng, (char *)ipsp->ips_iv,
					   ipsp->ips_iv_size);
				ipsp->ips_iv_bits = ipsp->ips_iv_size * 8;
			}
		}
		break;
#endif          /* !CONFIG_KLIPS_ESP */
#ifdef CONFIG_KLIPS_IPCOMP
	case IPPROTO_COMP:

		ipsp->ips_xformfuncs = ipcomp_xform_funcs;

		ipsp->ips_comp_adapt_tries = 0;
		ipsp->ips_comp_adapt_skip = 0;
		ipsp->ips_comp_ratio_cbytes = 0;
		ipsp->ips_comp_ratio_dbytes = 0;

#ifdef CONFIG_KLIPS_OCF
		if (ipsec_ocf_comp_sa_init(ipsp, ipsp->ips_encalg))
			break;
#endif

		ipsp->ips_comp_adapt_tries = 0;
		ipsp->ips_comp_adapt_skip = 0;
		ipsp->ips_comp_ratio_cbytes = 0;
		ipsp->ips_comp_ratio_dbytes = 0;
		break;
#endif          /* CONFIG_KLIPS_IPCOMP */
	default:
		printk(KERN_ERR "KLIPS sa initialization: "
		       "proto=%d unknown.\n",
		       ipsp->ips_said.proto);
		SENDERR(EINVAL);
	}

errlab:
	return error;
}
Example #12
0
DEBUG_NO_STATIC int
pfkey_key_parse(struct sadb_ext *pfkey_ext)
{
	int error = 0;
	struct sadb_key *pfkey_key = (struct sadb_key *)pfkey_ext;

	DEBUGGING(PF_KEY_DEBUG_PARSE_FLOW,
		"pfkey_key_parse:enter\n");
	/* sanity checks... */

	if(!pfkey_key) {
		DEBUGGING(PF_KEY_DEBUG_PARSE_PROBLEM,
			"pfkey_key_parse: "
			"NULL pointer passed in.\n");
		SENDERR(EINVAL);
	}

	if(pfkey_key->sadb_key_len < sizeof(struct sadb_key) / IPSEC_PFKEYv2_ALIGN) {
		DEBUGGING(PF_KEY_DEBUG_PARSE_PROBLEM,
			"pfkey_key_parse: "
			"size wrong ext_len=%d, key_ext_len=%ld.\n",
			pfkey_key->sadb_key_len,
			sizeof(struct sadb_key));
		SENDERR(EINVAL);
	}

	if(!pfkey_key->sadb_key_bits) {
		DEBUGGING(PF_KEY_DEBUG_PARSE_PROBLEM,
			"pfkey_key_parse: "
			"key length set to zero, must be non-zero.\n");
		SENDERR(EINVAL);
	}

	if(pfkey_key->sadb_key_len !=
	   DIVUP(sizeof(struct sadb_key) * OCTETBITS + pfkey_key->sadb_key_bits,
		 PFKEYBITS)) {
		DEBUGGING(PF_KEY_DEBUG_PARSE_PROBLEM,
			"pfkey_key_parse: "
			"key length=%d does not agree with extension length=%d.\n",
			pfkey_key->sadb_key_bits,
			pfkey_key->sadb_key_len);
		SENDERR(EINVAL);
	}
	
	if(pfkey_key->sadb_key_reserved) {
		DEBUGGING(PF_KEY_DEBUG_PARSE_PROBLEM,
			"pfkey_key_parse: "
			"res=%d, must be zero.\n",
			pfkey_key->sadb_key_reserved);
		SENDERR(EINVAL);
	}

	if(! ( (pfkey_key->sadb_key_exttype == SADB_EXT_KEY_AUTH) ||
	       (pfkey_key->sadb_key_exttype == SADB_EXT_KEY_ENCRYPT))) {
		DEBUGGING(PF_KEY_DEBUG_PARSE_PROBLEM,
			"pfkey_key_parse: "
			"expecting extension type AUTH or ENCRYPT, got %d.\n",
			pfkey_key->sadb_key_exttype);
		SENDERR(EINVAL);
	}

	DEBUGGING(PF_KEY_DEBUG_PARSE_STRUCT,
		"pfkey_key_parse: "
		"success, found len=%d exttype=%d bits=%d reserved=%d.\n",
		pfkey_key->sadb_key_len,
		pfkey_key->sadb_key_exttype,
		pfkey_key->sadb_key_bits,
		pfkey_key->sadb_key_reserved);

errlab:
	return error;
}
Example #13
0
DEBUG_NO_STATIC int
pfkey_address_parse(struct sadb_ext *pfkey_ext)
{
	int error = 0;
	int saddr_len = 0;
	struct sadb_address *pfkey_address = (struct sadb_address *)pfkey_ext;
	struct sockaddr* s = (struct sockaddr*)((char*)pfkey_address + sizeof(*pfkey_address));
	char ipaddr_txt[ADDRTOT_BUF];
	
	DEBUGGING(PF_KEY_DEBUG_PARSE_FLOW,
		"pfkey_address_parse:enter\n");
	/* sanity checks... */
	if(!pfkey_address) {
		DEBUGGING(PF_KEY_DEBUG_PARSE_PROBLEM,
			"pfkey_address_parse: "
			"NULL pointer passed in.\n");
		SENDERR(EINVAL);
	}
	
	if(pfkey_address->sadb_address_len <
	   (sizeof(struct sadb_address) + sizeof(struct sockaddr))/
	   IPSEC_PFKEYv2_ALIGN) {
		DEBUGGING(PF_KEY_DEBUG_PARSE_PROBLEM,
			"pfkey_address_parse: "
			"size wrong 1 ext_len=%d, adr_ext_len=%ld, saddr_len=%ld.\n",
			pfkey_address->sadb_address_len,
			sizeof(struct sadb_address),
			sizeof(struct sockaddr));
		SENDERR(EINVAL);
	}
	
	if(pfkey_address->sadb_address_reserved) {
		DEBUGGING(PF_KEY_DEBUG_PARSE_PROBLEM,
			"pfkey_address_parse: "
			"res=%d, must be zero.\n",
			pfkey_address->sadb_address_reserved);
		SENDERR(EINVAL);
	}
	
	switch(pfkey_address->sadb_address_exttype) {	
	case SADB_EXT_ADDRESS_SRC:
	case SADB_EXT_ADDRESS_DST:
	case SADB_EXT_ADDRESS_PROXY:
	case SADB_X_EXT_ADDRESS_DST2:
	case SADB_X_EXT_ADDRESS_SRC_FLOW:
	case SADB_X_EXT_ADDRESS_DST_FLOW:
	case SADB_X_EXT_ADDRESS_SRC_MASK:
	case SADB_X_EXT_ADDRESS_DST_MASK:
#ifdef NAT_TRAVERSAL
	case SADB_X_EXT_NAT_T_OA:
#endif
		break;
	default:
		DEBUGGING(PF_KEY_DEBUG_PARSE_PROBLEM, 
			"pfkey_address_parse: "
			"unexpected ext_type=%d.\n", 
			pfkey_address->sadb_address_exttype); 
		SENDERR(EINVAL); 
	}
	
	switch(s->sa_family) {
	case AF_INET:
		DEBUGGING(PF_KEY_DEBUG_PARSE_STRUCT,
			"pfkey_address_parse: "
			"found address family=%d, AF_INET.\n",
			s->sa_family);
		saddr_len = sizeof(struct sockaddr_in);
		sprintf(ipaddr_txt, "%d.%d.%d.%d"
			, (((struct sockaddr_in*)s)->sin_addr.s_addr >>  0) & 0xFF
			, (((struct sockaddr_in*)s)->sin_addr.s_addr >>  8) & 0xFF
			, (((struct sockaddr_in*)s)->sin_addr.s_addr >> 16) & 0xFF
			, (((struct sockaddr_in*)s)->sin_addr.s_addr >> 24) & 0xFF);
		DEBUGGING(PF_KEY_DEBUG_PARSE_STRUCT,
			"pfkey_address_parse: "
			"found address=%s.\n",
			ipaddr_txt);
		break;
	case AF_INET6:
		DEBUGGING(PF_KEY_DEBUG_PARSE_STRUCT,
			"pfkey_address_parse: "
			"found address family=%d, AF_INET6.\n",
			s->sa_family);
		saddr_len = sizeof(struct sockaddr_in6);
		sprintf(ipaddr_txt, "%x:%x:%x:%x:%x:%x:%x:%x"
			, ntohs(((struct sockaddr_in6*)s)->sin6_addr.s6_addr16[0])
			, ntohs(((struct sockaddr_in6*)s)->sin6_addr.s6_addr16[1])
			, ntohs(((struct sockaddr_in6*)s)->sin6_addr.s6_addr16[2])
			, ntohs(((struct sockaddr_in6*)s)->sin6_addr.s6_addr16[3])
			, ntohs(((struct sockaddr_in6*)s)->sin6_addr.s6_addr16[4])
			, ntohs(((struct sockaddr_in6*)s)->sin6_addr.s6_addr16[5])
			, ntohs(((struct sockaddr_in6*)s)->sin6_addr.s6_addr16[6])
			, ntohs(((struct sockaddr_in6*)s)->sin6_addr.s6_addr16[7]));
		DEBUGGING(PF_KEY_DEBUG_PARSE_STRUCT,
			"pfkey_address_parse: "
			"found address=%s.\n",
			ipaddr_txt);
		break;
	default:
		DEBUGGING(PF_KEY_DEBUG_PARSE_PROBLEM,
			"pfkey_address_parse: "
			"s->sa_family=%d not supported.\n",
			s->sa_family);
		SENDERR(EPFNOSUPPORT);
	}
	
	if(pfkey_address->sadb_address_len !=
	   DIVUP(sizeof(struct sadb_address) + saddr_len, IPSEC_PFKEYv2_ALIGN)) {
		DEBUGGING(PF_KEY_DEBUG_PARSE_PROBLEM,
			"pfkey_address_parse: "
			"size wrong 2 ext_len=%d, adr_ext_len=%ld, saddr_len=%d.\n",
			pfkey_address->sadb_address_len,
			sizeof(struct sadb_address),
			saddr_len);
		SENDERR(EINVAL);
	}
	
	if(pfkey_address->sadb_address_prefixlen != 0) {
		DEBUGGING(PF_KEY_DEBUG_PARSE_PROBLEM,
			"pfkey_address_parse: "
			"address prefixes not supported yet.\n");
		SENDERR(EAFNOSUPPORT); /* not supported yet */
	}
	
	/* XXX check if port!=0 */
	
	DEBUGGING(PF_KEY_DEBUG_PARSE_FLOW,
		"pfkey_address_parse: successful.\n");
 errlab:
	return error;
}
Example #14
0
void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[]) {

  CUresult cudastatus = CUDA_SUCCESS;

  if (nrhs != 3)
    mexErrMsgTxt("Wrong number of arguments");

  if (init == 0) {
    // Initialize function
    //mexLock();

    // load GPUmat
    gm = gmGetGPUmat();

    // load module
    CUmodule *drvmod = gmGetModule("subsample");

    //load appropriate GPU kernel (mangled name)
    CUresult status;

    status = cuModuleGetFunction(&_supersampleMedium_2, *drvmod, "_Z18kSupersampleMediumILi2EEvPfS0_ii");
    if (CUDA_SUCCESS != status) {
      mexErrMsgTxt("Unable to load user function.");
    }
    status = cuModuleGetFunction(&_supersampleMedium_3, *drvmod, "_Z18kSupersampleMediumILi3EEvPfS0_ii");
    if (CUDA_SUCCESS != status) {
      mexErrMsgTxt("Unable to load user function.");
    }
    status = cuModuleGetFunction(&_supersampleMedium_4, *drvmod, "_Z18kSupersampleMediumILi4EEvPfS0_ii");
    if (CUDA_SUCCESS != status) {
      mexErrMsgTxt("Unable to load user function.");
    }
    status = cuModuleGetFunction(&_supersampleMedium_5, *drvmod, "_Z18kSupersampleMediumILi5EEvPfS0_ii");
    if (CUDA_SUCCESS != status) {
      mexErrMsgTxt("Unable to load user function.");
    }
    status = cuModuleGetFunction(&_supersampleMedium_6, *drvmod, "_Z18kSupersampleMediumILi6EEvPfS0_ii");
    if (CUDA_SUCCESS != status) {
      mexErrMsgTxt("Unable to load user function.");
    }
    status = cuModuleGetFunction(&_supersampleMedium_7, *drvmod, "_Z18kSupersampleMediumILi7EEvPfS0_ii");
    if (CUDA_SUCCESS != status) {
      mexErrMsgTxt("Unable to load user function.");
    }
    status = cuModuleGetFunction(&_supersampleMedium_8, *drvmod, "_Z18kSupersampleMediumILi8EEvPfS0_ii");
    if (CUDA_SUCCESS != status) {
      mexErrMsgTxt("Unable to load user function.");
    }
    status = cuModuleGetFunction(&_supersampleMedium_9, *drvmod, "_Z18kSupersampleMediumILi9EEvPfS0_ii");
    if (CUDA_SUCCESS != status) {
      mexErrMsgTxt("Unable to load user function.");
    }
    status = cuModuleGetFunction(&_supersampleMedium_10, *drvmod, "_Z18kSupersampleMediumILi10EEvPfS0_ii");
    if (CUDA_SUCCESS != status) {
      mexErrMsgTxt("Unable to load user function.");
    }
    status = cuModuleGetFunction(&_supersampleMedium_11, *drvmod, "_Z18kSupersampleMediumILi11EEvPfS0_ii");
    if (CUDA_SUCCESS != status) {
      mexErrMsgTxt("Unable to load user function.");
    }
    status = cuModuleGetFunction(&_supersampleMedium_12, *drvmod, "_Z18kSupersampleMediumILi12EEvPfS0_ii");
    if (CUDA_SUCCESS != status) {
      mexErrMsgTxt("Unable to load user function.");
    }
    status = cuModuleGetFunction(&_supersampleMedium_13, *drvmod, "_Z18kSupersampleMediumILi13EEvPfS0_ii");
    if (CUDA_SUCCESS != status) {
      mexErrMsgTxt("Unable to load user function.");
    }
    status = cuModuleGetFunction(&_supersampleMedium_14, *drvmod, "_Z18kSupersampleMediumILi14EEvPfS0_ii");
    if (CUDA_SUCCESS != status) {
      mexErrMsgTxt("Unable to load user function.");
    }
    status = cuModuleGetFunction(&_supersampleMedium_15, *drvmod, "_Z18kSupersampleMediumILi15EEvPfS0_ii");
    if (CUDA_SUCCESS != status) {
      mexErrMsgTxt("Unable to load user function.");
    }
    status = cuModuleGetFunction(&_supersampleMedium_16, *drvmod, "_Z18kSupersampleMediumILi16EEvPfS0_ii");
    if (CUDA_SUCCESS != status) {
      mexErrMsgTxt("Unable to load user function.");
    }
    status = cuModuleGetFunction(&_supersampleMediumLoopy_2, *drvmod, "_Z23kSupersampleMediumLoopyILi2EEvPfS0_ii");
    if (CUDA_SUCCESS != status) {
      mexErrMsgTxt("Unable to load user function.");
    }
    status = cuModuleGetFunction(&_supersampleMediumLoopy_3, *drvmod, "_Z23kSupersampleMediumLoopyILi3EEvPfS0_ii");
    if (CUDA_SUCCESS != status) {
      mexErrMsgTxt("Unable to load user function.");
    }
    status = cuModuleGetFunction(&_supersampleMediumLoopy_4, *drvmod, "_Z23kSupersampleMediumLoopyILi4EEvPfS0_ii");
    if (CUDA_SUCCESS != status) {
      mexErrMsgTxt("Unable to load user function.");
    }
    status = cuModuleGetFunction(&_supersampleMediumLoopy_5, *drvmod, "_Z23kSupersampleMediumLoopyILi5EEvPfS0_ii");
    if (CUDA_SUCCESS != status) {
      mexErrMsgTxt("Unable to load user function.");
    }
    status = cuModuleGetFunction(&_supersampleMediumLoopy_6, *drvmod, "_Z23kSupersampleMediumLoopyILi6EEvPfS0_ii");
    if (CUDA_SUCCESS != status) {
      mexErrMsgTxt("Unable to load user function.");
    }
    status = cuModuleGetFunction(&_supersampleMediumLoopy_7, *drvmod, "_Z23kSupersampleMediumLoopyILi7EEvPfS0_ii");
    if (CUDA_SUCCESS != status) {
      mexErrMsgTxt("Unable to load user function.");
    }
    status = cuModuleGetFunction(&_supersampleMediumLoopy_8, *drvmod, "_Z23kSupersampleMediumLoopyILi8EEvPfS0_ii");
    if (CUDA_SUCCESS != status) {
      mexErrMsgTxt("Unable to load user function.");
    }
    status = cuModuleGetFunction(&_supersampleMediumLoopy_9, *drvmod, "_Z23kSupersampleMediumLoopyILi9EEvPfS0_ii");
    if (CUDA_SUCCESS != status) {
      mexErrMsgTxt("Unable to load user function.");
    }
    status = cuModuleGetFunction(&_supersampleMediumLoopy_10, *drvmod, "_Z23kSupersampleMediumLoopyILi10EEvPfS0_ii");
    if (CUDA_SUCCESS != status) {
      mexErrMsgTxt("Unable to load user function.");
    }
    status = cuModuleGetFunction(&_supersampleMediumLoopy_11, *drvmod, "_Z23kSupersampleMediumLoopyILi11EEvPfS0_ii");
    if (CUDA_SUCCESS != status) {
      mexErrMsgTxt("Unable to load user function.");
    }
    status = cuModuleGetFunction(&_supersampleMediumLoopy_12, *drvmod, "_Z23kSupersampleMediumLoopyILi12EEvPfS0_ii");
    if (CUDA_SUCCESS != status) {
      mexErrMsgTxt("Unable to load user function.");
    }
    status = cuModuleGetFunction(&_supersampleMediumLoopy_13, *drvmod, "_Z23kSupersampleMediumLoopyILi13EEvPfS0_ii");
    if (CUDA_SUCCESS != status) {
      mexErrMsgTxt("Unable to load user function.");
    }
    status = cuModuleGetFunction(&_supersampleMediumLoopy_14, *drvmod, "_Z23kSupersampleMediumLoopyILi14EEvPfS0_ii");
    if (CUDA_SUCCESS != status) {
      mexErrMsgTxt("Unable to load user function.");
    }
    status = cuModuleGetFunction(&_supersampleMediumLoopy_15, *drvmod, "_Z23kSupersampleMediumLoopyILi15EEvPfS0_ii");
    if (CUDA_SUCCESS != status) {
      mexErrMsgTxt("Unable to load user function.");
    }
    status = cuModuleGetFunction(&_supersampleMediumLoopy_16, *drvmod, "_Z23kSupersampleMediumLoopyILi16EEvPfS0_ii");
    if (CUDA_SUCCESS != status) {
      mexErrMsgTxt("Unable to load user function.");
    }

    init = 1;
  }

  // mex parameters are:

  // 1. IN1
  // 2. OUT
  // 3. supersampling factor

  bool avoidBankConflicts = true; //hard-coded
  bool trans = false; //hard-coded

  //IN1 is the input GPU array
  GPUtype IN1 = gm->gputype.getGPUtype(prhs[0]);

  //OUT is the output GPU array (result)
  GPUtype OUT = gm->gputype.getGPUtype(prhs[1]);

  //last parameter is the filterSize (int)
  int factor = (int) mxGetScalar(prhs[2]);

  // number of elements
  int nin1 = gm->gputype.getNumel(IN1);
  int nout = gm->gputype.getNumel(OUT);

  //dimensions 
  const int * sin1 = gm->gputype.getSize(IN1);
  const int * sout = gm->gputype.getSize(OUT);

  int imgPixels = sin1[0];
  if ( floor(sqrt(float(imgPixels))) != sqrt(float(imgPixels)) )
    mexErrMsgTxt("Images not square");
  int imgSize = int(sqrt(imgPixels));
  int numImages = sin1[1];
  

  /* if (imgSize <= factor)
     mexErrMsgTxt("imgSize must be > factor"); */
  if (factor > 16)
    mexErrMsgTxt("factor > 16");
  if (factor < 2)
    mexErrMsgTxt("factor < 2");
  if (imgSize > 512)
    mexErrMsgTxt("max imgSize: 512");
  if (imgSize < 1)
    mexErrMsgTxt("min imgSize: 1");

  int targetPixels = sout[0];
  if ( floor(sqrt(float(targetPixels))) != sqrt(float(targetPixels)) )
    mexErrMsgTxt("Targets not square");
  int targetSize = int(sqrt(targetPixels));
  if (targetSize % factor !=0)
    mexErrMsgTxt("imgSize must be evenly divisible by factor");
  if (targetSize / factor != imgSize)
    mexErrMsgTxt("targetSize/ factor must = imgSize");
  if (nout != nin1 * factor*factor)
    mexErrMsgTxt("Target dimensions not consistent");

  int threadsX, threadsY;
  int SHMEM_MAX = 8192; // don't use more than this much shmem
  int shmemX, shmemY, blocksX, blocksY;
  bool useLoopy = false;
  int THREADS_MAX_LOOPY = 512, THREADS_MAX = trans ? 256 : 512;
  if (!trans) {
    threadsX = imgSize;
    threadsY = factor * MIN(THREADS_MAX / (factor*threadsX), SHMEM_MAX / (4*threadsX*factor)); // to avoid running out of shmem

    if(threadsY == 0) {
      if (factor > 32)
	mexErrMsgTxt("factor can't be > 32");
      //assert(factor <= 32); // yes this is covered by assert above but in case i ever remove that
      THREADS_MAX = 512;
      useLoopy = true;
      threadsX = MIN(16, imgSize); // not that imgsize can be < 16 here under current conditions
      threadsY = factor * MIN(THREADS_MAX_LOOPY / (factor*threadsX), SHMEM_MAX / (4*threadsX*factor)); // to avoid running out of shmem
    }

    shmemY = threadsY;
    shmemX = threadsX;
    blocksX = imgSize;
    blocksY = DIVUP(numImages, threadsY);
    //        printf("boundary problems: %u\n", numImages % threadsY != 0);
  } else {

    threadsY = imgSize;
    threadsX = factor * MIN(THREADS_MAX / (factor*threadsY), SHMEM_MAX / (4*threadsY*factor)); // to avoid running out of shmem

    if(threadsX < 8) {
      useLoopy = true;
      int xFactorMult = DIVUP(16, factor);
      threadsX = xFactorMult * factor;
      threadsY = THREADS_MAX / threadsX;
      int newThreadsX = threadsX, newThreadsY = threadsY;
      while (newThreadsY > 0 && imgSize % newThreadsY != 0) { // let's see if we can make threadsY divide imgSize
	newThreadsX += factor;
	newThreadsY = THREADS_MAX / newThreadsX;
      }
      if (newThreadsY > 0) {
	threadsY = newThreadsY;
	threadsX = newThreadsX;
      }

      if (threadsY <=0)
	mexErrMsgTxt("threadsY <=0; not expected");
      //assert(threadsY > 0);
    }

    shmemY = threadsX;
    shmemX = threadsY + (1 - (threadsY % 2));
    blocksX = DIVUP(numImages, threadsX);
    blocksY = imgSize;
    //        printf("boundary problems: %u\n", numImages % threadsX != 0);
  }
  int shmem = 4 * shmemX * shmemY;
  if (shmem == 0 || shmem > 16300) {
    // this really shouldn't happen and i've only put this here as a precautionary measure
    // to avoid getting mysteriously wrong results.
    mexErrMsgTxt("supersample: not enough shared memory!");
    //exit(EXIT_FAILURE);
  }

  dim3 grid(blocksX, blocksY);
  dim3 threads(threadsX, threadsY);
  //mexPrintf("blocks: %dx%d, threads: %dx%d\n", blocksY, blocksX, threadsY, threadsX);
  //mexPrintf("using %dx%d = %d bytes of shmem\n", shmemY, shmemX, shmem);


  gpuTYPE_t tin1 = gm->gputype.getType(IN1);
  gpuTYPE_t tout = gm->gputype.getType(OUT);

  // check input/out size and type
  if (tin1!=tout)
    mexErrMsgTxt("Input and output arguments must be of the same type.");

  // I need the pointers to GPU memory
  CUdeviceptr d_IN1  = (CUdeviceptr) (UINTPTR gm->gputype.getGPUptr(IN1));
  CUdeviceptr d_OUT = (CUdeviceptr) (UINTPTR gm->gputype.getGPUptr(OUT));

  // // The GPU kernel depends on the type of input/output
  // CUfunction drvfun;
  // if (tin1 == gpuFLOAT) {
  //   drvfun = drvfunf;
  // } else 
  //   mexErrMsgTxt("Currently only single types supported.");

  hostdrv_pars_t gpuprhs[2];
  int gpunrhs = 2;
  gpuprhs[0] = hostdrv_pars(&d_IN1,sizeof(d_IN1));
  gpuprhs[1] = hostdrv_pars(&d_OUT,sizeof(d_OUT));


  /* trans not implemented; so always !trans    if(!trans) { */
        if(!useLoopy) {
            if(factor == 2) {
	      hostDriver(_supersampleMedium_2, grid, threads, shmem, imgSize, numImages*imgSize, gpunrhs, gpuprhs);
	      //kSupersampleMedium<2><<<grid, threads, shmem>>>(images->getDevData(), targets->getDevData(), imgSize, numImages*imgSize);
            } else if(factor == 3) {
                hostDriver(_supersampleMedium_3, grid, threads, shmem, imgSize, numImages*imgSize, gpunrhs, gpuprhs);
            } else if(factor == 4) {
                hostDriver(_supersampleMedium_4, grid, threads, shmem, imgSize, numImages*imgSize, gpunrhs, gpuprhs);
            } else if(factor == 5) {
                hostDriver(_supersampleMedium_5, grid, threads, shmem, imgSize, numImages*imgSize, gpunrhs, gpuprhs);
            } else if(factor == 6) {
                hostDriver(_supersampleMedium_6, grid, threads, shmem, imgSize, numImages*imgSize, gpunrhs, gpuprhs);
            } else if(factor == 7) {
                hostDriver(_supersampleMedium_7, grid, threads, shmem, imgSize, numImages*imgSize, gpunrhs, gpuprhs);
            } else if(factor == 8) {
                hostDriver(_supersampleMedium_8, grid, threads, shmem, imgSize, numImages*imgSize, gpunrhs, gpuprhs);
            } else if(factor == 9) {
                hostDriver(_supersampleMedium_9, grid, threads, shmem, imgSize, numImages*imgSize, gpunrhs, gpuprhs);
            } else if(factor == 10) {
                hostDriver(_supersampleMedium_10, grid, threads, shmem, imgSize, numImages*imgSize, gpunrhs, gpuprhs);
            } else if(factor == 11) {
                hostDriver(_supersampleMedium_11, grid, threads, shmem, imgSize, numImages*imgSize, gpunrhs, gpuprhs);
            } else if(factor == 12) {
                hostDriver(_supersampleMedium_12, grid, threads, shmem, imgSize, numImages*imgSize, gpunrhs, gpuprhs);
            } else if(factor == 13) {
                hostDriver(_supersampleMedium_13, grid, threads, shmem, imgSize, numImages*imgSize, gpunrhs, gpuprhs);
            } else if(factor == 14) {
                hostDriver(_supersampleMedium_14, grid, threads, shmem, imgSize, numImages*imgSize, gpunrhs, gpuprhs);
            } else if(factor == 15) {
                hostDriver(_supersampleMedium_15, grid, threads, shmem, imgSize, numImages*imgSize, gpunrhs, gpuprhs);
            } else if(factor == 16) {
                hostDriver(_supersampleMedium_16, grid, threads, shmem, imgSize, numImages*imgSize, gpunrhs, gpuprhs);
            }
        } else {
            if(factor == 2) {
                hostDriver(_supersampleMediumLoopy_2, grid, threads, shmem, imgSize, numImages*imgSize, gpunrhs, gpuprhs);
            } else if(factor == 3) {
                hostDriver(_supersampleMediumLoopy_3, grid, threads, shmem, imgSize, numImages*imgSize, gpunrhs, gpuprhs);
            } else if(factor == 4) {
                hostDriver(_supersampleMediumLoopy_4, grid, threads, shmem, imgSize, numImages*imgSize, gpunrhs, gpuprhs);
            } else if(factor == 5) {
                hostDriver(_supersampleMediumLoopy_5, grid, threads, shmem, imgSize, numImages*imgSize, gpunrhs, gpuprhs);
            } else if(factor == 6) {
                hostDriver(_supersampleMediumLoopy_6, grid, threads, shmem, imgSize, numImages*imgSize, gpunrhs, gpuprhs);
            } else if(factor == 7) {
                hostDriver(_supersampleMediumLoopy_7, grid, threads, shmem, imgSize, numImages*imgSize, gpunrhs, gpuprhs);
            } else if(factor == 8) {
                hostDriver(_supersampleMediumLoopy_8, grid, threads, shmem, imgSize, numImages*imgSize, gpunrhs, gpuprhs);
            } else if(factor == 9) {
                hostDriver(_supersampleMediumLoopy_9, grid, threads, shmem, imgSize, numImages*imgSize, gpunrhs, gpuprhs);
            } else if(factor == 10) {
                hostDriver(_supersampleMediumLoopy_10, grid, threads, shmem, imgSize, numImages*imgSize, gpunrhs, gpuprhs);
            } else if(factor == 11) {
                hostDriver(_supersampleMediumLoopy_11, grid, threads, shmem, imgSize, numImages*imgSize, gpunrhs, gpuprhs);
            } else if(factor == 12) {
                hostDriver(_supersampleMediumLoopy_12, grid, threads, shmem, imgSize, numImages*imgSize, gpunrhs, gpuprhs);
            } else if(factor == 13) {
                hostDriver(_supersampleMediumLoopy_13, grid, threads, shmem, imgSize, numImages*imgSize, gpunrhs, gpuprhs);
            } else if(factor == 14) {
                hostDriver(_supersampleMediumLoopy_14, grid, threads, shmem, imgSize, numImages*imgSize, gpunrhs, gpuprhs);
            } else if(factor == 15) {
                hostDriver(_supersampleMediumLoopy_15, grid, threads, shmem, imgSize, numImages*imgSize, gpunrhs, gpuprhs);
            } else if(factor == 16) {
                hostDriver(_supersampleMediumLoopy_16, grid, threads, shmem, imgSize, numImages*imgSize, gpunrhs, gpuprhs);
            }
        }
	/* } else {
        if(!useLoopy) {
            if(factor == 2) {
                kSupersampleMediumTrans<2><<<grid, threads, shmem>>>(images->getDevData(), targets->getDevData(), numImages*imgSize, imgSize, shmemX);
            } else if(factor == 3) {
                kSupersampleMediumTrans<3><<<grid, threads, shmem>>>(images->getDevData(), targets->getDevData(), numImages*imgSize, imgSize, shmemX);
            } else if(factor == 4) {
                kSupersampleMediumTrans<4><<<grid, threads, shmem>>>(images->getDevData(), targets->getDevData(), numImages*imgSize, imgSize, shmemX);
            } else if(factor == 5) {
                kSupersampleMediumTrans<5><<<grid, threads, shmem>>>(images->getDevData(), targets->getDevData(), numImages*imgSize, imgSize, shmemX);
            } else if(factor == 6) {
                kSupersampleMediumTrans<6><<<grid, threads, shmem>>>(images->getDevData(), targets->getDevData(), numImages*imgSize, imgSize, shmemX);
            } else if(factor == 7) {
                kSupersampleMediumTrans<7><<<grid, threads, shmem>>>(images->getDevData(), targets->getDevData(), numImages*imgSize, imgSize, shmemX);
            } else if(factor == 8) {
                kSupersampleMediumTrans<8><<<grid, threads, shmem>>>(images->getDevData(), targets->getDevData(), numImages*imgSize, imgSize, shmemX);
            } else if(factor == 9) {
                kSupersampleMediumTrans<9><<<grid, threads, shmem>>>(images->getDevData(), targets->getDevData(), numImages*imgSize, imgSize, shmemX);
            } else if(factor == 10) {
                kSupersampleMediumTrans<10><<<grid, threads, shmem>>>(images->getDevData(), targets->getDevData(), numImages*imgSize, imgSize, shmemX);
            } else if(factor == 11) {
                kSupersampleMediumTrans<11><<<grid, threads, shmem>>>(images->getDevData(), targets->getDevData(), numImages*imgSize, imgSize, shmemX);
            } else if(factor == 12) {
                kSupersampleMediumTrans<12><<<grid, threads, shmem>>>(images->getDevData(), targets->getDevData(), numImages*imgSize, imgSize, shmemX);
            } else if(factor == 13) {
                kSupersampleMediumTrans<13><<<grid, threads, shmem>>>(images->getDevData(), targets->getDevData(), numImages*imgSize, imgSize, shmemX);
            } else if(factor == 14) {
                kSupersampleMediumTrans<14><<<grid, threads, shmem>>>(images->getDevData(), targets->getDevData(), numImages*imgSize, imgSize, shmemX);
            } else if(factor == 15) {
                kSupersampleMediumTrans<15><<<grid, threads, shmem>>>(images->getDevData(), targets->getDevData(), numImages*imgSize, imgSize, shmemX);
            } else if(factor == 16) {
                kSupersampleMediumTrans<16><<<grid, threads, shmem>>>(images->getDevData(), targets->getDevData(), numImages*imgSize, imgSize, shmemX);
            }
        } else {
            if(factor == 2) {
                kSupersampleMediumTransLoopy<2><<<grid, threads, shmem>>>(images->getDevData(), targets->getDevData(), numImages*imgSize, imgSize, shmemX);
            } else if(factor == 3) {
                kSupersampleMediumTransLoopy<3><<<grid, threads, shmem>>>(images->getDevData(), targets->getDevData(), numImages*imgSize, imgSize, shmemX);
            } else if(factor == 4) {
                kSupersampleMediumTransLoopy<4><<<grid, threads, shmem>>>(images->getDevData(), targets->getDevData(), numImages*imgSize, imgSize, shmemX);
            } else if(factor == 5) {
                kSupersampleMediumTransLoopy<5><<<grid, threads, shmem>>>(images->getDevData(), targets->getDevData(), numImages*imgSize, imgSize, shmemX);
            } else if(factor == 6) {
                kSupersampleMediumTransLoopy<6><<<grid, threads, shmem>>>(images->getDevData(), targets->getDevData(), numImages*imgSize, imgSize, shmemX);
            } else if(factor == 7) {
                kSupersampleMediumTransLoopy<7><<<grid, threads, shmem>>>(images->getDevData(), targets->getDevData(), numImages*imgSize, imgSize, shmemX);
            } else if(factor == 8) {
                kSupersampleMediumTransLoopy<8><<<grid, threads, shmem>>>(images->getDevData(), targets->getDevData(), numImages*imgSize, imgSize, shmemX);
            } else if(factor == 9) {
                kSupersampleMediumTransLoopy<9><<<grid, threads, shmem>>>(images->getDevData(), targets->getDevData(), numImages*imgSize, imgSize, shmemX);
            } else if(factor == 10) {
                kSupersampleMediumTransLoopy<10><<<grid, threads, shmem>>>(images->getDevData(), targets->getDevData(), numImages*imgSize, imgSize, shmemX);
            } else if(factor == 11) {
                kSupersampleMediumTransLoopy<11><<<grid, threads, shmem>>>(images->getDevData(), targets->getDevData(), numImages*imgSize, imgSize, shmemX);
            } else if(factor == 12) {
                kSupersampleMediumTransLoopy<12><<<grid, threads, shmem>>>(images->getDevData(), targets->getDevData(), numImages*imgSize, imgSize, shmemX);
            } else if(factor == 13) {
                kSupersampleMediumTransLoopy<13><<<grid, threads, shmem>>>(images->getDevData(), targets->getDevData(), numImages*imgSize, imgSize, shmemX);
            } else if(factor == 14) {
                kSupersampleMediumTransLoopy<14><<<grid, threads, shmem>>>(images->getDevData(), targets->getDevData(), numImages*imgSize, imgSize, shmemX);
            } else if(factor == 15) {
                kSupersampleMediumTransLoopy<15><<<grid, threads, shmem>>>(images->getDevData(), targets->getDevData(), numImages*imgSize, imgSize, shmemX);
            } else if(factor == 16) {
                kSupersampleMediumTransLoopy<16><<<grid, threads, shmem>>>(images->getDevData(), targets->getDevData(), numImages*imgSize, imgSize, shmemX);
            }
        }
    }
	*/

}
int
pfkey_key_process(struct sadb_ext *pfkey_ext, struct pfkey_extracted_data* extr)
{
        int error = 0;
        struct sadb_key *pfkey_key = (struct sadb_key *)pfkey_ext;
	
	KLIPS_PRINT(debug_pfkey,
		    "klips_debug:pfkey_key_process: .\n");

	if(!extr || !extr->ips) {
		KLIPS_PRINT(debug_pfkey,
			    "klips_debug:pfkey_key_process: "
			    "extr or extr->ips is NULL, fatal\n");
		SENDERR(EINVAL);
	}

        switch(pfkey_key->sadb_key_exttype) {
        case SADB_EXT_KEY_AUTH:
		KLIPS_PRINT(debug_pfkey,
			    "klips_debug:pfkey_key_process: "
			    "allocating %d bytes for authkey.\n",
			    DIVUP(pfkey_key->sadb_key_bits, 8));
		if(!(extr->ips->ips_key_a = kmalloc(DIVUP(pfkey_key->sadb_key_bits, 8), GFP_KERNEL))) {
			KLIPS_PRINT(debug_pfkey,
				    "klips_debug:pfkey_key_process: "
				    "memory allocation error.\n");
			SENDERR(ENOMEM);
		}
                extr->ips->ips_key_bits_a = pfkey_key->sadb_key_bits;
                extr->ips->ips_key_a_size = DIVUP(pfkey_key->sadb_key_bits, 8);
		memcpy(extr->ips->ips_key_a,
		       (char*)pfkey_key + sizeof(struct sadb_key),
		       extr->ips->ips_key_a_size);
		break;
	case SADB_EXT_KEY_ENCRYPT: /* Key(s) */
		KLIPS_PRINT(debug_pfkey,
			    "klips_debug:pfkey_key_process: "
			    "allocating %d bytes for enckey.\n",
			    DIVUP(pfkey_key->sadb_key_bits, 8));
		if(!(extr->ips->ips_key_e = kmalloc(DIVUP(pfkey_key->sadb_key_bits, 8), GFP_KERNEL))) {
			KLIPS_PRINT(debug_pfkey,
				    "klips_debug:pfkey_key_process: "
				    "memory allocation error.\n");
			SENDERR(ENOMEM);
		}
		extr->ips->ips_key_bits_e = pfkey_key->sadb_key_bits;
		extr->ips->ips_key_e_size = DIVUP(pfkey_key->sadb_key_bits, 8);
		memcpy(extr->ips->ips_key_e,
		       (char*)pfkey_key + sizeof(struct sadb_key),
		       extr->ips->ips_key_e_size);
		break;
	default:
		SENDERR(EINVAL);
 	}

	KLIPS_PRINT(debug_pfkey,
		    "klips_debug:pfkey_key_process: "
		    "success.\n");
errlab:
	return error;
}
UINT32 ipsec_glue_encapalgo(struct ipsec_sa *ips)
{
    UINT32 status = STATUS_SUCCESS;

    switch(ips->ips_encalg)
    {
#ifdef CONFIG_IPSEC_ENC_AES
	case ESP_AES:
	    cryptoAccCtx.cipherCtx.cipherAlgo = IX_CRYPTO_ACC_CIPHER_AES;

	    switch (DIVUP(ips->ips_key_bits_e, BITS))
	    {
	    case EMT_ESPAES128_KEY_SZ:
		cryptoAccCtx.cipherCtx.cipherKeyLen = IX_CRYPTO_ACC_AES_KEY_128;
		memcpy(cryptoAccCtx.cipherCtx.key.aesKey128,
		    (UINT8 *)(ips->ips_key_e), IX_CRYPTO_ACC_AES_KEY_128);
		break;
	    case EMT_ESPAES192_KEY_SZ:
		cryptoAccCtx.cipherCtx.cipherKeyLen = IX_CRYPTO_ACC_AES_KEY_192;
		memcpy(cryptoAccCtx.cipherCtx.key.aesKey192,
		    (UINT8 *)(ips->ips_key_e), IX_CRYPTO_ACC_AES_KEY_192);
		break;
	    case EMT_ESPAES256_KEY_SZ:
		cryptoAccCtx.cipherCtx.cipherKeyLen = IX_CRYPTO_ACC_AES_KEY_256;
		memcpy(cryptoAccCtx.cipherCtx.key.aesKey256,
		    (UINT8 *)(ips->ips_key_e), IX_CRYPTO_ACC_AES_KEY_256);
		break;
	    default:
		status = STATUS_FAIL;
		KLIPS_PRINT(debug_xform, "klips_error:glue_encapalgo: "
		    "Invalid AES length!\n");
	    }

	    cryptoAccCtx.cipherCtx.cipherBlockLen = IX_CRYPTO_ACC_AES_BLOCK_128;
	    cryptoAccCtx.cipherCtx.cipherMode = IX_CRYPTO_ACC_MODE_CBC;

	    if (EMT_ESPAES_IV_SZ == (DIVUP(ips->ips_iv_bits, BITS))) 
	    {
		cryptoAccCtx.cipherCtx.cipherInitialVectorLen =
		    IX_CRYPTO_ACC_AES_CBC_IV_128;
	    }
	    else
	    {
		status = STATUS_FAIL;
		KLIPS_PRINT(debug_xform, "klips_error:glue_encapalgo: "
		    "Invalid IV length!\n");
	    }

	    break;
#endif
#ifdef CONFIG_IPSEC_ENC_3DES
	case ESP_3DES:
	    /* The cipher algorith, 3DES */
	    cryptoAccCtx.cipherCtx.cipherAlgo = IX_CRYPTO_ACC_CIPHER_3DES;
		
	    /* The cipher key length	 		*/
	    /* check the cipher length, 3DES = 24 bytes	*/
	    if (EMT_ESP3DES_KEY_SZ == (DIVUP(ips->ips_key_bits_e, BITS))) 
	    {
		cryptoAccCtx.cipherCtx.cipherKeyLen = IX_CRYPTO_ACC_3DES_KEY_192;
	    }
	    else
	    {
		status = STATUS_FAIL;
		KLIPS_PRINT(debug_xform,
			    "klips_error:glue_encapalgo: "
			    "Invalid 3DES length!\n");
		break;
	    }
		
	    /* The cipher key  */
	    memcpy (cryptoAccCtx.cipherCtx.key.desKey, (UINT8 *)(ips->ips_key_e), 
		    IX_CRYPTO_ACC_3DES_KEY_192); 

	    /* The cipher block length */
	    cryptoAccCtx.cipherCtx.cipherBlockLen = IPSEC_DES_BLOCK_LENGTH;
		
	    /* The cipher mode, supported cipher mode: CBC	*/
	    cryptoAccCtx.cipherCtx.cipherMode = IX_CRYPTO_ACC_MODE_CBC;
 	
	    /* The cipher IV length */
	    if (EMT_ESPDES_IV_SZ == (DIVUP(ips->ips_iv_bits, BITS))) 
	    {
		cryptoAccCtx.cipherCtx.cipherInitialVectorLen = IX_CRYPTO_ACC_DES_IV_64;
	    }
	    else
	    {
		status = STATUS_FAIL;
		KLIPS_PRINT(debug_xform,
			   "klips_error:glue_encapalgo: "
			   "Invalid IV length!\n");
	    }
	
	    break;
#endif /* CONFIG_IPSEC_ENC_3DES */

#ifdef USE_SINGLE_DES
	case ESP_DES:
	    /* The cipher algorith, DES */
	    cryptoAccCtx.cipherCtx.cipherAlgo = IX_CRYPTO_ACC_CIPHER_DES;
		
	    /* The cipher key length, DES = 8 bytes */
	    if (EMT_ESPDES_KEY_SZ == (DIVUP(ips->ips_key_bits_e, BITS))) 
	    {
		cryptoAccCtx.cipherCtx.cipherKeyLen = IX_CRYPTO_ACC_DES_KEY_64;
	    }
	    else
	    {
		status = STATUS_FAIL;
		KLIPS_PRINT(debug_xform,
			    "klips_error:glue_encapalgo: "
			    "Invalid DES length!\n");
		break;
	    }
		
	    /* The cipher key  */
	    memcpy (cryptoAccCtx.cipherCtx.key.desKey, (UINT8 *)(ips->ips_key_e), 
		    IX_CRYPTO_ACC_DES_KEY_64);
		
	    /* The cipher block length */
	    cryptoAccCtx.cipherCtx.cipherBlockLen = IPSEC_DES_BLOCK_LENGTH;
		
	    /* The cipher mode, supported cipher mode: CBC	*/
	    cryptoAccCtx.cipherCtx.cipherMode = IX_CRYPTO_ACC_MODE_CBC;
 	
	    /* The cipher IV length */
	    if (EMT_ESPDES_IV_SZ == (DIVUP(ips->ips_iv_bits, BITS))) 
	    {
		cryptoAccCtx.cipherCtx.cipherInitialVectorLen = IX_CRYPTO_ACC_DES_IV_64;
	    }
	    else
	    {
		status = STATUS_FAIL;
		KLIPS_PRINT(debug_xform,
			   "klips_error:glue_encapalgo: "
			   "Invalid IV length!\n");
	    }
	
	    break;
#endif /* USE_SINGLE_DES */

	case ESP_NULL:
	    break;

	default:
	    /* Encryption not supported */
	    status = STATUS_FAIL;
	    KLIPS_PRINT(debug_xform,
			"klips_error:glue_encapalgo: "
			"Encap. Algorithm not supported!\n");
	    return status;
    }
	
    return status;
}
Example #17
0
int burnAPP(TCHAR *path) {
   int errorcode = 0;
   unsigned char *buf = (unsigned char *)0x80000000;
   unsigned char *buftemp = buf;
   unsigned char headbuf[512];
   unsigned int rdlen;
   APPHEADER *header = (APPHEADER * )headbuf;
   FIL file;
   FRESULT fret;
   int ret;
   char disbuf[32];
   unsigned int  percent = 0, percentold = 0;
   unsigned int  filesize, count;
   APPPACKHEAD *apppackhead = (APPPACKHEAD * )(buf + 32);
   APPSETCTION *appsection1 = &(apppackhead->appsec1);
   APPSETCTION *appsection2 = &(apppackhead->appsec2);
   fret = f_open(&file, path, FA_READ);
   if (fret != FR_OK) {
      errorcode = BURNAPP_READERROR;
      return errorcode;
   }
   filesize = file.fsize;
   if ((filesize > APP_MAX_SIZE) || (filesize <= 1024)) {
      errorcode = BURNAPP_FILE_ERROR;
      goto ERROR;
   }

   count = DIVUP(filesize, 512);
   memset(headbuf, 0, sizeof headbuf);
   statBarPrint(0, "reading");
   delay(300);
   for (int i = 0; i < count; i++, buftemp+=512) {
      fret = f_read(&file, buftemp, 512, &rdlen);
      if (fret != FR_OK)  goto ERROR;
      for (int j = 0; j < 256; j++) {
         buftemp[j] ^= ProgramTable[j];
         buftemp[j + 256] ^= ProgramTable[j];
      }
      if (i == 0) {
        /*
         if ((buftemp[0] != 'T') || (buftemp[1] != 'H') || (buftemp[2] != 'J')) {
            errorcode = BURNAPP_FILE_ERROR;
            goto ERROR;
         }
         if ((buftemp[10] != 'A') || (buftemp[11] != 'R') || (buftemp[12] != 'A')) {
            errorcode = BURNAPP_FILE_ERROR;
            goto ERROR;
         }
        */
         apppackhead = (APPPACKHEAD * )(buftemp + 32);
         appsection1 = &(apppackhead->appsec1);
         appsection2 = &(apppackhead->appsec2);
         if ((apppackhead->secflag & 0x01) &&
             ((appsection1->imageaddr + appsection1->imageSize) > (filesize - 16))) {
            errorcode = BURNAPP_FILE_ERROR;
            goto ERROR;
         }
         if ((apppackhead->secflag & 0x02) &&
             ((appsection2->imageaddr + appsection2->imageSize) > (filesize - 16))) {
            errorcode = BURNAPP_FILE_ERROR;
            goto ERROR;
         }
      } else {
         percent = i * 100 / count;
         if (percent / 5 != percentold / 5) {
            sprintf(disbuf, "%d%%", percent);
            statBarPrint(0, disbuf);
            percentold = percent;
         }
      }
   }
   MD5_CTX md5context;
   unsigned char decrypt[16];
   statBarPrint(0, "processing please waite");
   MD5Init(& md5context);
   MD5Update(& md5context, buf, filesize-16);
   MD5Final(& md5context, decrypt);
   if (memcmp(buf + filesize - 16, decrypt, 16)) {
      errorcode = BURNAPP_FILE_ERROR;
      goto ERROR;
   }
   statBarPrint(0, "write file please waite");
   delay(300);
   header->magic = APP_MAGIC_NO;
   header->secflag = apppackhead->secflag;
   if (apppackhead->secflag & 0x01) {
      header->appsec1.imageaddr = APP_BEGIN_SECTOR;
      header->appsec1.imageSize = DIVUP(appsection1->imageSize, 512);
      header->appsec1.imageRevPrefix = appsection1->imageRevPrefix;
      header->appsec1.imageMainRev = appsection1->imageMainRev;
      header->appsec1.imageMidRev = appsection1->imageMidRev;
      header->appsec1.imageMinRev = appsection1->imageMinRev;
   }
   if (apppackhead->secflag & 0x02) {
      header->appsec2.imageaddr = BAG_BEGIN_SETCTOR;
      header->appsec2.imageSize = DIVUP(appsection2->imageSize, 512);
      header->appsec2.imageRevPrefix = appsection2->imageRevPrefix;
      header->appsec2.imageMainRev = appsection2->imageMainRev;
      header->appsec2.imageMidRev = appsection2->imageMidRev;
      header->appsec2.imageMinRev = appsection2->imageMinRev;
   }
   ret = MMCSDP_Write(mmcsdctr, headbuf, APP_HEAD_SECTOR, 1);
   if (FALSE == ret) {
      errorcode = BURNAPP_WRITEERROR;
      goto ERROR;
   }
   if (apppackhead->secflag & 0x01) {
      ret = MMCSDP_Write(mmcsdctr, (void *)(buf + appsection1->imageaddr), header->appsec1.imageaddr, header->appsec1.imageSize);
      if (FALSE == ret) {
         errorcode = BURNAPP_WRITEERROR;
         goto ERROR;
      }
      header->magic = APP_MAGIC_OK;
   }
   if (apppackhead->secflag & 0x02) {
      ret = MMCSDP_Write(mmcsdctr, (void *)(buf + appsection2->imageaddr), header->appsec2.imageaddr, header->appsec2.imageSize);
      if (FALSE == ret) {
         errorcode = BURNAPP_WRITEERROR;
         goto ERROR;
      }
   }
   MMCSDP_Write(mmcsdctr, headbuf, APP_HEAD_SECTOR, 1);
   return 0;
ERROR:
   f_close(&file);
   return errorcode;
}
Example #18
0
FileSystemCode_t
FsWriteToFile(
    _In_  FileSystemDescriptor_t*   FileSystem,
    _In_  MfsEntryHandle_t*         Handle,
    _In_  DmaBuffer_t*              BufferObject,
    _In_  size_t                    Length,
    _Out_ size_t*                   BytesWritten)
{
    MfsInstance_t*   Mfs             = (MfsInstance_t*)FileSystem->ExtensionData;
    MfsEntry_t*      Entry           = (MfsEntry_t*)Handle->Base.Entry;
    FileSystemCode_t Result          = FsOk;
    uint64_t         Position        = Handle->Base.Position;
    size_t           BucketSizeBytes = Mfs->SectorsPerBucket * FileSystem->Disk.Descriptor.SectorSize;
    size_t           BytesToWrite    = Length;

    TRACE("FsWriteEntry(Id 0x%x, Position %u, Length %u)",
        Handle->Base.Id, LODWORD(Position), Length);

    *BytesWritten = 0;
    Result        = MfsEnsureRecordSpace(FileSystem, Entry, Position + BytesToWrite);
    if (Result != FsOk) {
        return Result;
    }

    // Guard against newly allocated files
    if (Handle->DataBucketPosition == MFS_ENDOFCHAIN) {
        Handle->DataBucketPosition  = Entry->StartBucket;
        Handle->DataBucketLength    = Entry->StartLength;
        Handle->BucketByteBoundary  = 0;
    }
    
    // Write in a loop to make sure we write all requested bytes
    while (BytesToWrite) {
        // Calculate which bucket, then the sector offset
        // Then calculate how many sectors of the bucket we need to read
        uint64_t Sector         = MFS_GETSECTOR(Mfs, Handle->DataBucketPosition);
        uint64_t SectorOffset   = (Position - Handle->BucketByteBoundary) % FileSystem->Disk.Descriptor.SectorSize;
        size_t SectorIndex      = (size_t)((Position - Handle->BucketByteBoundary) / FileSystem->Disk.Descriptor.SectorSize);
        size_t SectorsLeft      = MFS_GETSECTOR(Mfs, Handle->DataBucketLength) - SectorIndex;
        size_t SectorCount      = 0, ByteCount = 0;

        // Ok - so sectorindex contains the index in the bucket
        // and sector offset contains the byte-offset in that sector

        // Calculate the sector index into bucket
        Sector += SectorIndex;

        // Calculate how many sectors we should read in
        SectorCount = DIVUP(BytesToWrite, FileSystem->Disk.Descriptor.SectorSize);

        // Do we cross a boundary?
        if (SectorOffset + BytesToWrite > FileSystem->Disk.Descriptor.SectorSize) {
            SectorCount++;
        }

        // Adjust for bucket boundary
        SectorCount = MIN(SectorsLeft, SectorCount);

        // Adjust for number of bytes read
        ByteCount = (size_t)MIN(BytesToWrite, (SectorCount * FileSystem->Disk.Descriptor.SectorSize) - SectorOffset);

        // Ex pos 490 - length 50
        // SectorIndex = 0, SectorOffset = 490, SectorCount = 2 - ByteCount = 50 (Capacity 4096)
        // Ex pos 1109 - length 450
        // SectorIndex = 2, SectorOffset = 85, SectorCount = 2 - ByteCount = 450 (Capacity 4096)
        // Ex pos 490 - length 4000
        // SectorIndex = 0, SectorOffset = 490, SectorCount = 8 - ByteCount = 3606 (Capacity 4096)
        TRACE("Write metrics - Sector %u + %u, Count %u, ByteOffset %u, ByteCount %u",
            LODWORD(Sector), SectorIndex, SectorCount, LODWORD(SectorOffset), ByteCount);

        // First of all, calculate the bounds as we might need to read
        // in existing data - Start out by clearing our combination buffer
        ZeroBuffer(Mfs->TransferBuffer);

        // Case 1 - Handle padding
        if (SectorOffset != 0 || ByteCount != FileSystem->Disk.Descriptor.SectorSize) {
            // Start building the sector
            if (MfsReadSectors(FileSystem, Mfs->TransferBuffer, Sector, 
                SectorCount, &SectorCount) != OsSuccess) {
                ERROR("Failed to read sector %u for combination step", 
                    LODWORD(Sector));
                Result = FsDiskError;
                break;
            }
            
            // Adjust the bytecount if we are not able to read all in one go
            if ((FileSystem->Disk.Descriptor.SectorSize * SectorCount) < ByteCount) {
                ByteCount = FileSystem->Disk.Descriptor.SectorSize * SectorCount;
            }
        }

        // Now write the data to the sector
        SeekBuffer(Mfs->TransferBuffer, (size_t)SectorOffset);
        CombineBuffer(Mfs->TransferBuffer, BufferObject, ByteCount, NULL);

        // Perform the write (Raw - as we need to pass the datapointer)
        if (MfsWriteSectors(FileSystem, Mfs->TransferBuffer, Sector, 
            SectorCount, &SectorCount) != OsSuccess) {
            ERROR("Failed to write sector %u", LODWORD(Sector));
            Result = FsDiskError;
            break;
        }

        // Increase the pointers and decrease with bytes read
        // Adjust the bytecount if we are not able to read all in one go
        if ((FileSystem->Disk.Descriptor.SectorSize * SectorCount) < ByteCount) {
            ByteCount = FileSystem->Disk.Descriptor.SectorSize * SectorCount;
        }
        Position      += ByteCount;
        *BytesWritten += ByteCount;
        BytesToWrite  -= ByteCount;

        // Do we need to switch bucket?
        // We do if the position we have read to equals end of bucket
        if (Position == (Handle->BucketByteBoundary + (Handle->DataBucketLength * BucketSizeBytes))) {
            MapRecord_t Link;

            // We have to lookup the link for current bucket
            if (MfsGetBucketLink(FileSystem, Handle->DataBucketPosition, &Link) != OsSuccess) {
                ERROR("Failed to get link for bucket %u", Handle->DataBucketPosition);
                Result = FsDiskError;
                break;
            }

            // Check for EOL
            if (Link.Link == MFS_ENDOFCHAIN) {
                break;
            }
            Handle->DataBucketPosition = Link.Link;

            // Lookup length of link
            if (MfsGetBucketLink(FileSystem, Handle->DataBucketPosition, &Link) != OsSuccess) {
                ERROR("Failed to get length for bucket %u", Handle->DataBucketPosition);
                Result = FsDiskError;
                break;
            }
            Handle->DataBucketLength    = Link.Length;
            Handle->BucketByteBoundary  += (Link.Length * BucketSizeBytes);
        }
    }

    // entry->modified = now
    Entry->ActionOnClose = MFS_ACTION_UPDATE;
    return Result;
}
UINT32 ipsec_glue_authalg(struct ipsec_sa *ips)
{
    UINT32 status = STATUS_SUCCESS;

    switch(ips->ips_authalg) {
#ifdef CONFIG_IPSEC_AUTH_HMAC_MD5
	case AH_MD5:
	    /* Tne the authentication algorithm - MD5*/
	    cryptoAccCtx.authCtx.authAlgo = IX_CRYPTO_ACC_AUTH_MD5;
			
	    /* The digest length, in bytes */
	    cryptoAccCtx.authCtx.authDigestLen = AHHMAC_HASHLEN;
		
	    /* The authentication key length */
	    if (AHMD596_KLEN == (DIVUP(ips->ips_key_bits_a, BITS))) 
	    {
		cryptoAccCtx.authCtx.authKeyLen = IX_CRYPTO_ACC_MD5_KEY_128;
	    }
	    else
	    {
		status = STATUS_FAIL;
		KLIPS_PRINT(debug_xform,
			    "klips_error:glue_encapalgo: "
			    "Invalid MD5 length!\n");
		break;
	    }
	
	    /* The authentication key */
	    memcpy(cryptoAccCtx.authCtx.key.authKey, (UINT8 *)(ips->ips_key_a), 
		    IX_CRYPTO_ACC_MD5_KEY_128);
	    break;
#endif /* CONFIG_IPSEC_AUTH_HMAC_MD5 */

#ifdef CONFIG_IPSEC_AUTH_HMAC_SHA1
	case AH_SHA:
	    cryptoAccCtx.authCtx.authAlgo = IX_CRYPTO_ACC_AUTH_SHA1;
		
	    /* The digest length, in bytes */
	    cryptoAccCtx.authCtx.authDigestLen = AHHMAC_HASHLEN;
		
	    /* The authentication key length */
	    if (AHSHA196_KLEN == (DIVUP(ips->ips_key_bits_a, BITS))) 
	    {
		cryptoAccCtx.authCtx.authKeyLen = IX_CRYPTO_ACC_SHA1_KEY_160;
	    }
	    else
	    {
		status = STATUS_FAIL;
		KLIPS_PRINT(debug_xform,
			    "klips_error:glue_encapalgo: "
			    "Invalid SHA1 length!\n");
		break;
	    }
		
	    /* The authentication key, SHA1 */
	    memcpy(cryptoAccCtx.authCtx.key.authKey, (UINT8 *)(ips->ips_key_a), 
		    IX_CRYPTO_ACC_SHA1_KEY_160);
		
	    break;
#endif /* CONFIG_IPSEC_AUTH_HMAC_SHA1 */

	case AH_NONE:
	    break;
	
	default:
	    /* Authentication algo. not supported */
	    status = STATUS_FAIL;
	    KLIPS_PRINT(debug_xform,
			"klips_error:glue_authalgo: "
			"Authen. Algorithm not supported!\n");
	    return status;
    }
    return status;
}
Example #20
0
void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[]) {

  CUresult cudastatus = CUDA_SUCCESS;

  if (nrhs != 3)
    mexErrMsgTxt("Wrong number of arguments");

  if (init == 0) {
    // Initialize function
    //mexLock();

    // load GPUmat
    gm = gmGetGPUmat();

    // load module
    CUmodule *drvmod = gmGetModule("subsample");

    //load appropriate GPU kernel (mangled name)
    CUresult status;

    status = cuModuleGetFunction(&subsample_noreduc_2T, *drvmod, "_Z18kSubsample_noreducILi2ELb1EEvPfS0_iii");
    if (CUDA_SUCCESS != status) {
      mexErrMsgTxt("Unable to load user function.");
    }
    status = cuModuleGetFunction(&subsample_noreduc_2F, *drvmod, "_Z18kSubsample_noreducILi2ELb0EEvPfS0_iii");
    if (CUDA_SUCCESS != status) {
      mexErrMsgTxt("Unable to load user function.");
    }
    status = cuModuleGetFunction(&subsample_noreduc_3T, *drvmod, "_Z18kSubsample_noreducILi3ELb1EEvPfS0_iii");
    if (CUDA_SUCCESS != status) {
      mexErrMsgTxt("Unable to load user function.");
    }
    status = cuModuleGetFunction(&subsample_noreduc_3F, *drvmod, "_Z18kSubsample_noreducILi3ELb0EEvPfS0_iii");
    if (CUDA_SUCCESS != status) {
      mexErrMsgTxt("Unable to load user function.");
    }
    status = cuModuleGetFunction(&subsample_noreduc_4T, *drvmod, "_Z18kSubsample_noreducILi4ELb1EEvPfS0_iii");
    if (CUDA_SUCCESS != status) {
      mexErrMsgTxt("Unable to load user function.");
    }
    status = cuModuleGetFunction(&subsample_noreduc_4F, *drvmod, "_Z18kSubsample_noreducILi4ELb0EEvPfS0_iii");
    if (CUDA_SUCCESS != status) {
      mexErrMsgTxt("Unable to load user function.");
    }
    status = cuModuleGetFunction(&subsample_noreduc_5T, *drvmod, "_Z18kSubsample_noreducILi5ELb1EEvPfS0_iii");
    if (CUDA_SUCCESS != status) {
      mexErrMsgTxt("Unable to load user function.");
    }
    status = cuModuleGetFunction(&subsample_noreduc_5F, *drvmod, "_Z18kSubsample_noreducILi5ELb0EEvPfS0_iii");
    if (CUDA_SUCCESS != status) {
      mexErrMsgTxt("Unable to load user function.");
    }
    status = cuModuleGetFunction(&subsample_noreduc_6T, *drvmod, "_Z18kSubsample_noreducILi6ELb1EEvPfS0_iii");
    if (CUDA_SUCCESS != status) {
      mexErrMsgTxt("Unable to load user function.");
    }
    status = cuModuleGetFunction(&subsample_noreduc_6F, *drvmod, "_Z18kSubsample_noreducILi6ELb0EEvPfS0_iii");
    if (CUDA_SUCCESS != status) {
      mexErrMsgTxt("Unable to load user function.");
    }
    status = cuModuleGetFunction(&subsample_noreduc_7T, *drvmod, "_Z18kSubsample_noreducILi7ELb1EEvPfS0_iii");
    if (CUDA_SUCCESS != status) {
      mexErrMsgTxt("Unable to load user function.");
    }
    status = cuModuleGetFunction(&subsample_noreduc_7F, *drvmod, "_Z18kSubsample_noreducILi7ELb0EEvPfS0_iii");
    if (CUDA_SUCCESS != status) {
      mexErrMsgTxt("Unable to load user function.");
    }
    status = cuModuleGetFunction(&subsample_noreduc_8T, *drvmod, "_Z18kSubsample_noreducILi8ELb1EEvPfS0_iii");
    if (CUDA_SUCCESS != status) {
      mexErrMsgTxt("Unable to load user function.");
    }
    status = cuModuleGetFunction(&subsample_noreduc_8F, *drvmod, "_Z18kSubsample_noreducILi8ELb0EEvPfS0_iii");
    if (CUDA_SUCCESS != status) {
      mexErrMsgTxt("Unable to load user function.");
    }
    status = cuModuleGetFunction(&subsample_noreduc_9T, *drvmod, "_Z18kSubsample_noreducILi9ELb1EEvPfS0_iii");
    if (CUDA_SUCCESS != status) {
      mexErrMsgTxt("Unable to load user function.");
    }
    status = cuModuleGetFunction(&subsample_noreduc_9F, *drvmod, "_Z18kSubsample_noreducILi9ELb0EEvPfS0_iii");
    if (CUDA_SUCCESS != status) {
      mexErrMsgTxt("Unable to load user function.");
    }
    status = cuModuleGetFunction(&subsample_noreduc_10T, *drvmod, "_Z18kSubsample_noreducILi10ELb1EEvPfS0_iii");
    if (CUDA_SUCCESS != status) {
      mexErrMsgTxt("Unable to load user function.");
    }
    status = cuModuleGetFunction(&subsample_noreduc_10F, *drvmod, "_Z18kSubsample_noreducILi10ELb0EEvPfS0_iii");
    if (CUDA_SUCCESS != status) {
      mexErrMsgTxt("Unable to load user function.");
    }
    status = cuModuleGetFunction(&subsample_noreduc_11T, *drvmod, "_Z18kSubsample_noreducILi11ELb1EEvPfS0_iii");
    if (CUDA_SUCCESS != status) {
      mexErrMsgTxt("Unable to load user function.");
    }
    status = cuModuleGetFunction(&subsample_noreduc_11F, *drvmod, "_Z18kSubsample_noreducILi11ELb0EEvPfS0_iii");
    if (CUDA_SUCCESS != status) {
      mexErrMsgTxt("Unable to load user function.");
    }
    status = cuModuleGetFunction(&subsample_noreduc_12T, *drvmod, "_Z18kSubsample_noreducILi12ELb1EEvPfS0_iii");
    if (CUDA_SUCCESS != status) {
      mexErrMsgTxt("Unable to load user function.");
    }
    status = cuModuleGetFunction(&subsample_noreduc_12F, *drvmod, "_Z18kSubsample_noreducILi12ELb0EEvPfS0_iii");
    if (CUDA_SUCCESS != status) {
      mexErrMsgTxt("Unable to load user function.");
    }
    status = cuModuleGetFunction(&subsample_noreduc_13T, *drvmod, "_Z18kSubsample_noreducILi13ELb1EEvPfS0_iii");
    if (CUDA_SUCCESS != status) {
      mexErrMsgTxt("Unable to load user function.");
    }
    status = cuModuleGetFunction(&subsample_noreduc_13F, *drvmod, "_Z18kSubsample_noreducILi13ELb0EEvPfS0_iii");
    if (CUDA_SUCCESS != status) {
      mexErrMsgTxt("Unable to load user function.");
    }
    status = cuModuleGetFunction(&subsample_noreduc_14T, *drvmod, "_Z18kSubsample_noreducILi14ELb1EEvPfS0_iii");
    if (CUDA_SUCCESS != status) {
      mexErrMsgTxt("Unable to load user function.");
    }
    status = cuModuleGetFunction(&subsample_noreduc_14F, *drvmod, "_Z18kSubsample_noreducILi14ELb0EEvPfS0_iii");
    if (CUDA_SUCCESS != status) {
      mexErrMsgTxt("Unable to load user function.");
    }
    status = cuModuleGetFunction(&subsample_noreduc_15T, *drvmod, "_Z18kSubsample_noreducILi15ELb1EEvPfS0_iii");
    if (CUDA_SUCCESS != status) {
      mexErrMsgTxt("Unable to load user function.");
    }
    status = cuModuleGetFunction(&subsample_noreduc_15F, *drvmod, "_Z18kSubsample_noreducILi15ELb0EEvPfS0_iii");
    if (CUDA_SUCCESS != status) {
      mexErrMsgTxt("Unable to load user function.");
    }
    status = cuModuleGetFunction(&subsample_noreduc_16T, *drvmod, "_Z18kSubsample_noreducILi16ELb1EEvPfS0_iii");
    if (CUDA_SUCCESS != status) {
      mexErrMsgTxt("Unable to load user function.");
    }
    status = cuModuleGetFunction(&subsample_noreduc_16F, *drvmod, "_Z18kSubsample_noreducILi16ELb0EEvPfS0_iii");
    if (CUDA_SUCCESS != status) {
      mexErrMsgTxt("Unable to load user function.");
    }

    init = 1;
  }

  // mex parameters are:

  // 1. IN1
  // 2. OUT
  // 3. subsampling factor

  bool avoidBankConflicts = true; //hard-coded

  //IN1 is the input GPU array
  GPUtype IN1 = gm->gputype.getGPUtype(prhs[0]);

  //OUT is the output GPU array (result)
  GPUtype OUT = gm->gputype.getGPUtype(prhs[1]);

  //last parameter is the filterSize (int)
  int factor = (int) mxGetScalar(prhs[2]);

  // number of elements
  int nin1 = gm->gputype.getNumel(IN1);
  int nout = gm->gputype.getNumel(OUT);

  //dimensions 
  const int * sin1 = gm->gputype.getSize(IN1);
  const int * sout = gm->gputype.getSize(OUT);

  int imgPixels = sin1[0];
  if ( floor(sqrt(float(imgPixels))) != sqrt(float(imgPixels)) )
    mexErrMsgTxt("Images not square");
  int imgSize = int(sqrt(imgPixels));

  if (imgSize <= factor)
    mexErrMsgTxt("imgSize must be > factor");
  if (imgSize % factor !=0)
    mexErrMsgTxt("imgSize must be evenly divisible by factor");
  if (factor > 16)
    mexErrMsgTxt("factor > 16");
  if (factor < 2)
    mexErrMsgTxt("factor < 2");
  if (imgSize > 512)
    mexErrMsgTxt("max imgSize: 512");

  int numRegions = nin1 / (factor*factor);
  int numRegionsY = (imgSize / factor) * sin1[1];
  if (nout != numRegions)
    mexErrMsgTxt("Target dimensions not consistent");


  int regionsXPerBlock = imgSize / factor;
  int numThreadsX = imgSize;
  int SHMEM_MAX = 8192; // don't use more than this much shmem
  int regionsYPerBlock = MIN(512 / numThreadsX, SHMEM_MAX / (4*imgSize)); // to avoid running out of shmem
//    regionsYPerBlock--;
  int regionsPerBlock = regionsYPerBlock * regionsXPerBlock;

  // this will avoid all bank conflicts but may (?) use up too much shmem
  int shmemPadX = avoidBankConflicts * (DIVUP(16,factor) + (regionsPerBlock % 16 == 0 ? 0 : 16 - regionsPerBlock % 16));
  //    shmemPadX = 0;
  int shmemY = factor, shmemX = regionsPerBlock + shmemPadX;
  int shmem = 4 * shmemX * shmemY;
  if (shmem == 0 || shmem > 16300) {
    // this really shouldn't happen and i've only put this here as a precautionary measure
    // to avoid getting mysteriously wrong results.
    mexErrMsgTxt("subsample: not enough shared memory!");
  }

  int numThreadsY = regionsYPerBlock;
  //    int blocks = numRegionsY / regionsYPerBlock;
  int blocksX = imgSize / factor, blocksY = DIVUP(sin1[1], regionsYPerBlock);
  if (blocksX >=65535 || blocksY >= 65535)
    mexErrMsgTxt("Exceeded max block size");

  //    assert(numRegionsY % regionsYPerBlock == 0);
  bool checkThreadBounds = numRegionsY % regionsYPerBlock != 0;
  //    printf("num regions y: %d, regions y per block: %d\n", numRegionsY, regionsYPerBlock);
  dim3 grid(blocksX, blocksY);
  dim3 threads(numThreadsX, numThreadsY);
  /*
  mexPrintf("grid: %ux%u, threads: %ux%u\n", grid.y, grid.x, threads.y, threads.x);
  mexPrintf("check bounds: %u\n", checkThreadBounds);
  mexPrintf("using %u bytes of shmem\n", shmem);
  */

  gpuTYPE_t tin1 = gm->gputype.getType(IN1);
  gpuTYPE_t tout = gm->gputype.getType(OUT);

  // check input/out size and type
  if (tin1!=tout)
    mexErrMsgTxt("Input and output arguments must be of the same type.");

  // I need the pointers to GPU memory
  CUdeviceptr d_IN1  = (CUdeviceptr) (UINTPTR gm->gputype.getGPUptr(IN1));
  CUdeviceptr d_OUT = (CUdeviceptr) (UINTPTR gm->gputype.getGPUptr(OUT));

  // // The GPU kernel depends on the type of input/output
  // CUfunction drvfun;
  // if (tin1 == gpuFLOAT) {
  //   drvfun = drvfunf;
  // } else 
  //   mexErrMsgTxt("Currently only single types supported.");

  hostdrv_pars_t gpuprhs[2];
  int gpunrhs = 2;
  gpuprhs[0] = hostdrv_pars(&d_IN1,sizeof(d_IN1));
  gpuprhs[1] = hostdrv_pars(&d_OUT,sizeof(d_OUT));

  //int N = nin1;

  if (factor == 2) {
    if (checkThreadBounds) {
      hostDriver(subsample_noreduc_2T, grid, threads, imgSize, numRegionsY, shmemX, shmem, gpunrhs, gpuprhs);
    } else {
      hostDriver(subsample_noreduc_2F, grid, threads, imgSize, numRegionsY, shmemX, shmem, gpunrhs, gpuprhs);
    }
  } else if (factor == 3) {
    if (checkThreadBounds) {
      hostDriver(subsample_noreduc_3T, grid, threads, imgSize, numRegionsY, shmemX, shmem, gpunrhs, gpuprhs);
    } else {
      hostDriver(subsample_noreduc_3F, grid, threads, imgSize, numRegionsY, shmemX, shmem, gpunrhs, gpuprhs);
    }
  } else if (factor == 4) {
    if (checkThreadBounds) {
      hostDriver(subsample_noreduc_4T, grid, threads, imgSize, numRegionsY, shmemX, shmem, gpunrhs, gpuprhs);
    } else {
      hostDriver(subsample_noreduc_4F, grid, threads, imgSize, numRegionsY, shmemX, shmem, gpunrhs, gpuprhs);
    }
  } else if (factor == 5) {
    if (checkThreadBounds) {
      hostDriver(subsample_noreduc_5T, grid, threads, imgSize, numRegionsY, shmemX, shmem, gpunrhs, gpuprhs);
    } else {
      hostDriver(subsample_noreduc_5F, grid, threads, imgSize, numRegionsY, shmemX, shmem, gpunrhs, gpuprhs);
    }
  } else if (factor == 6) {
    if (checkThreadBounds) {
      hostDriver(subsample_noreduc_6T, grid, threads, imgSize, numRegionsY, shmemX, shmem, gpunrhs, gpuprhs);
    } else {
      hostDriver(subsample_noreduc_6F, grid, threads, imgSize, numRegionsY, shmemX, shmem, gpunrhs, gpuprhs);
    }
  } else if (factor == 7) {
    if (checkThreadBounds) {
      hostDriver(subsample_noreduc_7T, grid, threads, imgSize, numRegionsY, shmemX, shmem, gpunrhs, gpuprhs);
    } else {
      hostDriver(subsample_noreduc_7F, grid, threads, imgSize, numRegionsY, shmemX, shmem, gpunrhs, gpuprhs);
    }
  } else if (factor == 8) {
    if (checkThreadBounds) {
      hostDriver(subsample_noreduc_8T, grid, threads, imgSize, numRegionsY, shmemX, shmem, gpunrhs, gpuprhs);
    } else {
      hostDriver(subsample_noreduc_8F, grid, threads, imgSize, numRegionsY, shmemX, shmem, gpunrhs, gpuprhs);
    }
  } else if (factor == 9) {
    if (checkThreadBounds) {
      hostDriver(subsample_noreduc_9T, grid, threads, imgSize, numRegionsY, shmemX, shmem, gpunrhs, gpuprhs);
    } else {
      hostDriver(subsample_noreduc_9F, grid, threads, imgSize, numRegionsY, shmemX, shmem, gpunrhs, gpuprhs);
    }
  } else if (factor == 10) {
    if (checkThreadBounds) {
      hostDriver(subsample_noreduc_10T, grid, threads, imgSize, numRegionsY, shmemX, shmem, gpunrhs, gpuprhs);
    } else {
      hostDriver(subsample_noreduc_10F, grid, threads, imgSize, numRegionsY, shmemX, shmem, gpunrhs, gpuprhs);
    }
  } else if (factor == 11) {
    if (checkThreadBounds) {
      hostDriver(subsample_noreduc_11T, grid, threads, imgSize, numRegionsY, shmemX, shmem, gpunrhs, gpuprhs);
    } else {
      hostDriver(subsample_noreduc_11F, grid, threads, imgSize, numRegionsY, shmemX, shmem, gpunrhs, gpuprhs);
    }
  } else if (factor == 12) {
    if (checkThreadBounds) {
      hostDriver(subsample_noreduc_12T, grid, threads, imgSize, numRegionsY, shmemX, shmem, gpunrhs, gpuprhs);
    } else {
      hostDriver(subsample_noreduc_12F, grid, threads, imgSize, numRegionsY, shmemX, shmem, gpunrhs, gpuprhs);
    }
  } else if (factor == 13) {
    if (checkThreadBounds) {
      hostDriver(subsample_noreduc_13T, grid, threads, imgSize, numRegionsY, shmemX, shmem, gpunrhs, gpuprhs);
    } else {
      hostDriver(subsample_noreduc_13F, grid, threads, imgSize, numRegionsY, shmemX, shmem, gpunrhs, gpuprhs);
    }
  } else if (factor == 14) {
    if (checkThreadBounds) {
      hostDriver(subsample_noreduc_14T, grid, threads, imgSize, numRegionsY, shmemX, shmem, gpunrhs, gpuprhs);
    } else {
      hostDriver(subsample_noreduc_14F, grid, threads, imgSize, numRegionsY, shmemX, shmem, gpunrhs, gpuprhs);
    }
  } else if (factor == 15) {
    if (checkThreadBounds) {
      hostDriver(subsample_noreduc_15T, grid, threads, imgSize, numRegionsY, shmemX, shmem, gpunrhs, gpuprhs);
    } else {
      hostDriver(subsample_noreduc_15F, grid, threads, imgSize, numRegionsY, shmemX, shmem, gpunrhs, gpuprhs);
    }
  } else if (factor == 16) {
    if (checkThreadBounds) {
      hostDriver(subsample_noreduc_16T, grid, threads, imgSize, numRegionsY, shmemX, shmem, gpunrhs, gpuprhs);
    } else {
      hostDriver(subsample_noreduc_16F, grid, threads, imgSize, numRegionsY, shmemX, shmem, gpunrhs, gpuprhs);
    }
  }
    //cutilCheckMsg("kernel execution failed");
    
    //    if(factor == 4) {
    ////        kSubsample_reduc<4><<<grid, threads,4*numThreadsX*numThreadsY>>>(images->getDevData(), targets->getDevData(), imgSize, numRegionsY);
    //    }

}
Example #21
0
FileSystemCode_t
FsReadFromFile(
    _In_  FileSystemDescriptor_t* FileSystem,
    _In_  MfsEntryHandle_t*       Handle,
    _In_  DmaBuffer_t*            BufferObject,
    _In_  size_t                  Length,
    _Out_ size_t*                 BytesAt,
    _Out_ size_t*                 BytesRead)
{
    MfsInstance_t*   Mfs             = (MfsInstance_t*)FileSystem->ExtensionData;
    MfsEntry_t*      Entry           = (MfsEntry_t*)Handle->Base.Entry;
    FileSystemCode_t Result          = FsOk;
    uintptr_t        DataPointer     = GetBufferDma(BufferObject);
    uint64_t         Position        = Handle->Base.Position;
    size_t           BucketSizeBytes = Mfs->SectorsPerBucket * FileSystem->Disk.Descriptor.SectorSize;
    size_t           BytesToRead     = Length;

    TRACE("FsReadFile(Id 0x%x, Position %u, Length %u)",
        Handle->Base.Id, LODWORD(Handle->Base.Position), Length);

    *BytesRead = 0;
    *BytesAt   = Handle->Base.Position % FileSystem->Disk.Descriptor.SectorSize;

    // Sanitize the amount of bytes we want to read, cap it at bytes available
    if ((Position + BytesToRead) > Entry->Base.Descriptor.Size.QuadPart) {
        if (Position == Entry->Base.Descriptor.Size.QuadPart) {
            return FsOk;
        }
        BytesToRead = (size_t)(Entry->Base.Descriptor.Size.QuadPart - Position);
    }

    // Debug counter values
    TRACE(" > dma: 0x%x, fpos %u, bytes-total %u, bytes-at %u", DataPointer, 
        LODWORD(Position), BytesToRead, *BytesAt);

    // Read the current sector, update index to where data starts
    // Keep reading consecutive after that untill all bytes requested have
    // been read

    // Read in a loop to make sure we read all requested bytes
    while (BytesToRead) {
        // Calculate which bucket, then the sector offset
        // Then calculate how many sectors of the bucket we need to read
        uint64_t Sector         = MFS_GETSECTOR(Mfs, Handle->DataBucketPosition);        // Start-sector of current bucket
        uint64_t SectorOffset   = Position % FileSystem->Disk.Descriptor.SectorSize;        // Byte-offset into the current sector
        size_t SectorIndex      = (size_t)((Position - Handle->BucketByteBoundary) / FileSystem->Disk.Descriptor.SectorSize); // The sector-index into the current bucket
        size_t SectorsLeft      = MFS_GETSECTOR(Mfs, Handle->DataBucketLength) - SectorIndex; // How many sectors are left in this bucket
        size_t SectorCount;
        size_t SectorsFitInBuffer;
        size_t ByteCount;
        
        // Calculate the sector index into bucket
        Sector += SectorIndex;

        // Calculate how many sectors we should read in
        SectorCount         = DIVUP(BytesToRead, FileSystem->Disk.Descriptor.SectorSize);
        SectorsFitInBuffer  = (GetBufferSize(BufferObject) - *BytesRead) / FileSystem->Disk.Descriptor.SectorSize;
        if (SectorOffset != 0 && (SectorOffset + BytesToRead > FileSystem->Disk.Descriptor.SectorSize)) {
            SectorCount++; // Take into account the extra sector we have to read
        }

        // Adjust for bucket boundary, and adjust again for buffer size
        SectorCount = MIN(SectorCount, SectorsLeft);
        SectorCount = MIN(SectorCount, SectorsFitInBuffer);
        if (SectorCount == 0) {
            break;
        }

        // Adjust for number of bytes already consumed in the active sector
        ByteCount = MIN(BytesToRead, (SectorCount * FileSystem->Disk.Descriptor.SectorSize) - SectorOffset);

        // Ex pos 490 - length 50
        // SectorIndex = 0, SectorOffset = 490, SectorCount = 2 - ByteCount = 50 (Capacity 4096)
        // Ex pos 1109 - length 450
        // SectorIndex = 2, SectorOffset = 85, SectorCount = 2 - ByteCount = 450 (Capacity 4096)
        // Ex pos 490 - length 4000
        // SectorIndex = 0, SectorOffset = 490, SectorCount = 8 - ByteCount = 3606 (Capacity 4096)
        TRACE(" > sector %u (b-start %u, b-index %u), num-sectors %u, sector-byte-offset %u, bytecount %u",
            LODWORD(Sector), LODWORD(Sector) - SectorIndex, SectorIndex, SectorCount, LODWORD(SectorOffset), ByteCount);
        if ((GetBufferSize(BufferObject) - *BytesRead) < (SectorCount * FileSystem->Disk.Descriptor.SectorSize)) {
            WARNING(" > not enough room in buffer for transfer");
            break;
        }

        // Perform the read (Raw - as we need to pass the datapointer)
        if (StorageRead(FileSystem->Disk.Driver, FileSystem->Disk.Device, 
            FileSystem->SectorStart + Sector, DataPointer, SectorCount, &SectorCount) != OsSuccess) {
            ERROR("Failed to read sector");
            Result = FsDiskError;
            break;
        }

        // Increase the pointers and decrease with bytes read, take into account
        // we might not have been able to read all data in one go
        if ((FileSystem->Disk.Descriptor.SectorSize * SectorCount) < ByteCount) {
            ByteCount = FileSystem->Disk.Descriptor.SectorSize * SectorCount;
        }
        DataPointer += FileSystem->Disk.Descriptor.SectorSize * SectorCount;
        *BytesRead  += ByteCount;
        Position    += ByteCount;
        BytesToRead -= ByteCount;

        // Do we need to switch bucket?
        // We do if the position we have read to equals end of bucket
        if (Position == (Handle->BucketByteBoundary + (Handle->DataBucketLength * BucketSizeBytes))) {
            Result = MfsSwitchToNextBucketLink(FileSystem, Handle, BucketSizeBytes);
            if (Result == FsPathNotFound || Result != FsOk) {
                if (Result == FsPathNotFound) {
                    Result = FsOk;
                }
                break;
            }
        }
    }

    // if (update_when_accessed) @todo
    // entry->accessed = now
    // entry->action_on_close = update

    TRACE(" > bytes read %u/%u", *BytesRead, Length);
    return Result;
}
Example #22
0
BOOL burnBootloader(const TCHAR *path) {
   unsigned char *buf = (unsigned char *)0x80000000;
   unsigned int rdlen;
   FIL file;
   FRESULT fret;
   int ret;
   if (!strendwith(path, ".MBT")) {
      return FALSE;
   }
   fret = f_open(&file, path, FA_READ);
   if (fret != FR_OK) {
      return FALSE;
   }
   if (file.fsize > 109 * 1024) { //109KB
      goto ERROR;
   }
  
   memset(buf, 0, 512);
   memcpy(buf, emmcheader, sizeof emmcheader);
   *(unsigned int *)(buf + 512) = file.fsize;
   *(unsigned int *)(buf + 512 + 4) = BOOTLOADER_ENTRY;
   fret = f_read(&file, buf + 512 + 8, file.fsize, &rdlen);
   if (fret != FR_OK) {
      goto ERROR;
   }
   if (rdlen != file.fsize) {
      goto ERROR;
   }
   //memset(buf,0,file.fsize+8+512);
   ret = MMCSDP_Write(mmcsdctr, buf, BOOTLOADER_BEGIN_SECTOR, DIVUP(file.fsize + 8 + 512,512));
   if (FALSE == ret) {
      goto ERROR;
   }
   
   long long flashid;
   spiFlashReadId(&flashid);
   if ((flashid!=0)&&(flashid!=-1L)){
      unsigned char flashstatus = spiFlashReadStatus();
      if(!(flashstatus & 0x01)){
         spiFlashSwitch256PageSize();
         flashstatus = spiFlashReadStatus();
         if(!(flashstatus & 0x01)){
           goto ERROR;
         }
      }
   }
   unsigned int flashwcont = DIVUP(file.fsize+8,256);
   unsigned int byteswapcont = DIVUP(file.fsize+8,4);
   if ((flashid!=0)&&(flashid!=-1L)) {
      statBarPrint(0, "found dataflash chip ,burn to dataflash");
      for(int i=0;i<byteswapcont;i++){
         *(unsigned int *)(buf+512+4*i) = htonl(*(unsigned int *)(buf+512+4*i));
      }
      delay(500);
      unsigned char percent1 = 0,percent2 = 0;
      char checkbuf[256];
      for (int i=0;i<flashwcont;i++) {
         ret = spiFlashPageWrite(256*i,(void *)(buf + 512+256*i),256 );
         percent1 = (i+1)*100/flashwcont;
         if(percent1/5 != percent2/5){
            percent2 = percent1;
            char  printbuf[200]; 
            sprintf(printbuf,"dataflash write percent %d%%",percent2);
            statBarPrint(0, printbuf);
         }
         if (FALSE==ret) {
            statBarPrint(1, "data flash write  error");
            delay(1000);
            goto ERROR;
         }
         delay(45);
         spiFlashRead(256*i,checkbuf,256);
         if(memcmp(checkbuf,(void *)(buf + 512+256*i),256)!=0){
           statBarPrint(1, "data flash write check error");
           delay(500);
           goto ERROR;
         }
      } 
   } 
   f_close(&file);
   return TRUE;
ERROR:
   f_close(&file);
   return FALSE;
}