void Net::Classify(const mxArray *mx_data, mxArray *&mx_pred) { //mexPrintMsg("Start classification..."); ReadData(mx_data); size_t test_num = data_.size1(); labels_.resize(test_num, layers_.back()->length_); labels_.reorder(true, false); size_t numbatches = DIVUP(test_num, params_.batchsize_); size_t offset = 0; Mat data_batch, pred_batch; for (size_t batch = 0; batch < numbatches; ++batch) { size_t batchsize = MIN(test_num - offset, params_.batchsize_); data_batch.resize(batchsize, data_.size2()); SubSet(data_, data_batch, offset, true); InitActiv(data_batch); Forward(pred_batch, 0); SubSet(labels_, pred_batch, offset, false); offset += batchsize; if (params_.verbose_ == 2) { mexPrintInt("Batch", (int) batch + 1); } } labels_.reorder(kMatlabOrder, true); mx_pred = mexSetMatrix(labels_); //mexPrintMsg("Classification finished"); }
int main(int argc, char **argv) { int pf_sock = 0; ssize_t ret_size = 0; int error; struct sadb_msg msg_hdr; memset(&msg_hdr, 0, sizeof(msg_hdr)); msg_hdr.sadb_msg_version = PF_KEY_V2; msg_hdr.sadb_msg_type = SADB_FLUSH; msg_hdr.sadb_msg_errno = 0; msg_hdr.sadb_msg_satype = SADB_SATYPE_UNSPEC; msg_hdr.sadb_msg_len = DIVUP(sizeof(msg_hdr), IPSEC_PFKEYv2_ALIGN); msg_hdr.sadb_msg_seq = 0; msg_hdr.sadb_msg_pid = getpid(); pf_sock = socket(PF_KEY, SOCK_RAW, PF_KEY_V2); if (pf_sock < 0) { perror("pf_sock"); exit(1); } errno = 0; error = write(pf_sock, (char*)&msg_hdr, sizeof(msg_hdr)); if(error<0){ fprintf(stderr, "pfkey_send_flush: send error with %s\n", strerror(errno)); exit(1); } fprintf(stderr, "succeeded\n"); return 0; }
/*-------------------------------------------------------------------------*/ void AzPrepText2::gen_X_bow(const AzIntArr &ia_tokno, int dic_sz, int pch_sz, int pch_step, int padding, bool do_skip_stopunk, /*--- output ---*/ AzSmat *m_feat, AzIntArr *ia_pos) const /* patch position: may be NULL */ { const char *eyec = "AzPrepText2::gen_X_bow"; AzX::throw_if_null(m_feat, eyec, "m_feat"); int t_num; const int *tokno = ia_tokno.point(&t_num); int pch_num = DIVUP(t_num+padding*2-pch_sz, pch_step) + 1; m_feat->reform(dic_sz, pch_num); if (ia_pos != NULL) ia_pos->reset(); int col = 0; int tx0 = -padding; for (int pch_no = 0; pch_no < pch_num; ++pch_no) { int tx1 = tx0 + pch_sz; AzIntArr ia_rows; for (int tx = MAX(0, tx0); tx < MIN(t_num, tx1); ++tx) { if (tokno[tx] >= 0) ia_rows.put(tokno[tx]); } if (!do_skip_stopunk || ia_rows.size() > 0) { ia_rows.unique(); /* sorting too */ m_feat->col_u(col)->load(&ia_rows, 1); if (ia_pos != NULL) ia_pos->put(tx0); ++col; } if (tx1 >= t_num+padding) break; int dist = 1; if (do_skip_stopunk) { /*--- to avoid repeating the same bow ---*/ int tx; for (tx = tx0; tx < t_num; ++tx) if (tx >= 0 && tokno[tx] >= 0) break; int dist0 = tx-tx0+1; /* to lose a word, we have to slide a window this much */ tx = tx1; for (tx = tx1; tx < t_num; ++tx) if (tx >= 0 && tokno[tx] >= 0) break; int dist1 = tx-tx1+1; /* to get a new word, we have to slide a window this much */ dist = MIN(dist0, dist1); } tx0 += MAX(dist, pch_step); } m_feat->resize(col); }
/*-------------------------------------------------------------------------*/ void AzPrepText2::gen_Y_ifeat(int top_num_each, int top_num_total, const AzSmat *m_feat, const AzIntArr &ia_tokno, const AzIntArr &ia_pos, int xpch_sz, int min_dist, int max_dist, bool do_nolr, int f_pch_sz, int f_pch_step, int f_padding, AzSmat *m_y, feat_info fi[2]) const { const char *eyec = "AzPrepText2::gen_neigh_topfeat"; AzX::throw_if_null(m_feat, eyec, "m_feat"); AzX::throw_if_null(m_y, eyec, "m_y"); int t_num; const int *tokno = ia_tokno.point(&t_num); int feat_sz = m_feat->rowNum(); int f_pch_num = DIVUP(t_num+f_padding*2-f_pch_sz, f_pch_step) + 1; if (m_feat->colNum() != f_pch_num) { AzBytArr s("#patch mismatch: Expcected: "); s << f_pch_num << " Actual: " << m_feat->colNum(); AzX::throw_if(true, AzInputError, eyec, s.c_str()); } if (do_nolr) m_y->reform(feat_sz, ia_pos.size()); else m_y->reform(feat_sz*2, ia_pos.size()); for (int ix = 0; ix < ia_pos.size(); ++ix) { int xtx0 = ia_pos[ix]; int xtx1 = xtx0 + xpch_sz; AzIFarr ifa_ctx; int offs = 0; for (int tx = xtx0+min_dist; tx < xtx0; ++tx) { if (tx + f_pch_sz > xtx0) break; set_ifeat(m_feat, top_num_each, (tx+f_padding)/f_pch_step, offs, &ifa_ctx, fi); } if (!do_nolr) offs = feat_sz; for (int tx = xtx1; tx < xtx1+max_dist; ++tx) { if (tx + f_pch_sz > xtx1+max_dist) break; set_ifeat(m_feat, top_num_each, (tx+f_padding)/f_pch_step, offs, &ifa_ctx, fi); } ifa_ctx.squeeze_Max(); if (top_num_total > 0 && ifa_ctx.size() > top_num_total) { ifa_ctx.sort_Float(false); ifa_ctx.cut(top_num_total); } m_y->col_u(ix)->load(&ifa_ctx); } }
__global__ void kAggShortRows2(const float* mat, float* matSum, const uint width, const uint height, Agg agg, UnaryOp uop, BinaryOp bop) { const uint shmemX = AGG_SHORT_ROWS_THREADS_X + 1; __shared__ float shmem[AGG_SHORT_ROWS_THREADS_Y*shmemX]; const uint LOOPS_X = DIVUP(width, AGG_SHORT_ROWS_THREADS_X); const uint tidx = hipThreadIdx_y * AGG_SHORT_ROWS_THREADS_X + hipThreadIdx_x; const uint bidx = hipBlockIdx_y * hipGridDim_x + hipBlockIdx_x; const uint blockRowIdx = bidx * AGG_SHORT_ROWS_LOOPS_Y * AGG_SHORT_ROWS_THREADS_Y; float* shmemWrite = shmem + MUL24(hipThreadIdx_y, shmemX) + hipThreadIdx_x; matSum += blockRowIdx + tidx; // shmem[MUL24(hipThreadIdx_y, shmemX) + hipThreadIdx_x] = 0; mat += width * blockRowIdx + MUL24(hipThreadIdx_y, width) + hipThreadIdx_x; bool doAgg = tidx < AGG_SHORT_ROWS_THREADS_Y; if(blockRowIdx < height) { for (uint y = 0; y < AGG_SHORT_ROWS_LOOPS_Y*AGG_SHORT_ROWS_THREADS_Y; y += AGG_SHORT_ROWS_THREADS_Y) { doAgg &= tidx + y + blockRowIdx < height; const bool heightIdxOK = hipThreadIdx_y + y + blockRowIdx < height; float accum = agg.getBaseValue(); shmemWrite[0] = agg.getBaseValue(); for(uint x = 0; x < LOOPS_X * AGG_SHORT_ROWS_THREADS_X; x+= AGG_SHORT_ROWS_THREADS_X) { // __syncthreads(); if (heightIdxOK && x + hipThreadIdx_x < width) { shmemWrite[0] = agg(uop(mat[x]), shmemWrite[0]); } } __syncthreads(); if (doAgg) { float* shmemRead = shmem + MUL24(tidx, shmemX); #pragma unroll for (uint i = 0; i < AGG_SHORT_ROWS_THREADS_X; i++) { accum = agg(accum, shmemRead[0]); shmemRead++; } matSum[0] = bop(matSum[0], accum); matSum += AGG_SHORT_ROWS_THREADS_Y; } __syncthreads(); mat += width * AGG_SHORT_ROWS_THREADS_Y; } } }
void Net::Train(const mxArray *mx_data, const mxArray *mx_labels) { //mexPrintMsg("Start training..."); ReadData(mx_data); ReadLabels(mx_labels); InitNorm(); size_t train_num = data_.size1(); size_t numbatches = DIVUP(train_num, params_.batchsize_); trainerrors_.resize(params_.epochs_, 2); trainerrors_.assign(0); for (size_t epoch = 0; epoch < params_.epochs_; ++epoch) { if (params_.shuffle_) { Shuffle(data_, labels_); } StartTimer(); size_t offset = 0; Mat data_batch, labels_batch, pred_batch; for (size_t batch = 0; batch < numbatches; ++batch) { size_t batchsize = MIN(train_num - offset, params_.batchsize_); UpdateWeights(epoch, false); data_batch.resize(batchsize, data_.size2()); labels_batch.resize(batchsize, labels_.size2()); SubSet(data_, data_batch, offset, true); SubSet(labels_, labels_batch, offset, true); ftype error1; InitActiv(data_batch); Forward(pred_batch, 1); InitDeriv(labels_batch, error1); trainerrors_(epoch, 0) += error1; Backward(); UpdateWeights(epoch, true); offset += batchsize; if (params_.verbose_ == 2) { mexPrintInt("Epoch", (int) epoch + 1); mexPrintInt("Batch", (int) batch + 1); } } // batch MeasureTime("totaltime"); if (params_.verbose_ == 1) { mexPrintInt("Epoch", (int) epoch + 1); } } // epoch trainerrors_ /= (ftype) numbatches; //mexPrintMsg("Training finished"); }
/*-------------------------------------------------------------------------*/ void AzPrepText2::gen_X_seq(const AzIntArr &ia_tokno, int dic_sz, int pch_sz, int pch_step, int padding, bool do_allow_zero, bool do_skip_stopunk, /*--- output ---*/ AzSmat *m_feat, AzIntArr *ia_pos) const /* patch position: may be NULL */ { const char *eyec = "AzPrepText2::gen_X_seq"; AzX::throw_if_null(m_feat, eyec, "m_feat"); AzX::no_support(do_skip_stopunk, eyec, "variable strides with Seq"); int t_num; const int *tokno = ia_tokno.point(&t_num); int pch_num = DIVUP(t_num+padding*2-pch_sz, pch_step) + 1; m_feat->reform(dic_sz*pch_sz, pch_num); if (ia_pos != NULL) ia_pos->reset(); int col = 0; int tx0 = -padding; for (int pch_no = 0; pch_no < pch_num; ++pch_no) { int tx1 = tx0 + pch_sz; AzSmat m; for (int tx = tx0; tx < tx1; ++tx) { AzSmat m0(dic_sz, 1); if (tx >= 0 && tx < t_num && tokno[tx] >= 0) { AzIntArr ia_row; ia_row.put(tokno[tx]); m0.col_u(0)->load(&ia_row, 1); } if (tx == tx0) m.set(&m0); else m.rbind(&m0); } if (do_allow_zero || !m.isZero()) { m_feat->col_u(col)->set(m.col(0)); if (ia_pos != NULL) ia_pos->put(tx0); ++col; } if (tx1 >= t_num+padding) break; tx0 += pch_step; } m_feat->resize(col); }
void MStringResize(MString_t *String, size_t Length) { /* Calculate the new byte-count we * need to encompass with blocks */ size_t DataLength = DIVUP(Length, MSTRING_BLOCK_SIZE) * MSTRING_BLOCK_SIZE; /* Expand and reset buffer */ void *Data = dsalloc(DataLength); memset(Data, 0, DataLength); /* Copy old data over */ memcpy(Data, String->Data, String->Length); /* Free the old buffer */ dsfree(String->Data); /* Update string to new buffer */ String->MaxLength = DataLength; String->Data = Data; }
__global__ void findMinMedian( float* minMedian, unsigned* minIdx, CParam<float> median, CParam<unsigned> idx) { const int tid = threadIdx.x; __shared__ float s_minMedian[256]; __shared__ unsigned s_minIdx[256]; s_minMedian[tid] = FLT_MAX; s_minIdx[tid] = 0; __syncthreads(); const int loop = DIVUP(median.dims[0], blockDim.x); for (int i = 0; i < loop; i++) { int j = i * blockDim.x + tid; if (j < median.dims[0] && median.ptr[j] < s_minMedian[tid]) { s_minMedian[tid] = median.ptr[j]; s_minIdx[tid] = idx.ptr[j]; } __syncthreads(); } for (unsigned t = 128; t > 0; t >>= 1) { if (tid < t) { if (s_minMedian[tid + t] < s_minMedian[tid]) { s_minMedian[tid] = s_minMedian[tid + t]; s_minIdx[tid] = s_minIdx[tid + t]; } } __syncthreads(); } *minMedian = s_minMedian[0]; *minIdx = s_minIdx[0]; }
void reset(const AzOut &out, const AzxD *input, AzxD *output) { if (p.pl_num > 0) { AzX::throw_if((input->get_dim() != 1), "AzpPoolingDflt::reset", "num_pooling is allowed only with 1D data"); p.pl_sz = p.pl_step = DIVUP(input->sz(0), p.pl_num); AzBytArr s("Given num_pooling="); s << p.pl_num; s << ", setting pooling size and stride to " << p.pl_sz; AzPrint::writeln(out, s); } int minsz = input->get_min_size(); if (p.pl_sz > minsz) { p.pl_sz = minsz; AzBytArr s("pooling unit size is too large: shrinking to "); s.cn(p.pl_sz); AzPrint::writeln(out, s); } int padding = 0; map.reset_for_pooling(input, p.pl_sz, p.pl_step, padding, p.do_pl_simple_grid, &pia2_out2inp, &pia2_inp2out, &pia_out2num); innum = input->size(); if (p.ptyp == AzpPoolingDflt_None) { AzX::throw_if((!map.no_change_in_shape()), "AzpPoolingDflt::reset", "input and output must be the same for no pooling"); } map.output_region(output); }
int ipsec_sa_init(struct ipsec_sa *ipsp) { int error = 0; char sa[SATOT_BUF]; size_t sa_len; #ifdef CONFIG_KLIPS_DEBUG char ipaddr_txt[ADDRTOA_BUF]; char ipaddr2_txt[ADDRTOA_BUF]; #endif #if defined (CONFIG_KLIPS_AUTH_HMAC_MD5) || \ defined (CONFIG_KLIPS_AUTH_HMAC_SHA1) unsigned char kb[AHMD596_BLKLEN]; int i; #endif if (ipsp == NULL) { KLIPS_PRINT(debug_pfkey, "ipsec_sa_init: " "ipsp is NULL, fatal\n"); SENDERR(EINVAL); } sa_len = KLIPS_SATOT(debug_pfkey, &ipsp->ips_said, 0, sa, sizeof(sa)); KLIPS_PRINT(debug_pfkey, "ipsec_sa_init: " "(pfkey defined) called for SA:%s\n", sa_len ? sa : " (error)"); KLIPS_PRINT(debug_pfkey, "ipsec_sa_init: " "calling init routine of %s%s%s\n", IPS_XFORM_NAME(ipsp)); switch (ipsp->ips_said.proto) { #ifdef CONFIG_KLIPS_IPIP case IPPROTO_IPIP: { ipsp->ips_xformfuncs = ipip_xform_funcs; #ifdef CONFIG_KLIPS_DEBUG sin_addrtot(ipsp->ips_addr_s, 0, ipaddr_txt, sizeof(ipaddr_txt)); sin_addrtot(ipsp->ips_addr_d, 0, ipaddr2_txt, sizeof(ipaddr2_txt)); KLIPS_PRINT(debug_pfkey, "ipsec_sa_init: " "(pfkey defined) IPIP ipsec_sa set for %s->%s.\n", ipaddr_txt, ipaddr2_txt); #endif } break; #endif /* !CONFIG_KLIPS_IPIP */ #ifdef CONFIG_KLIPS_AH case IPPROTO_AH: ipsp->ips_xformfuncs = ah_xform_funcs; #ifdef CONFIG_KLIPS_OCF if (ipsec_ocf_sa_init(ipsp, ipsp->ips_authalg, 0)) break; #endif #ifdef CONFIG_KLIPS_ALG error = ipsec_alg_auth_key_create(ipsp); if ((error < 0) && (error != -EPROTO)) SENDERR(-error); if (error == -EPROTO) { /* perform manual key generation, ignore this particular error */ error = 0; #endif /* CONFIG_KLIPS_ALG */ switch (ipsp->ips_authalg) { # ifdef CONFIG_KLIPS_AUTH_HMAC_MD5 case AH_MD5: { unsigned char *akp; unsigned int aks; MD5_CTX *ictx; MD5_CTX *octx; if (ipsp->ips_key_bits_a != (AHMD596_KLEN * 8)) { KLIPS_PRINT(debug_pfkey, "ipsec_sa_init: " "incorrect key size: %d bits -- must be %d bits\n" /*octets (bytes)\n"*/, ipsp->ips_key_bits_a, AHMD596_KLEN * 8); SENDERR(EINVAL); } # if KLIPS_DIVULGE_HMAC_KEY KLIPS_PRINT(debug_pfkey && sysctl_ipsec_debug_verbose, "ipsec_sa_init: " "hmac md5-96 key is 0x%08x %08x %08x %08x\n", ntohl(*(((__u32 *)ipsp->ips_key_a) + 0)), ntohl(*(((__u32 *)ipsp->ips_key_a) + 1)), ntohl(*(((__u32 *)ipsp->ips_key_a) + 2)), ntohl(*(((__u32 *)ipsp->ips_key_a) + 3))); # endif /* KLIPS_DIVULGE_HMAC_KEY */ ipsp->ips_auth_bits = AHMD596_ALEN * 8; /* save the pointer to the key material */ akp = ipsp->ips_key_a; aks = ipsp->ips_key_a_size; KLIPS_PRINT(debug_pfkey && sysctl_ipsec_debug_verbose, "ipsec_sa_init: " "allocating %lu bytes for md5_ctx.\n", (unsigned long) sizeof(struct md5_ctx)); if ((ipsp->ips_key_a = (caddr_t) kmalloc(sizeof(struct md5_ctx), GFP_ATOMIC)) == NULL) { ipsp->ips_key_a = akp; SENDERR(ENOMEM); } ipsp->ips_key_a_size = sizeof(struct md5_ctx); for (i = 0; i < DIVUP(ipsp->ips_key_bits_a, 8); i++) kb[i] = akp[i] ^ HMAC_IPAD; for (; i < AHMD596_BLKLEN; i++) kb[i] = HMAC_IPAD; ictx = &(((struct md5_ctx*)(ipsp->ips_key_a))->ictx); osMD5Init(ictx); osMD5Update(ictx, kb, AHMD596_BLKLEN); for (i = 0; i < AHMD596_BLKLEN; i++) kb[i] ^= (HMAC_IPAD ^ HMAC_OPAD); octx = &(((struct md5_ctx*)(ipsp->ips_key_a))->octx); osMD5Init(octx); osMD5Update(octx, kb, AHMD596_BLKLEN); # if KLIPS_DIVULGE_HMAC_KEY KLIPS_PRINT(debug_pfkey && sysctl_ipsec_debug_verbose, "ipsec_sa_init: " "MD5 ictx=0x%08x %08x %08x %08x octx=0x%08x %08x %08x %08x\n", ((__u32*)ictx)[0], ((__u32*)ictx)[1], ((__u32*)ictx)[2], ((__u32*)ictx)[3], ((__u32*)octx)[0], ((__u32*)octx)[1], ((__u32*)octx)[2], ((__u32*)octx)[3] ); # endif /* KLIPS_DIVULGE_HMAC_KEY */ /* zero key buffer -- paranoid */ memset(akp, 0, aks); kfree(akp); } break; # endif /* CONFIG_KLIPS_AUTH_HMAC_MD5 */ # ifdef CONFIG_KLIPS_AUTH_HMAC_SHA1 case AH_SHA: { unsigned char *akp; unsigned int aks; SHA1_CTX *ictx; SHA1_CTX *octx; if (ipsp->ips_key_bits_a != (AHSHA196_KLEN * 8)) { KLIPS_PRINT(debug_pfkey, "ipsec_sa_init: " "incorrect key size: %d bits -- must be %d bits\n" /*octets (bytes)\n"*/, ipsp->ips_key_bits_a, AHSHA196_KLEN * 8); SENDERR(EINVAL); } # if KLIPS_DIVULGE_HMAC_KEY KLIPS_PRINT(debug_pfkey && sysctl_ipsec_debug_verbose, "ipsec_sa_init: " "hmac sha1-96 key is 0x%08x %08x %08x %08x\n", ntohl(*(((__u32 *)ipsp->ips_key_a) + 0)), ntohl(*(((__u32 *)ipsp->ips_key_a) + 1)), ntohl(*(((__u32 *)ipsp->ips_key_a) + 2)), ntohl(*(((__u32 *)ipsp->ips_key_a) + 3))); # endif /* KLIPS_DIVULGE_HMAC_KEY */ ipsp->ips_auth_bits = AHSHA196_ALEN * 8; /* save the pointer to the key material */ akp = ipsp->ips_key_a; aks = ipsp->ips_key_a_size; KLIPS_PRINT(debug_pfkey && sysctl_ipsec_debug_verbose, "ipsec_sa_init: " "allocating %lu bytes for sha1_ctx.\n", (unsigned long) sizeof(struct sha1_ctx)); if ((ipsp->ips_key_a = (caddr_t) kmalloc(sizeof(struct sha1_ctx), GFP_ATOMIC)) == NULL) { ipsp->ips_key_a = akp; SENDERR(ENOMEM); } ipsp->ips_key_a_size = sizeof(struct sha1_ctx); for (i = 0; i < DIVUP(ipsp->ips_key_bits_a, 8); i++) kb[i] = akp[i] ^ HMAC_IPAD; for (; i < AHMD596_BLKLEN; i++) kb[i] = HMAC_IPAD; ictx = &(((struct sha1_ctx*)(ipsp->ips_key_a))->ictx); SHA1Init(ictx); SHA1Update(ictx, kb, AHSHA196_BLKLEN); for (i = 0; i < AHSHA196_BLKLEN; i++) kb[i] ^= (HMAC_IPAD ^ HMAC_OPAD); octx = &(((struct sha1_ctx*)(ipsp->ips_key_a))->octx); SHA1Init(octx); SHA1Update(octx, kb, AHSHA196_BLKLEN); # if KLIPS_DIVULGE_HMAC_KEY KLIPS_PRINT(debug_pfkey && sysctl_ipsec_debug_verbose, "ipsec_sa_init: " "SHA1 ictx=0x%08x %08x %08x %08x octx=0x%08x %08x %08x %08x\n", ((__u32*)ictx)[0], ((__u32*)ictx)[1], ((__u32*)ictx)[2], ((__u32*)ictx)[3], ((__u32*)octx)[0], ((__u32*)octx)[1], ((__u32*)octx)[2], ((__u32*)octx)[3] ); # endif /* KLIPS_DIVULGE_HMAC_KEY */ /* zero key buffer -- paranoid */ memset(akp, 0, aks); kfree(akp); } break; # endif /* CONFIG_KLIPS_AUTH_HMAC_SHA1 */ default: KLIPS_PRINT(debug_pfkey, "ipsec_sa_init: " "authalg=%d support not available in the kernel", ipsp->ips_authalg); SENDERR(EINVAL); } #ifdef CONFIG_KLIPS_ALG /* closure of the -EPROTO condition above */ } #endif break; #endif /* CONFIG_KLIPS_AH */ #ifdef CONFIG_KLIPS_ESP case IPPROTO_ESP: ipsp->ips_xformfuncs = esp_xform_funcs; { #ifdef CONFIG_KLIPS_OCF if (ipsec_ocf_sa_init(ipsp, ipsp->ips_authalg, ipsp->ips_encalg)) break; #endif #ifdef CONFIG_KLIPS_ALG error = ipsec_alg_enc_key_create(ipsp); if (error < 0) SENDERR(-error); error = ipsec_alg_auth_key_create(ipsp); if ((error < 0) && (error != -EPROTO)) SENDERR(-error); if (error == -EPROTO) { /* perform manual key generation, ignore this particular error */ error = 0; #endif /* CONFIG_KLIPS_ALG */ switch (ipsp->ips_authalg) { #if defined (CONFIG_KLIPS_AUTH_HMAC_MD5) || \ defined (CONFIG_KLIPS_AUTH_HMAC_SHA1) unsigned char *akp; unsigned int aks; #endif # ifdef CONFIG_KLIPS_AUTH_HMAC_MD5 case AH_MD5: { MD5_CTX *ictx; MD5_CTX *octx; if (ipsp->ips_key_bits_a != (AHMD596_KLEN * 8)) { KLIPS_PRINT(debug_pfkey, "ipsec_sa_init: " "incorrect authorisation key size: %d bits -- must be %d bits\n" /*octets (bytes)\n"*/, ipsp->ips_key_bits_a, AHMD596_KLEN * 8); SENDERR(EINVAL); } # if KLIPS_DIVULGE_HMAC_KEY KLIPS_PRINT( debug_pfkey && sysctl_ipsec_debug_verbose, "ipsec_sa_init: " "hmac md5-96 key is 0x%08x %08x %08x %08x\n", ntohl(*(((__u32 *)(ipsp->ips_key_a)) + 0)), ntohl(*(((__u32 *)(ipsp->ips_key_a)) + 1)), ntohl(*(((__u32 *)(ipsp->ips_key_a)) + 2)), ntohl(*(((__u32 *)(ipsp->ips_key_a)) + 3))); # endif /* KLIPS_DIVULGE_HMAC_KEY */ ipsp->ips_auth_bits = AHMD596_ALEN * 8; /* save the pointer to the key material */ akp = ipsp->ips_key_a; aks = ipsp->ips_key_a_size; KLIPS_PRINT( debug_pfkey && sysctl_ipsec_debug_verbose, "ipsec_sa_init: " "allocating %lu bytes for md5_ctx.\n", (unsigned long) sizeof(struct md5_ctx)); if ((ipsp->ips_key_a = (caddr_t) kmalloc(sizeof(struct md5_ctx), GFP_ATOMIC)) == NULL) { ipsp->ips_key_a = akp; SENDERR(ENOMEM); } ipsp->ips_key_a_size = sizeof(struct md5_ctx); for (i = 0; i < DIVUP(ipsp->ips_key_bits_a, 8); i++) kb[i] = akp[i] ^ HMAC_IPAD; for (; i < AHMD596_BLKLEN; i++) kb[i] = HMAC_IPAD; ictx = &(((struct md5_ctx*)(ipsp->ips_key_a)) ->ictx); osMD5Init(ictx); osMD5Update(ictx, kb, AHMD596_BLKLEN); for (i = 0; i < AHMD596_BLKLEN; i++) kb[i] ^= (HMAC_IPAD ^ HMAC_OPAD); octx = &(((struct md5_ctx*)(ipsp->ips_key_a)) ->octx); osMD5Init(octx); osMD5Update(octx, kb, AHMD596_BLKLEN); # if KLIPS_DIVULGE_HMAC_KEY KLIPS_PRINT( debug_pfkey && sysctl_ipsec_debug_verbose, "ipsec_sa_init: " "MD5 ictx=0x%08x %08x %08x %08x octx=0x%08x %08x %08x %08x\n", ((__u32*)ictx)[0], ((__u32*)ictx)[1], ((__u32*)ictx)[2], ((__u32*)ictx)[3], ((__u32*)octx)[0], ((__u32*)octx)[1], ((__u32*)octx)[2], ((__u32*)octx)[3] ); # endif /* KLIPS_DIVULGE_HMAC_KEY */ /* paranoid */ memset(akp, 0, aks); kfree(akp); break; } # endif /* CONFIG_KLIPS_AUTH_HMAC_MD5 */ # ifdef CONFIG_KLIPS_AUTH_HMAC_SHA1 case AH_SHA: { SHA1_CTX *ictx; SHA1_CTX *octx; if (ipsp->ips_key_bits_a != (AHSHA196_KLEN * 8)) { KLIPS_PRINT(debug_pfkey, "ipsec_sa_init: " "incorrect authorisation key size: %d bits -- must be %d bits\n" /*octets (bytes)\n"*/, ipsp->ips_key_bits_a, AHSHA196_KLEN * 8); SENDERR(EINVAL); } # if KLIPS_DIVULGE_HMAC_KEY KLIPS_PRINT( debug_pfkey && sysctl_ipsec_debug_verbose, "ipsec_sa_init: " "hmac sha1-96 key is 0x%08x %08x %08x %08x\n", ntohl(*(((__u32 *)ipsp->ips_key_a) + 0)), ntohl(*(((__u32 *)ipsp->ips_key_a) + 1)), ntohl(*(((__u32 *)ipsp->ips_key_a) + 2)), ntohl(*(((__u32 *)ipsp->ips_key_a) + 3))); # endif /* KLIPS_DIVULGE_HMAC_KEY */ ipsp->ips_auth_bits = AHSHA196_ALEN * 8; /* save the pointer to the key material */ akp = ipsp->ips_key_a; aks = ipsp->ips_key_a_size; KLIPS_PRINT( debug_pfkey && sysctl_ipsec_debug_verbose, "ipsec_sa_init: " "allocating %lu bytes for sha1_ctx.\n", (unsigned long) sizeof(struct sha1_ctx)); if ((ipsp->ips_key_a = (caddr_t) kmalloc(sizeof(struct sha1_ctx), GFP_ATOMIC)) == NULL) { ipsp->ips_key_a = akp; SENDERR(ENOMEM); } ipsp->ips_key_a_size = sizeof(struct sha1_ctx); for (i = 0; i < DIVUP(ipsp->ips_key_bits_a, 8); i++) kb[i] = akp[i] ^ HMAC_IPAD; for (; i < AHMD596_BLKLEN; i++) kb[i] = HMAC_IPAD; ictx = &(((struct sha1_ctx*)(ipsp->ips_key_a)) ->ictx); SHA1Init(ictx); SHA1Update(ictx, kb, AHSHA196_BLKLEN); for (i = 0; i < AHSHA196_BLKLEN; i++) kb[i] ^= (HMAC_IPAD ^ HMAC_OPAD); octx = &((struct sha1_ctx*)(ipsp->ips_key_a)) ->octx; SHA1Init(octx); SHA1Update(octx, kb, AHSHA196_BLKLEN); # if KLIPS_DIVULGE_HMAC_KEY KLIPS_PRINT( debug_pfkey && sysctl_ipsec_debug_verbose, "ipsec_sa_init: " "SHA1 ictx=0x%08x %08x %08x %08x octx=0x%08x %08x %08x %08x\n", ((__u32*)ictx)[0], ((__u32*)ictx)[1], ((__u32*)ictx)[2], ((__u32*)ictx)[3], ((__u32*)octx)[0], ((__u32*)octx)[1], ((__u32*)octx)[2], ((__u32*)octx)[3] ); # endif /* KLIPS_DIVULGE_HMAC_KEY */ memset(akp, 0, aks); kfree(akp); break; } # endif /* CONFIG_KLIPS_AUTH_HMAC_SHA1 */ case AH_NONE: break; default: KLIPS_PRINT(debug_pfkey, "ipsec_sa_init: " "authalg=%d support not available in the kernel.\n", ipsp->ips_authalg); SENDERR(EINVAL); } #ifdef CONFIG_KLIPS_ALG /* closure of the -EPROTO condition above */ } #endif ipsp->ips_iv_size = ipsp->ips_alg_enc->ixt_common.ixt_support. ias_ivlen / 8; /* Create IV */ if (ipsp->ips_iv_size) { if ((ipsp->ips_iv = (caddr_t) kmalloc(ipsp->ips_iv_size, GFP_ATOMIC)) == NULL) SENDERR(ENOMEM); prng_bytes(&ipsec_prng, (char *)ipsp->ips_iv, ipsp->ips_iv_size); ipsp->ips_iv_bits = ipsp->ips_iv_size * 8; } } break; #endif /* !CONFIG_KLIPS_ESP */ #ifdef CONFIG_KLIPS_IPCOMP case IPPROTO_COMP: ipsp->ips_xformfuncs = ipcomp_xform_funcs; ipsp->ips_comp_adapt_tries = 0; ipsp->ips_comp_adapt_skip = 0; ipsp->ips_comp_ratio_cbytes = 0; ipsp->ips_comp_ratio_dbytes = 0; #ifdef CONFIG_KLIPS_OCF if (ipsec_ocf_comp_sa_init(ipsp, ipsp->ips_encalg)) break; #endif ipsp->ips_comp_adapt_tries = 0; ipsp->ips_comp_adapt_skip = 0; ipsp->ips_comp_ratio_cbytes = 0; ipsp->ips_comp_ratio_dbytes = 0; break; #endif /* CONFIG_KLIPS_IPCOMP */ default: printk(KERN_ERR "KLIPS sa initialization: " "proto=%d unknown.\n", ipsp->ips_said.proto); SENDERR(EINVAL); } errlab: return error; }
DEBUG_NO_STATIC int pfkey_key_parse(struct sadb_ext *pfkey_ext) { int error = 0; struct sadb_key *pfkey_key = (struct sadb_key *)pfkey_ext; DEBUGGING(PF_KEY_DEBUG_PARSE_FLOW, "pfkey_key_parse:enter\n"); /* sanity checks... */ if(!pfkey_key) { DEBUGGING(PF_KEY_DEBUG_PARSE_PROBLEM, "pfkey_key_parse: " "NULL pointer passed in.\n"); SENDERR(EINVAL); } if(pfkey_key->sadb_key_len < sizeof(struct sadb_key) / IPSEC_PFKEYv2_ALIGN) { DEBUGGING(PF_KEY_DEBUG_PARSE_PROBLEM, "pfkey_key_parse: " "size wrong ext_len=%d, key_ext_len=%ld.\n", pfkey_key->sadb_key_len, sizeof(struct sadb_key)); SENDERR(EINVAL); } if(!pfkey_key->sadb_key_bits) { DEBUGGING(PF_KEY_DEBUG_PARSE_PROBLEM, "pfkey_key_parse: " "key length set to zero, must be non-zero.\n"); SENDERR(EINVAL); } if(pfkey_key->sadb_key_len != DIVUP(sizeof(struct sadb_key) * OCTETBITS + pfkey_key->sadb_key_bits, PFKEYBITS)) { DEBUGGING(PF_KEY_DEBUG_PARSE_PROBLEM, "pfkey_key_parse: " "key length=%d does not agree with extension length=%d.\n", pfkey_key->sadb_key_bits, pfkey_key->sadb_key_len); SENDERR(EINVAL); } if(pfkey_key->sadb_key_reserved) { DEBUGGING(PF_KEY_DEBUG_PARSE_PROBLEM, "pfkey_key_parse: " "res=%d, must be zero.\n", pfkey_key->sadb_key_reserved); SENDERR(EINVAL); } if(! ( (pfkey_key->sadb_key_exttype == SADB_EXT_KEY_AUTH) || (pfkey_key->sadb_key_exttype == SADB_EXT_KEY_ENCRYPT))) { DEBUGGING(PF_KEY_DEBUG_PARSE_PROBLEM, "pfkey_key_parse: " "expecting extension type AUTH or ENCRYPT, got %d.\n", pfkey_key->sadb_key_exttype); SENDERR(EINVAL); } DEBUGGING(PF_KEY_DEBUG_PARSE_STRUCT, "pfkey_key_parse: " "success, found len=%d exttype=%d bits=%d reserved=%d.\n", pfkey_key->sadb_key_len, pfkey_key->sadb_key_exttype, pfkey_key->sadb_key_bits, pfkey_key->sadb_key_reserved); errlab: return error; }
DEBUG_NO_STATIC int pfkey_address_parse(struct sadb_ext *pfkey_ext) { int error = 0; int saddr_len = 0; struct sadb_address *pfkey_address = (struct sadb_address *)pfkey_ext; struct sockaddr* s = (struct sockaddr*)((char*)pfkey_address + sizeof(*pfkey_address)); char ipaddr_txt[ADDRTOT_BUF]; DEBUGGING(PF_KEY_DEBUG_PARSE_FLOW, "pfkey_address_parse:enter\n"); /* sanity checks... */ if(!pfkey_address) { DEBUGGING(PF_KEY_DEBUG_PARSE_PROBLEM, "pfkey_address_parse: " "NULL pointer passed in.\n"); SENDERR(EINVAL); } if(pfkey_address->sadb_address_len < (sizeof(struct sadb_address) + sizeof(struct sockaddr))/ IPSEC_PFKEYv2_ALIGN) { DEBUGGING(PF_KEY_DEBUG_PARSE_PROBLEM, "pfkey_address_parse: " "size wrong 1 ext_len=%d, adr_ext_len=%ld, saddr_len=%ld.\n", pfkey_address->sadb_address_len, sizeof(struct sadb_address), sizeof(struct sockaddr)); SENDERR(EINVAL); } if(pfkey_address->sadb_address_reserved) { DEBUGGING(PF_KEY_DEBUG_PARSE_PROBLEM, "pfkey_address_parse: " "res=%d, must be zero.\n", pfkey_address->sadb_address_reserved); SENDERR(EINVAL); } switch(pfkey_address->sadb_address_exttype) { case SADB_EXT_ADDRESS_SRC: case SADB_EXT_ADDRESS_DST: case SADB_EXT_ADDRESS_PROXY: case SADB_X_EXT_ADDRESS_DST2: case SADB_X_EXT_ADDRESS_SRC_FLOW: case SADB_X_EXT_ADDRESS_DST_FLOW: case SADB_X_EXT_ADDRESS_SRC_MASK: case SADB_X_EXT_ADDRESS_DST_MASK: #ifdef NAT_TRAVERSAL case SADB_X_EXT_NAT_T_OA: #endif break; default: DEBUGGING(PF_KEY_DEBUG_PARSE_PROBLEM, "pfkey_address_parse: " "unexpected ext_type=%d.\n", pfkey_address->sadb_address_exttype); SENDERR(EINVAL); } switch(s->sa_family) { case AF_INET: DEBUGGING(PF_KEY_DEBUG_PARSE_STRUCT, "pfkey_address_parse: " "found address family=%d, AF_INET.\n", s->sa_family); saddr_len = sizeof(struct sockaddr_in); sprintf(ipaddr_txt, "%d.%d.%d.%d" , (((struct sockaddr_in*)s)->sin_addr.s_addr >> 0) & 0xFF , (((struct sockaddr_in*)s)->sin_addr.s_addr >> 8) & 0xFF , (((struct sockaddr_in*)s)->sin_addr.s_addr >> 16) & 0xFF , (((struct sockaddr_in*)s)->sin_addr.s_addr >> 24) & 0xFF); DEBUGGING(PF_KEY_DEBUG_PARSE_STRUCT, "pfkey_address_parse: " "found address=%s.\n", ipaddr_txt); break; case AF_INET6: DEBUGGING(PF_KEY_DEBUG_PARSE_STRUCT, "pfkey_address_parse: " "found address family=%d, AF_INET6.\n", s->sa_family); saddr_len = sizeof(struct sockaddr_in6); sprintf(ipaddr_txt, "%x:%x:%x:%x:%x:%x:%x:%x" , ntohs(((struct sockaddr_in6*)s)->sin6_addr.s6_addr16[0]) , ntohs(((struct sockaddr_in6*)s)->sin6_addr.s6_addr16[1]) , ntohs(((struct sockaddr_in6*)s)->sin6_addr.s6_addr16[2]) , ntohs(((struct sockaddr_in6*)s)->sin6_addr.s6_addr16[3]) , ntohs(((struct sockaddr_in6*)s)->sin6_addr.s6_addr16[4]) , ntohs(((struct sockaddr_in6*)s)->sin6_addr.s6_addr16[5]) , ntohs(((struct sockaddr_in6*)s)->sin6_addr.s6_addr16[6]) , ntohs(((struct sockaddr_in6*)s)->sin6_addr.s6_addr16[7])); DEBUGGING(PF_KEY_DEBUG_PARSE_STRUCT, "pfkey_address_parse: " "found address=%s.\n", ipaddr_txt); break; default: DEBUGGING(PF_KEY_DEBUG_PARSE_PROBLEM, "pfkey_address_parse: " "s->sa_family=%d not supported.\n", s->sa_family); SENDERR(EPFNOSUPPORT); } if(pfkey_address->sadb_address_len != DIVUP(sizeof(struct sadb_address) + saddr_len, IPSEC_PFKEYv2_ALIGN)) { DEBUGGING(PF_KEY_DEBUG_PARSE_PROBLEM, "pfkey_address_parse: " "size wrong 2 ext_len=%d, adr_ext_len=%ld, saddr_len=%d.\n", pfkey_address->sadb_address_len, sizeof(struct sadb_address), saddr_len); SENDERR(EINVAL); } if(pfkey_address->sadb_address_prefixlen != 0) { DEBUGGING(PF_KEY_DEBUG_PARSE_PROBLEM, "pfkey_address_parse: " "address prefixes not supported yet.\n"); SENDERR(EAFNOSUPPORT); /* not supported yet */ } /* XXX check if port!=0 */ DEBUGGING(PF_KEY_DEBUG_PARSE_FLOW, "pfkey_address_parse: successful.\n"); errlab: return error; }
void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[]) { CUresult cudastatus = CUDA_SUCCESS; if (nrhs != 3) mexErrMsgTxt("Wrong number of arguments"); if (init == 0) { // Initialize function //mexLock(); // load GPUmat gm = gmGetGPUmat(); // load module CUmodule *drvmod = gmGetModule("subsample"); //load appropriate GPU kernel (mangled name) CUresult status; status = cuModuleGetFunction(&_supersampleMedium_2, *drvmod, "_Z18kSupersampleMediumILi2EEvPfS0_ii"); if (CUDA_SUCCESS != status) { mexErrMsgTxt("Unable to load user function."); } status = cuModuleGetFunction(&_supersampleMedium_3, *drvmod, "_Z18kSupersampleMediumILi3EEvPfS0_ii"); if (CUDA_SUCCESS != status) { mexErrMsgTxt("Unable to load user function."); } status = cuModuleGetFunction(&_supersampleMedium_4, *drvmod, "_Z18kSupersampleMediumILi4EEvPfS0_ii"); if (CUDA_SUCCESS != status) { mexErrMsgTxt("Unable to load user function."); } status = cuModuleGetFunction(&_supersampleMedium_5, *drvmod, "_Z18kSupersampleMediumILi5EEvPfS0_ii"); if (CUDA_SUCCESS != status) { mexErrMsgTxt("Unable to load user function."); } status = cuModuleGetFunction(&_supersampleMedium_6, *drvmod, "_Z18kSupersampleMediumILi6EEvPfS0_ii"); if (CUDA_SUCCESS != status) { mexErrMsgTxt("Unable to load user function."); } status = cuModuleGetFunction(&_supersampleMedium_7, *drvmod, "_Z18kSupersampleMediumILi7EEvPfS0_ii"); if (CUDA_SUCCESS != status) { mexErrMsgTxt("Unable to load user function."); } status = cuModuleGetFunction(&_supersampleMedium_8, *drvmod, "_Z18kSupersampleMediumILi8EEvPfS0_ii"); if (CUDA_SUCCESS != status) { mexErrMsgTxt("Unable to load user function."); } status = cuModuleGetFunction(&_supersampleMedium_9, *drvmod, "_Z18kSupersampleMediumILi9EEvPfS0_ii"); if (CUDA_SUCCESS != status) { mexErrMsgTxt("Unable to load user function."); } status = cuModuleGetFunction(&_supersampleMedium_10, *drvmod, "_Z18kSupersampleMediumILi10EEvPfS0_ii"); if (CUDA_SUCCESS != status) { mexErrMsgTxt("Unable to load user function."); } status = cuModuleGetFunction(&_supersampleMedium_11, *drvmod, "_Z18kSupersampleMediumILi11EEvPfS0_ii"); if (CUDA_SUCCESS != status) { mexErrMsgTxt("Unable to load user function."); } status = cuModuleGetFunction(&_supersampleMedium_12, *drvmod, "_Z18kSupersampleMediumILi12EEvPfS0_ii"); if (CUDA_SUCCESS != status) { mexErrMsgTxt("Unable to load user function."); } status = cuModuleGetFunction(&_supersampleMedium_13, *drvmod, "_Z18kSupersampleMediumILi13EEvPfS0_ii"); if (CUDA_SUCCESS != status) { mexErrMsgTxt("Unable to load user function."); } status = cuModuleGetFunction(&_supersampleMedium_14, *drvmod, "_Z18kSupersampleMediumILi14EEvPfS0_ii"); if (CUDA_SUCCESS != status) { mexErrMsgTxt("Unable to load user function."); } status = cuModuleGetFunction(&_supersampleMedium_15, *drvmod, "_Z18kSupersampleMediumILi15EEvPfS0_ii"); if (CUDA_SUCCESS != status) { mexErrMsgTxt("Unable to load user function."); } status = cuModuleGetFunction(&_supersampleMedium_16, *drvmod, "_Z18kSupersampleMediumILi16EEvPfS0_ii"); if (CUDA_SUCCESS != status) { mexErrMsgTxt("Unable to load user function."); } status = cuModuleGetFunction(&_supersampleMediumLoopy_2, *drvmod, "_Z23kSupersampleMediumLoopyILi2EEvPfS0_ii"); if (CUDA_SUCCESS != status) { mexErrMsgTxt("Unable to load user function."); } status = cuModuleGetFunction(&_supersampleMediumLoopy_3, *drvmod, "_Z23kSupersampleMediumLoopyILi3EEvPfS0_ii"); if (CUDA_SUCCESS != status) { mexErrMsgTxt("Unable to load user function."); } status = cuModuleGetFunction(&_supersampleMediumLoopy_4, *drvmod, "_Z23kSupersampleMediumLoopyILi4EEvPfS0_ii"); if (CUDA_SUCCESS != status) { mexErrMsgTxt("Unable to load user function."); } status = cuModuleGetFunction(&_supersampleMediumLoopy_5, *drvmod, "_Z23kSupersampleMediumLoopyILi5EEvPfS0_ii"); if (CUDA_SUCCESS != status) { mexErrMsgTxt("Unable to load user function."); } status = cuModuleGetFunction(&_supersampleMediumLoopy_6, *drvmod, "_Z23kSupersampleMediumLoopyILi6EEvPfS0_ii"); if (CUDA_SUCCESS != status) { mexErrMsgTxt("Unable to load user function."); } status = cuModuleGetFunction(&_supersampleMediumLoopy_7, *drvmod, "_Z23kSupersampleMediumLoopyILi7EEvPfS0_ii"); if (CUDA_SUCCESS != status) { mexErrMsgTxt("Unable to load user function."); } status = cuModuleGetFunction(&_supersampleMediumLoopy_8, *drvmod, "_Z23kSupersampleMediumLoopyILi8EEvPfS0_ii"); if (CUDA_SUCCESS != status) { mexErrMsgTxt("Unable to load user function."); } status = cuModuleGetFunction(&_supersampleMediumLoopy_9, *drvmod, "_Z23kSupersampleMediumLoopyILi9EEvPfS0_ii"); if (CUDA_SUCCESS != status) { mexErrMsgTxt("Unable to load user function."); } status = cuModuleGetFunction(&_supersampleMediumLoopy_10, *drvmod, "_Z23kSupersampleMediumLoopyILi10EEvPfS0_ii"); if (CUDA_SUCCESS != status) { mexErrMsgTxt("Unable to load user function."); } status = cuModuleGetFunction(&_supersampleMediumLoopy_11, *drvmod, "_Z23kSupersampleMediumLoopyILi11EEvPfS0_ii"); if (CUDA_SUCCESS != status) { mexErrMsgTxt("Unable to load user function."); } status = cuModuleGetFunction(&_supersampleMediumLoopy_12, *drvmod, "_Z23kSupersampleMediumLoopyILi12EEvPfS0_ii"); if (CUDA_SUCCESS != status) { mexErrMsgTxt("Unable to load user function."); } status = cuModuleGetFunction(&_supersampleMediumLoopy_13, *drvmod, "_Z23kSupersampleMediumLoopyILi13EEvPfS0_ii"); if (CUDA_SUCCESS != status) { mexErrMsgTxt("Unable to load user function."); } status = cuModuleGetFunction(&_supersampleMediumLoopy_14, *drvmod, "_Z23kSupersampleMediumLoopyILi14EEvPfS0_ii"); if (CUDA_SUCCESS != status) { mexErrMsgTxt("Unable to load user function."); } status = cuModuleGetFunction(&_supersampleMediumLoopy_15, *drvmod, "_Z23kSupersampleMediumLoopyILi15EEvPfS0_ii"); if (CUDA_SUCCESS != status) { mexErrMsgTxt("Unable to load user function."); } status = cuModuleGetFunction(&_supersampleMediumLoopy_16, *drvmod, "_Z23kSupersampleMediumLoopyILi16EEvPfS0_ii"); if (CUDA_SUCCESS != status) { mexErrMsgTxt("Unable to load user function."); } init = 1; } // mex parameters are: // 1. IN1 // 2. OUT // 3. supersampling factor bool avoidBankConflicts = true; //hard-coded bool trans = false; //hard-coded //IN1 is the input GPU array GPUtype IN1 = gm->gputype.getGPUtype(prhs[0]); //OUT is the output GPU array (result) GPUtype OUT = gm->gputype.getGPUtype(prhs[1]); //last parameter is the filterSize (int) int factor = (int) mxGetScalar(prhs[2]); // number of elements int nin1 = gm->gputype.getNumel(IN1); int nout = gm->gputype.getNumel(OUT); //dimensions const int * sin1 = gm->gputype.getSize(IN1); const int * sout = gm->gputype.getSize(OUT); int imgPixels = sin1[0]; if ( floor(sqrt(float(imgPixels))) != sqrt(float(imgPixels)) ) mexErrMsgTxt("Images not square"); int imgSize = int(sqrt(imgPixels)); int numImages = sin1[1]; /* if (imgSize <= factor) mexErrMsgTxt("imgSize must be > factor"); */ if (factor > 16) mexErrMsgTxt("factor > 16"); if (factor < 2) mexErrMsgTxt("factor < 2"); if (imgSize > 512) mexErrMsgTxt("max imgSize: 512"); if (imgSize < 1) mexErrMsgTxt("min imgSize: 1"); int targetPixels = sout[0]; if ( floor(sqrt(float(targetPixels))) != sqrt(float(targetPixels)) ) mexErrMsgTxt("Targets not square"); int targetSize = int(sqrt(targetPixels)); if (targetSize % factor !=0) mexErrMsgTxt("imgSize must be evenly divisible by factor"); if (targetSize / factor != imgSize) mexErrMsgTxt("targetSize/ factor must = imgSize"); if (nout != nin1 * factor*factor) mexErrMsgTxt("Target dimensions not consistent"); int threadsX, threadsY; int SHMEM_MAX = 8192; // don't use more than this much shmem int shmemX, shmemY, blocksX, blocksY; bool useLoopy = false; int THREADS_MAX_LOOPY = 512, THREADS_MAX = trans ? 256 : 512; if (!trans) { threadsX = imgSize; threadsY = factor * MIN(THREADS_MAX / (factor*threadsX), SHMEM_MAX / (4*threadsX*factor)); // to avoid running out of shmem if(threadsY == 0) { if (factor > 32) mexErrMsgTxt("factor can't be > 32"); //assert(factor <= 32); // yes this is covered by assert above but in case i ever remove that THREADS_MAX = 512; useLoopy = true; threadsX = MIN(16, imgSize); // not that imgsize can be < 16 here under current conditions threadsY = factor * MIN(THREADS_MAX_LOOPY / (factor*threadsX), SHMEM_MAX / (4*threadsX*factor)); // to avoid running out of shmem } shmemY = threadsY; shmemX = threadsX; blocksX = imgSize; blocksY = DIVUP(numImages, threadsY); // printf("boundary problems: %u\n", numImages % threadsY != 0); } else { threadsY = imgSize; threadsX = factor * MIN(THREADS_MAX / (factor*threadsY), SHMEM_MAX / (4*threadsY*factor)); // to avoid running out of shmem if(threadsX < 8) { useLoopy = true; int xFactorMult = DIVUP(16, factor); threadsX = xFactorMult * factor; threadsY = THREADS_MAX / threadsX; int newThreadsX = threadsX, newThreadsY = threadsY; while (newThreadsY > 0 && imgSize % newThreadsY != 0) { // let's see if we can make threadsY divide imgSize newThreadsX += factor; newThreadsY = THREADS_MAX / newThreadsX; } if (newThreadsY > 0) { threadsY = newThreadsY; threadsX = newThreadsX; } if (threadsY <=0) mexErrMsgTxt("threadsY <=0; not expected"); //assert(threadsY > 0); } shmemY = threadsX; shmemX = threadsY + (1 - (threadsY % 2)); blocksX = DIVUP(numImages, threadsX); blocksY = imgSize; // printf("boundary problems: %u\n", numImages % threadsX != 0); } int shmem = 4 * shmemX * shmemY; if (shmem == 0 || shmem > 16300) { // this really shouldn't happen and i've only put this here as a precautionary measure // to avoid getting mysteriously wrong results. mexErrMsgTxt("supersample: not enough shared memory!"); //exit(EXIT_FAILURE); } dim3 grid(blocksX, blocksY); dim3 threads(threadsX, threadsY); //mexPrintf("blocks: %dx%d, threads: %dx%d\n", blocksY, blocksX, threadsY, threadsX); //mexPrintf("using %dx%d = %d bytes of shmem\n", shmemY, shmemX, shmem); gpuTYPE_t tin1 = gm->gputype.getType(IN1); gpuTYPE_t tout = gm->gputype.getType(OUT); // check input/out size and type if (tin1!=tout) mexErrMsgTxt("Input and output arguments must be of the same type."); // I need the pointers to GPU memory CUdeviceptr d_IN1 = (CUdeviceptr) (UINTPTR gm->gputype.getGPUptr(IN1)); CUdeviceptr d_OUT = (CUdeviceptr) (UINTPTR gm->gputype.getGPUptr(OUT)); // // The GPU kernel depends on the type of input/output // CUfunction drvfun; // if (tin1 == gpuFLOAT) { // drvfun = drvfunf; // } else // mexErrMsgTxt("Currently only single types supported."); hostdrv_pars_t gpuprhs[2]; int gpunrhs = 2; gpuprhs[0] = hostdrv_pars(&d_IN1,sizeof(d_IN1)); gpuprhs[1] = hostdrv_pars(&d_OUT,sizeof(d_OUT)); /* trans not implemented; so always !trans if(!trans) { */ if(!useLoopy) { if(factor == 2) { hostDriver(_supersampleMedium_2, grid, threads, shmem, imgSize, numImages*imgSize, gpunrhs, gpuprhs); //kSupersampleMedium<2><<<grid, threads, shmem>>>(images->getDevData(), targets->getDevData(), imgSize, numImages*imgSize); } else if(factor == 3) { hostDriver(_supersampleMedium_3, grid, threads, shmem, imgSize, numImages*imgSize, gpunrhs, gpuprhs); } else if(factor == 4) { hostDriver(_supersampleMedium_4, grid, threads, shmem, imgSize, numImages*imgSize, gpunrhs, gpuprhs); } else if(factor == 5) { hostDriver(_supersampleMedium_5, grid, threads, shmem, imgSize, numImages*imgSize, gpunrhs, gpuprhs); } else if(factor == 6) { hostDriver(_supersampleMedium_6, grid, threads, shmem, imgSize, numImages*imgSize, gpunrhs, gpuprhs); } else if(factor == 7) { hostDriver(_supersampleMedium_7, grid, threads, shmem, imgSize, numImages*imgSize, gpunrhs, gpuprhs); } else if(factor == 8) { hostDriver(_supersampleMedium_8, grid, threads, shmem, imgSize, numImages*imgSize, gpunrhs, gpuprhs); } else if(factor == 9) { hostDriver(_supersampleMedium_9, grid, threads, shmem, imgSize, numImages*imgSize, gpunrhs, gpuprhs); } else if(factor == 10) { hostDriver(_supersampleMedium_10, grid, threads, shmem, imgSize, numImages*imgSize, gpunrhs, gpuprhs); } else if(factor == 11) { hostDriver(_supersampleMedium_11, grid, threads, shmem, imgSize, numImages*imgSize, gpunrhs, gpuprhs); } else if(factor == 12) { hostDriver(_supersampleMedium_12, grid, threads, shmem, imgSize, numImages*imgSize, gpunrhs, gpuprhs); } else if(factor == 13) { hostDriver(_supersampleMedium_13, grid, threads, shmem, imgSize, numImages*imgSize, gpunrhs, gpuprhs); } else if(factor == 14) { hostDriver(_supersampleMedium_14, grid, threads, shmem, imgSize, numImages*imgSize, gpunrhs, gpuprhs); } else if(factor == 15) { hostDriver(_supersampleMedium_15, grid, threads, shmem, imgSize, numImages*imgSize, gpunrhs, gpuprhs); } else if(factor == 16) { hostDriver(_supersampleMedium_16, grid, threads, shmem, imgSize, numImages*imgSize, gpunrhs, gpuprhs); } } else { if(factor == 2) { hostDriver(_supersampleMediumLoopy_2, grid, threads, shmem, imgSize, numImages*imgSize, gpunrhs, gpuprhs); } else if(factor == 3) { hostDriver(_supersampleMediumLoopy_3, grid, threads, shmem, imgSize, numImages*imgSize, gpunrhs, gpuprhs); } else if(factor == 4) { hostDriver(_supersampleMediumLoopy_4, grid, threads, shmem, imgSize, numImages*imgSize, gpunrhs, gpuprhs); } else if(factor == 5) { hostDriver(_supersampleMediumLoopy_5, grid, threads, shmem, imgSize, numImages*imgSize, gpunrhs, gpuprhs); } else if(factor == 6) { hostDriver(_supersampleMediumLoopy_6, grid, threads, shmem, imgSize, numImages*imgSize, gpunrhs, gpuprhs); } else if(factor == 7) { hostDriver(_supersampleMediumLoopy_7, grid, threads, shmem, imgSize, numImages*imgSize, gpunrhs, gpuprhs); } else if(factor == 8) { hostDriver(_supersampleMediumLoopy_8, grid, threads, shmem, imgSize, numImages*imgSize, gpunrhs, gpuprhs); } else if(factor == 9) { hostDriver(_supersampleMediumLoopy_9, grid, threads, shmem, imgSize, numImages*imgSize, gpunrhs, gpuprhs); } else if(factor == 10) { hostDriver(_supersampleMediumLoopy_10, grid, threads, shmem, imgSize, numImages*imgSize, gpunrhs, gpuprhs); } else if(factor == 11) { hostDriver(_supersampleMediumLoopy_11, grid, threads, shmem, imgSize, numImages*imgSize, gpunrhs, gpuprhs); } else if(factor == 12) { hostDriver(_supersampleMediumLoopy_12, grid, threads, shmem, imgSize, numImages*imgSize, gpunrhs, gpuprhs); } else if(factor == 13) { hostDriver(_supersampleMediumLoopy_13, grid, threads, shmem, imgSize, numImages*imgSize, gpunrhs, gpuprhs); } else if(factor == 14) { hostDriver(_supersampleMediumLoopy_14, grid, threads, shmem, imgSize, numImages*imgSize, gpunrhs, gpuprhs); } else if(factor == 15) { hostDriver(_supersampleMediumLoopy_15, grid, threads, shmem, imgSize, numImages*imgSize, gpunrhs, gpuprhs); } else if(factor == 16) { hostDriver(_supersampleMediumLoopy_16, grid, threads, shmem, imgSize, numImages*imgSize, gpunrhs, gpuprhs); } } /* } else { if(!useLoopy) { if(factor == 2) { kSupersampleMediumTrans<2><<<grid, threads, shmem>>>(images->getDevData(), targets->getDevData(), numImages*imgSize, imgSize, shmemX); } else if(factor == 3) { kSupersampleMediumTrans<3><<<grid, threads, shmem>>>(images->getDevData(), targets->getDevData(), numImages*imgSize, imgSize, shmemX); } else if(factor == 4) { kSupersampleMediumTrans<4><<<grid, threads, shmem>>>(images->getDevData(), targets->getDevData(), numImages*imgSize, imgSize, shmemX); } else if(factor == 5) { kSupersampleMediumTrans<5><<<grid, threads, shmem>>>(images->getDevData(), targets->getDevData(), numImages*imgSize, imgSize, shmemX); } else if(factor == 6) { kSupersampleMediumTrans<6><<<grid, threads, shmem>>>(images->getDevData(), targets->getDevData(), numImages*imgSize, imgSize, shmemX); } else if(factor == 7) { kSupersampleMediumTrans<7><<<grid, threads, shmem>>>(images->getDevData(), targets->getDevData(), numImages*imgSize, imgSize, shmemX); } else if(factor == 8) { kSupersampleMediumTrans<8><<<grid, threads, shmem>>>(images->getDevData(), targets->getDevData(), numImages*imgSize, imgSize, shmemX); } else if(factor == 9) { kSupersampleMediumTrans<9><<<grid, threads, shmem>>>(images->getDevData(), targets->getDevData(), numImages*imgSize, imgSize, shmemX); } else if(factor == 10) { kSupersampleMediumTrans<10><<<grid, threads, shmem>>>(images->getDevData(), targets->getDevData(), numImages*imgSize, imgSize, shmemX); } else if(factor == 11) { kSupersampleMediumTrans<11><<<grid, threads, shmem>>>(images->getDevData(), targets->getDevData(), numImages*imgSize, imgSize, shmemX); } else if(factor == 12) { kSupersampleMediumTrans<12><<<grid, threads, shmem>>>(images->getDevData(), targets->getDevData(), numImages*imgSize, imgSize, shmemX); } else if(factor == 13) { kSupersampleMediumTrans<13><<<grid, threads, shmem>>>(images->getDevData(), targets->getDevData(), numImages*imgSize, imgSize, shmemX); } else if(factor == 14) { kSupersampleMediumTrans<14><<<grid, threads, shmem>>>(images->getDevData(), targets->getDevData(), numImages*imgSize, imgSize, shmemX); } else if(factor == 15) { kSupersampleMediumTrans<15><<<grid, threads, shmem>>>(images->getDevData(), targets->getDevData(), numImages*imgSize, imgSize, shmemX); } else if(factor == 16) { kSupersampleMediumTrans<16><<<grid, threads, shmem>>>(images->getDevData(), targets->getDevData(), numImages*imgSize, imgSize, shmemX); } } else { if(factor == 2) { kSupersampleMediumTransLoopy<2><<<grid, threads, shmem>>>(images->getDevData(), targets->getDevData(), numImages*imgSize, imgSize, shmemX); } else if(factor == 3) { kSupersampleMediumTransLoopy<3><<<grid, threads, shmem>>>(images->getDevData(), targets->getDevData(), numImages*imgSize, imgSize, shmemX); } else if(factor == 4) { kSupersampleMediumTransLoopy<4><<<grid, threads, shmem>>>(images->getDevData(), targets->getDevData(), numImages*imgSize, imgSize, shmemX); } else if(factor == 5) { kSupersampleMediumTransLoopy<5><<<grid, threads, shmem>>>(images->getDevData(), targets->getDevData(), numImages*imgSize, imgSize, shmemX); } else if(factor == 6) { kSupersampleMediumTransLoopy<6><<<grid, threads, shmem>>>(images->getDevData(), targets->getDevData(), numImages*imgSize, imgSize, shmemX); } else if(factor == 7) { kSupersampleMediumTransLoopy<7><<<grid, threads, shmem>>>(images->getDevData(), targets->getDevData(), numImages*imgSize, imgSize, shmemX); } else if(factor == 8) { kSupersampleMediumTransLoopy<8><<<grid, threads, shmem>>>(images->getDevData(), targets->getDevData(), numImages*imgSize, imgSize, shmemX); } else if(factor == 9) { kSupersampleMediumTransLoopy<9><<<grid, threads, shmem>>>(images->getDevData(), targets->getDevData(), numImages*imgSize, imgSize, shmemX); } else if(factor == 10) { kSupersampleMediumTransLoopy<10><<<grid, threads, shmem>>>(images->getDevData(), targets->getDevData(), numImages*imgSize, imgSize, shmemX); } else if(factor == 11) { kSupersampleMediumTransLoopy<11><<<grid, threads, shmem>>>(images->getDevData(), targets->getDevData(), numImages*imgSize, imgSize, shmemX); } else if(factor == 12) { kSupersampleMediumTransLoopy<12><<<grid, threads, shmem>>>(images->getDevData(), targets->getDevData(), numImages*imgSize, imgSize, shmemX); } else if(factor == 13) { kSupersampleMediumTransLoopy<13><<<grid, threads, shmem>>>(images->getDevData(), targets->getDevData(), numImages*imgSize, imgSize, shmemX); } else if(factor == 14) { kSupersampleMediumTransLoopy<14><<<grid, threads, shmem>>>(images->getDevData(), targets->getDevData(), numImages*imgSize, imgSize, shmemX); } else if(factor == 15) { kSupersampleMediumTransLoopy<15><<<grid, threads, shmem>>>(images->getDevData(), targets->getDevData(), numImages*imgSize, imgSize, shmemX); } else if(factor == 16) { kSupersampleMediumTransLoopy<16><<<grid, threads, shmem>>>(images->getDevData(), targets->getDevData(), numImages*imgSize, imgSize, shmemX); } } } */ }
int pfkey_key_process(struct sadb_ext *pfkey_ext, struct pfkey_extracted_data* extr) { int error = 0; struct sadb_key *pfkey_key = (struct sadb_key *)pfkey_ext; KLIPS_PRINT(debug_pfkey, "klips_debug:pfkey_key_process: .\n"); if(!extr || !extr->ips) { KLIPS_PRINT(debug_pfkey, "klips_debug:pfkey_key_process: " "extr or extr->ips is NULL, fatal\n"); SENDERR(EINVAL); } switch(pfkey_key->sadb_key_exttype) { case SADB_EXT_KEY_AUTH: KLIPS_PRINT(debug_pfkey, "klips_debug:pfkey_key_process: " "allocating %d bytes for authkey.\n", DIVUP(pfkey_key->sadb_key_bits, 8)); if(!(extr->ips->ips_key_a = kmalloc(DIVUP(pfkey_key->sadb_key_bits, 8), GFP_KERNEL))) { KLIPS_PRINT(debug_pfkey, "klips_debug:pfkey_key_process: " "memory allocation error.\n"); SENDERR(ENOMEM); } extr->ips->ips_key_bits_a = pfkey_key->sadb_key_bits; extr->ips->ips_key_a_size = DIVUP(pfkey_key->sadb_key_bits, 8); memcpy(extr->ips->ips_key_a, (char*)pfkey_key + sizeof(struct sadb_key), extr->ips->ips_key_a_size); break; case SADB_EXT_KEY_ENCRYPT: /* Key(s) */ KLIPS_PRINT(debug_pfkey, "klips_debug:pfkey_key_process: " "allocating %d bytes for enckey.\n", DIVUP(pfkey_key->sadb_key_bits, 8)); if(!(extr->ips->ips_key_e = kmalloc(DIVUP(pfkey_key->sadb_key_bits, 8), GFP_KERNEL))) { KLIPS_PRINT(debug_pfkey, "klips_debug:pfkey_key_process: " "memory allocation error.\n"); SENDERR(ENOMEM); } extr->ips->ips_key_bits_e = pfkey_key->sadb_key_bits; extr->ips->ips_key_e_size = DIVUP(pfkey_key->sadb_key_bits, 8); memcpy(extr->ips->ips_key_e, (char*)pfkey_key + sizeof(struct sadb_key), extr->ips->ips_key_e_size); break; default: SENDERR(EINVAL); } KLIPS_PRINT(debug_pfkey, "klips_debug:pfkey_key_process: " "success.\n"); errlab: return error; }
UINT32 ipsec_glue_encapalgo(struct ipsec_sa *ips) { UINT32 status = STATUS_SUCCESS; switch(ips->ips_encalg) { #ifdef CONFIG_IPSEC_ENC_AES case ESP_AES: cryptoAccCtx.cipherCtx.cipherAlgo = IX_CRYPTO_ACC_CIPHER_AES; switch (DIVUP(ips->ips_key_bits_e, BITS)) { case EMT_ESPAES128_KEY_SZ: cryptoAccCtx.cipherCtx.cipherKeyLen = IX_CRYPTO_ACC_AES_KEY_128; memcpy(cryptoAccCtx.cipherCtx.key.aesKey128, (UINT8 *)(ips->ips_key_e), IX_CRYPTO_ACC_AES_KEY_128); break; case EMT_ESPAES192_KEY_SZ: cryptoAccCtx.cipherCtx.cipherKeyLen = IX_CRYPTO_ACC_AES_KEY_192; memcpy(cryptoAccCtx.cipherCtx.key.aesKey192, (UINT8 *)(ips->ips_key_e), IX_CRYPTO_ACC_AES_KEY_192); break; case EMT_ESPAES256_KEY_SZ: cryptoAccCtx.cipherCtx.cipherKeyLen = IX_CRYPTO_ACC_AES_KEY_256; memcpy(cryptoAccCtx.cipherCtx.key.aesKey256, (UINT8 *)(ips->ips_key_e), IX_CRYPTO_ACC_AES_KEY_256); break; default: status = STATUS_FAIL; KLIPS_PRINT(debug_xform, "klips_error:glue_encapalgo: " "Invalid AES length!\n"); } cryptoAccCtx.cipherCtx.cipherBlockLen = IX_CRYPTO_ACC_AES_BLOCK_128; cryptoAccCtx.cipherCtx.cipherMode = IX_CRYPTO_ACC_MODE_CBC; if (EMT_ESPAES_IV_SZ == (DIVUP(ips->ips_iv_bits, BITS))) { cryptoAccCtx.cipherCtx.cipherInitialVectorLen = IX_CRYPTO_ACC_AES_CBC_IV_128; } else { status = STATUS_FAIL; KLIPS_PRINT(debug_xform, "klips_error:glue_encapalgo: " "Invalid IV length!\n"); } break; #endif #ifdef CONFIG_IPSEC_ENC_3DES case ESP_3DES: /* The cipher algorith, 3DES */ cryptoAccCtx.cipherCtx.cipherAlgo = IX_CRYPTO_ACC_CIPHER_3DES; /* The cipher key length */ /* check the cipher length, 3DES = 24 bytes */ if (EMT_ESP3DES_KEY_SZ == (DIVUP(ips->ips_key_bits_e, BITS))) { cryptoAccCtx.cipherCtx.cipherKeyLen = IX_CRYPTO_ACC_3DES_KEY_192; } else { status = STATUS_FAIL; KLIPS_PRINT(debug_xform, "klips_error:glue_encapalgo: " "Invalid 3DES length!\n"); break; } /* The cipher key */ memcpy (cryptoAccCtx.cipherCtx.key.desKey, (UINT8 *)(ips->ips_key_e), IX_CRYPTO_ACC_3DES_KEY_192); /* The cipher block length */ cryptoAccCtx.cipherCtx.cipherBlockLen = IPSEC_DES_BLOCK_LENGTH; /* The cipher mode, supported cipher mode: CBC */ cryptoAccCtx.cipherCtx.cipherMode = IX_CRYPTO_ACC_MODE_CBC; /* The cipher IV length */ if (EMT_ESPDES_IV_SZ == (DIVUP(ips->ips_iv_bits, BITS))) { cryptoAccCtx.cipherCtx.cipherInitialVectorLen = IX_CRYPTO_ACC_DES_IV_64; } else { status = STATUS_FAIL; KLIPS_PRINT(debug_xform, "klips_error:glue_encapalgo: " "Invalid IV length!\n"); } break; #endif /* CONFIG_IPSEC_ENC_3DES */ #ifdef USE_SINGLE_DES case ESP_DES: /* The cipher algorith, DES */ cryptoAccCtx.cipherCtx.cipherAlgo = IX_CRYPTO_ACC_CIPHER_DES; /* The cipher key length, DES = 8 bytes */ if (EMT_ESPDES_KEY_SZ == (DIVUP(ips->ips_key_bits_e, BITS))) { cryptoAccCtx.cipherCtx.cipherKeyLen = IX_CRYPTO_ACC_DES_KEY_64; } else { status = STATUS_FAIL; KLIPS_PRINT(debug_xform, "klips_error:glue_encapalgo: " "Invalid DES length!\n"); break; } /* The cipher key */ memcpy (cryptoAccCtx.cipherCtx.key.desKey, (UINT8 *)(ips->ips_key_e), IX_CRYPTO_ACC_DES_KEY_64); /* The cipher block length */ cryptoAccCtx.cipherCtx.cipherBlockLen = IPSEC_DES_BLOCK_LENGTH; /* The cipher mode, supported cipher mode: CBC */ cryptoAccCtx.cipherCtx.cipherMode = IX_CRYPTO_ACC_MODE_CBC; /* The cipher IV length */ if (EMT_ESPDES_IV_SZ == (DIVUP(ips->ips_iv_bits, BITS))) { cryptoAccCtx.cipherCtx.cipherInitialVectorLen = IX_CRYPTO_ACC_DES_IV_64; } else { status = STATUS_FAIL; KLIPS_PRINT(debug_xform, "klips_error:glue_encapalgo: " "Invalid IV length!\n"); } break; #endif /* USE_SINGLE_DES */ case ESP_NULL: break; default: /* Encryption not supported */ status = STATUS_FAIL; KLIPS_PRINT(debug_xform, "klips_error:glue_encapalgo: " "Encap. Algorithm not supported!\n"); return status; } return status; }
int burnAPP(TCHAR *path) { int errorcode = 0; unsigned char *buf = (unsigned char *)0x80000000; unsigned char *buftemp = buf; unsigned char headbuf[512]; unsigned int rdlen; APPHEADER *header = (APPHEADER * )headbuf; FIL file; FRESULT fret; int ret; char disbuf[32]; unsigned int percent = 0, percentold = 0; unsigned int filesize, count; APPPACKHEAD *apppackhead = (APPPACKHEAD * )(buf + 32); APPSETCTION *appsection1 = &(apppackhead->appsec1); APPSETCTION *appsection2 = &(apppackhead->appsec2); fret = f_open(&file, path, FA_READ); if (fret != FR_OK) { errorcode = BURNAPP_READERROR; return errorcode; } filesize = file.fsize; if ((filesize > APP_MAX_SIZE) || (filesize <= 1024)) { errorcode = BURNAPP_FILE_ERROR; goto ERROR; } count = DIVUP(filesize, 512); memset(headbuf, 0, sizeof headbuf); statBarPrint(0, "reading"); delay(300); for (int i = 0; i < count; i++, buftemp+=512) { fret = f_read(&file, buftemp, 512, &rdlen); if (fret != FR_OK) goto ERROR; for (int j = 0; j < 256; j++) { buftemp[j] ^= ProgramTable[j]; buftemp[j + 256] ^= ProgramTable[j]; } if (i == 0) { /* if ((buftemp[0] != 'T') || (buftemp[1] != 'H') || (buftemp[2] != 'J')) { errorcode = BURNAPP_FILE_ERROR; goto ERROR; } if ((buftemp[10] != 'A') || (buftemp[11] != 'R') || (buftemp[12] != 'A')) { errorcode = BURNAPP_FILE_ERROR; goto ERROR; } */ apppackhead = (APPPACKHEAD * )(buftemp + 32); appsection1 = &(apppackhead->appsec1); appsection2 = &(apppackhead->appsec2); if ((apppackhead->secflag & 0x01) && ((appsection1->imageaddr + appsection1->imageSize) > (filesize - 16))) { errorcode = BURNAPP_FILE_ERROR; goto ERROR; } if ((apppackhead->secflag & 0x02) && ((appsection2->imageaddr + appsection2->imageSize) > (filesize - 16))) { errorcode = BURNAPP_FILE_ERROR; goto ERROR; } } else { percent = i * 100 / count; if (percent / 5 != percentold / 5) { sprintf(disbuf, "%d%%", percent); statBarPrint(0, disbuf); percentold = percent; } } } MD5_CTX md5context; unsigned char decrypt[16]; statBarPrint(0, "processing please waite"); MD5Init(& md5context); MD5Update(& md5context, buf, filesize-16); MD5Final(& md5context, decrypt); if (memcmp(buf + filesize - 16, decrypt, 16)) { errorcode = BURNAPP_FILE_ERROR; goto ERROR; } statBarPrint(0, "write file please waite"); delay(300); header->magic = APP_MAGIC_NO; header->secflag = apppackhead->secflag; if (apppackhead->secflag & 0x01) { header->appsec1.imageaddr = APP_BEGIN_SECTOR; header->appsec1.imageSize = DIVUP(appsection1->imageSize, 512); header->appsec1.imageRevPrefix = appsection1->imageRevPrefix; header->appsec1.imageMainRev = appsection1->imageMainRev; header->appsec1.imageMidRev = appsection1->imageMidRev; header->appsec1.imageMinRev = appsection1->imageMinRev; } if (apppackhead->secflag & 0x02) { header->appsec2.imageaddr = BAG_BEGIN_SETCTOR; header->appsec2.imageSize = DIVUP(appsection2->imageSize, 512); header->appsec2.imageRevPrefix = appsection2->imageRevPrefix; header->appsec2.imageMainRev = appsection2->imageMainRev; header->appsec2.imageMidRev = appsection2->imageMidRev; header->appsec2.imageMinRev = appsection2->imageMinRev; } ret = MMCSDP_Write(mmcsdctr, headbuf, APP_HEAD_SECTOR, 1); if (FALSE == ret) { errorcode = BURNAPP_WRITEERROR; goto ERROR; } if (apppackhead->secflag & 0x01) { ret = MMCSDP_Write(mmcsdctr, (void *)(buf + appsection1->imageaddr), header->appsec1.imageaddr, header->appsec1.imageSize); if (FALSE == ret) { errorcode = BURNAPP_WRITEERROR; goto ERROR; } header->magic = APP_MAGIC_OK; } if (apppackhead->secflag & 0x02) { ret = MMCSDP_Write(mmcsdctr, (void *)(buf + appsection2->imageaddr), header->appsec2.imageaddr, header->appsec2.imageSize); if (FALSE == ret) { errorcode = BURNAPP_WRITEERROR; goto ERROR; } } MMCSDP_Write(mmcsdctr, headbuf, APP_HEAD_SECTOR, 1); return 0; ERROR: f_close(&file); return errorcode; }
FileSystemCode_t FsWriteToFile( _In_ FileSystemDescriptor_t* FileSystem, _In_ MfsEntryHandle_t* Handle, _In_ DmaBuffer_t* BufferObject, _In_ size_t Length, _Out_ size_t* BytesWritten) { MfsInstance_t* Mfs = (MfsInstance_t*)FileSystem->ExtensionData; MfsEntry_t* Entry = (MfsEntry_t*)Handle->Base.Entry; FileSystemCode_t Result = FsOk; uint64_t Position = Handle->Base.Position; size_t BucketSizeBytes = Mfs->SectorsPerBucket * FileSystem->Disk.Descriptor.SectorSize; size_t BytesToWrite = Length; TRACE("FsWriteEntry(Id 0x%x, Position %u, Length %u)", Handle->Base.Id, LODWORD(Position), Length); *BytesWritten = 0; Result = MfsEnsureRecordSpace(FileSystem, Entry, Position + BytesToWrite); if (Result != FsOk) { return Result; } // Guard against newly allocated files if (Handle->DataBucketPosition == MFS_ENDOFCHAIN) { Handle->DataBucketPosition = Entry->StartBucket; Handle->DataBucketLength = Entry->StartLength; Handle->BucketByteBoundary = 0; } // Write in a loop to make sure we write all requested bytes while (BytesToWrite) { // Calculate which bucket, then the sector offset // Then calculate how many sectors of the bucket we need to read uint64_t Sector = MFS_GETSECTOR(Mfs, Handle->DataBucketPosition); uint64_t SectorOffset = (Position - Handle->BucketByteBoundary) % FileSystem->Disk.Descriptor.SectorSize; size_t SectorIndex = (size_t)((Position - Handle->BucketByteBoundary) / FileSystem->Disk.Descriptor.SectorSize); size_t SectorsLeft = MFS_GETSECTOR(Mfs, Handle->DataBucketLength) - SectorIndex; size_t SectorCount = 0, ByteCount = 0; // Ok - so sectorindex contains the index in the bucket // and sector offset contains the byte-offset in that sector // Calculate the sector index into bucket Sector += SectorIndex; // Calculate how many sectors we should read in SectorCount = DIVUP(BytesToWrite, FileSystem->Disk.Descriptor.SectorSize); // Do we cross a boundary? if (SectorOffset + BytesToWrite > FileSystem->Disk.Descriptor.SectorSize) { SectorCount++; } // Adjust for bucket boundary SectorCount = MIN(SectorsLeft, SectorCount); // Adjust for number of bytes read ByteCount = (size_t)MIN(BytesToWrite, (SectorCount * FileSystem->Disk.Descriptor.SectorSize) - SectorOffset); // Ex pos 490 - length 50 // SectorIndex = 0, SectorOffset = 490, SectorCount = 2 - ByteCount = 50 (Capacity 4096) // Ex pos 1109 - length 450 // SectorIndex = 2, SectorOffset = 85, SectorCount = 2 - ByteCount = 450 (Capacity 4096) // Ex pos 490 - length 4000 // SectorIndex = 0, SectorOffset = 490, SectorCount = 8 - ByteCount = 3606 (Capacity 4096) TRACE("Write metrics - Sector %u + %u, Count %u, ByteOffset %u, ByteCount %u", LODWORD(Sector), SectorIndex, SectorCount, LODWORD(SectorOffset), ByteCount); // First of all, calculate the bounds as we might need to read // in existing data - Start out by clearing our combination buffer ZeroBuffer(Mfs->TransferBuffer); // Case 1 - Handle padding if (SectorOffset != 0 || ByteCount != FileSystem->Disk.Descriptor.SectorSize) { // Start building the sector if (MfsReadSectors(FileSystem, Mfs->TransferBuffer, Sector, SectorCount, &SectorCount) != OsSuccess) { ERROR("Failed to read sector %u for combination step", LODWORD(Sector)); Result = FsDiskError; break; } // Adjust the bytecount if we are not able to read all in one go if ((FileSystem->Disk.Descriptor.SectorSize * SectorCount) < ByteCount) { ByteCount = FileSystem->Disk.Descriptor.SectorSize * SectorCount; } } // Now write the data to the sector SeekBuffer(Mfs->TransferBuffer, (size_t)SectorOffset); CombineBuffer(Mfs->TransferBuffer, BufferObject, ByteCount, NULL); // Perform the write (Raw - as we need to pass the datapointer) if (MfsWriteSectors(FileSystem, Mfs->TransferBuffer, Sector, SectorCount, &SectorCount) != OsSuccess) { ERROR("Failed to write sector %u", LODWORD(Sector)); Result = FsDiskError; break; } // Increase the pointers and decrease with bytes read // Adjust the bytecount if we are not able to read all in one go if ((FileSystem->Disk.Descriptor.SectorSize * SectorCount) < ByteCount) { ByteCount = FileSystem->Disk.Descriptor.SectorSize * SectorCount; } Position += ByteCount; *BytesWritten += ByteCount; BytesToWrite -= ByteCount; // Do we need to switch bucket? // We do if the position we have read to equals end of bucket if (Position == (Handle->BucketByteBoundary + (Handle->DataBucketLength * BucketSizeBytes))) { MapRecord_t Link; // We have to lookup the link for current bucket if (MfsGetBucketLink(FileSystem, Handle->DataBucketPosition, &Link) != OsSuccess) { ERROR("Failed to get link for bucket %u", Handle->DataBucketPosition); Result = FsDiskError; break; } // Check for EOL if (Link.Link == MFS_ENDOFCHAIN) { break; } Handle->DataBucketPosition = Link.Link; // Lookup length of link if (MfsGetBucketLink(FileSystem, Handle->DataBucketPosition, &Link) != OsSuccess) { ERROR("Failed to get length for bucket %u", Handle->DataBucketPosition); Result = FsDiskError; break; } Handle->DataBucketLength = Link.Length; Handle->BucketByteBoundary += (Link.Length * BucketSizeBytes); } } // entry->modified = now Entry->ActionOnClose = MFS_ACTION_UPDATE; return Result; }
UINT32 ipsec_glue_authalg(struct ipsec_sa *ips) { UINT32 status = STATUS_SUCCESS; switch(ips->ips_authalg) { #ifdef CONFIG_IPSEC_AUTH_HMAC_MD5 case AH_MD5: /* Tne the authentication algorithm - MD5*/ cryptoAccCtx.authCtx.authAlgo = IX_CRYPTO_ACC_AUTH_MD5; /* The digest length, in bytes */ cryptoAccCtx.authCtx.authDigestLen = AHHMAC_HASHLEN; /* The authentication key length */ if (AHMD596_KLEN == (DIVUP(ips->ips_key_bits_a, BITS))) { cryptoAccCtx.authCtx.authKeyLen = IX_CRYPTO_ACC_MD5_KEY_128; } else { status = STATUS_FAIL; KLIPS_PRINT(debug_xform, "klips_error:glue_encapalgo: " "Invalid MD5 length!\n"); break; } /* The authentication key */ memcpy(cryptoAccCtx.authCtx.key.authKey, (UINT8 *)(ips->ips_key_a), IX_CRYPTO_ACC_MD5_KEY_128); break; #endif /* CONFIG_IPSEC_AUTH_HMAC_MD5 */ #ifdef CONFIG_IPSEC_AUTH_HMAC_SHA1 case AH_SHA: cryptoAccCtx.authCtx.authAlgo = IX_CRYPTO_ACC_AUTH_SHA1; /* The digest length, in bytes */ cryptoAccCtx.authCtx.authDigestLen = AHHMAC_HASHLEN; /* The authentication key length */ if (AHSHA196_KLEN == (DIVUP(ips->ips_key_bits_a, BITS))) { cryptoAccCtx.authCtx.authKeyLen = IX_CRYPTO_ACC_SHA1_KEY_160; } else { status = STATUS_FAIL; KLIPS_PRINT(debug_xform, "klips_error:glue_encapalgo: " "Invalid SHA1 length!\n"); break; } /* The authentication key, SHA1 */ memcpy(cryptoAccCtx.authCtx.key.authKey, (UINT8 *)(ips->ips_key_a), IX_CRYPTO_ACC_SHA1_KEY_160); break; #endif /* CONFIG_IPSEC_AUTH_HMAC_SHA1 */ case AH_NONE: break; default: /* Authentication algo. not supported */ status = STATUS_FAIL; KLIPS_PRINT(debug_xform, "klips_error:glue_authalgo: " "Authen. Algorithm not supported!\n"); return status; } return status; }
void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[]) { CUresult cudastatus = CUDA_SUCCESS; if (nrhs != 3) mexErrMsgTxt("Wrong number of arguments"); if (init == 0) { // Initialize function //mexLock(); // load GPUmat gm = gmGetGPUmat(); // load module CUmodule *drvmod = gmGetModule("subsample"); //load appropriate GPU kernel (mangled name) CUresult status; status = cuModuleGetFunction(&subsample_noreduc_2T, *drvmod, "_Z18kSubsample_noreducILi2ELb1EEvPfS0_iii"); if (CUDA_SUCCESS != status) { mexErrMsgTxt("Unable to load user function."); } status = cuModuleGetFunction(&subsample_noreduc_2F, *drvmod, "_Z18kSubsample_noreducILi2ELb0EEvPfS0_iii"); if (CUDA_SUCCESS != status) { mexErrMsgTxt("Unable to load user function."); } status = cuModuleGetFunction(&subsample_noreduc_3T, *drvmod, "_Z18kSubsample_noreducILi3ELb1EEvPfS0_iii"); if (CUDA_SUCCESS != status) { mexErrMsgTxt("Unable to load user function."); } status = cuModuleGetFunction(&subsample_noreduc_3F, *drvmod, "_Z18kSubsample_noreducILi3ELb0EEvPfS0_iii"); if (CUDA_SUCCESS != status) { mexErrMsgTxt("Unable to load user function."); } status = cuModuleGetFunction(&subsample_noreduc_4T, *drvmod, "_Z18kSubsample_noreducILi4ELb1EEvPfS0_iii"); if (CUDA_SUCCESS != status) { mexErrMsgTxt("Unable to load user function."); } status = cuModuleGetFunction(&subsample_noreduc_4F, *drvmod, "_Z18kSubsample_noreducILi4ELb0EEvPfS0_iii"); if (CUDA_SUCCESS != status) { mexErrMsgTxt("Unable to load user function."); } status = cuModuleGetFunction(&subsample_noreduc_5T, *drvmod, "_Z18kSubsample_noreducILi5ELb1EEvPfS0_iii"); if (CUDA_SUCCESS != status) { mexErrMsgTxt("Unable to load user function."); } status = cuModuleGetFunction(&subsample_noreduc_5F, *drvmod, "_Z18kSubsample_noreducILi5ELb0EEvPfS0_iii"); if (CUDA_SUCCESS != status) { mexErrMsgTxt("Unable to load user function."); } status = cuModuleGetFunction(&subsample_noreduc_6T, *drvmod, "_Z18kSubsample_noreducILi6ELb1EEvPfS0_iii"); if (CUDA_SUCCESS != status) { mexErrMsgTxt("Unable to load user function."); } status = cuModuleGetFunction(&subsample_noreduc_6F, *drvmod, "_Z18kSubsample_noreducILi6ELb0EEvPfS0_iii"); if (CUDA_SUCCESS != status) { mexErrMsgTxt("Unable to load user function."); } status = cuModuleGetFunction(&subsample_noreduc_7T, *drvmod, "_Z18kSubsample_noreducILi7ELb1EEvPfS0_iii"); if (CUDA_SUCCESS != status) { mexErrMsgTxt("Unable to load user function."); } status = cuModuleGetFunction(&subsample_noreduc_7F, *drvmod, "_Z18kSubsample_noreducILi7ELb0EEvPfS0_iii"); if (CUDA_SUCCESS != status) { mexErrMsgTxt("Unable to load user function."); } status = cuModuleGetFunction(&subsample_noreduc_8T, *drvmod, "_Z18kSubsample_noreducILi8ELb1EEvPfS0_iii"); if (CUDA_SUCCESS != status) { mexErrMsgTxt("Unable to load user function."); } status = cuModuleGetFunction(&subsample_noreduc_8F, *drvmod, "_Z18kSubsample_noreducILi8ELb0EEvPfS0_iii"); if (CUDA_SUCCESS != status) { mexErrMsgTxt("Unable to load user function."); } status = cuModuleGetFunction(&subsample_noreduc_9T, *drvmod, "_Z18kSubsample_noreducILi9ELb1EEvPfS0_iii"); if (CUDA_SUCCESS != status) { mexErrMsgTxt("Unable to load user function."); } status = cuModuleGetFunction(&subsample_noreduc_9F, *drvmod, "_Z18kSubsample_noreducILi9ELb0EEvPfS0_iii"); if (CUDA_SUCCESS != status) { mexErrMsgTxt("Unable to load user function."); } status = cuModuleGetFunction(&subsample_noreduc_10T, *drvmod, "_Z18kSubsample_noreducILi10ELb1EEvPfS0_iii"); if (CUDA_SUCCESS != status) { mexErrMsgTxt("Unable to load user function."); } status = cuModuleGetFunction(&subsample_noreduc_10F, *drvmod, "_Z18kSubsample_noreducILi10ELb0EEvPfS0_iii"); if (CUDA_SUCCESS != status) { mexErrMsgTxt("Unable to load user function."); } status = cuModuleGetFunction(&subsample_noreduc_11T, *drvmod, "_Z18kSubsample_noreducILi11ELb1EEvPfS0_iii"); if (CUDA_SUCCESS != status) { mexErrMsgTxt("Unable to load user function."); } status = cuModuleGetFunction(&subsample_noreduc_11F, *drvmod, "_Z18kSubsample_noreducILi11ELb0EEvPfS0_iii"); if (CUDA_SUCCESS != status) { mexErrMsgTxt("Unable to load user function."); } status = cuModuleGetFunction(&subsample_noreduc_12T, *drvmod, "_Z18kSubsample_noreducILi12ELb1EEvPfS0_iii"); if (CUDA_SUCCESS != status) { mexErrMsgTxt("Unable to load user function."); } status = cuModuleGetFunction(&subsample_noreduc_12F, *drvmod, "_Z18kSubsample_noreducILi12ELb0EEvPfS0_iii"); if (CUDA_SUCCESS != status) { mexErrMsgTxt("Unable to load user function."); } status = cuModuleGetFunction(&subsample_noreduc_13T, *drvmod, "_Z18kSubsample_noreducILi13ELb1EEvPfS0_iii"); if (CUDA_SUCCESS != status) { mexErrMsgTxt("Unable to load user function."); } status = cuModuleGetFunction(&subsample_noreduc_13F, *drvmod, "_Z18kSubsample_noreducILi13ELb0EEvPfS0_iii"); if (CUDA_SUCCESS != status) { mexErrMsgTxt("Unable to load user function."); } status = cuModuleGetFunction(&subsample_noreduc_14T, *drvmod, "_Z18kSubsample_noreducILi14ELb1EEvPfS0_iii"); if (CUDA_SUCCESS != status) { mexErrMsgTxt("Unable to load user function."); } status = cuModuleGetFunction(&subsample_noreduc_14F, *drvmod, "_Z18kSubsample_noreducILi14ELb0EEvPfS0_iii"); if (CUDA_SUCCESS != status) { mexErrMsgTxt("Unable to load user function."); } status = cuModuleGetFunction(&subsample_noreduc_15T, *drvmod, "_Z18kSubsample_noreducILi15ELb1EEvPfS0_iii"); if (CUDA_SUCCESS != status) { mexErrMsgTxt("Unable to load user function."); } status = cuModuleGetFunction(&subsample_noreduc_15F, *drvmod, "_Z18kSubsample_noreducILi15ELb0EEvPfS0_iii"); if (CUDA_SUCCESS != status) { mexErrMsgTxt("Unable to load user function."); } status = cuModuleGetFunction(&subsample_noreduc_16T, *drvmod, "_Z18kSubsample_noreducILi16ELb1EEvPfS0_iii"); if (CUDA_SUCCESS != status) { mexErrMsgTxt("Unable to load user function."); } status = cuModuleGetFunction(&subsample_noreduc_16F, *drvmod, "_Z18kSubsample_noreducILi16ELb0EEvPfS0_iii"); if (CUDA_SUCCESS != status) { mexErrMsgTxt("Unable to load user function."); } init = 1; } // mex parameters are: // 1. IN1 // 2. OUT // 3. subsampling factor bool avoidBankConflicts = true; //hard-coded //IN1 is the input GPU array GPUtype IN1 = gm->gputype.getGPUtype(prhs[0]); //OUT is the output GPU array (result) GPUtype OUT = gm->gputype.getGPUtype(prhs[1]); //last parameter is the filterSize (int) int factor = (int) mxGetScalar(prhs[2]); // number of elements int nin1 = gm->gputype.getNumel(IN1); int nout = gm->gputype.getNumel(OUT); //dimensions const int * sin1 = gm->gputype.getSize(IN1); const int * sout = gm->gputype.getSize(OUT); int imgPixels = sin1[0]; if ( floor(sqrt(float(imgPixels))) != sqrt(float(imgPixels)) ) mexErrMsgTxt("Images not square"); int imgSize = int(sqrt(imgPixels)); if (imgSize <= factor) mexErrMsgTxt("imgSize must be > factor"); if (imgSize % factor !=0) mexErrMsgTxt("imgSize must be evenly divisible by factor"); if (factor > 16) mexErrMsgTxt("factor > 16"); if (factor < 2) mexErrMsgTxt("factor < 2"); if (imgSize > 512) mexErrMsgTxt("max imgSize: 512"); int numRegions = nin1 / (factor*factor); int numRegionsY = (imgSize / factor) * sin1[1]; if (nout != numRegions) mexErrMsgTxt("Target dimensions not consistent"); int regionsXPerBlock = imgSize / factor; int numThreadsX = imgSize; int SHMEM_MAX = 8192; // don't use more than this much shmem int regionsYPerBlock = MIN(512 / numThreadsX, SHMEM_MAX / (4*imgSize)); // to avoid running out of shmem // regionsYPerBlock--; int regionsPerBlock = regionsYPerBlock * regionsXPerBlock; // this will avoid all bank conflicts but may (?) use up too much shmem int shmemPadX = avoidBankConflicts * (DIVUP(16,factor) + (regionsPerBlock % 16 == 0 ? 0 : 16 - regionsPerBlock % 16)); // shmemPadX = 0; int shmemY = factor, shmemX = regionsPerBlock + shmemPadX; int shmem = 4 * shmemX * shmemY; if (shmem == 0 || shmem > 16300) { // this really shouldn't happen and i've only put this here as a precautionary measure // to avoid getting mysteriously wrong results. mexErrMsgTxt("subsample: not enough shared memory!"); } int numThreadsY = regionsYPerBlock; // int blocks = numRegionsY / regionsYPerBlock; int blocksX = imgSize / factor, blocksY = DIVUP(sin1[1], regionsYPerBlock); if (blocksX >=65535 || blocksY >= 65535) mexErrMsgTxt("Exceeded max block size"); // assert(numRegionsY % regionsYPerBlock == 0); bool checkThreadBounds = numRegionsY % regionsYPerBlock != 0; // printf("num regions y: %d, regions y per block: %d\n", numRegionsY, regionsYPerBlock); dim3 grid(blocksX, blocksY); dim3 threads(numThreadsX, numThreadsY); /* mexPrintf("grid: %ux%u, threads: %ux%u\n", grid.y, grid.x, threads.y, threads.x); mexPrintf("check bounds: %u\n", checkThreadBounds); mexPrintf("using %u bytes of shmem\n", shmem); */ gpuTYPE_t tin1 = gm->gputype.getType(IN1); gpuTYPE_t tout = gm->gputype.getType(OUT); // check input/out size and type if (tin1!=tout) mexErrMsgTxt("Input and output arguments must be of the same type."); // I need the pointers to GPU memory CUdeviceptr d_IN1 = (CUdeviceptr) (UINTPTR gm->gputype.getGPUptr(IN1)); CUdeviceptr d_OUT = (CUdeviceptr) (UINTPTR gm->gputype.getGPUptr(OUT)); // // The GPU kernel depends on the type of input/output // CUfunction drvfun; // if (tin1 == gpuFLOAT) { // drvfun = drvfunf; // } else // mexErrMsgTxt("Currently only single types supported."); hostdrv_pars_t gpuprhs[2]; int gpunrhs = 2; gpuprhs[0] = hostdrv_pars(&d_IN1,sizeof(d_IN1)); gpuprhs[1] = hostdrv_pars(&d_OUT,sizeof(d_OUT)); //int N = nin1; if (factor == 2) { if (checkThreadBounds) { hostDriver(subsample_noreduc_2T, grid, threads, imgSize, numRegionsY, shmemX, shmem, gpunrhs, gpuprhs); } else { hostDriver(subsample_noreduc_2F, grid, threads, imgSize, numRegionsY, shmemX, shmem, gpunrhs, gpuprhs); } } else if (factor == 3) { if (checkThreadBounds) { hostDriver(subsample_noreduc_3T, grid, threads, imgSize, numRegionsY, shmemX, shmem, gpunrhs, gpuprhs); } else { hostDriver(subsample_noreduc_3F, grid, threads, imgSize, numRegionsY, shmemX, shmem, gpunrhs, gpuprhs); } } else if (factor == 4) { if (checkThreadBounds) { hostDriver(subsample_noreduc_4T, grid, threads, imgSize, numRegionsY, shmemX, shmem, gpunrhs, gpuprhs); } else { hostDriver(subsample_noreduc_4F, grid, threads, imgSize, numRegionsY, shmemX, shmem, gpunrhs, gpuprhs); } } else if (factor == 5) { if (checkThreadBounds) { hostDriver(subsample_noreduc_5T, grid, threads, imgSize, numRegionsY, shmemX, shmem, gpunrhs, gpuprhs); } else { hostDriver(subsample_noreduc_5F, grid, threads, imgSize, numRegionsY, shmemX, shmem, gpunrhs, gpuprhs); } } else if (factor == 6) { if (checkThreadBounds) { hostDriver(subsample_noreduc_6T, grid, threads, imgSize, numRegionsY, shmemX, shmem, gpunrhs, gpuprhs); } else { hostDriver(subsample_noreduc_6F, grid, threads, imgSize, numRegionsY, shmemX, shmem, gpunrhs, gpuprhs); } } else if (factor == 7) { if (checkThreadBounds) { hostDriver(subsample_noreduc_7T, grid, threads, imgSize, numRegionsY, shmemX, shmem, gpunrhs, gpuprhs); } else { hostDriver(subsample_noreduc_7F, grid, threads, imgSize, numRegionsY, shmemX, shmem, gpunrhs, gpuprhs); } } else if (factor == 8) { if (checkThreadBounds) { hostDriver(subsample_noreduc_8T, grid, threads, imgSize, numRegionsY, shmemX, shmem, gpunrhs, gpuprhs); } else { hostDriver(subsample_noreduc_8F, grid, threads, imgSize, numRegionsY, shmemX, shmem, gpunrhs, gpuprhs); } } else if (factor == 9) { if (checkThreadBounds) { hostDriver(subsample_noreduc_9T, grid, threads, imgSize, numRegionsY, shmemX, shmem, gpunrhs, gpuprhs); } else { hostDriver(subsample_noreduc_9F, grid, threads, imgSize, numRegionsY, shmemX, shmem, gpunrhs, gpuprhs); } } else if (factor == 10) { if (checkThreadBounds) { hostDriver(subsample_noreduc_10T, grid, threads, imgSize, numRegionsY, shmemX, shmem, gpunrhs, gpuprhs); } else { hostDriver(subsample_noreduc_10F, grid, threads, imgSize, numRegionsY, shmemX, shmem, gpunrhs, gpuprhs); } } else if (factor == 11) { if (checkThreadBounds) { hostDriver(subsample_noreduc_11T, grid, threads, imgSize, numRegionsY, shmemX, shmem, gpunrhs, gpuprhs); } else { hostDriver(subsample_noreduc_11F, grid, threads, imgSize, numRegionsY, shmemX, shmem, gpunrhs, gpuprhs); } } else if (factor == 12) { if (checkThreadBounds) { hostDriver(subsample_noreduc_12T, grid, threads, imgSize, numRegionsY, shmemX, shmem, gpunrhs, gpuprhs); } else { hostDriver(subsample_noreduc_12F, grid, threads, imgSize, numRegionsY, shmemX, shmem, gpunrhs, gpuprhs); } } else if (factor == 13) { if (checkThreadBounds) { hostDriver(subsample_noreduc_13T, grid, threads, imgSize, numRegionsY, shmemX, shmem, gpunrhs, gpuprhs); } else { hostDriver(subsample_noreduc_13F, grid, threads, imgSize, numRegionsY, shmemX, shmem, gpunrhs, gpuprhs); } } else if (factor == 14) { if (checkThreadBounds) { hostDriver(subsample_noreduc_14T, grid, threads, imgSize, numRegionsY, shmemX, shmem, gpunrhs, gpuprhs); } else { hostDriver(subsample_noreduc_14F, grid, threads, imgSize, numRegionsY, shmemX, shmem, gpunrhs, gpuprhs); } } else if (factor == 15) { if (checkThreadBounds) { hostDriver(subsample_noreduc_15T, grid, threads, imgSize, numRegionsY, shmemX, shmem, gpunrhs, gpuprhs); } else { hostDriver(subsample_noreduc_15F, grid, threads, imgSize, numRegionsY, shmemX, shmem, gpunrhs, gpuprhs); } } else if (factor == 16) { if (checkThreadBounds) { hostDriver(subsample_noreduc_16T, grid, threads, imgSize, numRegionsY, shmemX, shmem, gpunrhs, gpuprhs); } else { hostDriver(subsample_noreduc_16F, grid, threads, imgSize, numRegionsY, shmemX, shmem, gpunrhs, gpuprhs); } } //cutilCheckMsg("kernel execution failed"); // if(factor == 4) { //// kSubsample_reduc<4><<<grid, threads,4*numThreadsX*numThreadsY>>>(images->getDevData(), targets->getDevData(), imgSize, numRegionsY); // } }
FileSystemCode_t FsReadFromFile( _In_ FileSystemDescriptor_t* FileSystem, _In_ MfsEntryHandle_t* Handle, _In_ DmaBuffer_t* BufferObject, _In_ size_t Length, _Out_ size_t* BytesAt, _Out_ size_t* BytesRead) { MfsInstance_t* Mfs = (MfsInstance_t*)FileSystem->ExtensionData; MfsEntry_t* Entry = (MfsEntry_t*)Handle->Base.Entry; FileSystemCode_t Result = FsOk; uintptr_t DataPointer = GetBufferDma(BufferObject); uint64_t Position = Handle->Base.Position; size_t BucketSizeBytes = Mfs->SectorsPerBucket * FileSystem->Disk.Descriptor.SectorSize; size_t BytesToRead = Length; TRACE("FsReadFile(Id 0x%x, Position %u, Length %u)", Handle->Base.Id, LODWORD(Handle->Base.Position), Length); *BytesRead = 0; *BytesAt = Handle->Base.Position % FileSystem->Disk.Descriptor.SectorSize; // Sanitize the amount of bytes we want to read, cap it at bytes available if ((Position + BytesToRead) > Entry->Base.Descriptor.Size.QuadPart) { if (Position == Entry->Base.Descriptor.Size.QuadPart) { return FsOk; } BytesToRead = (size_t)(Entry->Base.Descriptor.Size.QuadPart - Position); } // Debug counter values TRACE(" > dma: 0x%x, fpos %u, bytes-total %u, bytes-at %u", DataPointer, LODWORD(Position), BytesToRead, *BytesAt); // Read the current sector, update index to where data starts // Keep reading consecutive after that untill all bytes requested have // been read // Read in a loop to make sure we read all requested bytes while (BytesToRead) { // Calculate which bucket, then the sector offset // Then calculate how many sectors of the bucket we need to read uint64_t Sector = MFS_GETSECTOR(Mfs, Handle->DataBucketPosition); // Start-sector of current bucket uint64_t SectorOffset = Position % FileSystem->Disk.Descriptor.SectorSize; // Byte-offset into the current sector size_t SectorIndex = (size_t)((Position - Handle->BucketByteBoundary) / FileSystem->Disk.Descriptor.SectorSize); // The sector-index into the current bucket size_t SectorsLeft = MFS_GETSECTOR(Mfs, Handle->DataBucketLength) - SectorIndex; // How many sectors are left in this bucket size_t SectorCount; size_t SectorsFitInBuffer; size_t ByteCount; // Calculate the sector index into bucket Sector += SectorIndex; // Calculate how many sectors we should read in SectorCount = DIVUP(BytesToRead, FileSystem->Disk.Descriptor.SectorSize); SectorsFitInBuffer = (GetBufferSize(BufferObject) - *BytesRead) / FileSystem->Disk.Descriptor.SectorSize; if (SectorOffset != 0 && (SectorOffset + BytesToRead > FileSystem->Disk.Descriptor.SectorSize)) { SectorCount++; // Take into account the extra sector we have to read } // Adjust for bucket boundary, and adjust again for buffer size SectorCount = MIN(SectorCount, SectorsLeft); SectorCount = MIN(SectorCount, SectorsFitInBuffer); if (SectorCount == 0) { break; } // Adjust for number of bytes already consumed in the active sector ByteCount = MIN(BytesToRead, (SectorCount * FileSystem->Disk.Descriptor.SectorSize) - SectorOffset); // Ex pos 490 - length 50 // SectorIndex = 0, SectorOffset = 490, SectorCount = 2 - ByteCount = 50 (Capacity 4096) // Ex pos 1109 - length 450 // SectorIndex = 2, SectorOffset = 85, SectorCount = 2 - ByteCount = 450 (Capacity 4096) // Ex pos 490 - length 4000 // SectorIndex = 0, SectorOffset = 490, SectorCount = 8 - ByteCount = 3606 (Capacity 4096) TRACE(" > sector %u (b-start %u, b-index %u), num-sectors %u, sector-byte-offset %u, bytecount %u", LODWORD(Sector), LODWORD(Sector) - SectorIndex, SectorIndex, SectorCount, LODWORD(SectorOffset), ByteCount); if ((GetBufferSize(BufferObject) - *BytesRead) < (SectorCount * FileSystem->Disk.Descriptor.SectorSize)) { WARNING(" > not enough room in buffer for transfer"); break; } // Perform the read (Raw - as we need to pass the datapointer) if (StorageRead(FileSystem->Disk.Driver, FileSystem->Disk.Device, FileSystem->SectorStart + Sector, DataPointer, SectorCount, &SectorCount) != OsSuccess) { ERROR("Failed to read sector"); Result = FsDiskError; break; } // Increase the pointers and decrease with bytes read, take into account // we might not have been able to read all data in one go if ((FileSystem->Disk.Descriptor.SectorSize * SectorCount) < ByteCount) { ByteCount = FileSystem->Disk.Descriptor.SectorSize * SectorCount; } DataPointer += FileSystem->Disk.Descriptor.SectorSize * SectorCount; *BytesRead += ByteCount; Position += ByteCount; BytesToRead -= ByteCount; // Do we need to switch bucket? // We do if the position we have read to equals end of bucket if (Position == (Handle->BucketByteBoundary + (Handle->DataBucketLength * BucketSizeBytes))) { Result = MfsSwitchToNextBucketLink(FileSystem, Handle, BucketSizeBytes); if (Result == FsPathNotFound || Result != FsOk) { if (Result == FsPathNotFound) { Result = FsOk; } break; } } } // if (update_when_accessed) @todo // entry->accessed = now // entry->action_on_close = update TRACE(" > bytes read %u/%u", *BytesRead, Length); return Result; }
BOOL burnBootloader(const TCHAR *path) { unsigned char *buf = (unsigned char *)0x80000000; unsigned int rdlen; FIL file; FRESULT fret; int ret; if (!strendwith(path, ".MBT")) { return FALSE; } fret = f_open(&file, path, FA_READ); if (fret != FR_OK) { return FALSE; } if (file.fsize > 109 * 1024) { //109KB goto ERROR; } memset(buf, 0, 512); memcpy(buf, emmcheader, sizeof emmcheader); *(unsigned int *)(buf + 512) = file.fsize; *(unsigned int *)(buf + 512 + 4) = BOOTLOADER_ENTRY; fret = f_read(&file, buf + 512 + 8, file.fsize, &rdlen); if (fret != FR_OK) { goto ERROR; } if (rdlen != file.fsize) { goto ERROR; } //memset(buf,0,file.fsize+8+512); ret = MMCSDP_Write(mmcsdctr, buf, BOOTLOADER_BEGIN_SECTOR, DIVUP(file.fsize + 8 + 512,512)); if (FALSE == ret) { goto ERROR; } long long flashid; spiFlashReadId(&flashid); if ((flashid!=0)&&(flashid!=-1L)){ unsigned char flashstatus = spiFlashReadStatus(); if(!(flashstatus & 0x01)){ spiFlashSwitch256PageSize(); flashstatus = spiFlashReadStatus(); if(!(flashstatus & 0x01)){ goto ERROR; } } } unsigned int flashwcont = DIVUP(file.fsize+8,256); unsigned int byteswapcont = DIVUP(file.fsize+8,4); if ((flashid!=0)&&(flashid!=-1L)) { statBarPrint(0, "found dataflash chip ,burn to dataflash"); for(int i=0;i<byteswapcont;i++){ *(unsigned int *)(buf+512+4*i) = htonl(*(unsigned int *)(buf+512+4*i)); } delay(500); unsigned char percent1 = 0,percent2 = 0; char checkbuf[256]; for (int i=0;i<flashwcont;i++) { ret = spiFlashPageWrite(256*i,(void *)(buf + 512+256*i),256 ); percent1 = (i+1)*100/flashwcont; if(percent1/5 != percent2/5){ percent2 = percent1; char printbuf[200]; sprintf(printbuf,"dataflash write percent %d%%",percent2); statBarPrint(0, printbuf); } if (FALSE==ret) { statBarPrint(1, "data flash write error"); delay(1000); goto ERROR; } delay(45); spiFlashRead(256*i,checkbuf,256); if(memcmp(checkbuf,(void *)(buf + 512+256*i),256)!=0){ statBarPrint(1, "data flash write check error"); delay(500); goto ERROR; } } } f_close(&file); return TRUE; ERROR: f_close(&file); return FALSE; }