void copy(const CPU_MATRIX & cpu_matrix,
                  compressed_matrix<SCALARTYPE, ALIGNMENT> & gpu_matrix )
 {
   if ( cpu_matrix.size1() > 0 && cpu_matrix.size2() > 0 )
   {
     gpu_matrix.resize(static_cast<unsigned int>(cpu_matrix.size1()), static_cast<unsigned int>(cpu_matrix.size2()), false);
     
     //determine nonzeros:
     long num_entries = 0;
     for (typename CPU_MATRIX::const_iterator1 row_it = cpu_matrix.begin1();
           row_it != cpu_matrix.end1();
           ++row_it)
     {
       unsigned int entries_per_row = 0;
       for (typename CPU_MATRIX::const_iterator2 col_it = row_it.begin();
             col_it != row_it.end();
             ++col_it)
       {
         ++entries_per_row;
       }
       num_entries += viennacl::tools::roundUpToNextMultiple<unsigned int>(entries_per_row, ALIGNMENT);
     }
     
     //std::cout << "CPU->GPU, Number of entries: " << num_entries << std::endl;
     
     //set up matrix entries:
     std::vector<unsigned int> row_buffer(cpu_matrix.size1() + 1);
     std::vector<unsigned int> col_buffer(num_entries);
     std::vector<SCALARTYPE> elements(num_entries);
     
     unsigned int row_index = 0;
     unsigned int data_index = 0;
     
     for (typename CPU_MATRIX::const_iterator1 row_it = cpu_matrix.begin1();
           row_it != cpu_matrix.end1();
           ++row_it)
     {
       row_buffer[row_index] = data_index;
       ++row_index;
       
       for (typename CPU_MATRIX::const_iterator2 col_it = row_it.begin();
             col_it != row_it.end();
             ++col_it)
       {
         col_buffer[data_index] = static_cast<unsigned int>(col_it.index2());
         elements[data_index] = *col_it;
         ++data_index;
       }
       data_index = viennacl::tools::roundUpToNextMultiple<unsigned int>(data_index, ALIGNMENT); //take care of alignment
     }
     row_buffer[row_index] = data_index;
     
     /*gpu_matrix._row_buffer = viennacl::ocl::device().createMemory(CL_MEM_READ_WRITE, row_buffer);
     gpu_matrix._col_buffer = viennacl::ocl::device().createMemory(CL_MEM_READ_WRITE, col_buffer);
     gpu_matrix._elements = viennacl::ocl::device().createMemory(CL_MEM_READ_WRITE, elements);
     
     gpu_matrix._nonzeros = num_entries;*/
     gpu_matrix.set(&row_buffer[0], &col_buffer[0], &elements[0], static_cast<unsigned int>(cpu_matrix.size1()), num_entries);
   }
 }
示例#2
0
void BaseConvolutionLayer<Dtype>::forward_gpu_gemm(const Dtype* input,
                                                   const int_tp input_off,
                                                   const Dtype* weights,
                                                   Dtype* output,
                                                   const int_tp output_off,
                                                   bool skip_im2col) {
  const Dtype* col_buff = input;
  if (this->device_->backend() == BACKEND_CUDA) {
#ifdef USE_CUDA
    if (!is_1x1_) {
      if (!skip_im2col) {
        conv_im2col_gpu(input + input_off, col_buffer()->mutable_gpu_data());
      }
      col_buff = col_buffer()->gpu_data();
    }
    for (int_tp g = 0; g < group_; ++g) {
      caffe_gpu_gemm<Dtype>(
          CblasNoTrans, CblasNoTrans, conv_out_channels_ / group_,
          conv_out_spatial_dim_, kernel_dim_, (Dtype) 1.,
          weights + weight_offset_ * g,
          col_buff + (is_1x1_ ? input_off : 0) + col_offset_ * g, (Dtype) 0.,
          output + output_off + output_offset_ * g);
    }
#endif  // USE_CUDA
  } else {
#ifdef USE_GREENTEA
    if (!is_1x1_) {
      if (!skip_im2col) {
        greentea_conv_im2col_gpu(input, input_off,
                                 col_buffer()->mutable_gpu_data(), 0);
      }
      col_buff = col_buffer()->gpu_data();
    }
    for (int_tp g = 0; g < group_; ++g) {
      greentea_gpu_gemm<Dtype>(this->device_->id(), CblasNoTrans,
                               CblasNoTrans, conv_out_channels_ / group_,
                               conv_out_spatial_dim_, kernel_dim_,
                               (Dtype) 1., (cl_mem) weights, weight_offset_ * g,
                               (cl_mem) col_buff,
                               (is_1x1_ ? input_off : 0) + col_offset_ * g,
                               (Dtype) 0., (cl_mem) output,
                               output_off + output_offset_ * g);
    }
#endif  // USE_GREENTEA
  }
}
 void copy(const compressed_matrix<SCALARTYPE, ALIGNMENT> & gpu_matrix,
                  CPU_MATRIX & cpu_matrix )
 {
   if ( gpu_matrix.size1() > 0 && gpu_matrix.size2() > 0 )
   {
     cpu_matrix.resize(gpu_matrix.size1(), gpu_matrix.size2());
     
     //get raw data from memory:
     std::vector<unsigned int> row_buffer(gpu_matrix.size1() + 1);
     std::vector<unsigned int> col_buffer(gpu_matrix.nnz());
     std::vector<SCALARTYPE> elements(gpu_matrix.nnz());
     
     //std::cout << "GPU->CPU, nonzeros: " << gpu_matrix.nnz() << std::endl;
     
     cl_int err;
     err = clEnqueueReadBuffer(viennacl::ocl::device().queue().get(), gpu_matrix.handle1().get(), CL_TRUE, 0, sizeof(unsigned int)*(gpu_matrix.size1() + 1), &(row_buffer[0]), 0, NULL, NULL);
     CL_ERR_CHECK(err);
     err = clEnqueueReadBuffer(viennacl::ocl::device().queue().get(), gpu_matrix.handle2().get(), CL_TRUE, 0, sizeof(unsigned int)*gpu_matrix.nnz(), &(col_buffer[0]), 0, NULL, NULL);
     CL_ERR_CHECK(err);
     err = clEnqueueReadBuffer(viennacl::ocl::device().queue().get(), gpu_matrix.handle().get(), CL_TRUE, 0, sizeof(SCALARTYPE)*gpu_matrix.nnz(), &(elements[0]), 0, NULL, NULL);
     CL_ERR_CHECK(err);
     viennacl::ocl::finish();
     
     //fill the cpu_matrix:
     unsigned int data_index = 0;
     for (unsigned int row = 1; row <= gpu_matrix.size1(); ++row)
     {
       while (data_index < row_buffer[row])
       {
         if (col_buffer[data_index] >= gpu_matrix.size1())
         {
           std::cerr << "ViennaCL encountered invalid data at colbuffer[" << data_index << "]: " << col_buffer[data_index] << std::endl;
           return;
         }
         
         if (elements[data_index] != static_cast<SCALARTYPE>(0.0))
           cpu_matrix(row-1, col_buffer[data_index]) = elements[data_index];
         ++data_index;
       }
     }
   }
 }
示例#4
0
void BaseConvolutionLayer<Dtype>::backward_gpu_gemm(const Dtype* output,
                                                    const int output_off,
                                                    const Dtype* weights,
                                                    Dtype* input,
                                                    const int input_off) {
  Dtype* col_buff = col_buffer()->mutable_gpu_data();
  if (is_1x1_) {
    col_buff = input;
  }
  if (this->device_context_->backend() == BACKEND_CUDA) {
#ifdef USE_CUDA
    for (int g = 0; g < group_; ++g) {
      caffe_gpu_gemm<Dtype>(
          CblasTrans, CblasNoTrans, kernel_dim_ / group_, conv_out_spatial_dim_,
          conv_out_channels_ / group_, (Dtype) 1., weights + weight_offset_ * g,
          output + output_off + output_offset_ * g, (Dtype) 0.,
          col_buff + (is_1x1_ ? input_off : 0) + col_offset_ * g);
    }
    if (!is_1x1_) {
      conv_col2im_gpu(col_buff, input + input_off);
    }
#endif  // USE_CUDA
  } else {
#ifdef USE_GREENTEA
    for (int g = 0; g < group_; ++g) {
      greentea_gpu_gemm<Dtype>(this->device_context_->id(), CblasTrans,
                               CblasNoTrans, kernel_dim_ / group_,
                               conv_out_spatial_dim_,
                               conv_out_channels_ / group_, (Dtype) 1.,
                               (cl_mem) weights, weight_offset_ * g,
                               (cl_mem) output, output_off + output_offset_ * g,
                               (Dtype) 0., (cl_mem) col_buff,
                               (is_1x1_ ? input_off : 0) + col_offset_ * g);
    }
    if (!is_1x1_) {
      greentea_conv_col2im_gpu(col_buff, 0, input, input_off);
    }
#endif  // USE_GREENTEA
  }
}
      void copy(viennashe::math::sparse_matrix<NumericT> const & assembled_matrix,
                viennacl::compressed_matrix<NumericT>          &       vcl_matrix)
      {
        std::size_t nonzeros = assembled_matrix.nnz();
        viennacl::backend::typesafe_host_array<unsigned int> row_buffer(vcl_matrix.handle1(), assembled_matrix.size1() + 1);
        viennacl::backend::typesafe_host_array<unsigned int> col_buffer(vcl_matrix.handle2(), nonzeros);
        std::vector<NumericT> elements(nonzeros);

        std::size_t data_index = 0;

        for (std::size_t i  = 0;
                         i != assembled_matrix.size1();
                       ++i)
        {
          typedef typename viennashe::math::sparse_matrix<NumericT>::const_iterator2   AlongRowIterator;
          typedef typename viennashe::math::sparse_matrix<NumericT>::row_type          RowType;

          row_buffer.set(i, data_index);
          RowType const & row_i = assembled_matrix.row(i);

          for (AlongRowIterator col_it  = row_i.begin();
                                col_it != row_i.end();
                              ++col_it)
          {
            col_buffer.set(data_index, col_it->first);
            elements[data_index] = col_it->second;
            ++data_index;
          }
        }
        row_buffer.set(assembled_matrix.size1(), data_index);

        vcl_matrix.set(row_buffer.get(),
                       col_buffer.get(),
                       &elements[0],
                       assembled_matrix.size1(),
                       assembled_matrix.size2(),
                       nonzeros);
      }
示例#6
0
文件: dbpivot.c 项目: RQZeng/freetds
/** 
 * Pivot the rows, creating a new resultset
 *
 * Call dbpivot() immediately after dbresults().  It calls dbnextrow() as long as
 * it returns REG_ROW, transforming the results into a cross-tab report.  
 * dbpivot() modifies the metadata such that DB-Library can be used tranparently: 
 * retrieve the rows as usual with dbnumcols(), dbnextrow(), etc. 
 *
 * @dbproc, our old friend
 * @nkeys the number of left-edge columns to group by
 * @keys  an array of left-edge columns to group by
 * @ncols the number of top-edge columns to group by
 * @cols  an array of top-edge columns to group by
 * @func  the aggregation function to use
 * @val   the number of the column to which @func is applied
 *
 * @returns the return code from the final call to dbnextrow().  
 *  Success is normally indicated by NO_MORE_ROWS.  
 */
RETCODE
dbpivot(DBPROCESS *dbproc, int nkeys, int *keys, int ncols, int *cols, DBPIVOT_FUNC func, int val)
{
	enum { logalot = 1 };
	struct pivot_t P, *pp;
	struct agg_t input, *pout = NULL;
	struct key_t *pacross;
	struct metadata_t *metadata, *pmeta;
	size_t i, nmeta = 0;

	tdsdump_log(TDS_DBG_FUNC, "dbpivot(%p, %d,%p, %d,%p, %p, %d)\n", dbproc, nkeys, keys, ncols, cols, func, val);
	if (logalot) {
		char buffer[1024] = {'\0'}, *s = buffer;
		const static char *names[2] = { "\tkeys (down)", "\n\tcols (across)" };
		int *p = keys, *pend = p + nkeys;
		
		for (i=0; i < 2; i++) {
			const char *sep = "";
			s += sprintf(s, "%s: ", names[i]);
			for ( ; p < pend; p++) {
				s += sprintf(s, "%s%d", sep, *p);
				sep = ", ";
			}
			p = cols;
			pend = p + ncols;
			assert(s < buffer + sizeof(buffer));
		}
		tdsdump_log(TDS_DBG_FUNC, "%s\n", buffer);
	}
	
	memset(&input,  0, sizeof(input));
	
	P.dbproc = dbproc;
	if ((pp = tds_find(&P, pivots, npivots, sizeof(*pivots), pivot_key_equal)) == NULL ) {
		pp = realloc(pivots, (1 + npivots) * sizeof(*pivots)); 
		if (!pp)
			return FAIL;
		pivots = pp;
		pp += npivots++;
	} else {
		agg_free(pp->output);
		key_free(pp->across);		
	}
	memset(pp, 0, sizeof(*pp));

	if ((input.row_key.keys = calloc(nkeys, sizeof(*input.row_key.keys))) == NULL)
		return FAIL;
	input.row_key.nkeys = nkeys;
	for (i=0; i < nkeys; i++) {
		int type = dbcoltype(dbproc, keys[i]);
		int len = dbcollen(dbproc, keys[i]);
		assert(type && len);
		
		col_init(input.row_key.keys+i, type, len);
		if (FAIL == dbbind(dbproc, keys[i], bind_type(type), input.row_key.keys[i].len, col_buffer(input.row_key.keys+i)))
			return FAIL;
		if (FAIL == dbnullbind(dbproc, keys[i], &input.row_key.keys[i].null_indicator))
			return FAIL;
	}
	
	if ((input.col_key.keys = calloc(ncols, sizeof(*input.col_key.keys))) == NULL)
		return FAIL;
	input.col_key.nkeys = ncols;
	for (i=0; i < ncols; i++) {
		int type = dbcoltype(dbproc, cols[i]);
		int len = dbcollen(dbproc, cols[i]);
		assert(type && len);
		
		col_init(input.col_key.keys+i, type, len);
		if (FAIL == dbbind(dbproc, cols[i], bind_type(type), input.col_key.keys[i].len, col_buffer(input.col_key.keys+i)))
			return FAIL;
		if (FAIL == dbnullbind(dbproc, cols[i], &input.col_key.keys[i].null_indicator))
			return FAIL;
	}
	
	/* value */ {
		int type = dbcoltype(dbproc, val);
		int len = dbcollen(dbproc, val);
		assert(type && len);
		
		col_init(&input.value, type, len);
		if (FAIL == dbbind(dbproc, val, bind_type(type), input.value.len, col_buffer(&input.value)))
			return FAIL;
		if (FAIL == dbnullbind(dbproc, val, &input.value.null_indicator))
			return FAIL;
	}
	
	while ((pp->status = dbnextrow(dbproc)) == REG_ROW) {
		/* add to unique list of crosstab columns */
		if ((pacross = tds_find(&input.col_key, pp->across, pp->nacross, sizeof(*pp->across), key_equal)) == NULL ) {
			pacross = realloc(pp->across, (1 + pp->nacross) * sizeof(*pp->across)); 
			if (!pacross)
				return FAIL;
			pp->across = pacross;
			pacross += pp->nacross++;
			key_cpy(pacross, &input.col_key);
		}
		assert(pp->across);
		
		if ((pout = tds_find(&input, pp->output, pp->nout, sizeof(*pp->output), agg_equal)) == NULL ) {
			pout = realloc(pp->output, (1 + pp->nout) * sizeof(*pp->output)); 
			if (!pout)
				return FAIL;
			pp->output = pout;
			pout += pp->nout++;

			
			if ((pout->row_key.keys = calloc(input.row_key.nkeys, sizeof(*pout->row_key.keys))) == NULL)
				return FAIL;
			key_cpy(&pout->row_key, &input.row_key);

			if ((pout->col_key.keys = calloc(input.col_key.nkeys, sizeof(*pout->col_key.keys))) == NULL)
				return FAIL;
			key_cpy(&pout->col_key, &input.col_key);

			col_init(&pout->value, input.value.type, input.value.len);
		}
		
		func(&pout->value, &input.value);

	}

	/* Mark this proc as pivoted, so that dbnextrow() sees it when the application calls it */
	pp->dbproc = dbproc;
	pp->dbresults_state = dbproc->dbresults_state;
	dbproc->dbresults_state = pp->output < pout? _DB_RES_RESULTSET_ROWS : _DB_RES_RESULTSET_EMPTY;
	
	/*
	 * Initialize new metadata
	 */
	nmeta = input.row_key.nkeys + pp->nacross;	
	metadata = calloc(nmeta, sizeof(*metadata));
	
	assert(pp->across || pp->nacross == 0);
	
	/* key columns are passed through as-is, verbatim */
	for (i=0; i < input.row_key.nkeys; i++) {
		assert(i < nkeys);
		metadata[i].name = strdup(dbcolname(dbproc, keys[i]));
		metadata[i].pacross = NULL;
		col_cpy(&metadata[i].col, input.row_key.keys+i);
	}

	/* pivoted columms are found in the "across" data */
	for (i=0, pmeta = metadata + input.row_key.nkeys; i < pp->nacross; i++) {
		struct col_t col;
		col_init(&col, SYBFLT8, sizeof(double));
		assert(pmeta + i < metadata + nmeta);
		pmeta[i].name = make_col_name(pp->across+i);
		assert(pp->across);
		pmeta[i].pacross = pp->across + i;
		col_cpy(&pmeta[i].col, pp->nout? &pp->output[0].value : &col);
	}

	if (!reinit_results(dbproc->tds_socket, nmeta, metadata)) {
		return FAIL;
	}
	
	return SUCCEED;
	
#if 0
	for (pp->pout=pp->output; pp->pout < pp->output + pp->nout; pp->pout++) {
		char name[256] = {0};
	
		assert(pp->pout->col_key.keys[0].len < sizeof(name));
		memset(name, '\0', sizeof(name));
		memcpy(name, pp->pout->col_key.keys[0].s, pp->pout->col_key.keys[0].len), 
		printf("%5d  %-30s  %5d\n", pp->pout->row_key.keys[0].i, 
					    name, 
					    pp->pout->value.i );
	}
	exit(1);
#endif
}
示例#7
0
文件: dbpivot.c 项目: RQZeng/freetds
STATUS
dbnextrow_pivoted(DBPROCESS *dbproc, struct pivot_t *pp)
{
	int i;
	struct agg_t candidate, *pout;

	assert(pp);
	assert(dbproc && dbproc->tds_socket);
	assert(dbproc->tds_socket->res_info);
	assert(dbproc->tds_socket->res_info->columns || 0 == dbproc->tds_socket->res_info->num_cols);
	
	for (pout = pp->output; pout < pp->output + pp->nout; pout++) {
		if (pout->row_key.keys != NULL)
			break;
	}

	if (pout == pp->output + pp->nout) {
		dbproc->dbresults_state = _DB_RES_NEXT_RESULT;
		return NO_MORE_ROWS;
	}

	memset(&candidate, 0, sizeof(candidate));
	key_cpy(&candidate.row_key, &pout->row_key);
	
	/* "buffer_transfer_bound_data" */
	for (i = 0; i < dbproc->tds_socket->res_info->num_cols; i++) {
		struct col_t *pval = NULL;
		TDSCOLUMN *pcol = dbproc->tds_socket->res_info->columns[i];
		assert(pcol);
		
		if (pcol->column_nullbind) {
			if (pcol->column_cur_size < 0) {
				*(DBINT *)(pcol->column_nullbind) = -1;
			} else {
				*(DBINT *)(pcol->column_nullbind) = 0;
			}
		}
		if (!pcol->column_varaddr) {
			fprintf(stderr, "no pcol->column_varaddr in col %d\n", i);
			continue;
		}

		/* find column in output */
		if (pcol->bcp_terminator == NULL) { /* not a cross-tab column */
			pval = &candidate.row_key.keys[i];
		} else {
			struct agg_t *pcan;
			key_cpy(&candidate.col_key, (struct key_t *) pcol->bcp_terminator);
			if ((pcan = tds_find(&candidate, pout, pp->output + pp->nout - pout, 
						sizeof(*pp->output), agg_next)) != NULL) {
				/* flag this output as used */
				pout->row_key.keys = NULL;
				pval = &pcan->value;
			}
		}
		
		if (!pval || col_null(pval)) {  /* nothing in output for this x,y location */
			dbgetnull(dbproc, pcol->column_bindtype, pcol->column_bindlen, (BYTE *) pcol->column_varaddr);
			continue;
		}
		
		assert(pval);
		
#if 0
		printf("\ncopying col %d, type %d/%d, len %d to %p ", i, pval->type, pcol->column_type, pval->len, pcol->column_varaddr);
		switch (pval->type) {
		case 48:
			printf("value %d", (int)pval->ti);	break;
		case 56:
			printf("value %d", (int)pval->si);	break;
		}
		printf("\n");
#endif		
		pcol->column_size = pval->len;
		pcol->column_data = col_buffer(pval);
		
		copy_data_to_host_var(	dbproc, 
					pval->type, 
					col_buffer(pval), 
					pval->len, 
					(BYTE *) pcol->column_varaddr,  
					pcol->column_bindlen,
					pcol->column_bindtype, 
					(DBINT*) pcol->column_nullbind
					);
	}

	return REG_ROW;
}