void av1_highbd_quantize_fp_avx2(
    const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr,
    const int16_t *round_ptr, const int16_t *quant_ptr,
    const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
    tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr,
    const int16_t *scan, const int16_t *iscan, int log_scale) {
  (void)scan;
  (void)zbin_ptr;
  (void)quant_shift_ptr;
  const unsigned int step = 8;
  __m256i qp[3], coeff;

  init_qp(round_ptr, quant_ptr, dequant_ptr, log_scale, qp);
  coeff = _mm256_loadu_si256((const __m256i *)coeff_ptr);

  __m256i eob = _mm256_setzero_si256();
  quantize(qp, &coeff, iscan, log_scale, qcoeff_ptr, dqcoeff_ptr, &eob);

  coeff_ptr += step;
  qcoeff_ptr += step;
  dqcoeff_ptr += step;
  iscan += step;
  n_coeffs -= step;

  update_qp(qp);
  while (n_coeffs > 0) {
    coeff = _mm256_loadu_si256((const __m256i *)coeff_ptr);
    quantize(qp, &coeff, iscan, log_scale, qcoeff_ptr, dqcoeff_ptr, &eob);

    coeff_ptr += step;
    qcoeff_ptr += step;
    dqcoeff_ptr += step;
    iscan += step;
    n_coeffs -= step;
  }
  {
    __m256i eob_s;
    eob_s = _mm256_shuffle_epi32(eob, 0xe);
    eob = _mm256_max_epi16(eob, eob_s);
    eob_s = _mm256_shufflelo_epi16(eob, 0xe);
    eob = _mm256_max_epi16(eob, eob_s);
    eob_s = _mm256_shufflelo_epi16(eob, 1);
    eob = _mm256_max_epi16(eob, eob_s);
    const __m128i final_eob = _mm_max_epi16(_mm256_castsi256_si128(eob),
                                            _mm256_extractf128_si256(eob, 1));
    *eob_ptr = _mm_extract_epi16(final_eob, 0);
  }
}
Exemple #2
0
/**
 * hinic_io_create_qps - Create Queue Pairs
 * @func_to_io: func to io channel that holds the IO components
 * @base_qpn: base qp number
 * @num_qps: number queue pairs to create
 * @sq_msix_entry: msix entries for sq
 * @rq_msix_entry: msix entries for rq
 *
 * Return 0 - Success, negative - Failure
 **/
int hinic_io_create_qps(struct hinic_func_to_io *func_to_io,
			u16 base_qpn, int num_qps,
			struct msix_entry *sq_msix_entries,
			struct msix_entry *rq_msix_entries)
{
	struct hinic_hwif *hwif = func_to_io->hwif;
	struct pci_dev *pdev = hwif->pdev;
	size_t qps_size, wq_size, db_size;
	void *ci_addr_base;
	int i, j, err;

	qps_size = num_qps * sizeof(*func_to_io->qps);
	func_to_io->qps = devm_kzalloc(&pdev->dev, qps_size, GFP_KERNEL);
	if (!func_to_io->qps)
		return -ENOMEM;

	wq_size = num_qps * sizeof(*func_to_io->sq_wq);
	func_to_io->sq_wq = devm_kzalloc(&pdev->dev, wq_size, GFP_KERNEL);
	if (!func_to_io->sq_wq) {
		err = -ENOMEM;
		goto err_sq_wq;
	}

	wq_size = num_qps * sizeof(*func_to_io->rq_wq);
	func_to_io->rq_wq = devm_kzalloc(&pdev->dev, wq_size, GFP_KERNEL);
	if (!func_to_io->rq_wq) {
		err = -ENOMEM;
		goto err_rq_wq;
	}

	db_size = num_qps * sizeof(*func_to_io->sq_db);
	func_to_io->sq_db = devm_kzalloc(&pdev->dev, db_size, GFP_KERNEL);
	if (!func_to_io->sq_db) {
		err = -ENOMEM;
		goto err_sq_db;
	}

	ci_addr_base = dma_zalloc_coherent(&pdev->dev, CI_TABLE_SIZE(num_qps),
					   &func_to_io->ci_dma_base,
					   GFP_KERNEL);
	if (!ci_addr_base) {
		dev_err(&pdev->dev, "Failed to allocate CI area\n");
		err = -ENOMEM;
		goto err_ci_base;
	}

	func_to_io->ci_addr_base = ci_addr_base;

	for (i = 0; i < num_qps; i++) {
		err = init_qp(func_to_io, &func_to_io->qps[i], i,
			      &sq_msix_entries[i], &rq_msix_entries[i]);
		if (err) {
			dev_err(&pdev->dev, "Failed to create QP %d\n", i);
			goto err_init_qp;
		}
	}

	err = write_qp_ctxts(func_to_io, base_qpn, num_qps);
	if (err) {
		dev_err(&pdev->dev, "Failed to init QP ctxts\n");
		goto err_write_qp_ctxts;
	}

	return 0;

err_write_qp_ctxts:
err_init_qp:
	for (j = 0; j < i; j++)
		destroy_qp(func_to_io, &func_to_io->qps[j]);

	dma_free_coherent(&pdev->dev, CI_TABLE_SIZE(num_qps),
			  func_to_io->ci_addr_base, func_to_io->ci_dma_base);

err_ci_base:
	devm_kfree(&pdev->dev, func_to_io->sq_db);

err_sq_db:
	devm_kfree(&pdev->dev, func_to_io->rq_wq);

err_rq_wq:
	devm_kfree(&pdev->dev, func_to_io->sq_wq);

err_sq_wq:
	devm_kfree(&pdev->dev, func_to_io->qps);
	return err;
}
Exemple #3
0
static void verbs_add_device (struct ib_device *dev)
{
	int ret;
	struct ib_qp_init_attr attrs;

	if (ib_dev)
		return;

	/* durty hack for ib_dma_map_single not to segfault */
	dev->dma_ops = NULL;
	ib_dev = dev;

	printk (KERN_INFO "IB add device called. Name = %s\n", dev->name);

	ret = ib_query_device (dev, &dev_attr);
	if (ret) {
		printk (KERN_INFO "ib_quer_device failed: %d\n", ret);
		return;
	}

	printk (KERN_INFO "IB device caps: max_qp %d, max_mcast_grp: %d, max_pkeys: %d\n",
		dev_attr.max_qp, dev_attr.max_mcast_grp, (int)dev_attr.max_pkeys);

	/* We'll work with first port. It's a sample module, anyway. Who is that moron which decided
	 * to count ports from one? */
	ret = ib_query_port (dev, 1, &port_attr);
	if (ret) {
		printk (KERN_INFO "ib_query_port failed: %d\n", ret);
		return;
	}

	printk (KERN_INFO "Port info: lid: %u, sm_lid: %u, max_msg_size: %u\n",
		(unsigned)port_attr.lid, (unsigned)port_attr.sm_lid, port_attr.max_msg_sz);

	pd = ib_alloc_pd (dev);
	if (IS_ERR (pd)) {
		ret = PTR_ERR (pd);
		printk (KERN_INFO "pd allocation failed: %d\n", ret);
		return;
	}

	printk (KERN_INFO "PD allocated\n");

	mr = ib_get_dma_mr (pd, IB_ACCESS_LOCAL_WRITE);
	if (IS_ERR (mr)) {
		ret = PTR_ERR (mr);
		printk (KERN_INFO "get_dma_mr failed: %d\n", ret);
		return;
	}

	send_cq = ib_create_cq (dev, NULL, NULL, NULL, 1, 1);
	if (IS_ERR (send_cq)) {
		ret = PTR_ERR (send_cq);
		printk (KERN_INFO "ib_create_cq failed: %d\n", ret);
		return;
	}

	recv_cq = ib_create_cq (dev, verbs_comp_handler_recv, NULL, NULL, 1, 1);
	if (IS_ERR (recv_cq)) {
		ret = PTR_ERR (recv_cq);
		printk (KERN_INFO "ib_create_cq failed: %d\n", ret);
		return;
	}

	ib_req_notify_cq (recv_cq, IB_CQ_NEXT_COMP);
	printk (KERN_INFO "CQs allocated\n");

	ib_query_pkey (dev, 1, 0, &pkey);

	/* allocate memory */
	send_buf = kmalloc (buf_size + 40, GFP_KERNEL);
	recv_buf = kmalloc (buf_size + 40, GFP_KERNEL);

	if (!send_buf || !recv_buf) {
		printk (KERN_INFO "Memory allocation error\n");
		return;
	}

	printk (KERN_INFO "Trying to register regions\n");
	if (ib_dev->dma_ops)
		printk (KERN_INFO "DMA ops are defined\n");

	memset (send_buf, 0, buf_size+40);
	memset (send_buf, 0, buf_size+40);

	send_key = ib_dma_map_single (ib_dev, send_buf, buf_size, DMA_FROM_DEVICE);
	printk (KERN_INFO "send_key obtained %llx\n", send_key);
	recv_key = ib_dma_map_single (ib_dev, recv_buf, buf_size, DMA_TO_DEVICE);
	printk (KERN_INFO "recv_key obtained %llx\n", recv_key);

	if (ib_dma_mapping_error (ib_dev, send_key)) {
		printk (KERN_INFO "Error mapping send buffer\n");
		return;
	}

	if (ib_dma_mapping_error (ib_dev, recv_key)) {
		printk (KERN_INFO "Error mapping recv buffer\n");
		return;
	}

	memset (&attrs, 0, sizeof (attrs));
	attrs.qp_type = IB_QPT_UD;
	attrs.sq_sig_type = IB_SIGNAL_ALL_WR;
	attrs.event_handler = verbs_qp_event;
	attrs.cap.max_send_wr = CQ_SIZE;
	attrs.cap.max_recv_wr = CQ_SIZE;
	attrs.cap.max_send_sge = 1;
	attrs.cap.max_recv_sge = 1;
	attrs.send_cq = send_cq;
	attrs.recv_cq = recv_cq;

	qp = ib_create_qp (pd, &attrs);
	if (IS_ERR (qp)) {
		ret = PTR_ERR (qp);
		printk (KERN_INFO "qp allocation failed: %d\n", ret);
		return;
	}

	printk (KERN_INFO "Create QP with num %x\n", qp->qp_num);

	if (init_qp (qp)) {
		printk (KERN_INFO "Failed to initialize QP\n");
		return;
	}

	ret = ib_query_gid (ib_dev, 1, 0, &local_info.gid);
	if (ret) {
		printk (KERN_INFO "query_gid failed %d\n", ret);
		return;
	}

	local_info.qp_num = qp->qp_num;
	local_info.lid = port_attr.lid;

	/* now we are ready to send our QP number and other stuff to other party */
	if (!server_addr) {
		schedule_work (&sock_accept);
		flush_scheduled_work ();
	}
	else
		exchange_info (server_addr);

	if (!have_remote_info) {
		printk (KERN_INFO "Have no remote info, give up\n");
		return;
	}

	ret = path_rec_lookup_start ();
	if (ret) {
		printk (KERN_INFO "path_rec lookup start failed: %d\n", ret);
		return;
	}

	/* post receive request */
	verbs_post_recv_req ();

	mod_timer (&verbs_timer, NEXTJIFF(1));
}