void av1_highbd_quantize_fp_avx2( const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, int log_scale) { (void)scan; (void)zbin_ptr; (void)quant_shift_ptr; const unsigned int step = 8; __m256i qp[3], coeff; init_qp(round_ptr, quant_ptr, dequant_ptr, log_scale, qp); coeff = _mm256_loadu_si256((const __m256i *)coeff_ptr); __m256i eob = _mm256_setzero_si256(); quantize(qp, &coeff, iscan, log_scale, qcoeff_ptr, dqcoeff_ptr, &eob); coeff_ptr += step; qcoeff_ptr += step; dqcoeff_ptr += step; iscan += step; n_coeffs -= step; update_qp(qp); while (n_coeffs > 0) { coeff = _mm256_loadu_si256((const __m256i *)coeff_ptr); quantize(qp, &coeff, iscan, log_scale, qcoeff_ptr, dqcoeff_ptr, &eob); coeff_ptr += step; qcoeff_ptr += step; dqcoeff_ptr += step; iscan += step; n_coeffs -= step; } { __m256i eob_s; eob_s = _mm256_shuffle_epi32(eob, 0xe); eob = _mm256_max_epi16(eob, eob_s); eob_s = _mm256_shufflelo_epi16(eob, 0xe); eob = _mm256_max_epi16(eob, eob_s); eob_s = _mm256_shufflelo_epi16(eob, 1); eob = _mm256_max_epi16(eob, eob_s); const __m128i final_eob = _mm_max_epi16(_mm256_castsi256_si128(eob), _mm256_extractf128_si256(eob, 1)); *eob_ptr = _mm_extract_epi16(final_eob, 0); } }
/** * hinic_io_create_qps - Create Queue Pairs * @func_to_io: func to io channel that holds the IO components * @base_qpn: base qp number * @num_qps: number queue pairs to create * @sq_msix_entry: msix entries for sq * @rq_msix_entry: msix entries for rq * * Return 0 - Success, negative - Failure **/ int hinic_io_create_qps(struct hinic_func_to_io *func_to_io, u16 base_qpn, int num_qps, struct msix_entry *sq_msix_entries, struct msix_entry *rq_msix_entries) { struct hinic_hwif *hwif = func_to_io->hwif; struct pci_dev *pdev = hwif->pdev; size_t qps_size, wq_size, db_size; void *ci_addr_base; int i, j, err; qps_size = num_qps * sizeof(*func_to_io->qps); func_to_io->qps = devm_kzalloc(&pdev->dev, qps_size, GFP_KERNEL); if (!func_to_io->qps) return -ENOMEM; wq_size = num_qps * sizeof(*func_to_io->sq_wq); func_to_io->sq_wq = devm_kzalloc(&pdev->dev, wq_size, GFP_KERNEL); if (!func_to_io->sq_wq) { err = -ENOMEM; goto err_sq_wq; } wq_size = num_qps * sizeof(*func_to_io->rq_wq); func_to_io->rq_wq = devm_kzalloc(&pdev->dev, wq_size, GFP_KERNEL); if (!func_to_io->rq_wq) { err = -ENOMEM; goto err_rq_wq; } db_size = num_qps * sizeof(*func_to_io->sq_db); func_to_io->sq_db = devm_kzalloc(&pdev->dev, db_size, GFP_KERNEL); if (!func_to_io->sq_db) { err = -ENOMEM; goto err_sq_db; } ci_addr_base = dma_zalloc_coherent(&pdev->dev, CI_TABLE_SIZE(num_qps), &func_to_io->ci_dma_base, GFP_KERNEL); if (!ci_addr_base) { dev_err(&pdev->dev, "Failed to allocate CI area\n"); err = -ENOMEM; goto err_ci_base; } func_to_io->ci_addr_base = ci_addr_base; for (i = 0; i < num_qps; i++) { err = init_qp(func_to_io, &func_to_io->qps[i], i, &sq_msix_entries[i], &rq_msix_entries[i]); if (err) { dev_err(&pdev->dev, "Failed to create QP %d\n", i); goto err_init_qp; } } err = write_qp_ctxts(func_to_io, base_qpn, num_qps); if (err) { dev_err(&pdev->dev, "Failed to init QP ctxts\n"); goto err_write_qp_ctxts; } return 0; err_write_qp_ctxts: err_init_qp: for (j = 0; j < i; j++) destroy_qp(func_to_io, &func_to_io->qps[j]); dma_free_coherent(&pdev->dev, CI_TABLE_SIZE(num_qps), func_to_io->ci_addr_base, func_to_io->ci_dma_base); err_ci_base: devm_kfree(&pdev->dev, func_to_io->sq_db); err_sq_db: devm_kfree(&pdev->dev, func_to_io->rq_wq); err_rq_wq: devm_kfree(&pdev->dev, func_to_io->sq_wq); err_sq_wq: devm_kfree(&pdev->dev, func_to_io->qps); return err; }
static void verbs_add_device (struct ib_device *dev) { int ret; struct ib_qp_init_attr attrs; if (ib_dev) return; /* durty hack for ib_dma_map_single not to segfault */ dev->dma_ops = NULL; ib_dev = dev; printk (KERN_INFO "IB add device called. Name = %s\n", dev->name); ret = ib_query_device (dev, &dev_attr); if (ret) { printk (KERN_INFO "ib_quer_device failed: %d\n", ret); return; } printk (KERN_INFO "IB device caps: max_qp %d, max_mcast_grp: %d, max_pkeys: %d\n", dev_attr.max_qp, dev_attr.max_mcast_grp, (int)dev_attr.max_pkeys); /* We'll work with first port. It's a sample module, anyway. Who is that moron which decided * to count ports from one? */ ret = ib_query_port (dev, 1, &port_attr); if (ret) { printk (KERN_INFO "ib_query_port failed: %d\n", ret); return; } printk (KERN_INFO "Port info: lid: %u, sm_lid: %u, max_msg_size: %u\n", (unsigned)port_attr.lid, (unsigned)port_attr.sm_lid, port_attr.max_msg_sz); pd = ib_alloc_pd (dev); if (IS_ERR (pd)) { ret = PTR_ERR (pd); printk (KERN_INFO "pd allocation failed: %d\n", ret); return; } printk (KERN_INFO "PD allocated\n"); mr = ib_get_dma_mr (pd, IB_ACCESS_LOCAL_WRITE); if (IS_ERR (mr)) { ret = PTR_ERR (mr); printk (KERN_INFO "get_dma_mr failed: %d\n", ret); return; } send_cq = ib_create_cq (dev, NULL, NULL, NULL, 1, 1); if (IS_ERR (send_cq)) { ret = PTR_ERR (send_cq); printk (KERN_INFO "ib_create_cq failed: %d\n", ret); return; } recv_cq = ib_create_cq (dev, verbs_comp_handler_recv, NULL, NULL, 1, 1); if (IS_ERR (recv_cq)) { ret = PTR_ERR (recv_cq); printk (KERN_INFO "ib_create_cq failed: %d\n", ret); return; } ib_req_notify_cq (recv_cq, IB_CQ_NEXT_COMP); printk (KERN_INFO "CQs allocated\n"); ib_query_pkey (dev, 1, 0, &pkey); /* allocate memory */ send_buf = kmalloc (buf_size + 40, GFP_KERNEL); recv_buf = kmalloc (buf_size + 40, GFP_KERNEL); if (!send_buf || !recv_buf) { printk (KERN_INFO "Memory allocation error\n"); return; } printk (KERN_INFO "Trying to register regions\n"); if (ib_dev->dma_ops) printk (KERN_INFO "DMA ops are defined\n"); memset (send_buf, 0, buf_size+40); memset (send_buf, 0, buf_size+40); send_key = ib_dma_map_single (ib_dev, send_buf, buf_size, DMA_FROM_DEVICE); printk (KERN_INFO "send_key obtained %llx\n", send_key); recv_key = ib_dma_map_single (ib_dev, recv_buf, buf_size, DMA_TO_DEVICE); printk (KERN_INFO "recv_key obtained %llx\n", recv_key); if (ib_dma_mapping_error (ib_dev, send_key)) { printk (KERN_INFO "Error mapping send buffer\n"); return; } if (ib_dma_mapping_error (ib_dev, recv_key)) { printk (KERN_INFO "Error mapping recv buffer\n"); return; } memset (&attrs, 0, sizeof (attrs)); attrs.qp_type = IB_QPT_UD; attrs.sq_sig_type = IB_SIGNAL_ALL_WR; attrs.event_handler = verbs_qp_event; attrs.cap.max_send_wr = CQ_SIZE; attrs.cap.max_recv_wr = CQ_SIZE; attrs.cap.max_send_sge = 1; attrs.cap.max_recv_sge = 1; attrs.send_cq = send_cq; attrs.recv_cq = recv_cq; qp = ib_create_qp (pd, &attrs); if (IS_ERR (qp)) { ret = PTR_ERR (qp); printk (KERN_INFO "qp allocation failed: %d\n", ret); return; } printk (KERN_INFO "Create QP with num %x\n", qp->qp_num); if (init_qp (qp)) { printk (KERN_INFO "Failed to initialize QP\n"); return; } ret = ib_query_gid (ib_dev, 1, 0, &local_info.gid); if (ret) { printk (KERN_INFO "query_gid failed %d\n", ret); return; } local_info.qp_num = qp->qp_num; local_info.lid = port_attr.lid; /* now we are ready to send our QP number and other stuff to other party */ if (!server_addr) { schedule_work (&sock_accept); flush_scheduled_work (); } else exchange_info (server_addr); if (!have_remote_info) { printk (KERN_INFO "Have no remote info, give up\n"); return; } ret = path_rec_lookup_start (); if (ret) { printk (KERN_INFO "path_rec lookup start failed: %d\n", ret); return; } /* post receive request */ verbs_post_recv_req (); mod_timer (&verbs_timer, NEXTJIFF(1)); }