/** * rvt_post_receive - post a receive on a QP * @ibqp: the QP to post the receive on * @wr: the WR to post * @bad_wr: the first bad WR is put here * * This may be called from interrupt context. * * Return: 0 on success otherwise errno */ int rvt_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, struct ib_recv_wr **bad_wr) { struct rvt_qp *qp = ibqp_to_rvtqp(ibqp); struct rvt_rwq *wq = qp->r_rq.wq; unsigned long flags; int qp_err_flush = (ib_rvt_state_ops[qp->state] & RVT_FLUSH_RECV) && !qp->ibqp.srq; /* Check that state is OK to post receive. */ if (!(ib_rvt_state_ops[qp->state] & RVT_POST_RECV_OK) || !wq) { *bad_wr = wr; return -EINVAL; } for (; wr; wr = wr->next) { struct rvt_rwqe *wqe; u32 next; int i; if ((unsigned)wr->num_sge > qp->r_rq.max_sge) { *bad_wr = wr; return -EINVAL; } spin_lock_irqsave(&qp->r_rq.lock, flags); next = wq->head + 1; if (next >= qp->r_rq.size) next = 0; if (next == wq->tail) { spin_unlock_irqrestore(&qp->r_rq.lock, flags); *bad_wr = wr; return -ENOMEM; } if (unlikely(qp_err_flush)) { struct ib_wc wc; memset(&wc, 0, sizeof(wc)); wc.qp = &qp->ibqp; wc.opcode = IB_WC_RECV; wc.wr_id = wr->wr_id; wc.status = IB_WC_WR_FLUSH_ERR; rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc, 1); } else { wqe = rvt_get_rwqe_ptr(&qp->r_rq, wq->head); wqe->wr_id = wr->wr_id; wqe->num_sge = wr->num_sge; for (i = 0; i < wr->num_sge; i++) wqe->sg_list[i] = wr->sg_list[i]; /* * Make sure queue entry is written * before the head index. */ smp_wmb(); wq->head = next; } spin_unlock_irqrestore(&qp->r_rq.lock, flags); } return 0; }
/* * Validate a RWQE and fill in the SGE state. * Return 1 if OK. */ static int qib_init_sge(struct rvt_qp *qp, struct rvt_rwqe *wqe) { int i, j, ret; struct ib_wc wc; struct rvt_lkey_table *rkt; struct rvt_pd *pd; struct rvt_sge_state *ss; rkt = &to_idev(qp->ibqp.device)->rdi.lkey_table; pd = ibpd_to_rvtpd(qp->ibqp.srq ? qp->ibqp.srq->pd : qp->ibqp.pd); ss = &qp->r_sge; ss->sg_list = qp->r_sg_list; qp->r_len = 0; for (i = j = 0; i < wqe->num_sge; i++) { if (wqe->sg_list[i].length == 0) continue; /* Check LKEY */ ret = rvt_lkey_ok(rkt, pd, j ? &ss->sg_list[j - 1] : &ss->sge, NULL, &wqe->sg_list[i], IB_ACCESS_LOCAL_WRITE); if (unlikely(ret <= 0)) goto bad_lkey; qp->r_len += wqe->sg_list[i].length; j++; } ss->num_sge = j; ss->total_len = qp->r_len; ret = 1; goto bail; bad_lkey: while (j) { struct rvt_sge *sge = --j ? &ss->sg_list[j - 1] : &ss->sge; rvt_put_mr(sge->mr); } ss->num_sge = 0; memset(&wc, 0, sizeof(wc)); wc.wr_id = wqe->wr_id; wc.status = IB_WC_LOC_PROT_ERR; wc.opcode = IB_WC_RECV; wc.qp = &qp->ibqp; /* Signal solicited completion event. */ rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc, 1); ret = 0; bail: return ret; }
/** * rvt_error_qp - put a QP into the error state * @qp: the QP to put into the error state * @err: the receive completion error to signal if a RWQE is active * * Flushes both send and receive work queues. * * Return: true if last WQE event should be generated. * The QP r_lock and s_lock should be held and interrupts disabled. * If we are already in error state, just return. */ int rvt_error_qp(struct rvt_qp *qp, enum ib_wc_status err) { struct ib_wc wc; int ret = 0; struct rvt_dev_info *rdi = ib_to_rvt(qp->ibqp.device); if (qp->state == IB_QPS_ERR || qp->state == IB_QPS_RESET) goto bail; qp->state = IB_QPS_ERR; if (qp->s_flags & (RVT_S_TIMER | RVT_S_WAIT_RNR)) { qp->s_flags &= ~(RVT_S_TIMER | RVT_S_WAIT_RNR); del_timer(&qp->s_timer); } if (qp->s_flags & RVT_S_ANY_WAIT_SEND) qp->s_flags &= ~RVT_S_ANY_WAIT_SEND; rdi->driver_f.notify_error_qp(qp); /* Schedule the sending tasklet to drain the send work queue. */ if (ACCESS_ONCE(qp->s_last) != qp->s_head) rdi->driver_f.schedule_send(qp); rvt_clear_mr_refs(qp, 0); memset(&wc, 0, sizeof(wc)); wc.qp = &qp->ibqp; wc.opcode = IB_WC_RECV; if (test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags)) { wc.wr_id = qp->r_wr_id; wc.status = err; rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc, 1); } wc.status = IB_WC_WR_FLUSH_ERR; if (qp->r_rq.wq) { struct rvt_rwq *wq; u32 head; u32 tail; spin_lock(&qp->r_rq.lock); /* sanity check pointers before trusting them */ wq = qp->r_rq.wq; head = wq->head; if (head >= qp->r_rq.size) head = 0; tail = wq->tail; if (tail >= qp->r_rq.size) tail = 0; while (tail != head) { wc.wr_id = rvt_get_rwqe_ptr(&qp->r_rq, tail)->wr_id; if (++tail >= qp->r_rq.size) tail = 0; rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc, 1); } wq->tail = tail; spin_unlock(&qp->r_rq.lock); } else if (qp->ibqp.event_handler) { ret = 1; } bail: return ret; }
/** * hfi1_uc_rcv - handle an incoming UC packet * @ibp: the port the packet came in on * @hdr: the header of the packet * @rcv_flags: flags relevant to rcv processing * @data: the packet data * @tlen: the length of the packet * @qp: the QP for this packet. * * This is called from qp_rcv() to process an incoming UC packet * for the given QP. * Called at interrupt level. */ void hfi1_uc_rcv(struct hfi1_packet *packet) { struct hfi1_ibport *ibp = rcd_to_iport(packet->rcd); void *data = packet->payload; u32 tlen = packet->tlen; struct rvt_qp *qp = packet->qp; struct ib_other_headers *ohdr = packet->ohdr; u32 opcode = packet->opcode; u32 hdrsize = packet->hlen; u32 psn; u32 pad = packet->pad; struct ib_wc wc; u32 pmtu = qp->pmtu; struct ib_reth *reth; int ret; u8 extra_bytes = pad + packet->extra_byte + (SIZE_OF_CRC << 2); if (hfi1_ruc_check_hdr(ibp, packet)) return; process_ecn(qp, packet, true); psn = ib_bth_get_psn(ohdr); /* Compare the PSN verses the expected PSN. */ if (unlikely(cmp_psn(psn, qp->r_psn) != 0)) { /* * Handle a sequence error. * Silently drop any current message. */ qp->r_psn = psn; inv: if (qp->r_state == OP(SEND_FIRST) || qp->r_state == OP(SEND_MIDDLE)) { set_bit(RVT_R_REWIND_SGE, &qp->r_aflags); qp->r_sge.num_sge = 0; } else { rvt_put_ss(&qp->r_sge); } qp->r_state = OP(SEND_LAST); switch (opcode) { case OP(SEND_FIRST): case OP(SEND_ONLY): case OP(SEND_ONLY_WITH_IMMEDIATE): goto send_first; case OP(RDMA_WRITE_FIRST): case OP(RDMA_WRITE_ONLY): case OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE): goto rdma_first; default: goto drop; } } /* Check for opcode sequence errors. */ switch (qp->r_state) { case OP(SEND_FIRST): case OP(SEND_MIDDLE): if (opcode == OP(SEND_MIDDLE) || opcode == OP(SEND_LAST) || opcode == OP(SEND_LAST_WITH_IMMEDIATE)) break; goto inv; case OP(RDMA_WRITE_FIRST): case OP(RDMA_WRITE_MIDDLE): if (opcode == OP(RDMA_WRITE_MIDDLE) || opcode == OP(RDMA_WRITE_LAST) || opcode == OP(RDMA_WRITE_LAST_WITH_IMMEDIATE)) break; goto inv; default: if (opcode == OP(SEND_FIRST) || opcode == OP(SEND_ONLY) || opcode == OP(SEND_ONLY_WITH_IMMEDIATE) || opcode == OP(RDMA_WRITE_FIRST) || opcode == OP(RDMA_WRITE_ONLY) || opcode == OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE)) break; goto inv; } if (qp->state == IB_QPS_RTR && !(qp->r_flags & RVT_R_COMM_EST)) rvt_comm_est(qp); /* OK, process the packet. */ switch (opcode) { case OP(SEND_FIRST): case OP(SEND_ONLY): case OP(SEND_ONLY_WITH_IMMEDIATE): send_first: if (test_and_clear_bit(RVT_R_REWIND_SGE, &qp->r_aflags)) { qp->r_sge = qp->s_rdma_read_sge; } else { ret = hfi1_rvt_get_rwqe(qp, 0); if (ret < 0) goto op_err; if (!ret) goto drop; /* * qp->s_rdma_read_sge will be the owner * of the mr references. */ qp->s_rdma_read_sge = qp->r_sge; } qp->r_rcv_len = 0; if (opcode == OP(SEND_ONLY)) goto no_immediate_data; else if (opcode == OP(SEND_ONLY_WITH_IMMEDIATE)) goto send_last_imm; /* FALLTHROUGH */ case OP(SEND_MIDDLE): /* Check for invalid length PMTU or posted rwqe len. */ /* * There will be no padding for 9B packet but 16B packets * will come in with some padding since we always add * CRC and LT bytes which will need to be flit aligned */ if (unlikely(tlen != (hdrsize + pmtu + extra_bytes))) goto rewind; qp->r_rcv_len += pmtu; if (unlikely(qp->r_rcv_len > qp->r_len)) goto rewind; hfi1_copy_sge(&qp->r_sge, data, pmtu, false, false); break; case OP(SEND_LAST_WITH_IMMEDIATE): send_last_imm: wc.ex.imm_data = ohdr->u.imm_data; wc.wc_flags = IB_WC_WITH_IMM; goto send_last; case OP(SEND_LAST): no_immediate_data: wc.ex.imm_data = 0; wc.wc_flags = 0; send_last: /* Check for invalid length. */ /* LAST len should be >= 1 */ if (unlikely(tlen < (hdrsize + extra_bytes))) goto rewind; /* Don't count the CRC. */ tlen -= (hdrsize + extra_bytes); wc.byte_len = tlen + qp->r_rcv_len; if (unlikely(wc.byte_len > qp->r_len)) goto rewind; wc.opcode = IB_WC_RECV; hfi1_copy_sge(&qp->r_sge, data, tlen, false, false); rvt_put_ss(&qp->s_rdma_read_sge); last_imm: wc.wr_id = qp->r_wr_id; wc.status = IB_WC_SUCCESS; wc.qp = &qp->ibqp; wc.src_qp = qp->remote_qpn; wc.slid = rdma_ah_get_dlid(&qp->remote_ah_attr) & U16_MAX; /* * It seems that IB mandates the presence of an SL in a * work completion only for the UD transport (see section * 11.4.2 of IBTA Vol. 1). * * However, the way the SL is chosen below is consistent * with the way that IB/qib works and is trying avoid * introducing incompatibilities. * * See also OPA Vol. 1, section 9.7.6, and table 9-17. */ wc.sl = rdma_ah_get_sl(&qp->remote_ah_attr); /* zero fields that are N/A */ wc.vendor_err = 0; wc.pkey_index = 0; wc.dlid_path_bits = 0; wc.port_num = 0; /* Signal completion event if the solicited bit is set. */ rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc, ib_bth_is_solicited(ohdr)); break; case OP(RDMA_WRITE_FIRST): case OP(RDMA_WRITE_ONLY): case OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE): /* consume RWQE */ rdma_first: if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE))) { goto drop; } reth = &ohdr->u.rc.reth; qp->r_len = be32_to_cpu(reth->length); qp->r_rcv_len = 0; qp->r_sge.sg_list = NULL; if (qp->r_len != 0) { u32 rkey = be32_to_cpu(reth->rkey); u64 vaddr = be64_to_cpu(reth->vaddr); int ok; /* Check rkey */ ok = rvt_rkey_ok(qp, &qp->r_sge.sge, qp->r_len, vaddr, rkey, IB_ACCESS_REMOTE_WRITE); if (unlikely(!ok)) goto drop; qp->r_sge.num_sge = 1; } else { qp->r_sge.num_sge = 0; qp->r_sge.sge.mr = NULL; qp->r_sge.sge.vaddr = NULL; qp->r_sge.sge.length = 0; qp->r_sge.sge.sge_length = 0; } if (opcode == OP(RDMA_WRITE_ONLY)) { goto rdma_last; } else if (opcode == OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE)) { wc.ex.imm_data = ohdr->u.rc.imm_data; goto rdma_last_imm; } /* FALLTHROUGH */ case OP(RDMA_WRITE_MIDDLE): /* Check for invalid length PMTU or posted rwqe len. */ if (unlikely(tlen != (hdrsize + pmtu + 4))) goto drop; qp->r_rcv_len += pmtu; if (unlikely(qp->r_rcv_len > qp->r_len)) goto drop; hfi1_copy_sge(&qp->r_sge, data, pmtu, true, false); break; case OP(RDMA_WRITE_LAST_WITH_IMMEDIATE): wc.ex.imm_data = ohdr->u.imm_data; rdma_last_imm: wc.wc_flags = IB_WC_WITH_IMM; /* Check for invalid length. */ /* LAST len should be >= 1 */ if (unlikely(tlen < (hdrsize + pad + 4))) goto drop; /* Don't count the CRC. */ tlen -= (hdrsize + extra_bytes); if (unlikely(tlen + qp->r_rcv_len != qp->r_len)) goto drop; if (test_and_clear_bit(RVT_R_REWIND_SGE, &qp->r_aflags)) { rvt_put_ss(&qp->s_rdma_read_sge); } else { ret = hfi1_rvt_get_rwqe(qp, 1); if (ret < 0) goto op_err; if (!ret) goto drop; } wc.byte_len = qp->r_len; wc.opcode = IB_WC_RECV_RDMA_WITH_IMM; hfi1_copy_sge(&qp->r_sge, data, tlen, true, false); rvt_put_ss(&qp->r_sge); goto last_imm; case OP(RDMA_WRITE_LAST): rdma_last: /* Check for invalid length. */ /* LAST len should be >= 1 */ if (unlikely(tlen < (hdrsize + pad + 4))) goto drop; /* Don't count the CRC. */ tlen -= (hdrsize + extra_bytes); if (unlikely(tlen + qp->r_rcv_len != qp->r_len)) goto drop; hfi1_copy_sge(&qp->r_sge, data, tlen, true, false); rvt_put_ss(&qp->r_sge); break; default: /* Drop packet for unknown opcodes. */ goto drop; } qp->r_psn++; qp->r_state = opcode; return; rewind: set_bit(RVT_R_REWIND_SGE, &qp->r_aflags); qp->r_sge.num_sge = 0; drop: ibp->rvp.n_pkt_drops++; return; op_err: rvt_rc_error(qp, IB_WC_LOC_QP_OP_ERR); }