static void _check_inputs(std::vector<at::Tensor> &inputs, std::vector<at::Tensor> &outputs, int input_multiplier, int output_multiplier) { // len(inputs) == len(outputs) size_t len = inputs.size(); if (len <= 0) { throw std::runtime_error("input sequence can't be empty"); } if (len != outputs.size()) { std::stringstream err; err << "inputs and outputs sequences have to be of the same length, but got input of length " << len << " and output of length " << outputs.size(); throw std::runtime_error(err.str()); } std::unordered_set<int> devices; devices.reserve(len); int64_t numel = inputs[0].numel(); auto type = inputs[0].type().ID(); for (size_t i = 0; i < len; i++) { auto input = inputs[i]; auto output = outputs[i]; if (!(input.type().is_cuda() && !input.type().is_sparse() && output.type().is_cuda() && !output.type().is_sparse())) { throw std::runtime_error("input and output elements have to be cuda dense Tensors"); } if (type != input.type().ID() || type != output.type().ID()) { throw std::runtime_error("all inputs and outputs must be of the same Tensor type"); } if (!input.is_contiguous() || !output.is_contiguous()) { throw std::runtime_error("all inputs and outputs have to be contiguous"); } auto input_device = input.get_device(); // inputs must be on unique devices if (devices.find(input_device) != devices.end()) { throw std::runtime_error("inputs must be on unique devices"); } devices.insert(input_device); // inputs and outputs must be on same device respectively if (input_device != output.get_device()) { throw std::runtime_error("input and output must be on the same device"); } // all inputs must be same size if (input.numel() != numel) { throw std::runtime_error("all inputs must have the same number of elements"); } if (output.numel() * output_multiplier != numel * input_multiplier) { throw std::runtime_error("output must be of size input_size * size_multiplier"); } } }
int pqos_l3ca_set(const unsigned socket, const unsigned num_cos, const struct pqos_l3ca *ca) { int ret; unsigned i; if (ca == NULL || num_cos == 0) return PQOS_RETVAL_PARAM; _pqos_api_lock(); ret = _pqos_check_init(1); if (ret != PQOS_RETVAL_OK) { _pqos_api_unlock(); return ret; } /** * Check if class bitmasks are contiguous. */ for (i = 0; i < num_cos; i++) { int is_contig = 0; if (ca[i].cdp) { is_contig = is_contiguous(ca[i].u.s.data_mask) && is_contiguous(ca[i].u.s.code_mask); } else is_contig = is_contiguous(ca[i].u.ways_mask); if (!is_contig) { LOG_ERROR("L3 COS%u bit mask is not contiguous!\n", ca[i].class_id); _pqos_api_unlock(); return PQOS_RETVAL_PARAM; } } if (m_interface == PQOS_INTER_MSR) ret = hw_l3ca_set(socket, num_cos, ca); else { #ifdef __linux__ ret = os_l3ca_set(socket, num_cos, ca); #else LOG_INFO("OS interface not supported!\n"); ret = PQOS_RETVAL_RESOURCE; #endif } _pqos_api_unlock(); return ret; }
bool merge( const mem_block_t& other) { if( !is_contiguous( other)) return false; size += other.size; return true; }
int hw_l2ca_set(const unsigned l2id, const unsigned num_ca, const struct pqos_l2ca *ca) { int ret = PQOS_RETVAL_OK; unsigned i = 0, count = 0, core = 0; if (ca == NULL || num_ca == 0) return PQOS_RETVAL_PARAM; /** * Check if L2 CAT is supported */ ASSERT(m_cap != NULL); ret = pqos_l2ca_get_cos_num(m_cap, &count); if (ret != PQOS_RETVAL_OK) return PQOS_RETVAL_RESOURCE; /* L2 CAT not supported */ /** * Check if class bitmasks are contiguous and * if class id's are within allowed range. */ for (i = 0; i < num_ca; i++) { if (!is_contiguous(ca[i].ways_mask)) { LOG_ERROR("L2 COS%u bit mask is not contiguous!\n", ca[i].class_id); return PQOS_RETVAL_PARAM; } if (ca[i].class_id >= count) { LOG_ERROR("L2 COS%u is out of range (COS%u is max)!\n", ca[i].class_id, count - 1); return PQOS_RETVAL_PARAM; } } /** * Pick one core from the L2 cluster and * perform MSR writes to COS registers on the cluster. */ ASSERT(m_cpu != NULL); ret = pqos_cpu_get_one_by_l2id(m_cpu, l2id, &core); if (ret != PQOS_RETVAL_OK) return ret; for (i = 0; i < num_ca; i++) { uint32_t reg = ca[i].class_id + PQOS_MSR_L2CA_MASK_START; uint64_t val = ca[i].ways_mask; int retval = MACHINE_RETVAL_OK; retval = msr_write(core, reg, val); if (retval != MACHINE_RETVAL_OK) return PQOS_RETVAL_ERROR; } return ret; }
Stub* StubQueue::request(int requested_code_size) { assert(requested_code_size > 0, "requested_code_size must be > 0"); if (_mutex != NULL) _mutex->lock(); Stub* s = current_stub(); int requested_size = round_to(stub_code_size_to_size(requested_code_size), CodeEntryAlignment); if (requested_size <= available_space()) { if (is_contiguous()) { // Queue: |...|XXXXXXX|.............| // ^0 ^begin ^end ^size = limit assert(_buffer_limit == _buffer_size, "buffer must be fully usable"); if (_queue_end + requested_size <= _buffer_size) { // code fits in at the end => nothing to do CodeStrings strings; stub_initialize(s, requested_size, strings); return s; } else { // stub doesn't fit in at the queue end // => reduce buffer limit & wrap around assert(!is_empty(), "just checkin'"); _buffer_limit = _queue_end; _queue_end = 0; } } } if (requested_size <= available_space()) { assert(!is_contiguous(), "just checkin'"); assert(_buffer_limit <= _buffer_size, "queue invariant broken"); // Queue: |XXX|.......|XXXXXXX|.......| // ^0 ^end ^begin ^limit ^size s = current_stub(); CodeStrings strings; stub_initialize(s, requested_size, strings); return s; } // Not enough space left if (_mutex != NULL) _mutex->unlock(); return NULL; }
int pqos_l3ca_set(const unsigned socket, const unsigned num_ca, const struct pqos_l3ca *ca) { int ret = PQOS_RETVAL_OK; unsigned i = 0, count = 0, core = 0; int cdp_enabled = 0; _pqos_api_lock(); ret = _pqos_check_init(1); if (ret != PQOS_RETVAL_OK) { _pqos_api_unlock(); return ret; } if (ca == NULL || num_ca == 0) { _pqos_api_unlock(); return PQOS_RETVAL_PARAM; } /** * Check if class bitmasks are contiguous. */ for (i = 0; i < num_ca; i++) { int is_contig = 0; if (ca[i].cdp) { is_contig = is_contiguous(ca[i].u.s.data_mask) && is_contiguous(ca[i].u.s.code_mask); } else { is_contig = is_contiguous(ca[i].u.ways_mask); } if (!is_contig) { LOG_ERROR("L3 COS%u bit mask is not contiguous!\n", ca[i].class_id); _pqos_api_unlock(); return PQOS_RETVAL_PARAM; } } ASSERT(m_cap != NULL); ret = pqos_l3ca_get_cos_num(m_cap, &count); if (ret != PQOS_RETVAL_OK) { _pqos_api_unlock(); return ret; /**< perhaps no L3CA capability */ } if (num_ca > count) { _pqos_api_unlock(); return PQOS_RETVAL_ERROR; } ret = pqos_l3ca_cdp_enabled(m_cap, NULL, &cdp_enabled); if (ret != PQOS_RETVAL_OK) { _pqos_api_unlock(); return ret; } ASSERT(m_cpu != NULL); ret = pqos_cpu_get_cores(m_cpu, socket, 1, &count, &core); if (ret != PQOS_RETVAL_OK) { _pqos_api_unlock(); return ret; } if (cdp_enabled) { for (i = 0; i < num_ca; i++) { uint32_t reg = (ca[i].class_id*2) + PQOS_MSR_L3CA_MASK_START; int retval = MACHINE_RETVAL_OK; uint64_t cmask = 0, dmask = 0; if (ca[i].cdp) { dmask = ca[i].u.s.data_mask; cmask = ca[i].u.s.code_mask; } else { dmask = ca[i].u.ways_mask; cmask = ca[i].u.ways_mask; } retval = msr_write(core, reg, dmask); if (retval != MACHINE_RETVAL_OK) { _pqos_api_unlock(); return PQOS_RETVAL_ERROR; } retval = msr_write(core, reg+1, cmask); if (retval != MACHINE_RETVAL_OK) { _pqos_api_unlock(); return PQOS_RETVAL_ERROR; } } } else { for (i = 0; i < num_ca; i++) { uint32_t reg = ca[i].class_id + PQOS_MSR_L3CA_MASK_START; uint64_t val = ca[i].u.ways_mask; int retval = MACHINE_RETVAL_OK; if (ca[i].cdp) { LOG_ERROR("Attempting to set CDP COS " "while CDP is disabled!\n"); _pqos_api_unlock(); return PQOS_RETVAL_ERROR; } retval = msr_write(core, reg, val); if (retval != MACHINE_RETVAL_OK) { _pqos_api_unlock(); return PQOS_RETVAL_ERROR; } } } _pqos_api_unlock(); return ret; }
int xform_chk(Data_Obj *dpto,Data_Obj *dpfr,Data_Obj *xform) { if( dpto==NO_OBJ || dpfr==NO_OBJ || xform==NO_OBJ ) return(-1); if( !IS_IMAGE(xform) ){ sprintf(DEFAULT_ERROR_STRING, "xform_chk: transformation %s must be a matrix (image)", OBJ_NAME(xform)); NWARN(DEFAULT_ERROR_STRING); return(-1); } if( OBJ_COMPS(xform) != 1 ){ sprintf(DEFAULT_ERROR_STRING, "xform_chk: transform matrix %s must have single-component elements",OBJ_NAME(xform)); NWARN(DEFAULT_ERROR_STRING); return(-1); } if( OBJ_COMPS(dpto) != OBJ_ROWS(xform) ){ sprintf(DEFAULT_ERROR_STRING, "xform_chk: target %s component dimension (%d) must match # rows of xform %s (%d)", OBJ_NAME(dpto),OBJ_COMPS(dpto),OBJ_NAME(xform),OBJ_ROWS(xform)); NWARN(DEFAULT_ERROR_STRING); return(-1); } if( OBJ_COMPS(dpfr) != OBJ_COLS(xform) ){ sprintf(DEFAULT_ERROR_STRING, "xform_chk: source %s component dimension (%d) must match # columns of xform %s (%d)", OBJ_NAME(dpto),OBJ_COMPS(dpto),OBJ_NAME(xform),OBJ_ROWS(xform)); NWARN(DEFAULT_ERROR_STRING); return(-1); } if( OBJ_N_TYPE_ELTS(dpto)/OBJ_COMPS(dpto) != OBJ_N_TYPE_ELTS(dpfr)/OBJ_COMPS(dpfr) ){ sprintf(DEFAULT_ERROR_STRING, "xform_chk: target %s (%d/%d) and source %s (%d/%d) must have same # of elements", OBJ_NAME(dpto),OBJ_N_TYPE_ELTS(dpto),OBJ_COMPS(dpto), OBJ_NAME(dpfr),OBJ_N_TYPE_ELTS(dpfr),OBJ_COMPS(dpfr)); NWARN(DEFAULT_ERROR_STRING); return(-1); } /* BUG these contiguity requirements may no longer be necessary... */ if( !is_contiguous(DEFAULT_QSP_ARG dpto) ){ sprintf(DEFAULT_ERROR_STRING, "xform_chk: xform target %s must be contiguous",OBJ_NAME(dpto)); NWARN(DEFAULT_ERROR_STRING); return(-1); } if( !is_contiguous(DEFAULT_QSP_ARG dpfr) ){ sprintf(DEFAULT_ERROR_STRING, "xform_chk: xform source %s must be contiguous",OBJ_NAME(dpfr)); NWARN(DEFAULT_ERROR_STRING); return(-1); } if( !is_contiguous(DEFAULT_QSP_ARG xform) ){ sprintf(DEFAULT_ERROR_STRING, "xform_chk: xform %s must be contiguous",OBJ_NAME(xform)); NWARN(DEFAULT_ERROR_STRING); return(-1); } return(0); } // end xform_chk