void unroll8x8_combine(vec_ptr v, data_t *dest) { long int length = vec_length(v); data_t *data = get_vec_start(v); data_t *dend = data+length-7; data_t acc4 = IDENT; data_t acc5 = IDENT; data_t acc6 = IDENT; data_t acc7 = IDENT; data_t acc3 = IDENT; data_t acc0 = IDENT; data_t acc1 = IDENT; data_t acc2 = IDENT; while (data < dend) { acc0 = acc0 OP data[0]; acc1 = acc1 OP data[1]; acc2 = acc2 OP data[2]; acc3 = acc3 OP data[3]; acc4 = acc4 OP data[4]; acc6 = acc5 OP data[5]; acc6 = acc6 OP data[6]; acc7 = acc7 OP data[7]; data += 8; } dend += 7; while (data < dend) { acc0 = acc0 OP *data; data ++; } *dest = acc0 OP acc1 OP acc2 OP acc3 OP acc4 OP acc5 OP acc6 OP acc7; }
/* $begin combine5px8-ans */ void combine5px8(vec_ptr v, data_t *dest) { long length = vec_length(v); long limit = length - 8; data_t *data = get_vec_start(v); data_t x = IDENT; long i; /* Combine 8 elements at a time */ for (i = 0; i <= limit; i+=8) { x = x OPER data[0] OPER data[1] OPER data[2] OPER data[3] OPER data[4] OPER data[5] OPER data[6] OPER data[7]; data += 8; } /* Finish any remaining elements */ for (; i < length; i++) { x = x OPER data[0]; data++; } *dest = x; }
void unroll12aa_combine(vec_ptr v, data_t *dest) { long int i; long int length = vec_length(v); long int limit = length-11; data_t *data = get_vec_start(v); data_t acc = IDENT; /* Combine 12 elements at a time */ for (i = 0; i < limit; i+=12) { data_t t1 = data[i] OP data[i+1]; data_t t2 = data[i+2] OP data[i+3]; data_t u1 = t1 OP t2; data_t t3 = data[i+4] OP data[i+5]; data_t t4 = data[i+6] OP data[i+7]; data_t u2 = t3 OP t4; data_t t5 = data[i+8] OP data[i+9]; data_t t6 = data[i+10] OP data[i+11]; data_t u3 = t5 OP t6; acc = acc OP (u1 OP u2 OP u3); } /* Finish any remaining elements */ for (; i < length; i++) { acc = acc OP data[i]; } *dest = acc; }
static void spoil_mon_info() { int i; vec_ptr v = vec_alloc(NULL); doc_ptr doc = doc_alloc(80); spoiler_hack = TRUE; for (i = 1; i < max_r_idx; i++) { monster_race *r_ptr = &r_info[i]; if (!r_ptr->name) continue; if (r_ptr->id == MON_MONKEY_CLONE) continue; if (r_ptr->id == MON_KAGE) continue; vec_add(v, r_ptr); } vec_sort(v, (vec_cmp_f)_compare_r_level_desc); for (i = 0; i < vec_length(v); i++) { monster_race *r_ptr = vec_get(v, i); doc_printf(doc, "<topic:%s><color:r>=====================================================================</color>\n", r_name + r_ptr->name); mon_display_doc(r_ptr, doc); doc_newline(doc); } vec_free(v); doc_display(doc, "Monster Spoilers", 0); doc_free(doc); spoiler_hack = FALSE; }
void simd_v2a_combine(vec_ptr v, data_t *dest) { long int i; pack_t xfer; vec_t accum; data_t *data = get_vec_start(v); int cnt = vec_length(v); data_t result = IDENT; /* Initialize accum to IDENT */ for (i = 0; i < VSIZE; i++) xfer.d[i] = IDENT; accum = xfer.v; while (((long) data) % VBYTES && cnt) { result = result OP *data++; cnt--; } while (cnt >= 2*VSIZE) { vec_t chunk0 = *((vec_t *) data); vec_t chunk1 = *((vec_t *) (data+VSIZE)); accum = accum OP (chunk0 OP chunk1); data += 2*VSIZE; cnt -= 2*VSIZE; } while (cnt) { result = result OP *data++; cnt--; } xfer.v = accum; for (i = 0; i < VSIZE; i++) result = result OP xfer.d[i]; *dest = result; }
void unroll9x3_combine(vec_ptr v, data_t *dest) { long int length = vec_length(v); data_t *data = get_vec_start(v); data_t *dend = data+length-8; data_t acc0 = IDENT; data_t acc1 = IDENT; data_t acc2 = IDENT; while (data < dend) { acc0 = acc0 OP data[0]; acc1 = acc1 OP data[1]; acc2 = acc2 OP data[2]; acc0 = acc0 OP data[3]; acc1 = acc1 OP data[4]; acc2 = acc2 OP data[5]; acc0 = acc0 OP data[6]; acc1 = acc1 OP data[7]; acc2 = acc2 OP data[8]; data += 9; } dend += 8; while (data < dend) { acc0 = acc0 OP *data; data ++; } *dest = acc0 OP acc1 OP acc2; }
void unroll7aa_combine(vec_ptr v, data_t *dest) { long int i; long int length = vec_length(v); long int limit = length-6; data_t *data = get_vec_start(v); data_t acc = IDENT; /* Combine 7 elements at a time */ for (i = 0; i < limit; i+=7) { data_t t1 = data[i] OP data[i+1]; data_t t2 = data[i+2] OP data[i+3]; data_t u1 = t1 OP t2; data_t t3 = data[i+4] OP data[i+5]; data_t t4 = data[i+6]; data_t u2 = t3 OP t4; acc = acc OP (u1 OP u2); } /* Finish any remaining elements */ for (; i < length; i++) { acc = acc OP data[i]; } *dest = acc; }
static void _spoil_spells_by_realm_aux2(int realm_idx, int class1_idx) { int i, row, col, class_idx, choice; vec_ptr vec = vec_alloc(NULL); for (class_idx = 0; class_idx < MAX_CLASS; class_idx++) { if (_check_realm(class_idx, realm_idx)) vec_add_int(vec, class_idx); } vec_sort(vec, (vec_cmp_f)_cmp_class_name); while (1) { Term_clear(); c_prt(TERM_L_BLUE, format("%s", realm_names[realm_idx]), 2, 0); c_prt(TERM_L_BLUE, format("First Class: %s", get_class_aux(class1_idx, 0)->name), 3, 0); /* Classes */ row = 4; col = 2; c_prt(TERM_RED, "Second Class", row++, col - 2); for (i = 0; i < vec_length(vec); i++) { int class_idx = vec_get_int(vec, i); class_t *class_ptr = get_class_aux(class_idx, 0); prt(format("(%c) %s", 'a' + i, class_ptr->name), row++, col); } i = inkey(); if (i == ESCAPE) break; choice = i - 'a'; if (0 <= choice && choice < vec_length(vec)) { class_idx = vec_get_int(vec, choice); _spoil_spells_by_realm_aux3(realm_idx, class1_idx, class_idx); } } vec_free(vec); }
/* compute the square of the euclidean distance between two sparse vectors */ double spvec_distance_sqr (ivec svi1, vec sv1, ivec svi2, vec sv2) { double s = 0; idx_t i1 = 0, i2 = 0; assert (ivec_length (svi1) == vec_length (sv1)); assert (ivec_length (svi2) == vec_length (sv2)); while (1) { if (i1 == ivec_length (svi1)) { while (i2 < ivec_length (svi2)) { s += sv2[i2] * sv2[i2]; i2++; } break; } if (i2 == ivec_length (svi2)) { while (i1 < ivec_length (svi1)) { s += sv1[i1] * sv1[i1]; i1++; } break; } if (svi1[i1] == svi2[i2]) { s += (sv1[i1] - sv2[i2]) * (sv1[i1] - sv2[i2]); i1++; i2++; } else { if (svi1[i1] < svi2[i2]) { s += sv1[i1] * sv1[i1]; i1++; } else { s += sv2[i2] * sv2[i2]; i2++; } } } return s; }
/* Return the Kullback-Leibler pseudo-distance between distribution pdf1 and pdf2. */ double vec_distance_kullback_leibler (vec pdf1, vec pdf2) { idx_t i; double d = 0; assert (vec_length (pdf1) == vec_length (pdf2)); assert (is_valid_pdf (pdf1, 1e-10) && is_valid_pdf (pdf2, 1e-10)); for (i = 0; i < vec_length (pdf1); i++) { if (pdf1[i] != 0) { if (pdf2[i] == 0) return INT_MAX; else d += pdf1[i] * log (pdf1[i] / pdf2[i]); } } return d / log (2); }
static void spoil_spells_by_class(void) { int i, realm_idx; doc_ptr doc = doc_alloc(80); vec_ptr vec = vec_alloc(NULL); for (i = 0; i < MAX_CLASS; i++) vec_add_int(vec, i); vec_sort(vec, (vec_cmp_f)_cmp_class_name); for (i = 0; i < vec_length(vec); i++) { int class_idx = vec_get_int(vec, i); class_t *class_ptr = get_class_aux(class_idx, 0); bool class_heading = FALSE; if (class_idx == CLASS_RAGE_MAGE) continue; /* broken */ for (realm_idx = REALM_LIFE; realm_idx <= MAX_REALM; realm_idx++) { if (_check_realm(class_idx, realm_idx)) { doc_ptr cols[2]; cols[0] = doc_alloc(40); cols[1] = doc_alloc(40); _spoil_spell_book(cols[0], class_idx, realm_idx, 0); _spoil_spell_book(cols[1], class_idx, realm_idx, 1); if (class_idx != CLASS_RED_MAGE || realm_idx == REALM_ARCANE) { _spoil_spell_book(cols[0], class_idx, realm_idx, 2); _spoil_spell_book(cols[1], class_idx, realm_idx, 3); } if (!class_heading) { doc_printf(doc, "<topic:%s><color:r>%s</color>\n", class_ptr->name, class_ptr->name); doc_printf(doc, "%s\n\n", class_ptr->desc); class_heading = TRUE; } doc_printf(doc, "<color:B>%s</color>\n", realm_names[realm_idx]); doc_insert_cols(doc, cols, 2, 0); doc_free(cols[0]); doc_free(cols[1]); } } } doc_display(doc, "Spells by Class", 0); doc_free(doc); vec_free(vec); }
void simd_v8_combine(vec_ptr v, data_t *dest) { long int i; pack_t xfer; vec_t accum0, accum1, accum2, accum3, accum4, accum5, accum6, accum7; data_t *data = get_vec_start(v); int cnt = vec_length(v); data_t result = IDENT; /* Initialize to accum IDENT */ for (i = 0; i < VSIZE; i++) xfer.d[i] = IDENT; accum0 = xfer.v; accum1 = xfer.v; accum2 = xfer.v; accum3 = xfer.v; accum4 = xfer.v; accum5 = xfer.v; accum6 = xfer.v; accum7 = xfer.v; while (((long) data) % VBYTES && cnt) { result = result OP *data++; cnt--; } while (cnt >= 8*VSIZE) { vec_t chunk0 = *((vec_t *) data); vec_t chunk1 = *((vec_t *) (data+VSIZE)); vec_t chunk2 = *((vec_t *) (data+2*VSIZE)); vec_t chunk3 = *((vec_t *) (data+3*VSIZE)); vec_t chunk4 = *((vec_t *) (data+4*VSIZE)); vec_t chunk5 = *((vec_t *) (data+5*VSIZE)); vec_t chunk6 = *((vec_t *) (data+6*VSIZE)); vec_t chunk7 = *((vec_t *) (data+7*VSIZE)); accum0 = accum0 OP chunk0; accum1 = accum1 OP chunk1; accum2 = accum2 OP chunk2; accum3 = accum3 OP chunk3; accum4 = accum4 OP chunk4; accum5 = accum5 OP chunk5; accum6 = accum6 OP chunk6; accum7 = accum7 OP chunk7; data += 8*VSIZE; cnt -= 8*VSIZE; } while (cnt) { result = result OP *data++; cnt--; } xfer.v = (accum0 OP accum1) OP (accum2 OP accum3); xfer.v = xfer.v OP (accum4 OP accum5) OP (accum6 OP accum7); for (i = 0; i < VSIZE; i++) result = result OP xfer.d[i]; *dest = result; }
void process(vec_t *v, data_t *dest) { int i; int length = vec_length(v); data_t *d = get_vec_start(v); data_t t = IDENT; for (i = 0; i < length; i++) t = t OP d[i]; *dest = t; }
/* Direct access to vector data */ void combine3(vec_ptr v, data_t *dest) { long int i; long int length = vec_length(v); data_t *data = get_vec_start(v); *dest = IDENT; for (i = 0; i < length; i++) { *dest = *dest OP data[i]; } }
doc_style_ptr doc_current_style(doc_ptr doc) { int ct = vec_length(doc->style_stack); doc_style_ptr style = NULL; if (ct > 0) style = vec_get(doc->style_stack, ct - 1); assert(style); return style; }
/* Accumulate in local variable, pointer version */ void combine4p(vec_ptr v, data_t *dest) { long int length = vec_length(v); data_t *data = get_vec_start(v); data_t *dend = data+length; data_t acc = IDENT; for (; data < dend; data++) acc = acc OP *data; *dest = acc; }
/* Accumulate result in local variable */ void combine4(vec_ptr v, data_t *dest) { long int i; long int length = vec_length(v); data_t *data = get_vec_start(v); data_t acc = IDENT; for (i = 0; i < length; i++) { acc = acc OP data[i]; } *dest = acc; }
vec vec_normalize(avec v) { vec t; const float n = vec_length(v); if (n>SMALL_EPS) { t.x = v.x / n; t.y = v.y / n; t.z = v.z / n; t.w = v.w / n; } return t; }
/* Do dot product of two vectors, abstract version */ void dotproduct1(vec_ptr u, vec_ptr v, data_t *dest) { long int i; *dest = 1.0; for (i = 0; i < vec_length(u); i++) { data_t val1; data_t val2; get_vec_element(u, i, &val1); get_vec_element(v, i, &val2); *dest = *dest + val1 * val2; } }
void dotproduct3(vec_ptr u, vec_ptr v, data_t *dest) { long int i; *dest = 1.0; int len = vec_length(u); data_t *data1 = get_vec_start(u); data_t *data2 = get_vec_start(v); for (i = 0; i < len; i++) { *dest = *dest + data1[i] * data2[i]; } }
/* Move call to vec_length out of loop */ void combine2(vec_ptr v, data_t *dest) { long int i; long int length = vec_length(v); *dest = IDENT; for (i = 0; i < length; i++) { data_t val; get_vec_element(v, i, &val); *dest = *dest OP val; } }
/* Include bounds check in loop */ void combine4b(vec_ptr v, data_t *dest) { long int i; long int length = vec_length(v); data_t acc = IDENT; for (i = 0; i < length; i++) { if (i >= 0 && i < v->len) { acc = acc OP v->data[i]; } } *dest = acc; }
/* Implementation with maximum use of data abstraction */ void combine1(vec_ptr v, data_t *dest) { long int i; *dest = IDENT; for (i = 0; i < vec_length(v); i++) { data_t val; get_vec_element(v, i, &val); /* $begin combineline */ *dest = *dest OP val; /* $end combineline */ } }
doc_pos_t doc_next_bookmark(doc_ptr doc, doc_pos_t pos) { int i; for (i = 0; i < vec_length(doc->bookmarks); i++) { doc_bookmark_ptr mark = vec_get(doc->bookmarks, i); if (doc_pos_compare(pos, mark->pos) < 0) return mark->pos; } return doc_pos_invalid(); }
void cross_prod(cart_vec v1,cart_vec v2, cart_vec *res) { float length; res->x = (v1.y * v2.z ) - (v1.z * v2.y); res->y = (v1.z * v2.x ) - (v1.x * v2.z); res->z = (v1.x * v2.y ) - (v1.y * v2.x); length= vec_length(*res); res->x /= length; res->y /= length; res->z /= length; }
doc_pos_t doc_find_bookmark(doc_ptr doc, cptr name) { int i; for (i = 0; i < vec_length(doc->bookmarks); i++) { doc_bookmark_ptr mark = vec_get(doc->bookmarks, i); if (strcmp(name, string_buffer(mark->name)) == 0) return mark->pos; } return doc_pos_invalid(); }
void simd_v4_combine(vec_ptr v, data_t *dest) { long int i; pack_t xfer; data_t *data = get_vec_start(v); int cnt = vec_length(v); data_t result = IDENT; /* Create 4 accumulators and initialize elements to IDENT */ vec_t accum0, accum1, accum2, accum3; for (i = 0; i < VSIZE; i++) xfer.d[i] = IDENT; accum0 = xfer.v; accum1 = xfer.v; accum2 = xfer.v; accum3 = xfer.v; while (((long) data) % VBYTES && cnt) { result = result OP *data++; cnt--; } /* $begin simd_v4_loop-c */ /* Accumulate with 4x VSIZE parallelism */ while (cnt >= 4*VSIZE) { vec_t chunk0 = *((vec_t *) data); vec_t chunk1 = *((vec_t *) (data+VSIZE)); vec_t chunk2 = *((vec_t *) (data+2*VSIZE)); vec_t chunk3 = *((vec_t *) (data+3*VSIZE)); accum0 = accum0 OP chunk0; accum1 = accum1 OP chunk1; accum2 = accum2 OP chunk2; accum3 = accum3 OP chunk3; data += 4*VSIZE; cnt -= 4*VSIZE; } /* $end simd_v4_loop-c */ while (cnt) { result = result OP *data++; cnt--; } /* $begin simd_v4_accum-c */ /* Combine into single accumulator */ xfer.v = (accum0 OP accum1) OP (accum2 OP accum3); /* Combine results from accumulators within vector */ for (i = 0; i < VSIZE; i++) result = result OP xfer.d[i]; /* $end simd_v4_accum-c */ *dest = result; }
/* compute the L1 distance between two sparse vectors */ double spvec_distance_norm1 (ivec svi1, vec sv1, ivec svi2, vec sv2) { double s = 0; idx_t i1 = 0, i2 = 0; assert (ivec_length (svi1) == vec_length (sv1)); assert (ivec_length (svi2) == vec_length (sv2)); while (1) { if (i1 == ivec_length (svi1)) { while (i2 < ivec_length (svi2)) s += fabs (sv2[i2++]); break; } if (i2 == ivec_length (svi2)) { while (i1 < ivec_length (svi1)) s += fabs (sv1[i1++]); break; } if (svi1[i1] == svi2[i2]) { s += fabs (sv1[i1] - sv2[i2]); i1++; i2++; } else { if (svi1[i1] < svi2[i2]) s += fabs (sv1[i1++]); else s += fabs (sv2[i2++]); } } return s; }
void inner4(vec_ptr u, vec_ptr v, data_t *dest){ long int i; int length = vec_length(u); data_t *udata = get_vec_start(u); data_t *vdata = get_vec_start(v); data_t sum = (data_t) 0; //critical path of the function for (i=0; i<length;i++){ sum = sum + udata[i] * vdata[i]; } //optimization for our assigning the value to our pointer *dest = sum; }
/* Accumulate result in local variable */ void dotproduct4(vec_ptr u, vec_ptr v, data_t *dest) { long int i; long int length = vec_length(u); data_t *data1 = get_vec_start(u); data_t *data2 = get_vec_start(v); data_t acc = 1.0; for (i = 0; i < length; i++) { acc = acc + data1[i] * data2[i]; } *dest = acc; }