forked from iNuSys/SIMD
/
simd_example.cpp
83 lines (75 loc) · 2.58 KB
/
simd_example.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
/**
* SIMD (simple instruction, multiple data) examples in vector computation
* The usage of these vector-processing capabilities in parallel can provide important speedups in certain algorithms
*
* compile: g++ -msse2 simd_example.cpp
* run: ./a.out
*/
#include <cstdio>
#include <xmmintrin.h>
class vectorManip {
public:
static void vectorAdd(float* a, float* b, float* c, size_t n) {
__m128 A, B, C;
for(size_t i = 0; i < n; i += 4) {
A = _mm_load_ps(&a[i]);
B = _mm_load_ps(&b[i]);
C = _mm_add_ps(A, B);
_mm_store_ps(&c[i], C);
}
}
static void vectorSub(float* a, float* b, float* c, size_t n) {
__m128 A, B, C;
for(size_t i = 0; i < n; i += 4) {
A = _mm_load_ps(&a[i]);
B = _mm_load_ps(&b[i]);
C = _mm_sub_ps(A, B);
_mm_store_ps(&c[i], C);
}
}
static void vectorMultiply(float* a, float* b, float* c, size_t n) {
__m128 A, B, C;
for(size_t i = 0; i < n; i += 4) {
A = _mm_load_ps(&a[i]);
B = _mm_load_ps(&b[i]);
C = _mm_mul_ps(A, B);
_mm_store_ps(&c[i], C);
}
}
static void vectorDiv(float* a, float* b, float* c, size_t n) {
__m128 A, B, C;
for(size_t i = 0; i < n; i += 4) {
A = _mm_load_ps(&a[i]);
B = _mm_load_ps(&b[i]);
C = _mm_div_ps(A, B);
_mm_store_ps(&c[i], C);
}
}
};
int main() {
const size_t num = 7;
float a[num] = {1, 2, 3, 4, 5, 6, 7};
float b[num] = {1, -1, -2, 1, -3, -2, 5};
float c[num];
vectorManip::vectorAdd(a, b, c, num);
for(int i = 0; i < num; ++i) {
printf("%f ", c[i]);
}
printf("\n");
vectorManip::vectorSub(a, b, c, num);
for(int i = 0; i < num; ++i) {
printf("%f ", c[i]);
}
printf("\n");
vectorManip::vectorMultiply(a, b, c, num);
for(int i = 0; i < num; ++i) {
printf("%f ", c[i]);
}
printf("\n");
vectorManip::vectorDiv(a, b, c, num);
for(int i = 0; i < num; ++i) {
printf("%f ", c[i]);
}
printf("\n");
return 0;
}