-
Notifications
You must be signed in to change notification settings - Fork 0
/
MIcomputation.cpp
229 lines (192 loc) · 7.18 KB
/
MIcomputation.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
////////////////////////////////////////////////////////////////
// GdI 3 - WS 11/12 - Praktikum 2
////////////////////////////////////////////////////////////////
// Bitte tragen Sie hier die Namen und Matrikelnummern Ihrer
// ein (maximal 3).
//
// Bsp.:
// Boris Baumstumpf 0999999133
// Axel Axt 12345678910
// Bruno Schneewittchen 66666336
////////////////////////////////////////////////////////////////
#include <stdexcept>
#include <iostream>
#include <CL/opencl.h>
#include <cstring>
#include <cmath>
#include <cstdlib>
#include <sys/time.h>
#include "MIcomputation.h"
#include "Fasta.h"
#include "oclutil.h"
#define LOOPCOUNT 1
void computeMI(bool cpu, bool oclgpu, SequenceSet& sequences, Matrix<float>& MI)
{
//calculate MI
cout << "computing MI" << endl;
if (cpu)
computeMIonCPU(sequences, MI);
else//GPU
computeMIonGPU(sequences, MI, oclgpu);
}
void computeMIonCPU(SequenceSet& sequences, Matrix<float>& MI) {
const int numChars = NUMPROTEINCHARS;
const int sequenceLength = sequences.getSequenceLength();
const int numSequences = sequences.getNumberOfSequences();
const double epsilon=1e-6;
timeval start, end;
gettimeofday(&start, 0);
for (int k = 0; k < LOOPCOUNT; k++) {
//iterate over all column combinations
for (int j = 0; j < sequenceLength; j++) {
for (int i = 0; i <= j; i++) {
//absolute number of occurrences of character pairs x,y: N_ij(x,y)
int twoPointOccs[numChars][numChars];
memset(twoPointOccs, 0, sizeof(twoPointOccs));
//iterate through all sequences and compute two-point occurrences
for (int seq = 0; seq < numSequences; seq++)
twoPointOccs[sequences.getData(seq, i)][sequences.getData(seq, j)]++;
/*
puts("===START===");
for (int m=0; m<numChars; m++) {
for (int n=0; n<numChars; n++)
printf("%d %d: %d\n", m, n, twoPointOccs[m][n]);
puts("");
}
puts("===STOP ===");
*/
double MI_ij = 0;
//sum over all x and y
for (int x = 0; x < numChars; x++) {
if (sequences.getOnePointProb(x, i) < epsilon)
continue;
for (int y = 0; y < numChars; y++) {
if (sequences.getOnePointProb(y, j) < epsilon || twoPointOccs[x][y] == 0)
continue;
double p_ij_xy = double(twoPointOccs[x][y]) / double(numSequences);
MI_ij += p_ij_xy * log2(p_ij_xy / (sequences.getOnePointProb(x, i) * sequences.getOnePointProb(y, j)));
}
}
MI.set(i, j, MI_ij);
}
}
}
gettimeofday(&end, 0);
std::cout << "execution time: "
<< (end.tv_sec - start.tv_sec ) * 1000 + ( end.tv_usec - start.tv_usec) / 1000
<< " milliseconds" << std::endl;
}
void computeMIonGPU(SequenceSet& sequence, Matrix<float>& MI, bool GPU)
{
// initializes context and kernel and stores them
OCL ocl(GPU);
cl_int oclError1, oclError2;
timeval start, end;
// memory sizes
size_t sequenceLength = sequence.getSequenceLength();
size_t numSequences = sequence.getNumberOfSequences();
// matrix MI is of size numElements
size_t numElements = sequenceLength * sequenceLength;
size_t sequenceSize = sequence.getNumberOfSequences() * sequenceLength;
size_t onePointProbsSize = sequenceLength * NUMPROTEINCHARS;
// host memory
float * dst = new float[MI.size()];
memset(dst, 0, MI.size());
// device memory for sequences, one point probablities and resulting matrix
cl_mem oclDevSrcSequence, oclDevSrcOnePointProbs, oclDevDstMI;
// size for a work group: each workgroup computes one matrix entry, thus computes the correlation
// one time for each character => 25 work items are sufficient
size_t localWorkSize[2] = { 5, 5 };
if (sequenceLength % localWorkSize[0] != 0) throw std::runtime_error("sequence length ^ 2 not divisable by local work size");
// global work size defines the total amount of threads over all work group, thus needs to be a multiple of the local
// work size in each dimension.
size_t globalWorkSize[2] = { sequenceLength, sequenceLength };
// create buffer on device, one for each input array
oclDevSrcSequence = clCreateBuffer( ocl.oclContext,
CL_MEM_READ_ONLY,
sizeof(cl_uchar) * sequenceSize,
0, &oclError1);
oclDevSrcOnePointProbs = clCreateBuffer(ocl.oclContext,
CL_MEM_READ_ONLY,
sizeof(cl_float) * onePointProbsSize,
0, &oclError2);
oclError1 |= oclError2;
oclDevDstMI = clCreateBuffer( ocl.oclContext,
CL_MEM_WRITE_ONLY,
sizeof(cl_float) * numElements,
0, &oclError2);
oclError1 |= oclError2;
if (oclError1 != CL_SUCCESS) {
std::cout << "error while allocating buffers" << std::endl;
exit(1);
}
// set buffer to appropriate kernel arguments
oclError1 = clSetKernelArg(ocl.oclKernel, 0, sizeof(cl_mem), (void*)&oclDevSrcSequence);
oclError1 |= clSetKernelArg(ocl.oclKernel, 1, sizeof(cl_mem), (void*)&oclDevSrcOnePointProbs);
oclError1 |= clSetKernelArg(ocl.oclKernel, 2, sizeof(cl_mem), (void*)&oclDevDstMI);
oclError1 |= clSetKernelArg(ocl.oclKernel, 3, sizeof(cl_uint), &sequenceLength);
oclError1 |= clSetKernelArg(ocl.oclKernel, 4, sizeof(cl_uint), &numSequences);
if (oclError1 != CL_SUCCESS) {
std::cout << "error while setting arguments: " << ocl.oclErrorString(oclError1) << std::endl;
exit(1);
}
// copy host memory to device, non-blocking copy
oclError1 = clEnqueueWriteBuffer( ocl.oclCmdQueue,
oclDevSrcSequence,
CL_FALSE,
0,
sizeof(cl_uchar) * sequenceSize,
(const void *) sequence.getData(),
0, 0, 0);
oclError1 |= clEnqueueWriteBuffer( ocl.oclCmdQueue,
oclDevSrcOnePointProbs,
CL_FALSE,
0,
sizeof(cl_float) * onePointProbsSize,
(const void *) sequence.getOnePointProbs(),
0, 0, 0);
if (oclError1 != CL_SUCCESS) {
std::cout << "error while writing to device " << ocl.oclErrorString(oclError1) << std::endl;
exit(1);
}
// execute kernel LOOPCOUNT times and measure execution time
// TODO LOOPCOUNT aendern, um Kernel mehrfach auszufuehren
gettimeofday(&start, 0);
for (int i = 0; i < LOOPCOUNT; ++i) {
oclError1 = clEnqueueNDRangeKernel( ocl.oclCmdQueue,
ocl.oclKernel,
2, // dimension
0,
globalWorkSize,
localWorkSize,
0, 0, 0);
if (oclError1 != CL_SUCCESS) {
std::cout << "error while executing kernel: " << ocl.oclErrorString(oclError1) << std::endl;
exit(1);
}
}
// clFinish blocks until all issued commands so far are completed, necessary for computing execution time
oclError1 = clFinish(ocl.oclCmdQueue);
gettimeofday(&end, 0);
// read memory from device, store in temporary array and if no error happend copy to result matrix
oclError1 = clEnqueueReadBuffer( ocl.oclCmdQueue,
oclDevDstMI,
CL_TRUE,
0,
sizeof(cl_float) * numElements,
dst,
0, 0, 0);
if (oclError1 != CL_SUCCESS) {
std::cout << "error while reading from device: " << ocl.oclErrorString(oclError1) << std::endl;
exit(1);
}
std::cout << "execution time: "
<< (end.tv_sec - start.tv_sec ) * 1000 + ( end.tv_usec - start.tv_usec) / 1000
<< " milliseconds" << std::endl;
// fill the matrix with the computed results
MI.copyElements(dst);
// release used memory, can cause really bad crashes otherwise
clReleaseMemObject(oclDevSrcSequence);
clReleaseMemObject(oclDevSrcOnePointProbs);
clReleaseMemObject(oclDevDstMI);
}