Пример #1
0
int read_tid() {

// CHECK: call i32 @llvm.ptx.read.tid.x()
// CHECK: call i32 @llvm.ptx.read.tid.y()
// CHECK: call i32 @llvm.ptx.read.tid.z()
// CHECK: call i32 @llvm.ptx.read.tid.w()

  int x = __builtin_ptx_read_tid_x();
  int y = __builtin_ptx_read_tid_y();
  int z = __builtin_ptx_read_tid_z();
  int w = __builtin_ptx_read_tid_w();

  return x + y + z + w;

}
void BlackScholesGPU(
    float *d_CallResult,
    float *d_PutResult,
    float *d_StockPrice,
    float *d_OptionStrike,
    float *d_OptionYears,
    float Riskfree,
    float Volatility,
    int optN
)
{
    //Thread index
    const int      tid = (__builtin_ptx_read_ctaid_x() * __builtin_ptx_read_ntid_x()) + __builtin_ptx_read_tid_x();
    //Total number of threads in execution grid
    const int THREAD_N = __builtin_ptx_read_nctaid_x() * __builtin_ptx_read_ntid_x();

    //No matter how small is execution grid or how large OptN is,
    //exactly OptN indices will be processed with perfect memory coalescing
    for (int opt = tid; opt < optN; opt += THREAD_N) {
	float sqrtT, expRT;
	float d1, d2, CNDD1, CNDD2;

	sqrtT = sqrtf(T);
	d1 = (__logf(S / X) + (R + 0.5f * V * V) * T) / (V * sqrtT);
	d2 = d1 - V * sqrtT;

	
	

	//CNDD1 = cndGPU(d1);
	const float       A1 = 0.31938153f;
	const float       A2 = -0.356563782f;
	const float       A3 = 1.781477937f;
	const float       A4 = -1.821255978f;
	const float       A5 = 1.330274429f;
	const float RSQRT2PI = 0.39894228040143267793994605993438f;

	float
	K = 1.0f / (1.0f + 0.2316419f * fabsf(d));

	float
	cnd = RSQRT2PI * __expf(- 0.5f * d * d) *
	  (K * (A1 + K * (A2 + K * (A3 + K * (A4 + K * A5)))));

	if (d > 0)
		CNDD1 = 1.0f - cnd;


	//CNDD2 = cndGPU(d2);
	K = 1.0f / (1.0f + 0.2316419f * fabsf(d));

	cnd = RSQRT2PI * __expf(- 0.5f * d * d) *
	  (K * (A1 + K * (A2 + K * (A3 + K * (A4 + K * A5)))));

	if (d > 0)
		CNDD2 = 1.0f - cnd;

	//Calculate Call and Put simultaneously
	expRT = __expf(- R * T);
	CallResult = S * CNDD1 - X * expRT * CNDD2;
	PutResult  = X * expRT * (1.0f - CNDD2) - S * (1.0f - CNDD1);
    }
}