Example #1
0
int recursiveFindOverlap(dTree* parent, int overlap, Path currentPath){
    if(parent == NULL || parent->path == NULL){
        return overlap;
    }else{
        printf("parent index: %i", parent->index);
        int currentOverlap = calcOverlap(currentPath, parent->path);
        if (currentOverlap > overlap){
            overlap = currentOverlap;
        }
        return recursiveFindOverlap(parent->parent, overlap, currentPath);
    }
}
Example #2
0
quint64 sw_cuda_cpp::estimateNeededRamAmount(int seqLibLength, ScoreType qProfLen, int queryLength, const U2::SmithWatermanSettings::SWResultView resultView) {
    const int overlapLength = calcOverlap(queryLength);
    const int partsNumber = calcPartsNumber(seqLibLength, overlapLength);
    const int partSeqSize = calcPartSeqSize(seqLibLength, overlapLength, partsNumber);
    const int sizeRow = calcSizeRow(seqLibLength, overlapLength, partsNumber, partSeqSize);

    int directionMatrixSize = 0;
    int backtraceBeginsSize = 0;
    if(U2::SmithWatermanSettings::MULTIPLE_ALIGNMENT == resultView) {
        directionMatrixSize = sizeof(int) * queryLength * seqLibLength;
        backtraceBeginsSize = sizeof(int) * sizeRow * 2;
    }

    const quint64 memToAlloc = 3 * sizeRow * sizeof(ScoreType) + directionMatrixSize + backtraceBeginsSize;
    return memToAlloc;
}
Example #3
0
quint64 sw_cuda_cpp::estimateNeededGpuMemory( int seqLibLength, ScoreType qProfLen, int queryLength, const U2::SmithWatermanSettings::SWResultView resultView) {

    int sizeP = qProfLen * sizeof(ScoreType);
    int sizeL = (seqLibLength) * sizeof(char);

    const int overlapLength = calcOverlap(queryLength);
    int partsNumber = calcPartsNumber(seqLibLength, overlapLength);
    int partSeqSize = calcPartSeqSize(seqLibLength, overlapLength, partsNumber);
    int sizeRow = calcSizeRow(seqLibLength, overlapLength, partsNumber, partSeqSize);

    int sizeN = 7 * sizeRow * sizeof(ScoreType);

int directionMatrixSize = 0;
    int backtraceBeginsSize = 0;
    if(U2::SmithWatermanSettings::MULTIPLE_ALIGNMENT == resultView) {
        directionMatrixSize = sizeof(int) * queryLength * seqLibLength;
        backtraceBeginsSize = sizeof(int) * sizeRow * 2;
    }

    quint64 memToAlloc = sizeL + sizeP + sizeN; //see cudaMallocs in sw_cuda.cu for details
    return memToAlloc * 1.2; //just for safety
}
Example #4
0
QList<resType> calculateOnGPU(const char * seqLib, int seqLibLength, ScoreType* queryProfile, ScoreType qProfLen, int queryLength,
                                ScoreType gapOpen, ScoreType gapExtension, ScoreType maxScore, U2::SmithWatermanSettings::SWResultView resultView) {

    //TODO: calculate maximum alignment length
    const int overlapLength = calcOverlap(queryLength);

    int partsNumber = calcPartsNumber(seqLibLength, overlapLength);

    int queryDevider = 1;
    if (queryLength > sw_cuda_cpp::MAX_SHARED_VECTOR_LENGTH) {
        queryDevider = (queryLength + sw_cuda_cpp::MAX_SHARED_VECTOR_LENGTH - 1) / sw_cuda_cpp::MAX_SHARED_VECTOR_LENGTH;
    }

    int partQuerySize = (queryLength + queryDevider - 1) / queryDevider;

    int partSeqSize = calcPartSeqSize(seqLibLength, overlapLength, partsNumber);

    int sizeRow = calcSizeRow(seqLibLength, overlapLength, partsNumber, partSeqSize);

    u2log.details(QString("partsNumber: %1 queryDevider: %2").arg(partsNumber).arg(queryDevider));

    u2log.details(QString("seqLen: %1 partSeqSize: %2 overlapSize: %3").arg(seqLibLength).arg(partSeqSize).arg(overlapLength));
    u2log.details(QString("queryLen %1 partQuerySize: %2").arg(queryLength).arg(partQuerySize));

    //************************** declare some temp variables on host

    ScoreType* tempRow = new ScoreType[sizeRow];
    ScoreType* zerroArr = new ScoreType[sizeRow];
    for (int i = 0; i < sizeRow; i++) {
        zerroArr[i] = 0;
    }

    ScoreType* directionRow = new ScoreType[sizeRow];

    size_t directionMatrixSize = 0;
    size_t backtraceBeginsSize = 0;
    int * globalMatrix = NULL;
    int * backtraceBegins = NULL;
    if(U2::SmithWatermanSettings::MULTIPLE_ALIGNMENT == resultView) {
        directionMatrixSize = seqLibLength * queryLength * sizeof(int);
        backtraceBeginsSize = 2 * sizeRow * sizeof(int);

        globalMatrix = new int[directionMatrixSize / sizeof(int)];
        backtraceBegins = new int[backtraceBeginsSize / sizeof(int)];

        memset(globalMatrix, 0, directionMatrixSize);
        memset(backtraceBegins, 0, backtraceBeginsSize);
    }
    //************************** sizes of arrays

    size_t sizeQ = sizeRow * sizeof(ScoreType);
    size_t sizeQQ = (sizeRow) * sizeof(ScoreType);
    size_t sizeP = qProfLen * sizeof(ScoreType);
    size_t sizeL = (seqLibLength) * sizeof(char);

    //************************** declare arrays on device

    char * g_seqLib;
    ScoreType* g_queryProfile;
    ScoreType* g_HdataMax;
    ScoreType* g_HdataUp;
    ScoreType* g_HdataRec;
    ScoreType* g_HdataTmp;
    ScoreType* g_FdataUp;
    ScoreType* g_directionsUp;
    ScoreType* g_directionsMax;
    ScoreType* g_directionsRec;
    int * g_directionsMatrix = NULL;
    int * g_backtraceBegins = NULL;

    //************************** allocate global memory on device

    cudaMalloc((void **)& g_seqLib, sizeL);
    cudaMalloc((void **)& g_queryProfile, sizeP);
    cudaMalloc((void **)& g_HdataMax, sizeQ);
    cudaMalloc((void **)& g_HdataUp, sizeQ);
    cudaMalloc((void **)& g_FdataUp, sizeQ);
    cudaMalloc((void **)& g_directionsUp, sizeQ);
    cudaMalloc((void **)& g_directionsMax, sizeQ);
    cudaMalloc((void **)& g_HdataRec, sizeQ);
    cudaMalloc((void **)& g_directionsRec, sizeQ);

    if(U2::SmithWatermanSettings::MULTIPLE_ALIGNMENT == resultView) {
        cudaError errorMatrix = cudaMalloc(reinterpret_cast<void **>(&g_directionsMatrix), directionMatrixSize);
        cudaError errorBacktrace = cudaMalloc(reinterpret_cast<void **>(&g_backtraceBegins), backtraceBeginsSize);
    }

    u2log.details(QString("GLOBAL MEMORY USED %1 KB").arg((sizeL + sizeP + sizeQ * 7
                                                        + directionMatrixSize + backtraceBeginsSize) / 1024));

    //************************** copy from host to device

    cudaMemcpy(g_seqLib, seqLib, sizeL, cudaMemcpyHostToDevice);
    cudaMemcpy(g_queryProfile, queryProfile, sizeP, cudaMemcpyHostToDevice);
    cudaMemcpy(g_HdataMax, zerroArr, sizeQ, cudaMemcpyHostToDevice);
    cudaMemcpy(g_HdataUp, zerroArr, sizeQ, cudaMemcpyHostToDevice);
    cudaMemcpy(g_FdataUp, zerroArr, sizeQ, cudaMemcpyHostToDevice);
    cudaMemcpy(g_directionsUp, zerroArr, sizeQ, cudaMemcpyHostToDevice);
    cudaMemcpy(g_directionsMax, zerroArr, sizeQ, cudaMemcpyHostToDevice);
    cudaMemcpy(g_directionsRec, zerroArr, sizeQ, cudaMemcpyHostToDevice);
    cudaMemcpy(g_HdataRec, zerroArr, sizeQ, cudaMemcpyHostToDevice);
    if(U2::SmithWatermanSettings::MULTIPLE_ALIGNMENT == resultView) {
        cudaMemcpy(g_directionsMatrix, globalMatrix, directionMatrixSize, cudaMemcpyHostToDevice);
        cudaMemcpy(g_backtraceBegins, backtraceBegins, backtraceBeginsSize, cudaMemcpyHostToDevice);
    }

    //************************** start calculation

    int BLOCK_SIZE = partsNumber;

    dim3 dimBlock(BLOCK_SIZE);
    dim3 dimGrid(partQuerySize);

    //move constants variables to constant cuda memory
    setConstants(partSeqSize, partsNumber, overlapLength, seqLibLength,
                queryLength, gapOpen, gapExtension, maxScore, partQuerySize,
                U2::SmithWatermanAlgorithm::UP, U2::SmithWatermanAlgorithm::LEFT, U2::SmithWatermanAlgorithm::DIAG,
                U2::SmithWatermanAlgorithm::STOP);

    size_t sh_mem_size = sizeof(ScoreType) * (dimGrid.x + 1) * 3;
    u2log.details(QString("SHARED MEM SIZE USED: %1 B").arg(sh_mem_size));
    // start main loop
    for (int i = 0; i < queryDevider; i++) {

        calculateMatrix_wrap( dimBlock.x, dimGrid.x, g_seqLib,
            g_queryProfile, g_HdataUp, g_HdataRec, g_HdataMax,
            g_FdataUp, g_directionsUp, g_directionsRec,
            g_directionsMax, i * partQuerySize, g_directionsMatrix, g_backtraceBegins);

        cudaError hasErrors = cudaThreadSynchronize();

        if (hasErrors != 0) {
            u2log.trace(QString("CUDA ERROR HAPPEN, errorId: ") + QString::number(hasErrors));
        }

        //revert arrays
        g_HdataTmp = g_HdataRec;
        g_HdataRec = g_HdataUp;
        g_HdataUp = g_HdataTmp;

        g_HdataTmp = g_directionsRec;
        g_directionsRec = g_directionsUp;
        g_directionsUp = g_HdataTmp;
    }

    //Copy vectors on host and find actual results
    cudaMemcpy(tempRow, g_HdataMax, sizeQQ, cudaMemcpyDeviceToHost);
    cudaMemcpy(directionRow, g_directionsMax, sizeQQ, cudaMemcpyDeviceToHost);
    if(U2::SmithWatermanSettings::MULTIPLE_ALIGNMENT == resultView) {
        cudaMemcpy(globalMatrix, g_directionsMatrix, directionMatrixSize, cudaMemcpyDeviceToHost);
        cudaMemcpy(backtraceBegins, g_backtraceBegins, backtraceBeginsSize, cudaMemcpyDeviceToHost);
    }

    QList<resType> pas;
    resType res;
    for (int j = 0; j < (sizeRow); j++) {
        if (tempRow[j] >= maxScore) {
            res.refSubseq.startPos = directionRow[j];
            res.refSubseq.length = j - res.refSubseq.startPos + 1 - (j) / (partSeqSize + 1) * overlapLength - (j) / (partSeqSize + 1);
            res.score = tempRow[j];
            if(U2::SmithWatermanSettings::MULTIPLE_ALIGNMENT == resultView) {
                qint32 pairAlignOffset = 0;

                qint32 row = backtraceBegins[2 * j];
                qint32 column = backtraceBegins[2 * j + 1];
                while(U2::SmithWatermanAlgorithm::STOP != globalMatrix[seqLibLength * row + column]) {
                    if(U2::SmithWatermanAlgorithm::DIAG == globalMatrix[seqLibLength * row + column]) {
                        res.pairAlign[pairAlignOffset++] = U2::SmithWatermanAlgorithm::DIAG;
                        row--;
                        column--;
                    } else if(U2::SmithWatermanAlgorithm::LEFT == globalMatrix[seqLibLength * row + column]) {
                        res.pairAlign[pairAlignOffset++] = U2::SmithWatermanAlgorithm::UP;
                        column--;
                    } else if(U2::SmithWatermanAlgorithm::UP == globalMatrix[seqLibLength * row + column]) {
                        res.pairAlign[pairAlignOffset++] = U2::SmithWatermanAlgorithm::LEFT;
                        row--;
                    }
                    if(0 >= row || 0 >= column) {
                        break;
                    }
                }
                res.patternSubseq.startPos = row;
                res.patternSubseq.length = backtraceBegins[2 * j] - row + 1;
            }

            pas.append(res);
        }
    }

    //deallocation memory
    cudaFree(g_seqLib);
    cudaFree(g_queryProfile);
    cudaFree(g_HdataMax);
    cudaFree(g_HdataUp);
    cudaFree(g_HdataRec);
    cudaFree(g_FdataUp);
    cudaFree(g_directionsUp);
    cudaFree(g_directionsMax);
    cudaFree(g_directionsRec);

    if(U2::SmithWatermanSettings::MULTIPLE_ALIGNMENT == resultView) {
        cudaFree(g_directionsMatrix);
        cudaFree(g_backtraceBegins);
    }

    delete[] tempRow;
    delete[] directionRow;
    delete[] zerroArr;
    delete[] globalMatrix;
    delete[] backtraceBegins;

    return pas;
}
Example #5
0
/*FGROUP DebCon---------------------------------------------------setswtrig
 This routine should be called to set trigger 
 before the repetitive call of startswtrig().
 (or there is GenSwtrg() combining both)
 TODO:
 - BCmask is not used. Should be given as symbolic name
   (similarly pf protection) in one if input parameters
 - p/f is disabled. If set, we should wait at least 'L0 Protection interval'
   before generating sw trigger (see ctp_pf.doc)
Inputs:
-------
 trigtype: 'a' - asynchronous
           's' - synchronous, noncalibration
           'c' - callibration
 roc: -> INT,FO* boards
 BC : BCmask for trigtype 'a'
      bunch crossing for the trigtype 'c' or 's'
         0xfff: retrieve CALIBRATION_BC from ltuproxy
 ctprodets: 24 bits representing CTP readout (L2 board). I.e.
            bits 23..0 corresponds to detectors 23..0 with
            DAQ numbering (i.e. daqdet# in VALID.LTUS file)
            It is used for programming:
            - L2 board
            - BUSY board 
            - FO boards
            - for decision if trigger starts from LM level (i.e. TRD in)
Operation:
----------
Set:
 L0: bc, bcmask is OFF now, flags: syn/asyn  cal, PF:off 
 L1: PF:off
 L2: PF:off, list of detectors- ctprodets
BUSY: BUSY_CLUSTER_TEST word
FOs: FO_TEST_CLUSTER words:
INT: INTtcset (roc + CIT flag in case of Cal. trigger)

RC: 0: if successfully set
    1: bad parameter(s): BC>3563 or bad trigtype
*/
int setswtrig(char trigtype, int roc, w32 BC, w32 ctprodets){
w32 daqonoff, word, INTtcset, busyclusterT, overlap, bsysc[NCLUST+1];
int rc=0, i, idet, ifo, iconnector;
w32 testclust[NFO],rocs[NFO];
/* following 3 lines are in gcalib.c 
#ifdef SIMVME
srand(73);
#endif
*/
#define TRDECSM 0x10
if( (((BC>3563) && (BC!=0xfff)) && (trigtype !='a')) 
  ){
  printf("Error: setswtrig: BC>3563 %i \n",BC);
  rc=1; goto RET;
};
if(ctprodets & TRDECSM) { LMSTART=1; } else { LMSTART=0;};
INTtcset= roc<<1;   // INT board
// L0 board   p/f, masks off
//     P/F      BCM4   BCM3    BCM2    BCM1
word=(1<<18)+(1<<17)+(1<<16)+(1<<15)+(1<<14);
switch(trigtype){
  case 'a':
       word=word+0;
       if(BC !=0) {   // mask (4bits, 1:use mask 0: do not use mask)
         word= word & (~(BC&0xf));
       };
       //printf("setswtrig: asynchr trigger 0x%x \n",word);
       break;
  case 's':
       {
       //word=word+(1<<12)+BC;
       int mybc=BC-vmer32(L0_BCOFFSETr2);
       if(mybc<0) BC=mybc+3564; else BC=mybc; 
       word=word+(1<<12)+BC;  // from 12.11.2015 (c707)
       //printf("setswtrig: synchr trigger 0x%x \n",word);
       break;
       }
  case 'c':
    if(BC==0xfff) { 
      BC= getCALIBBC2(ctprodets);
    };
    {
    //BC= BC - 97;   // CALIB_BC: 3011 ORBIT_BC: 91
    //BC= BC - 2;   // CALIB_BC: 3011 ORBIT_BC: 3560
    int mybc = BC - 2 - vmer32(L0_BCOFFSETr2); // from 12.11.2015 (c707) 
    /* BC-2: 
     software trigger in 3011 gives BCID: 3008
     calib.   trigger in 3011 gives BCID: 3006
     We do not know why we program BC-2 for cal. trigger (but it was programmed
     this way before c707)
     Before c707: SOD (sync trigger), generated in BC 1750 was seen in DAQ in BC
     1762 -this is very likely for TRD-run, when BCID is higher by 15, i.e.
     1750 + 15 -3 = 1762).
    */
    if(mybc<0) BC=mybc+3564; else BC=mybc;
    BC=BC%3564; 
    word=word+(1<<12)+(1<<13)+BC;
    INTtcset= INTtcset | 1;
    //printf("setswtrig: calib trigger 0x%x \n",word);
    }
    break;
  default:
    printf("Error: setswtrig: unknown type of trigger %c \n",trigtype);
    rc=1; goto RET;
}; TRIGTYPE= trigtype;
// L0 board -p/f prot. off
if(l0C0()) { 
  if(LMSTART != 0) word= word | 0x80000;
  vmew32(L0_TCSETr2,word); 
} else { 
  vmew32(L0_TCSET,word); 
};
word=(1<<18);
vmew32(L1_TCSET,word);        // L1 board p/f prot. off
word=(1<<24)+ctprodets;
  daqonoff= vmer32(INT_DDL_EMU) &0xf;
  if((daqonoff==0) || (daqonoff==0x3b)) { 
    // ctp readout active or emulated, set TRIGGER bit 
    //the bit has to be set for CALIBRATION events too!
    // if not set, EVB complains (run 67492)
    word= word  | (1<<CTPLTUECSN);   // from Monday 23rd bit17 back
    //word=word ;        // temporary suppress bit17 till Monday 23rd
    //printf("setswtrig: bit17 not set\n");
    //};
  };
vmew32(L2_TCSET,word);        // L2 board p/f off
if(DBGswtrg4) printf("setswtrig:L2_TCSET set to:%x\n", word);
vmew32(INT_TCSET,INTtcset);   // INT board
/* set BUSY_CLUSTER word (bits 23..0 for detectors 24..1 connected to BUSY)
*/
busyclusterT= findBUSYinputs(ctprodets);   //no bit17!
vmew32(BUSY_CLUSTER, busyclusterT);
/* we should update BUSY_OVERLAP word (at least bits corresponding
to combinations 1T 2T 3T 4T 5T 6T should be updated: */
bsysc[0]= busyclusterT;
for(i=1;i<NCLUST+1;i++){
  bsysc[i]=vmer32(BUSY_CLUSTER+i*4);
};
overlap= calcOverlap(bsysc); vmew32(BUSY_OVERLAP, overlap);
if(DBGswtrg4) {
  printf("setswtrig: BUSY/SET_CLUSTER: 0x%x BUSY_OVERLAP:0x%x ctprodets:0x%x\n", 
    busyclusterT, overlap, ctprodets);
};
/* set corresponding FO boards: */
for(i=0;i<NFO;i++){testclust[i]=0;rocs[i]=0;}
for(idet=0;idet<NDETEC;idet++){
  if((ctprodets & (1<<idet))!=0) {   // no bit17!
    if(Detector2Connector(idet,&ifo,&iconnector)) continue;  //not connected
    testclust[ifo]=testclust[ifo] +(1<<(16+iconnector));  //TestCluster
    if(trigtype=='c') {    // cal. trigger, set CALFLAG
      testclust[ifo]=testclust[ifo] | 0x100000 ;
    };
    rocs[ifo]=rocs[ifo]+(roc<<(4*iconnector));  
    if(DBGswtrg4) {printf(
      "setswtrig ifo=%i icon=%i testcl=0x%x roc=0x%x BC:%d dets:0x%x\n",
        ifo,iconnector,testclust[ifo],rocs[ifo], BC, ctprodets);
    };
  };
};
for(ifo=0;ifo<NFO;ifo++){   // set all FOs always
  //printf("FO:%d\n",ifo);
  if((notInCrate(ifo+FO1BOARD)==0)) {
    w32 vmeaddr;
    vmeaddr= FO_TESTCLUSTER+BSP*(ifo+1);
    ifoglob[ifo]= testclust[ifo] | rocs[ifo];
    if((DBGswtrg4==1)&&(testclust[ifo]!=0)) printf("setswtrig FO:%d Waddr: 0x%x data: 0x%x\n",
      ifo, vmeaddr, rocs[ifo] | testclust[ifo]);
    vmew32(vmeaddr, rocs[ifo] | testclust[ifo]);
  }
};
RET:
return(rc);
}