void *
Process_Overlaps(void *ptr){
  Work_Area_t  *WA = (Work_Area_t *)ptr;

  gkReadData   *readData = new gkReadData;

  char         *bases = new char [AS_MAX_READLEN + 1];
  char         *quals = new char [AS_MAX_READLEN + 1];

  while (WA->bgnID < G.endRefID) {
    WA->overlapsLen                = 0;

    WA->Total_Overlaps             = 0;
    WA->Contained_Overlap_Ct       = 0;
    WA->Dovetail_Overlap_Ct        = 0;

    WA->Kmer_Hits_Without_Olap_Ct  = 0;
    WA->Kmer_Hits_With_Olap_Ct     = 0;
    WA->Multi_Overlap_Ct           = 0;

    fprintf(stderr, "Thread %02u processes reads "F_U32"-"F_U32"\n",
            WA->thread_id, WA->bgnID, WA->endID);

    for (uint32 fi=WA->bgnID; fi<=WA->endID; fi++) {

      //  Load sequence/quality data
      //  Duplicated in Build_Hash_Index()

      gkRead   *read = WA->gkpStore->gkStore_getRead(fi);

      if ((read->gkRead_libraryID() < G.minLibToRef) ||
          (read->gkRead_libraryID() > G.maxLibToRef))
        continue;

      uint32 len = read->gkRead_sequenceLength();

      if (len < G.Min_Olap_Len)
        continue;

      WA->gkpStore->gkStore_loadReadData(read, readData);

      char   *seqptr   = readData->gkReadData_getSequence();
      char   *qltptr   = readData->gkReadData_getQualities();

      for (uint32 i=0; i<len; i++) {
        bases[i] = tolower(seqptr[i]);
        quals[i] = qltptr[i];
      }

      bases[len] = 0;
      quals[len] = 0;

      //  Generate overlaps.

      Find_Overlaps(bases, len, quals, read->gkRead_readID(), FORWARD, WA);

      reverseComplement(bases, quals, len);

      Find_Overlaps(bases, len, quals, read->gkRead_readID(), REVERSE, WA);
    }

    //  Write out this block of overlaps, no need to keep them in core!
    //  While we have a mutex, also find the next block of things to process.

    fprintf(stderr, "Thread %02u writes    reads "F_U32"-"F_U32" (%u overlaps %u/%u kmer hits with/without overlap)\n",
            WA->thread_id, WA->bgnID, WA->endID,
            WA->overlapsLen,
            WA->Kmer_Hits_With_Olap_Ct, WA->Kmer_Hits_Without_Olap_Ct);

    pthread_mutex_lock(& Write_Proto_Mutex);

    //  Flush any remaining overlaps.

    for (int zz=0; zz<WA->overlapsLen; zz++)
      Out_BOF->writeOverlap(WA->overlaps + zz);
    WA->overlapsLen = 0;

    //  Update stats

    Total_Overlaps            += WA->Total_Overlaps;
    Contained_Overlap_Ct      += WA->Contained_Overlap_Ct;
    Dovetail_Overlap_Ct       += WA->Dovetail_Overlap_Ct;

    Kmer_Hits_Without_Olap_Ct += WA->Kmer_Hits_Without_Olap_Ct;
    Kmer_Hits_With_Olap_Ct    += WA->Kmer_Hits_With_Olap_Ct;
    Multi_Overlap_Ct          += WA->Multi_Overlap_Ct;

    WA->bgnID = G.curRefID;
    WA->endID = G.curRefID + G.perThread - 1;

    if (WA->endID > G.endRefID)
      WA->endID = G.endRefID;

    G.curRefID = WA->endID + 1;

    pthread_mutex_unlock(& Write_Proto_Mutex);

  }

  delete readData;

  delete [] bases;
  delete [] quals;

  return(ptr);
}
Example #2
0
int main(int argc, char *argv[])
{ FILE *output;

  Process_Arguments(argc,argv,Spec,0);

#ifdef PROGRESS
  printf("\nParameters: c=%g e=%g s=%d\n",
         Get_Double_Arg("-c"),Get_Double_Arg("-e"),Get_Int_Arg("-s"));
  printf("SubFolder:  %s\n",Get_String_Arg("folder"));
  printf("CoreName:   %s\n",Get_String_Arg("core"));
  fflush(stdout);
#endif

  RezFolder = strdup(Get_String_Arg("folder"));
  if (RezFolder[strlen(RezFolder)-1] == '/')
    RezFolder[strlen(RezFolder)-1] = '\0';

  if (mkdir(RezFolder,S_IRWXU|S_IRWXG|S_IRWXO))
    { if (errno != EEXIST)
        { fprintf(stderr,"Error trying to create directory %s: %s\n",RezFolder,strerror(errno)); 
          exit (1);
        }
    }

  CoreName = strdup(Get_String_Arg("core"));

  sprintf(NameBuf,"%s.neu",CoreName);
  output = fopen(NameBuf,"w");
  fprintf(output,"NEUSEP: Version 0.9\n");

  { Histogram *hist;
    int        curchan;
    int        maxchans;
    int        i, n;

    n = Get_Repeat_Count("inputs");
    fwrite(&n,sizeof(int),1,output);

    hist = Make_Histogram(UVAL,0x10000,VALU(1),VALU(0));

    maxchans = 0;
    for (i = 0; i < n; i++)
      { 
	curchan  = NumChans;
        maxchans = Read_All_Channels(Get_String_Arg("inputs",i),maxchans);
	int channelsInCurrentFile=NumChans-curchan;


        { Size_Type sum, max;
          Indx_Type p;
          int       j, wch;
          uint16   *val;

          max = -1;
          for (j = curchan; j < NumChans; j++)
            { val = AUINT16(Images[j]);
              sum = 0;
              for (p = 0; p < Images[j]->size; p++)
                sum += val[p];
              if (sum > max)
                { max = sum;
                  wch = j;
                }
            }

          fprintf(output,"%s\n",Get_String_Arg("inputs",i));
          j = wch-curchan;
          fwrite(&j,sizeof(int),1,output);

#ifdef PROGRESS
          printf("\n  Eliminating channel %d from %s\n",j+1,Get_String_Arg("inputs",i));
          fflush(stdout);
#endif

	  {
	    // Section to write out the reference channel
	    printf("\n Considering reference channel output, channelsInCurrentFile=%d\n", channelsInCurrentFile);
	    fflush(stdout);
	    if (channelsInCurrentFile>2) { // should work with both lsm pair with channels=3, or raw file with channels=4
	      sprintf(NameBuf,"%s/Reference.tif",RezFolder,CoreName,i);
	      Write_Image(NameBuf,Images[wch],LZW_PRESS);
	    }

	  }

          Free_Array(Images[wch]);
          NumChans -= 1;
          for (j = wch; j < NumChans; j++)
            Images[j] = Images[j+1];
        }

        { int        j, ceil;
          Indx_Type  p;
          uint16    *val;

          for (j = curchan; j < NumChans; j++)
            {
              Histagain_Array(hist,Images[j],0);

              ceil = Percentile2Bin(hist,1e-5);

	      if (ceil==0) {
		fprintf(stderr, "Channel must have non-zero values for this program to function\n");
		exit(1);
	      } 

#ifdef PROGRESS
              printf("  Clipping channel %d at ceil = %d\n",j,ceil); fflush(stdout);
              fflush(stdout);
#endif
    
              val  = AUINT16(Images[j]);
              for (p = 0; p < Images[j]->size; p++)
                { 
		  if (val[p] > ceil)
		    val[p] = ceil;
		  val[p] = (val[p]*4095)/ceil;
		  }
	      //              Convert_Array_Inplace(Images[j],PLAIN_KIND,UINT8_TYPE,8,0);
            }
    
        }
      }

    Free_Histogram(hist);

    printf("Starting ConsolidatedSignal.tif section\n");
    fflush(stdout);

    // NA addition: write tif with re-scaled intensities to serve as basis for mask file
    {
      Array *signalStack;
      signalStack = Make_Array(RGB_KIND,UINT8_TYPE,3,Images[0]->dims);
      uint8 *sp=AUINT8(signalStack);
      int m;
      Indx_Type signalIndex;
      signalIndex=0;
      for (m=0;m<NumChans;m++) {
	sprintf(NameBuf, "%s/Signal_%d.tif", RezFolder, m);
	printf("Writing 16-bit channel file %s...", NameBuf);
	Write_Image(NameBuf, Images[m], LZW_PRESS);
	printf("done\n");
	uint16 *ip=AUINT16(Images[m]);
	Indx_Type  channelIndex;
	for (channelIndex=0;channelIndex<Images[m]->size;channelIndex++) {
	  int value=ip[channelIndex]/16;
	  if (value>255) {
	    value=255;
	  }
	  sp[signalIndex++]=value; // convert 12-bit to 8-bit
	}
      }
      sprintf(NameBuf,"%s/ConsolidatedSignal.tif", RezFolder);
      printf("Writing 8-bit consolidated signal file %s...", NameBuf);
      Write_Image(NameBuf,signalStack,LZW_PRESS);
      printf("done");
      //Free_Array(signalStack); - this is causing a bug
    }

    printf("Finished ConsolidatedSignal.tif section\n");
    fflush(stdout);

  }

  { int           i;
    Segmentation *segs;
    Overlaps     *ovl;
    Clusters     *clust;
    int           numneur;
    Region      **neurons;

    segs = (Segmentation *) Guarded_Malloc(sizeof(Segmentation)*NumChans,Program_Name());

    for (i = 0; i < NumChans; i++)
      { Segment_Channel(Images[i],segs+i);
        if (i == 0)
          segs[i].base = 0;
        else
          segs[i].base = segs[i-1].base + segs[i-1].nsegs;
	printf("channel=%d segmentBase=%d\n", i, segs[i].base);
      }

    ovl     = Find_Overlaps(segs);
    clust   = Merge_Segments(segs,ovl);
    neurons = Segment_Clusters(segs,ovl,clust,&numneur);

    if (Is_Arg_Matched("-gp"))
      Output_Clusters(segs,ovl,clust);
    if (Is_Arg_Matched("-nr"))
      Output_Neurons(numneur,neurons,1);

    // Added for NA
    Output_Consolidated_Mask(numneur,neurons,1);

    fwrite(&numneur,sizeof(int),1,output);
    for (i = 0; i < numneur; i++)
      Write_Region(neurons[i],output);

#ifdef PROGRESS
    printf("\nProduced %d neurons/fragments in %s.neu\n",numneur,CoreName);
    fflush(stdout);
#endif

    printf("DEBUG: starting cleanup\n");
    fflush(stdout);

    for (i = 0; i < numneur; i++) {
      printf("DEBUG: calling Kill_Region on neuron=%d\n", i);
      fflush(stdout);
      Kill_Region(neurons[i]);
    }
    printf("DEBUG: calling Kill_Clusters\n");
    fflush(stdout);
    Kill_Clusters(clust);
    printf("DEBUG: calling Kill_Overlaps\n");
    fflush(stdout);
    //Kill_Overlaps(ovl); - causing a bug
    printf("DEBUG: starting Kill_Segmentation loop\n");
    fflush(stdout);
    for (i = 0; i < NumChans; i++) {
      printf("DEBUG: Kill_Segmentation on index=%d\n", i);
      fflush(stdout);
      Kill_Segmentation(segs+i);
    }
    printf("DEBUG: calling free() on segs\n");
    fflush(stdout);
    free(segs);
  }

  printf("DEBUG: starting filestream cleanup\n");
  fflush(stdout);

  { int i;

    fclose(output);
    free(CoreName);
    free(RezFolder);
    for (i = 0; i < NumChans; i++)
      Kill_Array(Images[i]);
    free(Images);
  }

#ifdef VERBOSE
  printf("\nDid I free all arrays?:\n"); 
  Print_Inuse_List(stdout,4);
#endif

  exit (0);
}