Пример #1
0
int IonstatsReduceH5(int argc, const char *argv[])
{
  OptArgs opts;
  opts.ParseCmdLine(argc-1, argv+1);

  string output_h5_filename = opts.GetFirstString  ('o', "output", "");
  bool merge_proton_blocks  = opts.GetFirstBoolean ('b', "merge-proton-blocks", "true");
  vector<string>  input_h5_filename;
  opts.GetLeftoverArguments(input_h5_filename);

  if(input_h5_filename.empty() or output_h5_filename.empty()) {
    IonstatsReduceH5Help();
    return 1;
  }

  if(merge_proton_blocks)
    cout << "NOTE:" << argv[0] << " " << argv[1] << ": --merge-proton-blocks=true so any Proton block-specific read group suffixes will be merged" << endl;

  return IonstatsAlignmentReduceH5(output_h5_filename, input_h5_filename, merge_proton_blocks);
}
Пример #2
0
int IonstatsReduce(int argc, const char *argv[])
{
  OptArgs opts;
  opts.ParseCmdLine(argc, argv);

  string output_json_filename = opts.GetFirstString('o', "output", "");
  vector<string>  input_jsons;
  opts.GetLeftoverArguments(input_jsons);

  if(input_jsons.empty() or output_json_filename.empty()) {
    IonstatsReduceHelp();
    return 1;
  }

  ifstream in(input_jsons[0].c_str(), ifstream::in);
  if (!in.good()) {
    fprintf(stderr, "[ionstats] ERROR: cannot open %s\n", input_jsons[0].c_str());
    return 1;
  }
  Json::Value first_input_json;
  in >> first_input_json;
  in.close();

  if (!first_input_json.isMember("meta")) {
    fprintf(stderr, "[ionstats] ERROR: %s is not a valid input file for ionstats reduce\n", input_jsons[0].c_str());
    return 1;
  }
  string format_name = first_input_json["meta"].get("format_name","").asString();

  if (format_name == "ionstats_basecaller")
    return IonstatsBasecallerReduce(output_json_filename, input_jsons);
  if (format_name == "ionstats_tf")
    return IonstatsTestFragmentsReduce(output_json_filename, input_jsons);
  if (format_name == "ionstats_alignment")
    return IonstatsAlignmentReduce(output_json_filename, input_jsons);

  fprintf(stderr, "[ionstats] ERROR: %s is not a valid input file for ionstats reduce\n", input_jsons[0].c_str());
  return 1;
}
Пример #3
0
int main(int argc, const char *argv[]) {
  OptArgs opts;
  string position_file;
  string h5file_in;
  string source;
  string h5file_out;
  string destination;
  string positions_file;
  bool help;
  string flowlimit_arg;
  unsigned int flowlimit;
  vector<string>otherArgs;

  DumpStartingStateOfExtractWells (argc,argv);

  opts.ParseCmdLine(argc, argv);
  opts.GetOption(h5file_in, "", 'i', "input");
  opts.GetOption(source, "", 's', "source");
  opts.GetOption(h5file_out, "", 'o', "output");
  opts.GetOption(destination, "", 'd', "destination");
  opts.GetOption(flowlimit_arg, "", 'f', "flowlimit");
  opts.GetOption(positions_file, "", 'p', "positions");
  opts.GetOption(help, "false", 'h', "help");
  opts.GetLeftoverArguments(otherArgs);

  // input data processing
  string line;
  vector<size_t> row_val;
  vector<size_t> col_val;
  ifstream filestream;
  if ( ! positions_file.empty() )
    filestream.open(&positions_file.At(0));
  istream &input = ( filestream.is_open() ) ? filestream : cin;


		      
  while ( getline(input, line) )
  {
    int num = -1;
    vector<size_t> ints;
    istringstream ss(line);
    while ( ss >> num && ints.size() < 2 ) {
      if (num < 0) {
	fprintf(stderr, "Found negative integer %d\n", num);
	exit(-1);
      }
      else
	ints.push_back((size_t)num);
    }
    if (ints.size() != 2) {
      fprintf(stderr, "Found %d integers in %s, expected 2\n", (int)ints.size(), &line[0]);
      continue;
    }
    row_val.push_back(ints.at(0));
    col_val.push_back(ints.at(1));
  }
  if (row_val.size() == 0 ) {
      fprintf(stdout, "No positions to extract, check input\n");
      exit(0);
  }    
  vector<size_t>input_positions(row_val.size(), 0);

  int numCPU = (int)sysconf( _SC_NPROCESSORS_ONLN );
  int numThreads = MAXTHREADS < numCPU ? MAXTHREADS : numCPU;
  fprintf(stdout, "Using %d threads of %d cores\n", numThreads, numCPU);

  if (source.empty())
    source = source + SIGNAL_IN;
  H5ReplayReader reader = H5ReplayReader(h5file_in, &source[0]);
  if ( h5file_out.empty() )
    h5file_out = h5file_out + H5FILE_OUT;
  if ( destination.empty() )
    destination = destination + SIGNAL_OUT;

  reader.Open();
  int rank = reader.GetRank();
  vector<hsize_t>dims(rank);
  vector<hsize_t>chunks(rank);
  reader.GetDims(dims);
  reader.GetChunkSize(chunks);
  reader.Close();

  // convert input row, col positions to indices
  for (hsize_t i=0; i<input_positions.size(); i++)
    input_positions.At(i) = RowColToIndex(row_val.At(i), col_val.At(i), dims.At(0), dims.At(1));
  sort(input_positions.begin(), input_positions.end());

  fprintf(stdout, "Opened for read %s:%s with rank %d, row x col x flow dims=[ ", &h5file_in[0], &source[0], rank);
  for (int i=0; i<rank; i++)
    fprintf(stdout, "%d ", (int)dims.At(i));
  fprintf(stdout, "], chunksize=[ ");
  for (int i=0; i<rank; i++)
    fprintf(stdout, "%d ", (int)chunks.At(i));
  fprintf(stdout, "]\n");

  
  H5ReplayRecorder recorder = H5ReplayRecorder(h5file_out, &destination[0],reader.GetType(),2);
  recorder.CreateFile();


  {
    vector<hsize_t> dims_pos(1, input_positions.size());
    string pos_name = "position";
    H5ReplayRecorder recorder_pos = H5ReplayRecorder(h5file_out, &pos_name[0],H5T_NATIVE_ULONG,1);
    recorder_pos.CreateDataset(dims_pos);
  }

  {
    string chip_dims = "chip_dims";
    H5ReplayRecorder recorder_chip_dims = H5ReplayRecorder(h5file_out, &chip_dims[0],H5T_NATIVE_ULLONG,1);
    vector<hsize_t> offset_dims(1,0);
    vector<hsize_t> count_dims(1,3);
    recorder_chip_dims.CreateDataset(count_dims);
    recorder_chip_dims.Write(offset_dims, count_dims, offset_dims, count_dims, &dims[0]);
  }
  if (flowlimit_arg.empty())
    flowlimit = dims.At(2);
  else
    flowlimit = atoi(flowlimit_arg.c_str());

  flowlimit = (flowlimit < dims.At(2)) ? flowlimit : dims.At(2);
  fprintf(stdout, "Using %u flows\n", flowlimit);

  // chunks no bigger than 100000
  vector<hsize_t>chunks_out(2);
  chunks_out.At(0) = (input_positions.size() < 10000) ? input_positions.size() : 100000;
  chunks_out.At(1) = chunks.At(2);

  recorder.CreateDataset(chunks_out);
  vector<hsize_t> extension(2);
  extension.At(0) = input_positions.size();
  extension.At(1) = dims.At(2);
  recorder.ExtendDataSet(extension); // extend if necessary

  fprintf(stdout, "Opening for write %s:%s with rank %d, position x flow chunks=[ ", &h5file_out[0], &destination[0], (int)chunks_out.size());
  for (int i=0; i<(int)chunks_out.size(); i++)
    fprintf(stdout, "%d ", (int)chunks_out.At(i));
  fprintf(stdout, "]\n");

  int max_threads_ever = (dims.At(0)/chunks.At(0) +1)*(dims.At(1)/chunks.At(1) +1);
  thread_flags.resize (max_threads_ever, 0);
  // fprintf(stdout, "max_threads_ever = %d\n", max_threads_ever);
  unsigned int thread_id = 0;
  vector<thread_args> my_args( max_threads_ever );

  size_t runningCount = 0;

  // layout is rows x cols x flows
  for (size_t row=0; row<dims.At(0); ) {
    for (size_t col=0; col<dims.At(1); ) {

      size_t ix = 0;
      hsize_t offset_out = 0;
      hsize_t count_out = 0;

      vector<size_t> limit(2);
      limit.At(0) = ( row+chunks.At(0) < dims.At(0) ) ? row+chunks.At(0) : dims.At(0);
      limit.At(1) = ( col+chunks.At(1) < dims.At(1) ) ? col+chunks.At(1) : dims.At(1);
      // fprintf(stdout, "Block row=%lu, col=%lu, count=[%lu %lu]\n", row, col, limit.At(0), limit.At(1));
      // bool first_time=true;
      for (size_t rr=row; rr<limit.At(0) && ix < input_positions.size(); rr++) {
	for (size_t cc=col; cc<limit.At(1) && ix < input_positions.size(); cc++) {
	  size_t pos = input_positions.At(ix);
	  size_t chp_indx = RowColToIndex(rr,cc, dims.At(0), dims.At(1));
	  // if (first_time)
	  //   fprintf(stdout, "Entering loop with pos=%lu, ix=%lu, chp_indx=%lu\n", pos, ix, chp_indx);
	  // first_time = false;

	  if ( chp_indx < pos)
	    continue;

	  while ( chp_indx > pos){
	    // fprintf(stdout, "chp_indx=%lu > pos=%lu, incrementing ix=%lu\n", chp_indx, pos, ix);
	    ix++;
	    if (ix == input_positions.size()){
	      break;
	    }
	    pos = input_positions.At(ix);
	    // first_time = true;
	  }

	  if( chp_indx == pos){
	    if ( count_out == 0)
	      offset_out = runningCount;
	    count_out++;
	    runningCount++;
	    // fprintf(stdout, "found: rr=%d, cc=%d, pos=%d, index=%d, ix=%lu, runningCount=%lu\n", (int)rr, (int)cc, (int)pos, (int)chp_indx, ix, runningCount);
	    ix++;
	    continue;
	  }
	  
	}
      }

      assert (ix <= input_positions.size() );
      assert (runningCount <= input_positions.size() );
      
      if (count_out > 0) {
	pthread_t thread;
	int thread_status = 0;

	assert( thread_id < thread_flags.size() );
	my_args.at(thread_id).row = row;
	my_args.at(thread_id).col = col;
	my_args.at(thread_id).chunks = &chunks;
	my_args.at(thread_id).chunks_out = &chunks_out;
	my_args.at(thread_id).dims = &dims;
	my_args.at(thread_id).h5file_in = &h5file_in;
	my_args.at(thread_id).source = &source;
	my_args.at(thread_id).h5file_out = &h5file_out;
	my_args.at(thread_id).destination = &destination;
	my_args.at(thread_id).offset_out = offset_out;
	my_args.at(thread_id).count_out = count_out;
	my_args.at(thread_id).input_positions = &input_positions;
	my_args.at(thread_id).thread_id = thread_id;
	my_args.at(thread_id).flowlimit = flowlimit;

	// fprintf(stdout, "creating thread %d from row=%d (max %d), column=%d (max %d), offset_out=%llu, count_out=%llu\n", thread_id, (int)row, (int)dims.At(0), (int)col, (int)dims.At(1), offset_out, count_out);
	while (accumulate(thread_flags.begin(), thread_flags.end(), 0) > numThreads) {
	  // only have to be approximate, don't worry about races
	  fprintf(stdout, "Sleeping before creating thread %d from row=%d (max %d), column=%d (max %d), offset_out=%llu, count_out=%llu ...\n", thread_id, (int)row, (int)dims.At(0), (int)col, (int)dims.At(1), offset_out, count_out);
	  sleep(1);
	}
	thread_flags.At(thread_id) = 1;
	thread_status = pthread_create(&thread, NULL, do_subset, (void *)&my_args[thread_id]);
	// do_subset((void *)&my_args[thread_id]);
	assert (thread_status >= 0);
	thread_id++;
      }
      col += chunks.At(1);
      //fflush(stdout);
    }
    row += chunks.At(0);
  }
  while (accumulate(thread_flags.begin(), thread_flags.end(), 0) > 0) {
    // wait for the threads to finish
    // fprintf(stdout, "Waiting ...\n");
    sleep(1);
  }

  assert (runningCount == input_positions.size() );
  cout << "Done." << endl;
  pthread_exit(NULL);
}
Пример #4
0
int main(int argc, const char *argv[])
{
    OptArgs opts;
    opts.ParseCmdLine(argc, argv);
    bool help, combineSffs;
    string sffFile;
    string bamFile;
    vector<string> infiles;
    opts.GetOption(help,"false", 'h', "help");
    opts.GetOption(combineSffs,"false", 'c', "combine-sffs");
    opts.GetOption(bamFile,"",'o',"out-filename");
    opts.GetLeftoverArguments(infiles);

    if(help || infiles.empty())
    {
        usage();
    }

	if((!combineSffs) && infiles.size() > 1)
	{
        cerr << "sff2bam ERROR: if you want to combine all sff files into a single bam file, please use option -c true." << endl;
        usage();
	}

    sffFile= infiles.front();

    if(bamFile.length() < 1)
    {
        bamFile = sffFile.substr(0, sffFile.length() - 3);
        bamFile += "bam";
    }

    sff_file_t* sff_file = sff_fopen(sffFile.c_str(), "r", NULL, NULL);
    if(!sff_file)
    {
        cerr << "sff2bam ERROR: fail to open " << sffFile << endl;
        exit(1);
    }

	// All sff files must have the same flow and key
	if(combineSffs && infiles.size() > 1)
	{
        for(size_t n = 1; n < infiles.size(); ++n)
		{
			sff_file_t* sff_file2 = sff_fopen(infiles[n].c_str(), "r", NULL, NULL);
			if(!sff_file2)
			{
				sff_fclose(sff_file);
				cerr << "sff2bam ERROR: fail to open " << infiles[n] << endl;
				exit(1);
			}

			if(strcmp(sff_file2->header->flow->s, sff_file->header->flow->s) != 0 ||
				strcmp(sff_file2->header->key->s, sff_file->header->key->s) != 0)
			{
				sff_fclose(sff_file);
				sff_fclose(sff_file2);
				cerr << "sff2bam ERROR: " << sffFile << " and " << infiles[n] << " have different flows or keys." << endl;
				exit(1);
			}

			sff_fclose(sff_file2);
		}
	}

    sff_t* sff = NULL;
    // Open 1st read for read group name
    sff = sff_read(sff_file);
    if(!sff)
    {
        sff_fclose(sff_file);
        cerr << "sff2bam ERROR: fail to read " << sffFile << endl;
        exit(1);
    }

    // Set up BAM header
    SamHeader sam_header;
    sam_header.Version = "1.4";
    sam_header.SortOrder = "unsorted";

    SamProgram sam_program("sff2bam");
    sam_program.Name = "sff2bam";
    sam_program.Version = SFF2BAM_VERSION;
    sam_program.CommandLine = "sff2bam";
    sam_header.Programs.Add(sam_program);

    string rgname = sff->rheader->name->s;
    int index = rgname.find(":");
    rgname = rgname.substr(0, index);

    SamReadGroup read_group(rgname);
    read_group.FlowOrder = sff->gheader->flow->s;
    read_group.KeySequence = sff->gheader->key->s;

    sam_header.ReadGroups.Add(read_group);

    RefVector refvec;
    BamWriter bamWriter;
    bamWriter.SetCompressionMode(BamWriter::Compressed);

    if(!bamWriter.Open(bamFile, sam_header, refvec))
    {
        sff_fclose(sff_file);
        cerr << "sff2bam ERROR: failed to open " << bamFile << endl;
        exit(1);
    }

    // Save 1st read
    BamAlignment bam_alignment0;
    bam_alignment0.SetIsMapped(false);
    bam_alignment0.Name = sff->rheader->name->s;
    size_t nBases = sff->rheader->n_bases + 1 - sff->rheader->clip_qual_left;
    if(sff->rheader->clip_qual_right > 0)
    {
        nBases = sff->rheader->clip_qual_right - sff->rheader->clip_qual_left;
    }
    if(nBases > 0)
    {
        bam_alignment0.QueryBases.reserve(nBases);
        bam_alignment0.Qualities.reserve(nBases);
        for (int base = sff->rheader->clip_qual_left - 1; base < sff->rheader->clip_qual_right - 1; ++base)
        {
            bam_alignment0.QueryBases.push_back(sff->read->bases->s[base]);
            bam_alignment0.Qualities.push_back(sff->read->quality->s[base] + 33);
        }
    }

    int clip_flow = 0;
    for (unsigned int base = 0; base < sff->rheader->clip_qual_left && base < sff->rheader->n_bases; ++base)
    {
        clip_flow += sff->read->flow_index[base];
    }
    if (clip_flow > 0)
    {
        clip_flow--;
    }

    bam_alignment0.AddTag("RG","Z", rgname);
    bam_alignment0.AddTag("PG","Z", string("sff2bam"));
    bam_alignment0.AddTag("ZF","i", clip_flow); // TODO: trim flow
    vector<uint16_t> flowgram0(sff->gheader->flow_length);
    copy(sff->read->flowgram, sff->read->flowgram + sff->gheader->flow_length, flowgram0.begin());
    bam_alignment0.AddTag("FZ", flowgram0);
    sff_destroy(sff);
    sff = NULL;

    bamWriter.SaveAlignment(bam_alignment0);

    // Save rest reads
    while(NULL != (sff = sff_read(sff_file)))
    {
        BamAlignment bam_alignment;
        bam_alignment.SetIsMapped(false);
        bam_alignment.Name = sff->rheader->name->s;   
        nBases = sff->rheader->n_bases + 1 - sff->rheader->clip_qual_left;
        if(sff->rheader->clip_qual_right > 0)
        {
            nBases = sff->rheader->clip_qual_right - sff->rheader->clip_qual_left;
        }
        if(nBases > 0)
        {
            bam_alignment.QueryBases.reserve(nBases);
            bam_alignment.Qualities.reserve(nBases);
            for (int base = sff->rheader->clip_qual_left - 1; base < sff->rheader->clip_qual_right - 1; ++base)
            {
                bam_alignment.QueryBases.push_back(sff->read->bases->s[base]);
                bam_alignment.Qualities.push_back(sff->read->quality->s[base] + 33);
            }
        }

        clip_flow = 0;
        for (unsigned int base = 0; base <= sff->rheader->clip_qual_left && base < sff->rheader->n_bases; ++base)
        {
            clip_flow += sff->read->flow_index[base];
        }
        if (clip_flow > 0)
        {
            clip_flow--;
        }

        bam_alignment.AddTag("RG","Z", rgname);
        bam_alignment.AddTag("PG","Z", string("sff2bam"));
        bam_alignment.AddTag("ZF","i", clip_flow); // TODO: trim flow
        vector<uint16_t> flowgram(sff->gheader->flow_length);
        copy(sff->read->flowgram, sff->read->flowgram + sff->gheader->flow_length, flowgram.begin());
        bam_alignment.AddTag("FZ", flowgram);
        sff_destroy(sff);
        sff = NULL;

        bamWriter.SaveAlignment(bam_alignment);
    }

	sff_fclose(sff_file);

	if(combineSffs && infiles.size() > 1)
	{
        for(size_t n = 1; n < infiles.size(); ++n)
		{
			sff_file_t* sff_file2 = sff_fopen(infiles[n].c_str(), "r", NULL, NULL);

			while(NULL != (sff = sff_read(sff_file2)))
			{
				BamAlignment bam_alignment;
				bam_alignment.SetIsMapped(false);
				bam_alignment.Name = sff->rheader->name->s;   
				nBases = sff->rheader->n_bases + 1 - sff->rheader->clip_qual_left;
				if(sff->rheader->clip_qual_right > 0)
				{
					nBases = sff->rheader->clip_qual_right - sff->rheader->clip_qual_left;
				}
				if(nBases > 0)
				{
					bam_alignment.QueryBases.reserve(nBases);
					bam_alignment.Qualities.reserve(nBases);
					for (int base = sff->rheader->clip_qual_left - 1; base < sff->rheader->clip_qual_right - 1; ++base)
					{
						bam_alignment.QueryBases.push_back(sff->read->bases->s[base]);
						bam_alignment.Qualities.push_back(sff->read->quality->s[base] + 33);
					}
				}

				clip_flow = 0;
				for (unsigned int base = 0; base <= sff->rheader->clip_qual_left && base < sff->rheader->n_bases; ++base)
				{
					clip_flow += sff->read->flow_index[base];
				}
				if (clip_flow > 0)
				{
					clip_flow--;
				}

				bam_alignment.AddTag("RG","Z", rgname);
				bam_alignment.AddTag("PG","Z", string("sff2bam"));
				bam_alignment.AddTag("ZF","i", clip_flow); // TODO: trim flow
				vector<uint16_t> flowgram(sff->gheader->flow_length);
				copy(sff->read->flowgram, sff->read->flowgram + sff->gheader->flow_length, flowgram.begin());
				bam_alignment.AddTag("FZ", flowgram);
				sff_destroy(sff);
				sff = NULL;

				bamWriter.SaveAlignment(bam_alignment);
			}

			sff_fclose(sff_file2);
		}
	}

    bamWriter.Close();    

    return 0;
}
Пример #5
0
int main(int argc, const char *argv[]) {
  OptArgs opts;  
  string h5file;
  string source;
  string destination;
  vector<string> infiles;
  bool help;
  string flowlimit_arg;
  unsigned int flowlimit;

  DumpStartingStateOfNormWells (argc,argv);

  opts.ParseCmdLine(argc, argv);
  opts.GetOption(h5file, "", '-', "h5file");
  opts.GetOption(source, "", 's', "source");
  opts.GetOption(destination, "", 'd', "destination");
  opts.GetOption(flowlimit_arg, "", 'f', "flowlimit");
  opts.GetOption(help, "false", 'h', "help");
  opts.GetLeftoverArguments(infiles);
  if(help || infiles.empty() || (infiles.size() > 1) ) {
    usage();
  }
  h5file = infiles.front();

  int numCPU = (int)sysconf( _SC_NPROCESSORS_ONLN );
  int numThreads = MAXTHREADS < numCPU ? MAXTHREADS : numCPU;
  fprintf(stdout, "Using %d threads of %d cores\n", numThreads, numCPU);

  if (source.empty())
    source = source + SIGNAL_IN;
  H5ReplayReader reader = H5ReplayReader(h5file, &source[0]);
  if ( destination.empty() )
    destination = destination + SIGNAL_OUT;

  H5ReplayRecorder recorder = (source.compare(destination)==0)
    ? H5ReplayRecorder(h5file, &destination[0])
    : H5ReplayRecorder(h5file, &destination[0],reader.GetType(),reader.GetRank());

  reader.Open();
  int rank = reader.GetRank();
  vector<hsize_t>dims(rank,0);
  vector<hsize_t>chunks(rank,0);
  reader.GetDims(dims);
  reader.GetChunkSize(chunks);
  reader.Close();

  fprintf(stdout, "Opening for read %s:%s with rank %d, row x col x flow dims=[ ", &h5file[0], &source[0], rank);
  for (int i=0; i<rank; i++)
    fprintf(stdout, "%d ", (int)dims[i]);
  fprintf(stdout, "], chunksize=[ ");
  for (int i=0; i<rank; i++)
    fprintf(stdout, "%d ", (int)chunks[i]);
  fprintf(stdout, "]\n");

  if (flowlimit_arg.empty())
    flowlimit = dims[2];
  else
    flowlimit = atoi(flowlimit_arg.c_str());

  flowlimit = (flowlimit < dims[2]) ? flowlimit : dims[2];
  fprintf(stdout, "Using %u flows\n", flowlimit);

  // hard code region size to be at least 100x100
  chunks[0] = (chunks[0] < 100) ? 100 : chunks[0];
  chunks[1] = (chunks[1] < 100) ? 100 : chunks[1];

  recorder.CreateDataset(chunks);
  
  int max_threads_ever = (dims[0]/chunks[0] +1)*(dims[1]/chunks[1] +1);
  thread_flags.resize (max_threads_ever, 0);
  // fprintf(stdout, "max_threads_ever = %d\n", max_threads_ever);
  unsigned int thread_id = 0;
  vector<compute_norm_args> my_args( max_threads_ever );
  
  // layout is rows x cols x flows
  for (hsize_t row=0; row<dims[0]; ) {
    for (hsize_t col=0; col<dims[1]; ) {
      pthread_t thread;
      int thread_status;

      assert( thread_id < thread_flags.size() );
      my_args.at(thread_id).row = row;
      my_args.at(thread_id).col = col;
      my_args.at(thread_id).chunks = &chunks;
      my_args.at(thread_id).dims = &dims;
      my_args.at(thread_id).h5file = &h5file;
      my_args.at(thread_id).source = &source;
      my_args.at(thread_id).destination = &destination;
      my_args.at(thread_id).thread_id = thread_id;
      my_args.at(thread_id).flowlimit = flowlimit;

      fprintf(stdout, "creating thread %d from row=%d (max %d), column=%d (max %d)\n", thread_id, (int)row, (int)dims[0], (int)col, (int)dims[1]);
      while (accumulate(thread_flags.begin(), thread_flags.end(), 0) > numThreads) {
	// only have to be approximate, don't worry about races
	// fprintf(stdout, "Sleeping ...\n");
	sleep(1);
      }
      thread_flags[thread_id] = 1;
      thread_status = pthread_create(&thread, NULL, compute_norm, (void *)&my_args[thread_id]);
      // compute_norm((void *)&my_args[thread_id]);
      assert (thread_status >= 0);
      thread_id++;

      col += chunks[1];
      //fflush(stdout);
    }
    row += chunks[0];
  }
  while (accumulate(thread_flags.begin(), thread_flags.end(), 0) > 0) {
    // wait for the threads to finish
    // fprintf(stdout, "Waiting ...\n");
    sleep(1);
  }

  cout << "Done." << endl;
  pthread_exit(NULL);
}