Exemplo n.º 1
0
   bool corpus_gen::generate(const char* pCmd_line)
   {
      static const command_line_params::param_desc param_desc_array[] = 
      {
         { "corpus_gen", 0, false },
         { "in", 1, true },
         { "deep", 0, false },
         { "blockpercentage", 1, false },
         { "width", 1, false },
         { "height", 1, false },
         { "alpha", 0, false },
      };

      command_line_params params;
      if (!params.parse(pCmd_line, CRNLIB_ARRAY_SIZE(param_desc_array), param_desc_array, true))
         return false;

      if (!params.has_key("in"))
      {
         console::error("Must specify one or more input files using the /in option!");
         return false;
      }

      uint num_dst_blocks_x = params.get_value_as_int("width", 0, 4096, 128, 4096);
      num_dst_blocks_x = (num_dst_blocks_x + 3) / 4;
      uint num_dst_blocks_y = params.get_value_as_int("height", 0, 4096, 128, 4096);
      num_dst_blocks_y = (num_dst_blocks_y + 3) / 4;

      const uint total_dst_blocks = num_dst_blocks_x * num_dst_blocks_y;
      image_u8 dst_img(num_dst_blocks_x * 4, num_dst_blocks_y * 4);
      uint next_dst_block = 0;
      uint total_dst_images = 0;

      random rm;

      block_hash_map block_hash;
      block_hash.reserve(total_dst_blocks);

      uint total_images_loaded = 0;
      uint total_blocks_written = 0;

      command_line_params::param_map_const_iterator it = params.begin();
      for ( ; it != params.end(); ++it)
      {
         if (it->first != "in")
            continue;
         if (it->second.m_values.empty())
         {
            console::error("Must follow /in parameter with a filename!\n");
            return false;
         }
         
         for (uint in_value_index = 0; in_value_index < it->second.m_values.size(); in_value_index++)
         {
            const dynamic_string& filespec = it->second.m_values[in_value_index];

            find_files file_finder;
            if (!file_finder.find(filespec.get_ptr(), find_files::cFlagAllowFiles | (params.has_key("deep") ? find_files::cFlagRecursive : 0)))
            {
               console::warning("Failed finding files: %s", filespec.get_ptr());
               continue;
            }

            if (file_finder.get_files().empty())
            {
               console::warning("No files found: %s", filespec.get_ptr());
               return false;
            }

            const find_files::file_desc_vec& files = file_finder.get_files();

            for (uint file_index = 0; file_index < files.size(); file_index++)
            {
               const find_files::file_desc& file_desc = files[file_index];

               console::printf("Loading image: %s", file_desc.m_fullname.get_ptr());

               image_u8 img;
               if (!image_utils::read_from_file(img, file_desc.m_fullname.get_ptr(), 0))
               {
                  console::warning("Failed loading image file: %s", file_desc.m_fullname.get_ptr());
                  continue;
               }

               if (!params.has_key("alpha"))
               {
                  for (uint y = 0; y < img.get_height(); y++)
                     for (uint x = 0; x < img.get_width(); x++)
                        img(x, y).a = 255;
               }

               total_images_loaded++;

               uint width = img.get_width();
               uint height = img.get_height();

               uint num_blocks_x = (width + 3) / 4;
               uint num_blocks_y = (height + 3) / 4;
               uint total_blocks = num_blocks_x * num_blocks_y;

               float percentage = params.get_value_as_float("blockpercentage", 0, .1f, .001f, 1.0f);
               uint total_rand_blocks = math::maximum<uint>(1U, (uint)(total_blocks * percentage));

               crnlib::vector<uint> remaining_blocks(total_blocks);
               for (uint i = 0; i < total_blocks; i++)
                  remaining_blocks[i] = i;

               uint num_blocks_remaining = total_rand_blocks;
               while (num_blocks_remaining)
               {
                  if (remaining_blocks.empty())
                     break;

                  uint rand_block_index = rm.irand(0, remaining_blocks.size());
                  uint block_index = remaining_blocks[rand_block_index];
                  remaining_blocks.erase_unordered(rand_block_index);

                  uint block_y = block_index / num_blocks_x;
                  uint block_x = block_index % num_blocks_x;

                  block b;

                  for (uint y = 0; y < 4; y++)
                  {
                     for (uint x = 0; x < 4; x++)
                     {
                        b.m_c[x+y*4] = img.get_clamped(block_x*4+x, block_y*4+y);
                     }
                  }

                  if (!block_hash.insert(b).second)
                     continue;
                  if (block_hash.size() == total_dst_blocks)
                  {
                     block_hash.clear();
                     block_hash.reserve(total_dst_blocks);
                  }

                  uint dst_block_x = next_dst_block % num_dst_blocks_x;
                  uint dst_block_y = next_dst_block / num_dst_blocks_x;
                  for (uint y = 0; y < 4; y++)
                  {
                     for (uint x = 0; x < 4; x++)
                     {
                        dst_img(dst_block_x * 4 + x, dst_block_y * 4 + y) = b.m_c[x + y*4];
                     }
                  }        

                  next_dst_block++;
                  if (total_dst_blocks == next_dst_block)
                  {
                     sort_blocks(dst_img);

                     dynamic_string dst_filename(cVarArg, "test_%u.tga", total_dst_images);
                     console::printf("Writing image: %s", dst_filename.get_ptr());
                     image_utils::write_to_file(dst_filename.get_ptr(), dst_img, 0);

                     dst_img.set_all(color_quad_u8::make_black());

                     next_dst_block = 0;

                     total_dst_images++;
                  }

                  total_blocks_written++;

                  num_blocks_remaining--;
               }

            }  // file_index

         } // in_value_index

      }         

      if (next_dst_block)
      {
         sort_blocks(dst_img);

         dynamic_string dst_filename(cVarArg, "test_%u.tga", total_dst_images);
         console::printf("Writing image: %s", dst_filename.get_ptr());
         image_utils::write_to_file(dst_filename.get_ptr(), dst_img, 0);

         next_dst_block = 0;

         total_dst_images++;
      }

      console::printf("Found %u input images, %u output images, %u total blocks", total_images_loaded, total_dst_images, total_blocks_written);

      return true;   
   }
Exemplo n.º 2
0
/*!
  @abstract Sort an unsorted BAM file based on the chromosome order
  and the leftmost position of an alignment

  @param  is_by_qname whether to sort by query name
  @param  fn       name of the file to be sorted
  @param  prefix   prefix of the output and the temporary files; upon
	                   sucessess, prefix.bam will be written.
  @param  max_mem  approxiate maximum memory (very inaccurate)

  @discussion It may create multiple temporary subalignment files
  and then merge them by calling bam_merge_core(). This function is
  NOT thread safe.
 */
void bam_sort_core_ext(int is_by_qname, const char *fn, const char *prefix, size_t _max_mem, int is_stdout, int n_threads, int level, int sort_type)
{
	int ret, i, n_files = 0;
	size_t mem, max_k, k, max_mem;
	bam_header_t *header;
	bamFile fp;
	bam1_t *b, **buf;
	char *fnout = 0;

	if (n_threads < 2) n_threads = 1;
	g_is_by_qname = is_by_qname;
	max_k = k = 0; mem = 0;
	max_mem = _max_mem * n_threads;
	buf = 0;
	fp = strcmp(fn, "-")? bam_open(fn, "r") : bam_dopen(fileno(stdin), "r");
	if (fp == 0) {
		fprintf(stderr, "[bam_sort_core] fail to open file %s\n", fn);
		return;
	}
	header = bam_header_read(fp);
	if (is_by_qname) change_SO(header, "queryname");
	else change_SO(header, "coordinate");
	// write sub files
	for (;;) {
		if (k == max_k) {
			size_t old_max = max_k;
			max_k = max_k? max_k<<1 : 0x10000;
			buf = realloc(buf, max_k * sizeof(void*));
			memset(buf + old_max, 0, sizeof(void*) * (max_k - old_max));
		}
		if (buf[k] == 0) buf[k] = (bam1_t*)calloc(1, sizeof(bam1_t));
		b = buf[k];
		if ((ret = bam_read1(fp, b)) < 0) break;
		if (b->data_len < b->m_data>>2) { // shrink
			b->m_data = b->data_len;
			kroundup32(b->m_data);
			b->data = realloc(b->data, b->m_data);
		}
		mem += sizeof(bam1_t) + b->m_data + sizeof(void*) + sizeof(void*); // two sizeof(void*) for the data allocated to pointer arrays
		++k;
		if (mem >= max_mem) {
			n_files = sort_blocks(n_files, k, buf, prefix, header, n_threads, sort_type);
			mem = k = 0;
		}
	}
	if (ret != -1)
		fprintf(stderr, "[bam_sort_core] truncated file. Continue anyway.\n");
	// output file name
	fnout = calloc(strlen(prefix) + 20, 1);
	if (is_stdout) sprintf(fnout, "-");
	else sprintf(fnout, "%s.bam", prefix);
	// write the final output
	if (n_files == 0) { // a single block
		char mode[8];
		strcpy(mode, "w");
		if (level >= 0) sprintf(mode + 1, "%d", level < 9? level : 9);
                sort_aux_core(k, buf, sort_type);
#ifndef _PBGZF_USE 
		write_buffer(fnout, mode, k, buf, header, n_threads);
#else
		write_buffer(fnout, mode, k, buf, header);
#endif
	} else { // then merge
		char **fns;
		n_files = sort_blocks(n_files, k, buf, prefix, header, n_threads, sort_type);
		fprintf(stderr, "[bam_sort_core] merging from %d files...\n", n_files);
		fns = (char**)calloc(n_files, sizeof(char*));
		for (i = 0; i < n_files; ++i) {
			fns[i] = (char*)calloc(strlen(prefix) + 20, 1);
			sprintf(fns[i], "%s.%.4d.bam", prefix, i);
		}
#ifndef _PBGZF_USE 
		bam_merge_core2(is_by_qname, fnout, 0, n_files, fns, 0, 0, n_threads, level);
#else
		bam_merge_core2(is_by_qname, fnout, 0, n_files, fns, 0, 0, level);
#endif
		for (i = 0; i < n_files; ++i) {
			unlink(fns[i]);
			free(fns[i]);
		}
		free(fns);
	}
	free(fnout);
	// free
	for (k = 0; k < max_k; ++k) {
		if (!buf[k]) continue;
		free(buf[k]->data);
		free(buf[k]);
	}
	free(buf);
	bam_header_destroy(header);
	bam_close(fp);
}
Exemplo n.º 3
0
/*!
  @abstract Sort an unsorted BAM file based on the chromosome order
  and the leftmost position of an alignment

  @param  is_by_qname whether to sort by query name
  @param  fn       name of the file to be sorted
  @param  prefix   prefix of the output and the temporary files; upon
	                   sucessess, prefix.bam will be written.
  @param  max_mem  approxiate maximum memory (very inaccurate)

  @discussion It may create multiple temporary subalignment files
  and then merge them by calling bam_merge_core(). This function is
  NOT thread safe.
 */
void bam_sort_core_ext(int is_by_qname, const char *fn, const char *prefix, size_t max_mem, int is_stdout)
{
	int n, ret, k, i;
	size_t mem;
	bam_header_t *header;
	bamFile fp;
	bam1_t *b, **buf;

	g_is_by_qname = is_by_qname;
	n = k = 0; mem = 0;
	fp = strcmp(fn, "-")? bam_open(fn, "r") : bam_dopen(fileno(stdin), "r");
	if (fp == 0) {
		fprintf(stderr, "[bam_sort_core] fail to open file %s\n", fn);
		return;
	}
	header = bam_header_read(fp);
    if (is_by_qname) change_SO(header, "queryname");
    else change_SO(header, "coordinate");
	buf = (bam1_t**)calloc(max_mem / BAM_CORE_SIZE, sizeof(bam1_t*));
	// write sub files
	for (;;) {
		if (buf[k] == 0) buf[k] = (bam1_t*)calloc(1, sizeof(bam1_t));
		b = buf[k];
		if ((ret = bam_read1(fp, b)) < 0) break;
		mem += ret;
		++k;
		if (mem >= max_mem) {
			sort_blocks(n++, k, buf, prefix, header, 0);
			mem = 0; k = 0;
		}
	}
	if (ret != -1)
		fprintf(stderr, "[bam_sort_core] truncated file. Continue anyway.\n");
	if (n == 0) sort_blocks(-1, k, buf, prefix, header, is_stdout);
	else { // then merge
		char **fns, *fnout;
		fprintf(stderr, "[bam_sort_core] merging from %d files...\n", n+1);
		sort_blocks(n++, k, buf, prefix, header, 0);
		fnout = (char*)calloc(strlen(prefix) + 20, 1);
		if (is_stdout) sprintf(fnout, "-");
		else sprintf(fnout, "%s.bam", prefix);
		fns = (char**)calloc(n, sizeof(char*));
		for (i = 0; i < n; ++i) {
			fns[i] = (char*)calloc(strlen(prefix) + 20, 1);
			sprintf(fns[i], "%s.%.4d.bam", prefix, i);
		}
		bam_merge_core(is_by_qname, fnout, 0, n, fns, 0, 0);
		free(fnout);
		for (i = 0; i < n; ++i) {
			unlink(fns[i]);
			free(fns[i]);
		}
		free(fns);
	}
        for (k = 0; (size_t)k < max_mem / BAM_CORE_SIZE; ++k) {
		if (buf[k]) {
			free(buf[k]->data);
			free(buf[k]);
		}
	}
	free(buf);
	bam_header_destroy(header);
	bam_close(fp);
}
Exemplo n.º 4
0
/*!
  @abstract Sort an unsorted BAM file based on the chromosome order
  and the leftmost position of an alignment

  @param  is_by_qname whether to sort by query name
  @param  fn       name of the file to be sorted
  @param  prefix   prefix of the temporary files (prefix.NNNN.bam are written)
  @param  fnout    name of the final output file to be written
  @param  modeout  sam_open() mode to be used to create the final output file
  @param  max_mem  approxiate maximum memory (very inaccurate)
  @return 0 for successful sorting, negative on errors

  @discussion It may create multiple temporary subalignment files
  and then merge them by calling bam_merge_core(). This function is
  NOT thread safe.
 */
int bam_sort_core_ext(int is_by_qname, const char *fn, const char *prefix, const char *fnout, const char *modeout, size_t _max_mem, int n_threads)
{
    int ret, i, n_files = 0;
    size_t mem, max_k, k, max_mem;
    bam_hdr_t *header;
    samFile *fp;
    bam1_t *b, **buf;

    if (n_threads < 2) n_threads = 1;
    g_is_by_qname = is_by_qname;
    max_k = k = 0; mem = 0;
    max_mem = _max_mem * n_threads;
    buf = NULL;
    fp = sam_open(fn, "r");
    if (fp == NULL) {
        fprintf(pysamerr, "[bam_sort_core] fail to open file %s\n", fn);
        return -1;
    }
    header = sam_hdr_read(fp);
    if (is_by_qname) change_SO(header, "queryname");
    else change_SO(header, "coordinate");
    // write sub files
    for (;;) {
        if (k == max_k) {
            size_t kk, old_max = max_k;
            max_k = max_k? max_k<<1 : 0x10000;
            buf = (bam1_t**)realloc(buf, max_k * sizeof(bam1_t*));
            for (kk = old_max; kk < max_k; ++kk) buf[kk] = NULL;
        }
        if (buf[k] == NULL) buf[k] = bam_init1();
        b = buf[k];
        if ((ret = sam_read1(fp, header, b)) < 0) break;
        if (b->l_data < b->m_data>>2) { // shrink
            b->m_data = b->l_data;
            kroundup32(b->m_data);
            b->data = (uint8_t*)realloc(b->data, b->m_data);
        }
        mem += sizeof(bam1_t) + b->m_data + sizeof(void*) + sizeof(void*); // two sizeof(void*) for the data allocated to pointer arrays
        ++k;
        if (mem >= max_mem) {
            n_files = sort_blocks(n_files, k, buf, prefix, header, n_threads);
            mem = k = 0;
        }
    }
    if (ret != -1)
        fprintf(pysamerr, "[bam_sort_core] truncated file. Continue anyway.\n");
    // write the final output
    if (n_files == 0) { // a single block
        ks_mergesort(sort, k, buf, 0);
        write_buffer(fnout, modeout, k, buf, header, n_threads);
    } else { // then merge
        char **fns;
        n_files = sort_blocks(n_files, k, buf, prefix, header, n_threads);
        fprintf(pysamerr, "[bam_sort_core] merging from %d files...\n", n_files);
        fns = (char**)calloc(n_files, sizeof(char*));
        for (i = 0; i < n_files; ++i) {
            fns[i] = (char*)calloc(strlen(prefix) + 20, 1);
            sprintf(fns[i], "%s.%.4d.bam", prefix, i);
        }
        if (bam_merge_core2(is_by_qname, fnout, modeout, NULL, n_files, fns, MERGE_COMBINE_RG|MERGE_COMBINE_PG, NULL, n_threads) < 0) {
            // Propagate bam_merge_core2() failure; it has already emitted a
            // message explaining the failure, so no further message is needed.
            return -1;
        }
        for (i = 0; i < n_files; ++i) {
            unlink(fns[i]);
            free(fns[i]);
        }
        free(fns);
    }
    // free
    for (k = 0; k < max_k; ++k) bam_destroy1(buf[k]);
    free(buf);
    bam_hdr_destroy(header);
    sam_close(fp);
    return 0;
}
/*===========================================================================*
 *				rw_scattered				     *
 *===========================================================================*/
static void rw_scattered(
  dev_t dev,			/* major-minor device number */
  struct buf **bufq,		/* pointer to array of buffers */
  unsigned int bufqsize,	/* number of buffers */
  int rw_flag			/* READING or WRITING */
)
{
/* Read or write scattered data from a device. */

  register struct buf *bp;
  register iovec_t *iop;
  static iovec_t iovec[NR_IOREQS];
  off_t pos;
  unsigned int i, iov_per_block;
#if !defined(NDEBUG)
  unsigned int start_in_use = bufs_in_use, start_bufqsize = bufqsize;
#endif /* !defined(NDEBUG) */

  if(bufqsize == 0) return;

#if !defined(NDEBUG)
  /* for READING, check all buffers on the list are obtained and held
   * (count > 0)
   */
  if (rw_flag == READING) {
	assert(bufqsize <= LMFS_MAX_PREFETCH);

	for(i = 0; i < bufqsize; i++) {
		assert(bufq[i] != NULL);
		assert(bufq[i]->lmfs_count > 0);
  	}

  	/* therefore they are all 'in use' and must be at least this many */
	assert(start_in_use >= start_bufqsize);
  }

  assert(dev != NO_DEV);
  assert(fs_block_size > 0);
  assert(howmany(fs_block_size, PAGE_SIZE) <= NR_IOREQS);
#endif /* !defined(NDEBUG) */

  /* For WRITING, (Shell) sort buffers on lmfs_blocknr.
   * For READING, the buffers are already sorted.
   */
  if (rw_flag == WRITING)
	sort_blocks(bufq, bufqsize);

  /* Set up I/O vector and do I/O.  The result of bdev I/O is OK if everything
   * went fine, otherwise the error code for the first failed transfer.
   */
  while (bufqsize > 0) {
	unsigned int p, nblocks = 0, niovecs = 0;
	int r;
	for (iop = iovec; nblocks < bufqsize; nblocks++) {
		vir_bytes vdata, blockrem;
		bp = bufq[nblocks];
		if (bp->lmfs_blocknr != bufq[0]->lmfs_blocknr + nblocks)
			break;
		blockrem = bp->lmfs_bytes;
		iov_per_block = howmany(blockrem, PAGE_SIZE);
		if (niovecs > NR_IOREQS - iov_per_block) break;
		vdata = (vir_bytes) bp->data;
		for(p = 0; p < iov_per_block; p++) {
			vir_bytes chunk =
			    blockrem < PAGE_SIZE ? blockrem : PAGE_SIZE;
			iop->iov_addr = vdata;
			iop->iov_size = chunk;
			vdata += PAGE_SIZE;
			blockrem -= chunk;
			iop++;
			niovecs++;
		}
		assert(p == iov_per_block);
		assert(blockrem == 0);
	}

	assert(nblocks > 0);
	assert(niovecs > 0 && niovecs <= NR_IOREQS);

	pos = (off_t)bufq[0]->lmfs_blocknr * fs_block_size;
	if (rw_flag == READING)
		r = bdev_gather(dev, pos, iovec, niovecs, BDEV_NOFLAGS);
	else
		r = bdev_scatter(dev, pos, iovec, niovecs, BDEV_NOFLAGS);

	/* Harvest the results.  The driver may have returned an error, or it
	 * may have done less than what we asked for.
	 */
	if (r < 0) {
		printf("fs cache: I/O error %d on device %d/%d, "
		    "block %"PRIu64"\n",
		    r, major(dev), minor(dev), bufq[0]->lmfs_blocknr);
	}
	for (i = 0; i < nblocks; i++) {
		bp = bufq[i];
		if (r < (ssize_t)bp->lmfs_bytes) {
			/* Transfer failed. */
			if (i == 0) {
				bp->lmfs_dev = NO_DEV;	/* Invalidate block */
			}
			break;
		}
		if (rw_flag == READING) {
			lmfs_put_block(bp);
		} else {
			MARKCLEAN(bp);
		}
		r -= bp->lmfs_bytes;
	}

	bufq += i;
	bufqsize -= i;

	if (rw_flag == READING) {
		/* Don't bother reading more than the device is willing to
		 * give at this time.  Don't forget to release those extras.
		 */
		while (bufqsize > 0) {
			bp = *bufq++;
			bp->lmfs_dev = NO_DEV;	/* invalidate block */
			lmfs_put_block(bp);
			bufqsize--;
		}
	}
	if (rw_flag == WRITING && i == 0) {
		/* We're not making progress, this means we might keep
		 * looping. Buffers remain dirty if un-written. Buffers are
		 * lost if invalidate()d or LRU-removed while dirty. This
		 * is better than keeping unwritable blocks around forever..
		 */
		break;
	}
  }

#if !defined(NDEBUG)
  if(rw_flag == READING) {
  	assert(start_in_use >= start_bufqsize);

	/* READING callers assume all bufs are released. */
	assert(start_in_use - start_bufqsize == bufs_in_use);
  }
#endif /* !defined(NDEBUG) */
}