Пример #1
0
/**
 * Creates a new SeedTable data structure and returns it
 */
SeedTable *seed_table_new(int seed_len) {
  SeedTable *seed_tab;
  int i;

  seed_tab = my_new(SeedTable, 1);
  seed_tab->seed_len = seed_len;
  seed_tab->n_seed = kmer_num_kmers(seed_len);

  /* number of matches for each seed (init to 0) */
  seed_tab->n_match = my_new0(unsigned int, seed_tab->n_seed);

  /* arrays of seed matches (init to NULL) */
  seed_tab->match = my_new0(unsigned int *, seed_tab->n_seed);

  /* number of matches that have actually been added */
  seed_tab->cur = my_new0(unsigned int, seed_tab->n_seed);

  seed_tab->total_match = 0;
  seed_tab->match_buf = NULL;

  /* buffer to hold unambiguous nucleotide arrays */
  seed_tab->unambig_nucs = my_new(unsigned char *, 
				  SEED_TABLE_MAX_UNAMBIG);
  for(i = 0; i < SEED_TABLE_MAX_UNAMBIG; i++) {
    seed_tab->unambig_nucs[i] = my_new(unsigned char, 
				       seed_tab->seed_len);
  }
  
  return seed_tab;
}
Пример #2
0
void Initializare(Lista** pLista)
{
	// aloc un element de 
	*pLista = my_new(Lista);
	(*pLista)->size = 0;
	(*pLista)->pointerPrim = (*pLista)->pointerUltim = NULL;
}
Пример #3
0
SeedFinder *seed_finder_new(SeedTable *seed_tab, unsigned int read_len,
			    unsigned int seed_len, unsigned int n_seed) {
  SeedFinder *sf;
  int i, j, min_read_len;

  if(n_seed < 1) {
    my_err("%s:%d: invalid number of seeds (%d)\n", 
	   __FILE__, __LINE__, n_seed);
  }
  
  sf = my_new(SeedFinder, 1);

  sf->seed_tab = seed_tab;
  sf->read_len = read_len;
  sf->seed_len = seed_len;
  sf->n_seed = n_seed;
  sf->match = my_new(SeedMatch, read_len);


  min_read_len = seed_len * n_seed;
  if(read_len < min_read_len) {
    my_err("%s:%d min read len is %d for %d non-overlapping "
	   "seeds of length %u\n", __FILE__, __LINE__, min_read_len, 
	   n_seed, seed_len);
  }

  /* make arrays with dimensions n_seed x read_len */
  sf->lowest_match = my_new(long *, n_seed);
  sf->seed_start = my_new(int *, n_seed);
  for(i = 0; i < n_seed; i++) {
    sf->lowest_match[i] = my_new(long, read_len);
    sf->seed_start[i] = my_new(int, read_len);

    for(j = 0; j < read_len; j++) {
      sf->lowest_match[i][j] = -1;
      sf->seed_start[i][j] = -1;
    }

  }

  sf->best_seeds = my_new(SeedMatch *, n_seed);
  sf->best_read_offsets = my_new0(int, n_seed);

  return sf;
}
Пример #4
0
int main(int argc, char** argv) {
  int port = PORT;

  int sfd = socket(AF_INET, SOCK_STREAM, 0);
  if (sfd < 0) {
    return -1;
  }

  struct sockaddr_in local;
  local.sin_family = AF_INET;
  local.sin_addr.s_addr = INADDR_ANY;
  local.sin_port = htons(port);
  int on = 1;
  if (setsockopt(sfd, SOL_SOCKET, SO_REUSEADDR, (char *)&on,sizeof(on)) < 0 ||
      bind(sfd, (struct sockaddr*)&local, sizeof(local)) < 0 ||
      listen(sfd, 1)) {
    perror("Unable to bind");
    close(sfd);
    return -1;
  }

  int ret = 0;
  while (1) {
    int fd = accept(sfd, NULL, NULL);
    if (fd < 0) {
      perror("Accept failed");
      ret = -1;
      break;
    }

    my_t my = my_new(fd, port);
    if (!my) {
      ret = -1;
      break;
    }
    ws_t ws = my->ws;

    char buf[BUF_LEN];
    while (1) {
      ssize_t read_bytes = recv(fd, buf, BUF_LEN, 0);
      if (ws->on_recv(ws, buf, read_bytes)) {
        break;
      }
    }
    close(fd);
    my_free(my);
  }

  close(sfd);
  return ret;
}
Пример #5
0
/**
 * Reads an array chromosomes from a file containing a name and length on
 * each line, separated by a white space character.
 */
Chromosome *chr_read_file(const char *filename, int *n_chr) {
  char buf[LINE_MAX], name_buf[LINE_MAX];
  Chromosome *chrs;
  FILE *f;
  int n, i;

  if(util_has_gz_ext(filename)) {
    my_err("%s:%d: gzipped chr files not currently supported\n",
	   __FILE__, __LINE__);
  }

  f = util_must_fopen(filename, "r");
  *n_chr = util_fcount_lines(f);
  
  if(*n_chr < 1) {
    my_err("%s:%d: chromosome file '%s' is empty\n", 
	   __FILE__, __LINE__, filename);
  }

  chrs = my_new(Chromosome, *n_chr);
  for(i = 0; i < *n_chr; i++) {
    if(!fgets(buf, sizeof(buf), f)) {
      my_err("%s:%d: expected %d lines in file, but only read %d\n", 
	     __FILE__, __LINE__, *n_chr, i);
    }

    n = sscanf(buf, "%s %ld", name_buf, &chrs[i].len);
    if(n < 2) {
      my_err("%s:%d: line did not have at least 2 tokens:\n'%s'",
	     __FILE__, __LINE__, buf);
    }
    chrs[i].name = util_str_dup(name_buf);
    chrs[i].assembly = NULL;
    chrs[i].id = i;
    
    if(chrs[i].len < 1) {
      my_err("%s:%d: chr length (%ld) should be >= 1",
	     __FILE__, __LINE__, chrs[i].len);
    }
  }

  fclose(f);

  return chrs;
}
Пример #6
0
Chromosome *chrom_read_gzfile(const char *filename, int *n_chrom) {
  char buf[LINE_MAX], name_buf[LINE_MAX];
  Chromosome *chroms;
  gzFile gzf;
  int n, i;

  gzf = util_must_gzopen(filename, "r");
  *n_chrom = util_gzcount_lines(gzf);
  
  if(*n_chrom < 1) {
    my_err("%s:%d: chromosome file '%s' is empty\n", 
	   __FILE__, __LINE__, filename);
  }

  chroms = my_new(Chromosome, *n_chrom);
  for(i = 0; i < *n_chrom; i++) {
    if(!gzgets(gzf, buf, sizeof(buf))) {
      my_err("%s:%d: expected %d lines in file, but only read %d\n", 
	     __FILE__, __LINE__, *n_chrom, i);
    }

    n = sscanf(buf, "%s %ld", name_buf, &chroms[i].len);
    if(n < 2) {
      my_err("%s:%d: line did not have at least 2 tokens:\n'%s'",
	     __FILE__, __LINE__, buf);
    }
    chroms[i].name = util_str_dup(name_buf);
    chroms[i].assembly = NULL;
    chroms[i].id = i;
    
    if(chroms[i].len < 1) {
      my_err("%s:%d: chrom length (%ld) should be >= 1",
	     __FILE__, __LINE__, chroms[i].len);
    }
  }

  gzclose(gzf);

  return chroms;
}
Пример #7
0
/** 
 * opens one output file for each chromosome, returns aray of output files
 */
gzFile *open_multi_out_files(const char *output_dir,
			     ChrTable *chr_tab, gzFile *unmapped_out_file, 
			     gzFile *multi_out_file) {
  int i;
  char *filename, *dir;
  gzFile *out_files;

  out_files = my_new(gzFile, chr_tab->n_chr);

  /* add trailing '/' if not present */
  if(util_str_ends_with(output_dir, "/")) {
    dir = util_str_dup(output_dir);
  } else {
    dir = util_str_concat(output_dir, "/", NULL);
  }

  /* open file for each chromosome for reads that map uniquely */
  for(i = 0; i < chr_tab->n_chr; i++) {
    filename = util_str_concat(dir, chr_tab->chr_array[i].name,
			       ".mapped.txt.gz", NULL);
    
    out_files[i] = util_check_gzopen(filename);
    my_free(filename);
  }


  /* open files for multiply-mapped and unmapped reads */
  filename = util_str_concat(dir, "unmapped.txt.gz", NULL);
  *unmapped_out_file = util_check_gzopen(filename);
  my_free(filename);

  filename = util_str_concat(dir, "multi_mapped.txt.gz", NULL);
  *multi_out_file = util_check_gzopen(filename);
  my_free(filename);

  my_free(dir);

  return out_files;
}
Пример #8
0
/**
 * Returns a (deep) copy of the provided chromosome
 */
Chromosome *chr_copy(const Chromosome *chr) {
  Chromosome *new_chr;

  new_chr = my_new(Chromosome, 1);
  new_chr->id = chr->id;

  if(chr->name) {
    new_chr->name = util_str_dup(chr->name);
  } else {
    new_chr->name = NULL;
  }

  if(chr->assembly) {
    new_chr->assembly = util_str_dup(chr->assembly);
  } else {
    new_chr->assembly = NULL;
  }

  new_chr->len = chr->len;
  
  return new_chr;
}
Пример #9
0
/**
 * Reads a UCSC chromInfo.txt file, and creates a ChrTable data
 * structure from it. The table contains offsets for both forward and
 * reverse strands of each chromosome so that genomic coordinates can
 * be represented with a single unsigned 32bit integer.
 */
ChrTable *chr_table_read(const char *filename) {
  ChrTable *chr_tab = my_new(ChrTable, 1);
  int i;
  unsigned int len, prev_len;
  
  /* read chromosome names and lengths from file */
  chr_tab->chr_array = chr_read_file(filename, &chr_tab->n_chr);
  
  /* set offsets, total length */
  prev_len = chr_tab->total_chr_len = 0;
  chr_tab->offset = my_new(unsigned int, chr_tab->n_chr);
  for(i = 0; i < chr_tab->n_chr; i++) {
    len = chr_tab->chr_array[i].len;
    chr_tab->offset[i] = chr_tab->total_chr_len;
    chr_tab->total_chr_len += len;

    if(chr_tab->total_chr_len < prev_len) {
      my_err("genome length exceeds max %ld", UINT_MAX);
    }
    prev_len = chr_tab->total_chr_len;
  }
  
  return chr_tab;
}
Пример #10
0
void* __cdecl operator new(size_t nSize)
{
	return my_new(nSize);
}