Esempio n. 1
0
File: tree.c Progetto: fanf2/exim
static void
write_tree(tree_node *p, FILE *f)
{
fprintf(f, "%c%c %s\n",
  (p->left == NULL)? 'N':'Y', (p->right == NULL)? 'N':'Y', p->name);
if (p->left != NULL) write_tree(p->left, f);
if (p->right != NULL) write_tree(p->right, f);
}
Esempio n. 2
0
File: huffman.c Progetto: cifkao/bis
/* Write the Huffman tree recursively. */
void write_tree(file *f, huf_node *node){
  if(node->left){ // internal node
    bitfile_put_bit(f, 0);
    write_tree(f, node->left);
    write_tree(f, node->right);
  }else{ // leaf node
    bitfile_put_bit(f, 1);
    bitfile_put_symbol(f, node->symbol, SYMBOL_LENGTH);
  }
}
Esempio n. 3
0
gboolean
gitg_commit_commit(GitgCommit *commit, gchar const *comment, gboolean signoff, GError **error)
{
	g_return_val_if_fail(GITG_IS_COMMIT(commit), FALSE);
	
	gchar *tree;
	if (!write_tree(commit, &tree, error))
		return FALSE;
	
	gchar *ref;
	gboolean ret = commit_tree(commit, tree, comment, signoff, &ref, error);
	g_free(tree);
	
	if (!ret)
		return FALSE;

	gchar *subject = comment_parse_subject(comment);
	ret = update_ref(commit, ref, subject, error);
	g_free(subject);
	
	if (!ret)
		return FALSE;
	
	gitg_repository_reload(commit->priv->repository);
	return TRUE;
}
Esempio n. 4
0
int git_tree_cache_write(git_buf *out, git_tree_cache *tree)
{
	recount_entries(tree);
	write_tree(out, tree);

	return git_buf_oom(out) ? -1 : 0;
}
Esempio n. 5
0
//vl_bool CHIKMTree::vl_hikm_write_tree(CString strSavePath)
vl_bool CHIKMTree::vl_hikm_write_tree(const char* strSavePath)
{
	char strFileName[MAX_PATH];
	strcpy( strFileName, strSavePath );
	strcat( strFileName, "\\cluster.txt" );
	//CString strFileName = strSavePath + "\\cluster.txt";
	FILE * file;
	file = fopen( strFileName,"wb");
	if (!file) {

		return FALSE;
	}

	if(fprintf(file,"%u %u %u %d %d\n", m_VlHIKMTree->M, m_VlHIKMTree->K, m_VlHIKMTree->depth,
		m_VlHIKMTree->max_niters, m_VlHIKMTree->verb) < 0){
			return FALSE;
	}

	fclose(file);

	if(!write_tree(m_VlHIKMTree->root, m_VlHIKMTree->depth, strSavePath, "0")){
		return FALSE;
	}

	return TRUE;
}
Esempio n. 6
0
void BenchTree()
{
        
        printf("\n************ ROOT/TTree I/O  ************ \n");       
        const char *fname2 = "testio.root";
        Double_t wbytes;        
        wbytes = write_tree(fname2,100000000,0); 
        read_tree(fname2,wbytes); 
}
Esempio n. 7
0
File: main.c Progetto: Rubusch/c
void select_print(unsigned int* number_of_nodes)
{
  if(0 < *number_of_nodes){
    write_tree();
  }else{
    puts("failed - tree empty!");
    return;
  }
  puts("done.");
}
Esempio n. 8
0
File: tree.c Progetto: fanf2/exim
void
tree_write(tree_node *p, FILE *f)
{
if (p == NULL)
  {
  fprintf(f, "XX\n");
  return;
  }
write_tree(p, f);
}
Esempio n. 9
0
static void write_tree(git_buf *out, git_tree_cache *tree)
{
	size_t i;

	git_buf_printf(out, "%s%c%"PRIdZ" %"PRIuZ"\n", tree->name, 0, tree->entry_count, tree->children_count);

	if (tree->entry_count != -1)
		git_buf_put(out, (const char *) &tree->oid, GIT_OID_RAWSZ);

	for (i = 0; i < tree->children_count; i++)
		write_tree(out, tree->children[i]);
}
Esempio n. 10
0
void 
render(struct page *p)
{
    int depth;
    FILE *out;
    struct lacy_env env;
    struct page_stack p_stack;
    struct ut_str outfile;

    str_init(&curtok);

    if (NULL == p)
        return;

    str_init(&outfile);
    str_append_str(&outfile, conf.output_dir.s);
    str_append(&outfile, '/');
    str_append_str(&outfile, p->file_path);

    /* depth - 1 since we added output dir to path */
    depth = build_depth(outfile.s) - 1;
    if (NULL == (out = fopen(outfile.s, "w"))) 
        fatal("Unable to open: %: ", outfile.s);

    p_stack.size = 0;
    p_stack.pos = 0;
    /* Build Environment */
    env.depth = depth;
    env.p_stack = &p_stack;
    env.sym_tbl = NULL;

    env_build(p, &env);
    /* set stack back to top */
    p_stack.pos = 0;

    /* do it already */
    build_tree(&env);
    write_tree(out, &env);

    env_free(&env);
    fclose(out);

    str_free(&curtok);

    if (verbosity > 0) {
        printf("Rendered %s\n", outfile.s);
    }
    str_free(&outfile);
}
Esempio n. 11
0
int git_tree_create_fromindex(git_oid *oid, git_index *index)
{
	int error;

	if (index->repository == NULL)
		return git__throw(GIT_EBAREINDEX, "Failed to create tree. The index file is not backed up by an existing repository");

	if (index->tree != NULL && index->tree->entries >= 0) {
		git_oid_cpy(oid, &index->tree->oid);
		return GIT_SUCCESS;
	}

	/* The tree cache didn't help us */
	error = write_tree(oid, index, "", 0);
	return (error < GIT_SUCCESS) ? git__rethrow(error, "Failed to create tree") : GIT_SUCCESS;
}
Esempio n. 12
0
TREE_T* read_tree_from_file
  (char* filename) 
{

    TREE_T* tree;
    FILE* tree_file = NULL;

    if (open_file(filename, "r", 1, "tree", "tree", &tree_file) == 0) {
      die("Couldn't open the file %s.\n", filename);
    }
    read_tree(tree_file, &tree);
    (void) fclose(tree_file);
    if (verbosity >= HIGH_VERBOSE) {
      fprintf(stderr, "Read tree: ");
      write_tree(tree, stderr);
    }

    return tree;
}
Esempio n. 13
0
//static vl_bool write_tree(VlHIKMNode* node, vl_uint32 height, CString strSavePath, CString strInd)
static vl_bool write_tree(VlHIKMNode* node, vl_uint32 height, const char* strSavePath, const char* strInd)
{	
	//CString strFileClusterName = strSavePath + "\\" + strInd + ".txt";
	char strFileClusterName[MAX_PATH];
	strcpy( strFileClusterName, strSavePath );
	strcat( strFileClusterName, "\\" );
	strcat( strFileClusterName, strInd );
	strcat( strFileClusterName, ".txt" );
	if(!node->filter->vl_ikm_write_tree(strFileClusterName)){
		return FALSE;
	}

	if (height>1) {
		vl_uint32 k;
		for (k=0; k<node->filter->vl_ikm_get_k(); k++) {
			//CString strIndChild;
			//strIndChild.Format( _T("%3d"), k );
			//strIndChild.Replace( _T(" "), _T("0") );
			//strIndChild = strInd + strIndChild;
			char strIndChild[MAX_PATH];
			sprintf( strIndChild, "%s%4d", strInd, k );
			for ( int c = 0; c < strlen(strIndChild); c++ )
			{
				if ( strIndChild[c] == ' ')
				{
					strIndChild[c] = '0';
				}
			}

			if(!write_tree(node->children[k], height-1, strSavePath, strIndChild)){
				return FALSE;
			}
		}
	}
	return TRUE;
}
Esempio n. 14
0
/*************************************************************************
 * Entry point for pmp_bf
 *************************************************************************/
int main(int argc, char *argv[]) {

  char* bg_filename = NULL;
  char* motif_name = "motif"; // Use this motif name in the output.
  STRING_LIST_T* selected_motifs = NULL;
  double fg_rate = 1.0;
  double bg_rate = 1.0;
  double purine_pyrimidine = 1.0; // r
  double transition_transversion = 0.5; // R
  double pseudocount = 0.1;
  GAP_SUPPORT_T gap_support = SKIP_GAPS;
  MODEL_TYPE_T model_type = F81_MODEL;
  BOOLEAN_T use_halpern_bruno = FALSE;
  char* ustar_label = NULL;	// TLB; create uniform star tree
  int i;

  program_name = "pmp_bf";

  /**********************************************
   * COMMAND LINE PROCESSING
   **********************************************/

  // Define command line options. (FIXME: Repeated code)
  // FIXME: Note that if you add or remove options you
  // must change n_options.
  int n_options = 12;
  cmdoption const pmp_options[] = {
    {"hb", NO_VALUE},
    {"ustar", REQUIRED_VALUE},
    {"model", REQUIRED_VALUE},
    {"pur-pyr", REQUIRED_VALUE},
    {"transition-transversion", REQUIRED_VALUE},
    {"bg", REQUIRED_VALUE},
    {"fg", REQUIRED_VALUE},
    {"motif", REQUIRED_VALUE},
    {"motif-name", REQUIRED_VALUE},
    {"bgfile", REQUIRED_VALUE},
    {"pseudocount", REQUIRED_VALUE},
    {"verbosity", REQUIRED_VALUE}
  };

  int option_index = 0;

  // Define the usage message.
  char      usage[1000] = "";
  strcat(usage, "USAGE: pmp [options] <tree file> <MEME file>\n");
  strcat(usage, "\n");
  strcat(usage, "   Options:\n");

  // Evolutionary model parameters.
  strcat(usage, "     --hb\n");
  strcat(usage, "     --model single|average|jc|k2|f81|f84|hky|tn");
  strcat(usage, " (default=f81)\n");
  strcat(usage, "     --pur-pyr <float> (default=1.0)\n");
  strcat(usage, "     --transition-transversion <float> (default=0.5)\n");
  strcat(usage, "     --bg <float> (default=1.0)\n");
  strcat(usage, "     --fg <float> (default=1.0)\n");

  // Motif parameters.
  strcat(usage, "     --motif <id> (default=all)\n");
  strcat(usage, "     --motif-name <string> (default from motif file)\n");

  // Miscellaneous parameters
  strcat(usage, "     --bgfile <background> (default from motif file)\n");
  strcat(usage, "     --pseudocount <float> (default=0.1)\n");
  strcat(usage, "     --ustar <label>\n");	// TLB; create uniform star tree
  strcat(usage, "     --verbosity [1|2|3|4] (default 2)\n");
  strcat(usage, "\n    Prints the FP and FN rate at each of 10000 score values.\n");
  strcat(usage, "\n    Output format: [<motif_id> score <score> FPR <fpr> TPR <tpr>]+\n");

  // Parse the command line.
  if (simple_setopt(argc, argv, n_options, pmp_options) != NO_ERROR) {
    die("Error processing command line options: option name too long.\n");
  }

  while (TRUE) { 
    int c = 0;
    char* option_name = NULL;
    char* option_value = NULL;
    const char * message = NULL;

    // Read the next option, and break if we're done.
    c = simple_getopt(&option_name, &option_value, &option_index);
    if (c == 0) {
      break;
    } else if (c < 0) {
      (void) simple_getopterror(&message);
      die("Error processing command line options (%s)\n", message);
    }
    
    if (strcmp(option_name, "model") == 0) {
      if (strcmp(option_value, "jc") == 0) {
        model_type = JC_MODEL;
      } else if (strcmp(option_value, "k2") == 0) {
        model_type = K2_MODEL;
      } else if (strcmp(option_value, "f81") == 0) {
        model_type = F81_MODEL;
      } else if (strcmp(option_value, "f84") == 0) {
        model_type = F84_MODEL;
      } else if (strcmp(option_value, "hky") == 0) {
        model_type = HKY_MODEL;
      } else if (strcmp(option_value, "tn") == 0) {
        model_type = TAMURA_NEI_MODEL;
      } else if (strcmp(option_value, "single") == 0) {
        model_type = SINGLE_MODEL;
      } else if (strcmp(option_value, "average") == 0) {
        model_type = AVERAGE_MODEL;
      } else {
        die("Unknown model: %s\n", option_value);
      }
    } else if (strcmp(option_name, "hb") == 0){
        use_halpern_bruno = TRUE;
    } else if (strcmp(option_name, "ustar") == 0){	// TLB; create uniform star tree
        ustar_label = option_value;
    } else if (strcmp(option_name, "pur-pyr") == 0){
        purine_pyrimidine = atof(option_value);
    } else if (strcmp(option_name, "transition-transversion") == 0){
        transition_transversion = atof(option_value);
    } else if (strcmp(option_name, "bg") == 0){
      bg_rate = atof(option_value);
    } else if (strcmp(option_name, "fg") == 0){
      fg_rate = atof(option_value);
    } else if (strcmp(option_name, "motif") == 0){
        if (selected_motifs == NULL) {
          selected_motifs = new_string_list();
        }
       add_string(option_value, selected_motifs);
    } else if (strcmp(option_name, "motif-name") == 0){
        motif_name = option_value;
    } else if (strcmp(option_name, "bgfile") == 0){
      bg_filename = option_value;
    } else if (strcmp(option_name, "pseudocount") == 0){
        pseudocount = atof(option_value);
    } else if (strcmp(option_name, "verbosity") == 0){
        verbosity = atoi(option_value);
    }
  }

  // Must have tree and motif file names
  if (argc != option_index + 2) {
    fprintf(stderr, "%s", usage);
    exit(EXIT_FAILURE);
  } 

  /**********************************************
   * Read the phylogenetic tree.
   **********************************************/
  char* tree_filename = NULL;
  TREE_T* tree = NULL;
  tree_filename = argv[option_index];
  option_index++;
  tree = read_tree_from_file(tree_filename);

  // get the species names
  STRING_LIST_T* alignment_species = make_leaf_list(tree);
  char *root_label = get_label(tree);	// in case target in center
  if (strlen(root_label)>0) add_string(root_label, alignment_species);
  //write_string_list(" ", alignment_species, stderr);

  // TLB; Convert the tree to a uniform star tree with
  // the target sequence at its center.
  if (ustar_label != NULL) {
    tree = convert_to_uniform_star_tree(tree, ustar_label);
    if (tree == NULL) 
      die("Tree or alignment missing target %s\n", ustar_label);
    if (verbosity >= NORMAL_VERBOSE) {
      fprintf(stderr, 
	"Target %s placed at center of uniform (d=%.3f) star tree:\n", 
          ustar_label, get_total_length(tree) / get_num_children(tree) 
      );
      write_tree(tree, stderr);
    }
  }

  /**********************************************
   * Read the motifs.
   **********************************************/
  char* meme_filename = argv[option_index];
  option_index++;
  int num_motifs = 0; 

  MREAD_T *mread;
  ALPH_T alph;
  ARRAYLST_T *motifs;
  ARRAY_T *bg_freqs;

  mread = mread_create(meme_filename, OPEN_MFILE);
  mread_set_bg_source(mread, bg_filename);
  mread_set_pseudocount(mread, pseudocount);
  // read motifs
  motifs = mread_load(mread, NULL);
  alph = mread_get_alphabet(mread);
  bg_freqs = mread_get_background(mread);
  // check
  if (arraylst_size(motifs) == 0) die("No motifs in %s.", meme_filename);

  

  // TLB; need to resize bg_freqs array to ALPH_SIZE items
  // or copy array breaks in HB mode.  This throws away
  // the freqs for the ambiguous characters;
  int asize = alph_size(alph, ALPH_SIZE);
  resize_array(bg_freqs, asize);

  /**************************************************************
  * Compute probability distributions for each of the selected motifs.
  **************************************************************/
  int motif_index;
  for (motif_index = 0; motif_index < arraylst_size(motifs); motif_index++) {

    MOTIF_T* motif = (MOTIF_T*)arraylst_get(motif_index, motifs);
    char* motif_id = get_motif_id(motif);
    char* bare_motif_id = motif_id;

    // We may have specified on the command line that
    // only certain motifs were to be used.
    if (selected_motifs != NULL) {
      if (*bare_motif_id == '+' || *bare_motif_id == '-') {
        // The selected  motif id won't included a strand indicator.
        bare_motif_id++;
      }
      if (have_string(bare_motif_id, selected_motifs) == FALSE) {
        continue;
      }
    }

    if (verbosity >= NORMAL_VERBOSE) {
      fprintf(
        stderr, 
        "Using motif %s of width %d.\n",
        motif_id, get_motif_length(motif)
      );
    }

    // Build an array of evolutionary models for each position in the motif.
    EVOMODEL_T** models = make_motif_models(
      motif, 
      bg_freqs,
      model_type,
      fg_rate, 
      bg_rate, 
      purine_pyrimidine, 
      transition_transversion, 
      use_halpern_bruno
    );

    // Get the frequencies under the background model (row 0) 
    // and position-dependent scores (rows 1..w)
    // for each possible alignment column.
    MATRIX_T* pssm_matrix = build_alignment_pssm_matrix(
      alph,
      alignment_species,
      get_motif_length(motif) + 1, 
      models, 
      tree, 
      gap_support
    );
    ARRAY_T* alignment_col_freqs = allocate_array(get_num_cols(pssm_matrix)); 
    copy_array(get_matrix_row(0, pssm_matrix), alignment_col_freqs);
    remove_matrix_row(0, pssm_matrix);		// throw away first row
    //print_col_frequencies(alph, alignment_col_freqs);

    //
    // Get the position-dependent null model alignment column frequencies
    //
    int w = get_motif_length(motif);
    int ncols = get_num_cols(pssm_matrix); 
    MATRIX_T* pos_dep_bkg = allocate_matrix(w, ncols);
    for (i=0; i<w; i++) {
      // get the evo model corresponding to this column of the motif
      // and store it as the first evolutionary model.
      myfree(models[0]);
      // Use motif PSFM for equilibrium freqs. for model.
      ARRAY_T* site_specific_freqs = allocate_array(asize);
      int j = 0;
      for(j = 0; j < asize; j++) {
	double value = get_matrix_cell(i, j, get_motif_freqs(motif));
	set_array_item(j, value, site_specific_freqs);
      }
      if (use_halpern_bruno == FALSE) {
	models[0] = make_model(
	  model_type,
	  fg_rate,
	  transition_transversion,
	  purine_pyrimidine,
	  site_specific_freqs,
          NULL
	);
      } else {
        models[0] = make_model(
	  model_type,
	  fg_rate,
	  transition_transversion,
	  purine_pyrimidine,
	  bg_freqs,
	  site_specific_freqs
	);
      }
      // get the alignment column frequencies using this model
      MATRIX_T* tmp_pssm_matrix = build_alignment_pssm_matrix(
        alph,
	alignment_species,
	2,				// only interested in freqs under bkg
	models, 
	tree, 
	gap_support
      );
      // assemble the position-dependent background alignment column freqs.
      set_matrix_row(i, get_matrix_row(0, tmp_pssm_matrix), pos_dep_bkg);
      // chuck the pssm (not his real name)
      free_matrix(tmp_pssm_matrix);
    }

    //
    // Compute and print the score distribution under the background model
    // and under the (position-dependent) motif model.
    //
    int range = 10000;	// 10^4 gives same result as 10^5, but 10^3 differs

    // under background model
    PSSM_T* pssm = build_matrix_pssm(alph, pssm_matrix, alignment_col_freqs, range);

    // under position-dependent background (motif) model
    PSSM_T* pssm_pos_dep = build_matrix_pssm(alph, pssm_matrix, alignment_col_freqs, range);
    get_pv_lookup_pos_dep(
      pssm_pos_dep, 
      pos_dep_bkg, 
      NULL // no priors used
    );

    // print FP and FN distributions
    int num_items = get_pssm_pv_length(pssm_pos_dep);
    for (i=0; i<num_items; i++) {
      double pvf = get_pssm_pv(i, pssm);
      double pvt = get_pssm_pv(i, pssm_pos_dep);
      double fpr = pvf;
      double fnr = 1 - pvt;
      if (fpr >= 0.99999 || fnr == 0) continue;
      printf("%s score %d FPR %.3g FNR %.3g\n", motif_id, i, fpr, fnr);
    }

    // free stuff
    free_pssm(pssm);
    free_pssm(pssm_pos_dep);
    if (models != NULL) {
      int model_index;
      int num_models = get_motif_length(motif) + 1;
      for (model_index = 0; model_index < num_models; model_index++) {
        free_model(models[model_index]);
      }
      myfree(models);
    }

  } // motif

  arraylst_destroy(destroy_motif, motifs);

  /**********************************************
   * Clean up.
   **********************************************/
  // TLB may have encountered a memory corruption bug here
  // CEG has not been able to reproduce it. valgrind says all is well.
  free_array(bg_freqs);
  free_tree(TRUE, tree);
  free_string_list(selected_motifs);

  return(0);
} // main
Esempio n. 15
0
File: huffman.c Progetto: cifkao/bis
void huf_write_tree(file *f, huf_tree tree){
  write_tree(f, tree.root);
}
Esempio n. 16
0
static int write_tree(git_oid *oid, git_index *index, const char *dirname, unsigned int start)
{
	git_treebuilder *bld = NULL;
	unsigned int i, entries = git_index_entrycount(index);
	int error;
	size_t dirname_len = strlen(dirname);
	const git_tree_cache *cache;

	cache = git_tree_cache_get(index->tree, dirname);
	if (cache != NULL && cache->entries >= 0){
		git_oid_cpy(oid, &cache->oid);
		return find_next_dir(dirname, index, start);
	}

	error = git_treebuilder_create(&bld, NULL);
	if (bld == NULL) {
		return GIT_ENOMEM;
	}

	/*
	 * This loop is unfortunate, but necessary. The index doesn't have
	 * any directores, so we need to handle that manually, and we
	 * need to keep track of the current position.
	 */
	for (i = start; i < entries; ++i) {
		git_index_entry *entry = git_index_get(index, i);
		char *filename, *next_slash;

	/*
	 * If we've left our (sub)tree, exit the loop and return. The
	 * first check is an early out (and security for the
	 * third). The second check is a simple prefix comparison. The
	 * third check catches situations where there is a directory
	 * win32/sys and a file win32mmap.c. Without it, the following
	 * code believes there is a file win32/mmap.c
	 */
		if (strlen(entry->path) < dirname_len ||
		    memcmp(entry->path, dirname, dirname_len) ||
		    (dirname_len > 0 && entry->path[dirname_len] != '/')) {
			break;
		}

		filename = entry->path + dirname_len;
		if (*filename == '/')
			filename++;
		next_slash = strchr(filename, '/');
		if (next_slash) {
			git_oid sub_oid;
			int written;
			char *subdir, *last_comp;

			subdir = git__strndup(entry->path, next_slash - entry->path);
			if (subdir == NULL) {
				error = GIT_ENOMEM;
				goto cleanup;
			}

			/* Write out the subtree */
			written = write_tree(&sub_oid, index, subdir, i);
			if (written < 0) {
				error = git__rethrow(written, "Failed to write subtree %s", subdir);
			} else {
				i = written - 1; /* -1 because of the loop increment */
			}

			/*
			 * We need to figure out what we want toinsert
			 * into this tree. If we're traversing
			 * deps/zlib/, then we only want to write
			 * 'zlib' into the tree.
			 */
			last_comp = strrchr(subdir, '/');
			if (last_comp) {
				last_comp++; /* Get rid of the '/' */
			} else {
				last_comp = subdir;
			}
			error = append_entry(bld, last_comp, &sub_oid, S_IFDIR);
			free(subdir);
			if (error < GIT_SUCCESS) {
				error = git__rethrow(error, "Failed to insert dir");
				goto cleanup;
			}
		} else {
			error = append_entry(bld, filename, &entry->oid, entry->mode);
			if (error < GIT_SUCCESS) {
				error = git__rethrow(error, "Failed to insert file");
			}
		}
	}

	error = git_treebuilder_write(oid, index->repository, bld);
	if (error < GIT_SUCCESS)
		error = git__rethrow(error, "Failed to write tree to db");

 cleanup:
	git_treebuilder_free(bld);

	if (error < GIT_SUCCESS)
		return error;
	else
		return i;
}
Esempio n. 17
0
File: lncky.c Progetto: mjpost/cky
int      
main(int argc, char **argv)
{
  si_t          si = make_si(1024);
  FILE          *grammarfp = stdin, *yieldfp;
  FILE		*tracefp = NULL;  	/* trace output */
  FILE		*summaryfp = stderr;	/* end of parse stats output */
  FILE		*parsefp = stdout;      /* parse trees */
  FILE		*probfp = NULL;         /* max_neglog_prob */

  chart_cell	root_cell;
  grammar	g;
  chart		c;
  vindex 	terms;
  int		maxsentlen = 0;
  int           sentenceno = 0, parsed_sentences = 0, failed_sentences = 0;
  double	sum_neglog_prob = 0;
  int           sentfrom = 0;
  int           sentto = 0;

  srand(RAND_SEED);	/* seed random number generator */

  if (argc<2 || argc>6) {
    fprintf(stderr, "%s yieldfile [maxsentlen [grammarfile [sentfrom sentto]]]\n", argv[0]);
    exit(EXIT_FAILURE);
  }

  if ((yieldfp = fopen(argv[1], "r")) == NULL) {
    fprintf(stderr, "%s: Couldn't open yieldfile %s\n", argv[0], argv[1]);
    exit(EXIT_FAILURE);
  }

  if (argc >= 3)
    if (!sscanf(argv[2], "%d", &maxsentlen)) {
      fprintf(stderr, "%s: Couldn't parse maxsentlen %s\n", argv[0], argv[2]);
      exit(EXIT_FAILURE);
    }

  if (argc >= 4)
    if ((grammarfp = fopen(argv[3], "r")) == NULL) {
      fprintf(stderr, "%s: Couldn't open grammarfile %s\n", argv[0], argv[3]);
      exit(EXIT_FAILURE);
    }

  if (argc >= 6) {
    if (!sscanf(argv[4], "%d", &sentfrom)) {
      fprintf(stderr, "%s: Couldn't parse sentfrom %s\n", argv[0], argv[4]);
      exit(EXIT_FAILURE);
    }
    if (!sscanf(argv[5], "%d", &sentto)) {
      fprintf(stderr, "%s: Couldn't parse sentto %s\n", argv[0], argv[5]);
      exit(EXIT_FAILURE);
    }
  }

  g = read_grammar(grammarfp, si);
  /* write_grammar(tracefp, g, si); */

  while ((terms = read_terms(yieldfp, si))) {
    sentenceno++;

    if (sentfrom && sentenceno < sentfrom) {
      vindex_free(terms);
      continue;
    }
    if (sentto && sentenceno > sentto) {
      vindex_free(terms);
      break;
    }

    /* skip if sentence is too long */
    if (!maxsentlen || (int) terms->n <= maxsentlen) { 
      size_t	i;

      if (tracefp) {
	fprintf(tracefp, "\nSentence %d:\n", sentenceno);
	for (i=0; i<terms->n; i++)
	  fprintf(tracefp, " %s", si_index_string(si, terms->e[i]));
	fprintf(tracefp, "\n");
      }
     
      c = cky(*terms, g, si);

      /* fetch best root node */

      root_cell = sihashcc_ref(CHART_ENTRY(c, 0, terms->n), g.root_label);

      if (root_cell) {
	tree parse_tree = bintree_tree(&root_cell->tree, si);
	double prob = (double) root_cell->prob;

	parsed_sentences++;
	assert(prob > 0.0);
	sum_neglog_prob -= log(prob);

	if (probfp)
	  fprintf(probfp, "max_neglog_prob(%d, %g).\n", 
		  sentenceno, -log(prob)); 

	if (tracefp) 
	  fprintf(tracefp, " Prob = %g\n", prob);

	if (parsefp) {
	  write_tree(parsefp, parse_tree, si);
	  fprintf(parsefp, "\n");
	  /* write_prolog_tree(parsefp, parse_tree, si); */
	}

	free_tree(parse_tree);
      }

      else {
	failed_sentences++;
	if (tracefp)
	  fprintf(tracefp, "Failed to parse\n");
	if (parsefp)
	  fprintf(parsefp, "parse_failure.\n");
      }

      chart_free(c, terms->n);			/* free the chart */
    }
    else { 					/* sentence too long */
      if (parsefp)
	fprintf(parsefp, "too_long.\n");
    }

    vindex_free(terms);				/*  free the terms */
    assert(trees_allocated == 0);
    assert(bintrees_allocated == 0);
  }
  free_grammar(g);
  si_free(si);

  if (summaryfp) {
    fprintf(summaryfp, "\n%d/%d = %g%% test sentences met the length criteron,"
	    " of which %d/%d = %g%% were parsed\n", 
	    parsed_sentences+failed_sentences, sentenceno,
	    (double) (100.0 * (parsed_sentences+failed_sentences)) / 
	                       sentenceno,
	    parsed_sentences, parsed_sentences+failed_sentences, 
	    (double) (100.0 * parsed_sentences) / 
                              (parsed_sentences + failed_sentences));
    fprintf(summaryfp, "Sum(-log prob) = %g\n", sum_neglog_prob);
  }

  /* check that everything has been deallocated */
  /* printf("mmm_blocks_allocated = %ld\n", (long) mmm_blocks_allocated); */
  assert(mmm_blocks_allocated == 0);		
  exit(EXIT_SUCCESS);
}
Esempio n. 18
0
int main(int argc, char **argv)
{
  init_rand();
  setup_gsl_dgen();

  dgen_parse_cmdline(argc, argv);

  int i, j, h, p, k, c;
  
  int cons_cap, num_cons = -1, root_cap, num_root = -1;
  char **cons_seqs, **root_seqs;

  int internal_node_index = 0, leaf_node_index = M;
  int max_internal_node_index = 2000000;
  int max_leaf_node_index = 2000000;
  int sum_seen_or_unseen = 0;

  int ploidy, codon_sequence_length = -1, n_genes, total_n_HLA;
  Decimal mu;

  if(json_parameters_path == NULL) die("No .json file passed.");

  load_lengths_for_simulation_from_json(json_parameters_path, &kappa, &mu,
                                        &codon_sequence_length, &total_n_HLA, &ploidy, &n_genes);

  if(ploidy < 1) die("Ploidy is less than 1.");
  if(n_genes < 1) die("Number of genes is less than 1.");
  
  if(cons_path != NULL) {
    printf("Loading sequences to obtain consensus...\n");
    num_cons = load_seqs(cons_path, &cons_seqs, &cons_cap);
    assert(num_cons > 0);
    printf("Loaded %i sequences to determine consensus.\n", num_cons);
    
    codon_sequence_length = strlen(cons_seqs[0]);
    printf("Codon_sequence_length: %i\n",codon_sequence_length/3);
    
    if(codon_sequence_length % 3 != 0)
      die("Sequences contain partial codons [%i mod 3 != 0].", codon_sequence_length);

    for(c = 0; c < num_cons; c++) {
      if((int) strlen(cons_seqs[c]) != codon_sequence_length) {
        die("Sequences from which to derive the consensus sequence aren't all "
            "the same length.");
      }
    }
    codon_sequence_length = codon_sequence_length/3;
  }

  if(root_path != NULL) {
    printf("Loading sequences to obtain root...\n");
    num_root = load_seqs(root_path, &root_seqs, &root_cap);
    printf("Loaded %i sequences to determine root.\n", num_root);
    
    if(cons_path == NULL)
      die("Did not pass a file to find the consensus sequence.");
    
    if((int) (strlen(root_seqs[0])/3) != codon_sequence_length)
      die("Sequences used to determine the root are different lengths to those used for the consensus.");

    for(c = 0; c < num_root; c++) {
      if((int) strlen(root_seqs[c]) != 3*codon_sequence_length) {
        die("Sequences from which to derive the root sequence aren't all "
            "the same length.");
      }
    }
  }

  Decimal *internal_node_times = my_malloc(max_internal_node_index * sizeof(Decimal) , __FILE__, __LINE__);
  Decimal *leaf_node_times = my_malloc(max_leaf_node_index * sizeof(Decimal), __FILE__, __LINE__);
  int *seen_or_unseen = my_malloc(max_internal_node_index * sizeof(int), __FILE__, __LINE__);

  birth_death_simulation_backwards(max_internal_node_index, max_leaf_node_index,
                                   internal_node_times, 
                                   leaf_node_times,
                                   &internal_node_index, &leaf_node_index,
                                   seen_or_unseen,
                                   N, M, lambda, mu_tree, past_sampling);

  for(i = 0; i < internal_node_index; i++) sum_seen_or_unseen += seen_or_unseen[i];
  
  int total_nodes = (2 * leaf_node_index) - 1 + internal_node_index - sum_seen_or_unseen;
  Tree *tree = my_malloc((total_nodes+1) * sizeof(Tree), __FILE__, __LINE__);
  // Now malloc the memory that this points to.
  int *HLAs_in_tree = my_malloc((total_nodes+1) * ploidy * n_genes * sizeof(int), __FILE__, __LINE__);

  for(i = 0; i < total_nodes; i++) 
    tree[i].HLAs = &HLAs_in_tree[i * ploidy * n_genes];

  construct_birth_death_tree(leaf_node_index, internal_node_index,
                             leaf_node_times, internal_node_times,
                             M, seen_or_unseen,
                             tree);

  // Reverse the direction that time is measured in the tree.
  // DEV: Don't need to do this, waste of computation - sort.
  // DEV: The parent times are wrong when there are unseen nodes.
  for(i = 0; i < total_nodes; i++)
    tree[i].node_time = tree[total_nodes-1].node_time - tree[i].node_time;
   
  int root_node = tree[total_nodes-1].node;
  
  if(write_newick_tree_to_file == true) {
    write_newick_tree(newick_tree_data_file, tree, root_node, 1);
    fclose(newick_tree_data_file);
  }

  Decimal S_portion[NUM_CODONS];
  Decimal NS_portion[NUM_CODONS];

  for(c = 0; c < NUM_CODONS; c++) {
    S_portion[c] = kappa * beta_S[c] + beta_V[c];
    NS_portion[c] = kappa * alpha_S[c] + alpha_V[c];
  }
  
  int n_HLA[n_genes];

  printf("Total number of HLA types: %i.\n", total_n_HLA);

  Decimal HLA_prevalences[total_n_HLA];
  int wildtype_sequence[codon_sequence_length];
  Decimal *R = my_malloc(codon_sequence_length * sizeof(Decimal), __FILE__, __LINE__);
  Decimal *omega = my_malloc(codon_sequence_length * sizeof(Decimal), __FILE__, __LINE__);
  Decimal *reversion_selection = my_malloc(codon_sequence_length * sizeof(Decimal), __FILE__, __LINE__);

  memory_allocation(num_cons, num_root, codon_sequence_length,
                    max_internal_node_index, max_leaf_node_index, 
                    total_nodes, ploidy, n_genes, total_n_HLA,
                    leaf_node_index);

  int (*codon_sequence_matrix)[codon_sequence_length] = my_malloc(total_nodes *
                                                                  sizeof(int[codon_sequence_length]),
                                                                  __FILE__, __LINE__);
  Decimal (*HLA_selection_profiles)[codon_sequence_length] = my_malloc(total_n_HLA * sizeof(Decimal[codon_sequence_length]),
                                                                       __FILE__, __LINE__);

  load_parameters_for_simulation_from_json(json_parameters_path, codon_sequence_length,
                                           omega, R, reversion_selection, total_n_HLA,
                                           n_genes, n_HLA, HLA_prevalences,
                                           HLA_selection_profiles);
  
  Decimal sum_check;
  for(i = 0, k = 0; i < n_genes; i++) {
    sum_check = 0;
    for(h = 0; h < n_HLA[i]; h++, k++) {
      sum_check += HLA_prevalences[k];
    }
    if(sum_check > 1.00001 || sum_check < 0.9999) die("HLA prevalences for gene %i do not sum to 1\n", i+1);
  }
  
  if(cons_path != NULL) {
    printf("Mapping gaps to consensus...\n");
    // Set the consensus sequence - the consensus of the optional sequence file 
    // that is passed.
    char wildtype_sequence_dummy[3*codon_sequence_length+1];
    generate_consensus(cons_seqs, num_cons, 3*codon_sequence_length, wildtype_sequence_dummy);
    printf("Wildtype sequence:\n%s\n", wildtype_sequence_dummy);
    // By default, set the root as the wildtype sequence.
    for(i = 0; i < codon_sequence_length; i++)
      wildtype_sequence[i] = (int) amino_to_code(wildtype_sequence_dummy+i*3);

    if(root_path == NULL) {
      for(i = 0; i < codon_sequence_length; i++)
        codon_sequence_matrix[root_node][i] = wildtype_sequence[i];
    } else {
      printf("Mapping gaps to root...\n");
      char root_sequence_dummy[3*codon_sequence_length+1];
      generate_consensus(root_seqs, num_root, 3*codon_sequence_length, root_sequence_dummy);
      printf("Root sequence:\n%s\n", root_sequence_dummy);
      
      for(i = 0; i < codon_sequence_length; i++)
        codon_sequence_matrix[root_node][i] = (int) amino_to_code(root_sequence_dummy+i*3);
      printf("Number of root sequences: %i.\n", num_root);
      for(c = 0; c < num_root; c++) free(root_seqs[c]);
      free(root_seqs);
    }
    printf("Number of consensus sequences: %i.\n", num_cons);
    for(c = 0; c < num_cons; c++) free(cons_seqs[c]);
    free(cons_seqs);
  
  } else {
    for(i = 0; i < codon_sequence_length; i++) {
      // Sample the root sequence according to the HIV codon usage information.
      codon_sequence_matrix[root_node][i] = discrete_sampling_dist(NUM_CODONS, prior_C1);
      // As default, set the root node to the consensus sequence.  
      wildtype_sequence[i] = codon_sequence_matrix[root_node][i];
    }
  }
  
  // No matter what is read in, there is no recombination simulated - so make sure it's set to 0.
  for(i = 0; i < codon_sequence_length; i++) R[i] = 0;

  write_summary_json(json_summary_file,
                     mu, codon_sequence_length, ploidy,
                     n_genes, n_HLA, total_n_HLA,
                     HLA_prevalences,
                     omega, R, reversion_selection,
                     HLA_selection_profiles);

  free(R);
  
  fprintf(simulated_root_file, ">root_sequence\n");
  for(i = 0; i < codon_sequence_length; i++)
    fprintf(simulated_root_file, "%s", code_to_char(codon_sequence_matrix[root_node][i]));
  fprintf(simulated_root_file, "\n");

  int root_HLA[ploidy * n_genes];
  int cumulative_n_HLA = 0;

  for(i = 0, k = 0; i < n_genes; i++) {
    for(p = 0; p < ploidy; p++, k++) {
      root_HLA[k] = cumulative_n_HLA + 
                    discrete_sampling_dist(n_HLA[i], &HLA_prevalences[cumulative_n_HLA]);
      tree[root_node].HLAs[k] = root_HLA[k];
    }
    cumulative_n_HLA = cumulative_n_HLA + n_HLA[i];
  }

  printf("Passing HLA information...\n");
  pass_HLA(ploidy, n_genes, root_node,
                           tree, leaf_node_index, total_n_HLA,
                           n_HLA, HLA_prevalences);
  printf("Passed HLA information\n");
  
  // printf("Printing the tree\n");
  // for(i = 0; i < total_nodes; i++) {
  //   printf("%i %i %i "DECPRINT" %i", tree[i].node, tree[i].daughter_nodes[0],
  //          tree[i].daughter_nodes[1], tree[i].node_time,
  //          tree[i].seen_or_unseen);
  //   for(j = 0; j < (ploidy * n_genes); j++) {
  //     printf(" %i", tree[i].HLAs[j]);
  //   }
  //   printf("\n");
  // }

  if(write_tree_to_file == true) {
    write_tree(tree_data_file, tree, root_node, ploidy, n_genes);
    fclose(tree_data_file);
  }

  printf("Passing sequence information...\n");
  
  pass_codon_sequence_change(codon_sequence_length, ploidy,
                             n_genes, total_n_HLA, 
                             root_node, mu,
                             codon_sequence_matrix,
                             tree,
                             leaf_node_index,
                             S_portion, NS_portion,
                             HLA_selection_profiles,
                             wildtype_sequence,
                             omega, reversion_selection);

  printf("Passed sequence information\n"
         "Now generating .fasta files of reference and query sequences, and\n"
         "a .csv file of the HLA information associated to the query sequences.\n");

  if(num_queries < 0) {
    // Set the number of query sequences.
    num_queries = (int) (query_fraction * leaf_node_index);
    printf("Number of queries: %i.\n", num_queries);
  } else {
    printf("Number of queries: %i.\n", num_queries);
  }

  if(num_queries > leaf_node_index) die("Number of query sequences larger than the number of leaves");
  int *all_sequences = my_malloc(leaf_node_index * sizeof(int), __FILE__, __LINE__);
  int num_refs = leaf_node_index - num_queries;

  for(i = 0; i < leaf_node_index; i++) all_sequences[i] = i;

  save_simulated_ref_and_query_fasta(num_queries, num_refs, leaf_node_index,
                                     all_sequences, codon_sequence_length, codon_sequence_matrix,
                                     tree, ploidy, n_genes);

  // Now save the hla types to a .csv file.
  fprintf(hla_query_file, "\"\",");
  for(h = 0; h < total_n_HLA-1; h++) fprintf(hla_query_file, "\"%i\",", h+1);
  fprintf(hla_query_file, "\"%i\"\n", total_n_HLA);
  
  // Write the HLA types of the leaves to a file.
  int (*hla_types)[total_n_HLA] = my_malloc(leaf_node_index * sizeof(int[total_n_HLA]), __FILE__, __LINE__);

  for(i = 0; i < leaf_node_index; i++)
  {
    for(h = 0; h < total_n_HLA; h++)
      hla_types[i][h] = 0;
    for(j = 0; j < (n_genes * ploidy); j++)
      hla_types[i][tree[i].HLAs[j]] = 1;
  }

  // Write the query HLA types to a .csv file.
  for(i = num_refs; i < leaf_node_index; i++)
  {
    fprintf(hla_query_file,"\"simulated_seq_%i_HLA", all_sequences[i]+1);
    for(h = 0; h < (ploidy * n_genes); h++) fprintf(hla_query_file, "_%i", tree[all_sequences[i]].HLAs[h]);
    fprintf(hla_query_file, "\"");
    for(h = 0; h < total_n_HLA; h++) {
      fprintf(hla_query_file, ",%i", hla_types[all_sequences[i]][h]);
    }
    fprintf(hla_query_file, "\n");
  }

  free(hla_types); 
  free(internal_node_times);
  free(leaf_node_times);
  free(seen_or_unseen);
  free(codon_sequence_matrix);
  free(HLA_selection_profiles);
  free(all_sequences);
  free(omega);
  free(reversion_selection);

  free(tree[0].HLAs);
  free(tree);

  fclose(summary_file);
  fclose(json_summary_file);
  fclose(simulated_refs_file);
  fclose(simulated_root_file);
  fclose(simulated_queries_file);
  fclose(hla_query_file);

  clearup_gsl_dgen();
  return EXIT_SUCCESS;
}