Пример #1
0
/*
 * change the device name to a raw devname
 */
char *
getfullrawname(char *cp)
{
	struct stat64	buf;
	char		*dp;
	char		*new_path;
	dev_t		blk_dev;

	if (cp == NULL)
		return (strdup(""));

	/*
	 * Create a fully qualified name.
	 */
	if ((cp = getfullname(cp)) == NULL)
		return (NULL);

	if (*cp == '\0')
		return (cp);

	if (stat64(cp, &buf) != 0) {
		free(cp);
		return (strdup(""));
	}

	if (S_ISCHR(buf.st_mode))
		return (cp);

	if (!S_ISBLK(buf.st_mode)) {
		free(cp);
		return (strdup(""));
	}

	blk_dev = buf.st_rdev;

	if ((dp = getvfsspecial(cp, GET_RAW)) != NULL) {
		free(cp);
		return (strdup(dp));
	}

	/*
	 * We have a block device name, go find the raw name.
	 */
	if ((dp = strstr(cp, "/dsk/")) == NULL &&
	    (dp = strstr(cp, "/" LOFI_BLOCK_NAME "/")) == NULL &&
	    (dp = strstr(cp, "/" RD_BLOCK_NAME "/")) == NULL &&
	    (dp = strstr(cp, "/" SNAP_BLOCK_NAME "/")) == NULL &&
	    (dp = strrchr(cp, '/')) == NULL) {
		/* this is not really possible */
		free(cp);
		return (strdup(""));
	}
	dp++;

	if ((new_path = malloc(strlen(cp)+2)) == NULL) {
		free(cp);
		return (NULL);
	}
	(void) strncpy(new_path, cp, dp - cp);
	/* fill in the rest of the raw name */
	new_path[dp - cp] = 'r';
	(void) strcpy(new_path + (dp - cp) + 1, dp);

	if (test_if_raw(new_path, blk_dev)) {
		free(cp);
		return (new_path);
	}
	free(new_path);

	dp = getrawcomplete(cp, &buf);
	free(cp);
	return (dp);
}
Пример #2
0
int main(int argc, char** argv) {
    dictionary<ATOMIC>* dict[MAXSPECIES];
    dictionary<ATOMIC>* filter;
    conservation_table* cons[MAXSPECIES];

    int i,j;
    FILE *configfile, *metacalc;

    wordexp_t exp_result;

    char buff[MAXBUFFLENGTH];
    char tag[MAXBUFFLENGTH];
    char aux[MAXBUFFLENGTH];

    char path[MAXBUFFLENGTH]="";
    char suffix[MAXBUFFLENGTH]="";

    char configfilename[MAXBUFFLENGTH];
    char restrictionfilename[MAXBUFFLENGTH];
    char metacalcfilename[MAXBUFFLENGTH]="";
    char maffilename[MAXBUFFLENGTH]="";

    int n_species=0;
    char* file_name[MAXSPECIES];
    double weight[MAXSPECIES];


    double threshold=0;
    double tot_weight=0;
    int halfsize=4;
    int gap=2;
    int min_gc=1;
    int repeat_length=2;
    int length_limit=0;

    subset *restriction=NULL;

    index_t p,q,r,s;
    word_t a;
    char *pc;
    char *p_file_name;

    int reverse_complement=0;


/***************************************************************************************************************************************/

    if(argc==1) {
	fprintf(stderr, "TRIM - this program takes sequences, creates corresponding hash tables, and intersects them by trimmming\n");
        fprintf(stderr, "Usage: %s -i config_file [-maf maf_file_name] -r restriction_file -o output_metafile\n",argv[0]);
	fprintf(stderr, "Other parameters are:\n -verbose [suppress verbose output]\n -rc [reverse complement sequences before adding]\n");
	exit(1);
    }

    timestamp_set();
    maffilename[0]=0;
    for(i=1;i<argc;i++) {
        pc = argv[i];
        if(*pc == '-') {
            if(strcmp(pc+1,"i") == 0) {
		configfile = fopen(argv[++i],"r");
		if(configfile==NULL) {
		    fprintf(logfile,"Input file (%s) cannot be opened, exiting", argv[i]);
		    exit(1);
		}
		n_species = 0;
		tot_weight= 0;
    		while(fgets(buff,MAXBUFFLENGTH,configfile)) {
        	    tag[0]=0;
        	    sscanf(buff,"%s %*s",tag);
        	    if(strcmp(tag,"path")==0) sscanf(buff,"%*s %s", &path[0]);
        	    if(strcmp(tag,"extention")==0)  sscanf(buff,"%*s %s", &suffix[0]);
        	    if(strcmp(tag,"halfsize")==0)   sscanf(buff,"%*s %i", &halfsize);
        	    if(strcmp(tag,"gap")==0)        sscanf(buff,"%*s %i", &gap);
        	    if(strcmp(tag,"repeatmask")==0) sscanf(buff,"%*s %i", &repeat_length);
        	    if(strcmp(tag,"gccontent")==0)  sscanf(buff,"%*s %i", &min_gc);
        	    if(strcmp(tag,"lowercase")==0)  sscanf(buff,"%*s %i", &lowercase_allowed);
        	    if(strcmp(tag,"threshold")==0)  sscanf(buff,"%*s %lf",&threshold);
		    if(strcmp(tag,"samewindow")==0) sscanf(buff,"%*s %i", &SAMEWINDOW);
		    if(strcmp(tag,"limit")==0)	    sscanf(buff,"%*s %i", &length_limit);
        	    if(strcmp(tag,"species")==0) {
			file_name[n_species] = (char*)malloc(sizeof(char)*MAXBUFFLENGTH);
			sscanf(buff,"%*s %s %lf", &file_name[n_species][0], &weight[n_species]);
			tot_weight+=weight[n_species];
            		n_species++;
		    }
		}
		fclose(configfile);
	    }
	    if(strcmp(pc+1,"maf") == 0) sscanf(argv[++i],"%s",&maffilename[0]);
	    if(strcmp(pc+1,"r") == 0)   restriction = new subset(argv[++i]);
	    if(strcmp(pc+1,"o") == 0) 	sscanf(argv[++i], "%s", &metacalcfilename[0]);
	    if(strcmp(pc+1,"verbose") == 0)	verbose = 0;
	    if(strcmp(pc+1,"rc") == 0)		reverse_complement = 1;

            if(strcmp(pc+1,"log")==0) {
            	logfile = fopen(argv[++i], "w");
            	if(logfile==NULL) logfile = stderr;
            }
	    
        }
    }

    if(halfsize<2 || gap>3) {
        if(verbose) fprintf(logfile,"Word size or gap are incorrect, exiting\n");
        exit(1);
    }

    metacalc = fopen(metacalcfilename,"wb");
    if(metacalc==NULL) {
        if(verbose) fprintf(logfile,"Output metacalc file cannot be opened, exiting\n");
        exit(1);
    }


/***************************************************************************************************************************************/

    if(threshold==0) threshold = 0.5;

    if(verbose) fprintf(logfile, "[Numeric parameters are: pattern=%i-%i-%i, minGC=%i, repeat=%i, threshold=%2.1lf%%]\n", halfsize, gap, halfsize, min_gc, repeat_length, threshold*100);
    if(verbose) fprintf(logfile, "[Boolean parameters are: lowercase=%s, reverse_complement=%s]\n",yesno(lowercase_allowed), yesno(reverse_complement)); 
    if(verbose) fprintf(logfile, "[Extra parameters are: SAMEWINDOW=%i, sequence length limit=%i]\n",SAMEWINDOW,length_limit); 

    wordexp(path, &exp_result, 0);
    sprintf(&path[0],"%s",exp_result.we_wordv[0]);


/***************************************************************************************************************************************/

    filter = new dictionary<ATOMIC>(halfsize,gap);
    filter->mask_low_complexity(repeat_length);
    if(min_gc>0) filter->mask_low_GCcontent(min_gc);
    for(i=0;i<n_species;i++) {
        dict[i] = new dictionary<ATOMIC>(halfsize,gap);
        dict[i]->filter = filter;
    }

    if(maffilename[0]==0) {
    	for(i=0;i<n_species;i++) {
	    p_file_name = getfullname(path,file_name[i],suffix);
	    if(verbose) fprintf(logfile,"[Reading %s: ",p_file_name);
	    dict[i]->read_from_suf(p_file_name, restriction, reverse_complement,length_limit);
	    dict[i]->check();
	    cons[i] = new conservation_table(dict[i]->max_key);
	    cons[i]->before = dict[i]->fill_cons();
            if(verbose) fprintf(logfile,", weight=%2.1lf]\n",weight[i]);
	}
    }
    else {
    	if(verbose) fprintf(logfile,"[Reading %s: ", maffilename);
    	dictionary<ATOMIC>::read_from_muf(maffilename, dict, restriction, file_name, n_species, reverse_complement,length_limit);
    	if(verbose) fprintf(logfile,"]\n");
    	for(i=0;i<n_species;i++) {
            if(verbose) fprintf(logfile,"[%s",file_name[i]);
            dict[i]->info();
            dict[i]->check();
            cons[i] = new conservation_table(dict[i]->max_key);
            cons[i]->before = dict[i]->fill_cons();
            if(verbose) fprintf(logfile,", weight=%2.1lf]\n",weight[i]);
    	}
    }

    dictionary<ATOMIC>::intersect_many(dict, n_species, weight, threshold*tot_weight, (char*)"Intersect ");

    if(verbose) fprintf(logfile,"[Compute conservation rates:");
    for(i=0;i<n_species;i++) {
	if(verbose) fprintf(logfile," %s",file_name[i]);
	cons[i]->after = dict[i]->fill_cons();
    }
    if(verbose) fprintf(logfile,"]\n");

    if(verbose) fprintf(logfile,"[Metacalc: saving data to %s]\n",metacalcfilename);
    fprintf(metacalc,"%i %i %i %lf\n", halfsize, gap, n_species, threshold);
    for(i=0;i<n_species;i++) {
    	if(verbose) fprintf(logfile,"[Saving %s",file_name[i]);
	dict[i]->check();
	dict[i]->info();
	fprintf(metacalc,"%s %lf\n", file_name[i], weight[i]);
	dict[i]->save(metacalc);
	cons[i]->save(metacalc);
	if(verbose) fprintf(logfile,"]\n");
    }

    fclose(metacalc);
    timestamp_report();
    return(0);
}
Пример #3
0
/*
 * change the device name to a block device name
 */
char *
getfullblkname(char *cp)
{
	struct stat64	buf;
	char		*dp;
	char		*new_path;
	dev_t		raw_dev;

	if (cp == NULL)
		return (strdup(""));

	/*
	 * Create a fully qualified name.
	 */
	if ((cp = getfullname(cp)) == NULL)
		return (NULL);

	if (*cp == '\0')
		return (cp);

	if (stat64(cp, &buf) != 0) {
		free(cp);
		return (strdup(""));
	}

	if (S_ISBLK(buf.st_mode))
		return (cp);

	if (!S_ISCHR(buf.st_mode)) {
		free(cp);
		return (strdup(""));
	}

	if ((dp = getvfsspecial(cp, GET_BLK)) != NULL) {
		free(cp);
		return (strdup(dp));
	}

	raw_dev = buf.st_rdev;

	/*
	 * We have a raw device name, go find the block name.
	 */
	if ((dp = strstr(cp, "/rdsk/")) == NULL &&
	    (dp = strstr(cp, "/" LOFI_CHAR_NAME "/")) == NULL &&
	    (dp = strstr(cp, "/" RD_CHAR_NAME "/")) == NULL &&
	    (dp = strstr(cp, "/" SNAP_CHAR_NAME "/")) == NULL &&
	    (dp = strrchr(cp, '/')) == NULL) {
		/* this is not really possible */
		free(cp);
		return (strdup(""));
	}
	dp++;
	if (*dp != 'r') {
		dp = getblkcomplete(cp, &buf);
		free(cp);
		return (dp);
	}
	if ((new_path = malloc(strlen(cp))) == NULL) {
		free(cp);
		return (NULL);
	}
	(void) strncpy(new_path, cp, dp - cp);

	/* fill in the rest of the unraw name */
	(void) strcpy(new_path + (dp - cp), dp + 1);

	if (test_if_blk(new_path, raw_dev)) {
		free(cp);
		/* block name was found, return it here */
		return (new_path);
	}
	free(new_path);

	dp = getblkcomplete(cp, &buf);
	free(cp);
	return (dp);
}