Exemplo n.º 1
0
int main(){
	int n;
	scanf("%d", &n);
	int x1, x2, y1, y2, r1, r2, p;
	double dist,distP;
	int shorter, longer;
	for (int i = 0; i < n;i++)
	{
		scanf("%d %d %d %d %d %d", &x1, &y1, &r1, &x2, &y2, &r2);
		
		shorter = ((r1 < r2) ? r1 : r2);
		longer = ((r1 < r2) ? r2 : r1);
		distP = calDist(x1, x2, y1, y2);

		//중심이 같다면
		if (x1 == x2 && y1 == y2)
		{
			if (r1 == r2) p = -1;
			else p = 0;
		}
		
		//한 점이 다른 원 내부에 있다면
		else if (distP <= r1 || distP <= r2)
		{
			if (distP + shorter == longer) p = 1;
			else if (distP + shorter < longer) p = 0;
			else p = 2;
		}

		// 두점 사이의 거리가 충분히 먼 나머지 경우
		else
		{
			if (shorter + longer == distP) p = 1;
			else if (shorter + longer < distP) p = 0;
			else p = 2;
		}
		printf("%d\n", p);
	}
	return 0;
}
int main(int argc, char *argv[])
{
    struct sqlConnection *conn, *conn2;
    char query2[256];
    struct sqlResult *sr2;
    char **row2;
    char cond_str[255];
    char *proteinDatabaseName;
    FILE *o1, *o2, *o3;
    FILE *fh[23];
    char temp_str[1000];;
    char *accession;
    char *aaSeq;
    char *chp;
    int i, j, len;
    int ihi, ilow;
    char *answer;
    char *protDisplayId;
    int aaResCnt[30];
    char aaAlphabet[30];
    int aaResFound;
    float fvalue1, fvalue2;
    float p1, p2;
    int icnt, jcnt;
    char *taxon;
    char *database;
    int sortedCnt;

    if (argc != 4) usage();

    strcpy(aaAlphabet, "WCMHYNFIDQKRTVPGEASLXZB");

    proteinDatabaseName = argv[1];
    taxon = argv[2];
    database = argv[3];

    o2 = mustOpen("pbResAvgStd.tab", "w");

    for (i=0; i<20; i++)
    {
        safef(temp_str, sizeof(temp_str), "%c.txt", aaAlphabet[i]);
        fh[i] = mustOpen(temp_str, "w");
    }

    conn  = hAllocConn(hDefaultDb());
    conn2 = hAllocConn(hDefaultDb());

    safef(query2, sizeof(query2), "select proteinID from %s.knownGene;", database);
    sr2 = sqlMustGetResult(conn2, query2);
    row2 = sqlNextRow(sr2);
    icnt = 0;
    jcnt = 0;

    for (j=0; j<MAXRES; j++)
    {
        sumJ[j] = 0;
    }

    while (row2 != NULL)
    {
        protDisplayId = row2[0];
        safef(cond_str, sizeof(cond_str),  "val='%s'", protDisplayId);
        accession = sqlGetField(proteinDatabaseName, "displayId", "acc", cond_str);

        if (accession == NULL)
        {
            safef(cond_str, sizeof(cond_str),  "acc='%s'", protDisplayId);
            accession = sqlGetField(proteinDatabaseName, "displayId", "acc", cond_str);
            if (accession == NULL)
            {
                verbose(2, "'%s' not found.\n", protDisplayId);
                goto skip;
            }
        }

        safef(cond_str, sizeof(cond_str),  "accession='%s'", accession);
        answer = sqlGetField("proteins040115", "spXref2", "biodatabaseID", cond_str);
        if (answer == NULL)
        {
            /* this protein might be a variant splice protein, and then it won't be in spXref2 */
            goto skip;
        }
        if (answer[0] != '1')
        {
            /* printf("%s not in SWISS-PROT\n", protDisplayId);fflush(stdout); */
            goto skip;
        }

        safef(cond_str, sizeof(cond_str),  "acc='%s'", accession);
        aaSeq = sqlGetField(proteinDatabaseName, "protein", "val", cond_str);
        if (aaSeq == NULL)
        {
            printf("Can't find peptide sequence for %s, exiting ...\n", protDisplayId);
            fflush(stdout);
            exit(1);
        }

        len  = strlen(aaSeq);
        if (len < 100) goto skip;

        lenDouble = (double)len;

        for (j=0; j<MAXRES; j++)
        {
            aaResCnt[j] = 0;
        }

        chp = aaSeq;
        for (i=0; i<len; i++)
        {
            aaResFound = 0;
            for (j=0; j<MAXRES; j++)
            {
                if (*chp == aaAlphabet[j])
                {
                    aaResFound = 1;
                    aaResCnt[j] ++;
                }
            }
            if (!aaResFound)
            {
                fprintf(stderr, "%c %d not a valid AA residue.\n", *chp, *chp);
            }
            chp++;
        }

        for (j=0; j<MAXRES; j++)
        {
            freq[icnt][j] = (double)aaResCnt[j]/lenDouble;
            sumJ[j] = sumJ[j] + freq[icnt][j];
        }

        for (j=0; j<20; j++)
        {
            fprintf(fh[j], "%15.7f\t%s\n", freq[icnt][j], accession);
            fflush(fh[j]);
        }
        icnt++;
        if (icnt >= MAXN)
            errAbort("Too many proteins - please set MAXN to be more than %d\n", MAXN);

skip:
        row2 = sqlNextRow(sr2);
    }

    recordCnt = icnt;
    recordCntDouble = (double)recordCnt;

    for (j=0; j<20; j++)
    {
        carefulClose(&(fh[j]));
    }

    sqlFreeResult(&sr2);
    hFreeConn(&conn);
    hFreeConn(&conn2);

    for (j=0; j<MAXRES; j++)
    {
        avg[j] = sumJ[j]/recordCntDouble;
    }

    for (j=0; j<20; j++)
    {
        sum = 0.0;
        for (i=0; i<recordCnt; i++)
        {
            sum = sum + (freq[i][j] - avg[j]) * (freq[i][j] - avg[j]);
        }
        sigma[j] = sqrt(sum/(double)(recordCnt-1));
        fprintf(o2, "%c\t%f\t%f\n", aaAlphabet[j], avg[j], sigma[j]);
    }

    carefulClose(&o2);

    o1 = mustOpen("pbAnomLimit.tab", "w");
    for (j=0; j<20; j++)
    {
        safef(temp_str, sizeof(temp_str), "cat %c.txt|sort|uniq > %c.srt",  aaAlphabet[j], aaAlphabet[j]);
        mustSystem(temp_str);

        /* figure out how many unique entries */
        safef(temp_str, sizeof(temp_str), "wc %c.srt > %c.tmp",  aaAlphabet[j], aaAlphabet[j]);
        mustSystem(temp_str);
        safef(temp_str, sizeof(temp_str), "%c.tmp",  aaAlphabet[j]);
        o3 = mustOpen(temp_str, "r");
        mustGetLine(o3, temp_str, 1000);
        chp = temp_str;
        while (*chp == ' ') chp++;
        while (*chp != ' ') chp++;
        *chp = '\0';
        sscanf(temp_str, "%d", &sortedCnt);
        safef(temp_str, sizeof(temp_str), "rm %c.tmp", aaAlphabet[j]);
        mustSystem(temp_str);

        /* cal hi and low cutoff threshold */
        ilow = (int)((float)sortedCnt * 0.025);
        ihi  = (int)((float)sortedCnt * 0.975);

        safef(temp_str, sizeof(temp_str), "%c.srt",  aaAlphabet[j]);
        o2 = mustOpen(temp_str, "r");
        i=0;
        for (i=0; i<ilow; i++)
        {
            mustGetLine(o2, temp_str, 1000);
        }
        sscanf(temp_str, "%f", &fvalue1);

        mustGetLine(o2, temp_str, 1000);
        sscanf(temp_str, "%f", &fvalue2);
        p1 = (fvalue1 + fvalue2)/2.0;

        for (i=ilow+1; i<ihi; i++)
        {
            mustGetLine(o2, temp_str, 1000);
        }
        sscanf(temp_str, "%f", &fvalue1);

        mustGetLine(o2, temp_str, 1000);
        sscanf(temp_str, "%f", &fvalue2);
        p2 = (fvalue1 + fvalue2)/2.0;
        carefulClose(&o2);

        fprintf(o1, "%c\t%f\t%f\n", aaAlphabet[j], p1, p2);
        fflush(stdout);

        for (i=0; i<recordCnt; i++)
        {
            measure[i] = freq[i][j];
        }
        safef(temp_str, sizeof(temp_str), "pbAaDist%c.tab", aaAlphabet[j]);
        calDist(measure,  recordCnt,    51,     0.0, 0.005, temp_str);
    }

    carefulClose(&o1);

    return(0);
}
Exemplo n.º 3
0
int main( int argc, char **argv) {

	libusb_device_handle *zena;

	int channel = -1;			// no default 802.15.4 channel
	int format = FORMAT_PCAP;	// PCAP is default output format
	int scan_mode = FALSE;
	int drop_bad_packets = TRUE;
	int exit_time = -1;
	int status;

	int c;
	// Setup signal handler. Catching SIGPIPE allows for exit when 
	// piping to Wireshark for live packet feed.
	// signal(SIGPIPE, signal_handler);
	struct sigaction act;
	memset(&act, 0, sizeof(act));
	act.sa_sigaction = signal_handler;
	act.sa_flags = SA_SIGINFO;
	sigaction(SIGPIPE, &act, NULL);


	// Parse command line arguments. See usage() for details.
	while ((c = getopt(argc, argv, "bc:d:f:hqs:t:vx:r")) != -1) {
		switch(c) {
			case 'b':
				drop_bad_packets = FALSE;
				break;
			case 'c':
				channel = atoi (optarg);
				if (channel < 11 || channel > 26) {
					fprintf (stderr, "ERROR: Invalid channel. Must be in rage 11 to 26. Use -h for help.\n");
					exit(-1);
				}
				break;
			case 'd':
				debug_level = atoi (optarg);
				break;
			case 'f':
				if (strcmp(optarg,"pcap")==0) {
					format = FORMAT_PCAP;
				} else if (strcmp(optarg,"usbhex")==0) {
					format = FORMAT_USBHEX;
           			} else if (strcmp(optarg, "ranger")==0) { 
                   			format = FORMAT_RANGER; 
           			} else {
					fprintf(stderr,"ERROR: unrecognized output format '%s'. Only pcap, usbhex, ranger 					allowed.\n",optarg);
					exit(-1);
				}
            			break;
			case 'h':
				version();
				usage();
				exit(EXIT_SUCCESS);
			case 'q':
				quiet_mode = TRUE;
				break;
			case 's':
				scan_mode = TRUE;
				usb_timeout = atoi (optarg);
				break;
			case 't':
				exit_time = atoi(optarg);
				break;
			case 'v':
				version();
				exit(EXIT_SUCCESS);
			case 'x':
				pcap_lqi_rssi_write = TRUE;
				break;
			case 'r':
				rangeDevice();
           			break;
			case '?':	// case when a command line switch argument is missing
				if (optopt == 'c') {
					fprintf (stderr,"ERROR: 802.15.4 channel 11 to 26 must be specified with -c\n");
					exit(-1);
				}
				if (optopt == 'd') {
					fprintf (stderr,"ERROR: debug level 0 .. 9 must be specified with -d\n");
					exit(-1);
				}
				if (optopt == 'f') {
					fprintf (stderr,"ERROR: pcap or usbhex format must be specified with -f\n");
					exit(-1);
				}
				break;
		}
	}

	if (debug_level > 0) {
		fprintf (stderr,"DEBUG: debug level %d\n",debug_level);
	}

	// Locate ZENA on the USB bus and get handle.
	if ((zena = setup_libusb_access()) == NULL) {
		fprintf (stderr, "ERROR: ZENA device not found or not accessible\n");
		exit(EXIT_FAILURE);
	}

	if (channel == -1 && (selected_profile->flags & CHANNEL_SELECTABLE)) {
		fprintf (stderr,"ERROR: 802.15.4 channel is mandatory. Specify with -c. Use -h for help.\n");
		exit(EXIT_FAILURE);
	}

	if (channel != -1) {
		// Set 802.15.4 channel
		status = zena_set_channel (zena,channel);
		if (status < 0) {
			fprintf (stderr, "ERROR: error setting ZENA to 802.15.4 channel %d, errorCode=%d\n",channel,status);
			exit(EXIT_FAILURE);
		}
	}

	// Write PCAP header
	if (format == FORMAT_PCAP) {
		fwrite(&PCAP_MAGIC, sizeof(int), 1, stdout);    
		fwrite(&PCAP_VERSION_MAJOR, sizeof(short), 1, stdout);
		fwrite(&PCAP_VERSION_MINOR, sizeof(short), 1, stdout);
		fwrite(&PCAP_TZ, sizeof(int), 1, stdout);				// thiszone: GMT to local correction
		fwrite(&PCAP_SIGFIGS, sizeof(int), 1, stdout);			// sigfigs: accuracy of timestamps
		fwrite(&PCAP_SNAPLEN, sizeof(int), 1, stdout);			// snaplen: max len of packets, in octets
		fwrite(&PCAP_LINKTYPE, sizeof(int), 1, stdout);		// data link type
	}

	int i,j,data_len,packet_len,packet_len_plus_2,ts_sec,ts_usec;

	// Allocate buffer for usb_interrupt_read requests
	unsigned char usbbuf[64];
	//unsigned char packetbuf[128];
	
	// Get start time of capture. Won't worry about subsecond resolution for this.
	struct timespec tp;
	clock_gettime(CLOCK_REALTIME, &tp);
	int start_sec = tp.tv_sec;

	// Store the number of bytes actually transferred here
	int nbytes;

   // Seitz added: Store corresponding RSS from MRF24J40
   float rss; 
   
   // Seitz added: Store estimated distance returned from calDist()
   float estDist;
   
	// Packet counter
	int npacket=0;

	zena_packet_t zena_packet;

	// Main loop
	while ( ! exit_flag ) {

		// If scan_mode is TRUE, cycle through all the 802.15.4 channels looking
		// for packets. For some reason it seems to be necessary to close the 
		// USB device and libusb library and reopen it for the channel change to 
		// work reliably. Why?

		if (scan_mode) {

			channel++;
			if (channel > 26) {
				channel = 11;
			}

			// It seems to be necessary to reset libusb (close library and 
			// re-initialize it) for zena_set_channel() to be successful.
			debug(9,"Closing ZENA to facilitate 802.15.4 channel change");
			libusb_close (zena);
			debug(9,"Closing libusb library to facilitate 802.15.4 channel change");
			libusb_exit(NULL);
			debug(9,"Reopening ZENA");
		        if ((zena = setup_libusb_access()) == NULL) {
				fprintf (stderr, "ERROR: unable to reopen ZENA during 80.15.4 channel change\n");
				exit(EXIT_FAILURE);
			}
	
			debug (1,"Setting 802.15.4 channel to %d",channel);
			status = zena_set_channel(zena,channel);
			if (status<0) {
				fprintf (stderr,"ERROR: error setting 802.15.4 channel to %d during scan, errorCode=%d\n",channel, status);
				exit(EXIT_FAILURE);
			} 

			// TODO: bug - we can have packet received from the
			// previous 802.15.4 channel in the buffer at this
			// point. When outputted it will be incorrectly
			// tagged with the new channel number. Can we purge
			// the buffer somehow?
			
		}

		switch (format) {

      
            //Seitz Added: Prints the packet: Time/chan/pkt len/src/possible dst/LQI/RSSI/pkt count
           case FORMAT_RANGER:

               status = zena_get_packet (zena, &zena_packet);
				if (status == LIBUSB_ERROR_TIMEOUT) {
					// A timeout is a normal event. No action.
					break;
				}
				if (status != 0) {
					fprintf (stderr,"ERROR: retrieving packet, errorCode=%d\n",status);
					break;
				}

				// Ensure that zena_packet.packet_len is a sane value. Occasionally getting crazy
				// values which causes segv when accessing the zena_packet.packet[] buffer.
				//Lu: added case of 0 byte length here which causes tshark to crash
				zena_packet.packet_len &= 0xff;

				if (zena_packet.packet_len > 125 || zena_packet.packet_len == 0) {
					fprintf (stderr,"ERROR: invalid packet length, len=%d\n",zena_packet.packet_len);
					break;
				}

				if (  ( ! zena_packet.fcs_ok) && drop_bad_packets ) {
					warning ("dropping corrupted packet\n");
					break;
				}
/*
           //Zena reported second.usecond
              fprintf (stdout,"%d.%d ", zena_packet.zena_ts_sec, zena_packet.zena_ts_usec);

				// 802.15.4 channel
				fprintf (stdout, "Channel: %02d ", channel);
                
            // Packet length
            fprintf (stdout, "Packet Length: %03d ", zena_packet.packet_len);
            
            // Source Addr: 2 bytes zena_packet.packet[7-8]
            fprintf(stdout, "Src: %04d \t", zena_packet.packet[7]);
              
            // Destination Addr: 2 bytes zena_packet.packet[5-6]
            fprintf (stdout, "Dst: %04d \t", zena_packet.packet[5]);
              
            // LQI: Based off of SNR and RSSI for MRF24J40
            // Values range from [0-255] Higher is better
            fprintf(stdout, "LQI: %02d \t", zena_packet.lqi);
              
            // RSSI Based off MRF24J40 received signal strength
            // Values range from [0-255] Higher is better
            fprintf(stdout, "RSSI: %02d\t", zena_packet.rssi);
            fprintf(stdout, "Packet count: %02d ", npacket); //Packet count
              
            // Calculate estimated distance from target


            fprintf(stdout, "\nCal Dist: %dm\n", estDist);
            fprintf(stdout, "RSS: %3.2f\t", rss);
            */

            rss = RSSI_TO_RSS[zena_packet.rssi];
            estDist = calDist(rss);
            fprintf(stdout, "Packet Count:%d,\tSrc ID:%04d,\tDst ID: %04d,\tLQI:%d,\tRSS:%2.2f,\tDist Est:%2.2fm",npacket,zena_packet.packet[7],zena_packet.packet[5],zena_packet.lqi,rss,estDist);           
				 fprintf(stdout, "\n");
            // fprintf(stdout, "Packet Count: %d, \tRSS = %2.2f\n",npacket,rss);
               fflush(stdout);
               npacket++;
               break;
// End Seitz


			case FORMAT_USBHEX:
				
				bzero(usbbuf, 64);
				
				status = selected_profile->transfer(zena, selected_profile->ep_packets, usbbuf, 64, &nbytes, usb_timeout);
				// check for timeout and silently ignore
				if (status == LIBUSB_ERROR_TIMEOUT) {
					debug(9,"libusb_transfer(): timeout");
					continue;
				}

				// get host time of packet reception
				clock_gettime(CLOCK_REALTIME, &tp);
				if ( (exit_time>0) && (tp.tv_sec > (start_sec + exit_time))) {
					debug(1,"Exit time reached. Exiting.");
					exit(EXIT_SUCCESS);
				}

				// a real error (ie not timeout)
				if (status < 0) {
					fprintf (stderr,"ERROR: error retrieving ZENA packet, errorCode=%d\n", status);
					continue;
				}

				// Packet timestamp
				fprintf (stdout,"%ld.%ld ",tp.tv_sec,tp.tv_nsec);

				// 802.15.4 channel
				fprintf (stdout, "%02x ", channel);

				// Echo USB 64 byte packet to screen. Each byte as hex separated by space. 
				// One line per packet.
				for (j = 0; j < 64; j++) {
					fprintf (stdout, "%02x ", usbbuf[j] & 0xff);
				}
				fprintf (stdout, "\n");
				fflush (stdout);
				break;
            
            //Seitz Added: Printing last two bytes of Rx beacon for LQI and RSSI values
//				for (j = 17; j < 19; j++) {
//					fprintf (stdout, "%02x ", usbbuf[j] & 0xff);
//                  fprintf (stdout, "%02d ", usbbuf[j]);
//				}
            //End Seitz
 
				fprintf (stdout, "\n");
				fflush (stdout);
				break;
                
			case FORMAT_PCAP:
				status = zena_get_packet (zena, &zena_packet);
				if (status == LIBUSB_ERROR_TIMEOUT) {
					// A timeout is a normal event. No action.
					break;
				}
				if (status != 0) {
					fprintf (stderr,"ERROR: retrieving packet, errorCode=%d\n",status);
					break;
				}

				// Ensure that zena_packet.packet_len is a sane value. Occasionally getting crazy
				// values which causes segv when accessing the zena_packet.packet[] buffer.
				//Lu: added case of 0 byte length here which causes tshark to crash
				zena_packet.packet_len &= 0xff;

				if (zena_packet.packet_len > 125 || zena_packet.packet_len == 0) {
					fprintf (stderr,"ERROR: invalid packet length, len=%d\n",zena_packet.packet_len);
					break;
				}

				if (  ( ! zena_packet.fcs_ok) && drop_bad_packets ) {
					warning ("dropping corrupted packet\n");
					break;
				}

				npacket++;

				// Write PCAP packet header
				fwrite (&zena_packet.host_ts_sec, sizeof(int), 1, stdout);	// ts_sec: timestamp seconds
				fwrite (&zena_packet.host_ts_usec, sizeof(int), 1, stdout);	// ts_usec: timestamp microseconds

				if (selected_profile->flags & HAS_FCS_FIELD) {
					fwrite (&zena_packet.packet_len, sizeof(int), 1, stdout);
					fwrite (&zena_packet.packet_len, sizeof(int), 1, stdout);
					fwrite (zena_packet.packet, 1, zena_packet.packet_len, stdout);
				} else if (pcap_lqi_rssi_write) {
					packet_len_plus_2 = zena_packet.packet_len + 2;
					fwrite (&packet_len_plus_2, sizeof(int), 1, stdout);
					fwrite (&packet_len_plus_2, sizeof(int), 1, stdout);
					fwrite (zena_packet.packet, 1, zena_packet.packet_len, stdout);
				} else
					
				// Small problem re FCS. Old HW ZENA does not provide this information.
				// Solution is in the case of a good packet not to include FCS
				// and Wireshark will ignore it. In the case were the FCS is 
				// known to be bad, we'll include a deliberatly wrong FCS. For
				// the moment this will be a fixed value (0x0000), but ideally
				// it should be computed from the packet and the +1 to guarantee
				// it is a bad FCS.
					
					if (zena_packet.fcs_ok) {
					packet_len_plus_2 = zena_packet.packet_len + 2;
					
					// write packet excluding FCS
					fwrite (&zena_packet.packet_len, sizeof(int), 1, stdout);
					fwrite (&packet_len_plus_2, sizeof(int), 1, stdout);	// full frame included 2 FCS octets
					fwrite (zena_packet.packet, 1, zena_packet.packet_len, stdout);
				} else {
					packet_len_plus_2 = zena_packet.packet_len + 2;
					
					// two extra bytes for deliberately wrong FCS
					fwrite (&packet_len_plus_2, sizeof(int), 1, stdout);
					fwrite (&packet_len_plus_2, sizeof(int), 1, stdout);
					zena_packet.packet[zena_packet.packet_len] = 0;
					zena_packet.packet[zena_packet.packet_len+1] = 0;
					fwrite (zena_packet.packet, 1, packet_len_plus_2, stdout);
				}

				fflush(stdout);
				break;


		} // end switch


	} // end main loop

	// Release USB interface and close USB connection.
	// This code never reached at the moment -- need to implement signal handler for this.
	// However I've noticed no resource leaks. Process kill seems to take care of this.
	libusb_close (zena);
	libusb_exit(NULL);

	debug (1, "Normal exit");
	return EXIT_SUCCESS; 
}
Exemplo n.º 4
0
int main(int argc, char *argv[])
{
struct sqlConnection *conn, *conn2;
char query2[256];
struct sqlResult *sr2;
char **row2;
char cond_str[255];
char *proteinDatabaseName;	/* example: sp031112 */
char *protDbName;		/* example: proteins031112 */
char emptyStr[1] = {""};
FILE *o2;
char *accession;
char *aaSeq;
char *chp;
int i, j, len;
int cCnt;
char *answer, *answer2;
double hydroSum;
char *protDisplayId;
int aaResCnt[30];
double aaResCntDouble[30];
char aaAlphabet[30];
int aaResFound;
int totalResCnt;
int molWtCnt;
double molWt[100000];
int pIcnt;
double pI[100000];

double aa_hydro[256];
int icnt, jExon, pcnt, ipcnt = 0;
double aaLenDouble[100000];
double avgHydro[100000];
double cCountDouble[100000];
double exonCountDouble[100000];
double interProCountDouble[100000];
char *taxon;
char *database;
char *exonCnt;
int interProCount;
char *kgId;

if (argc != 5) usage();

strcpy(aaAlphabet, "WCMHYNFIDQKRTVPGEASLXZB");

/* Ala:  1.800  Arg: -4.500  Asn: -3.500  Asp: -3.500  Cys:  2.500  Gln: -3.500 */
aa_hydro['A'] =  1.800;
aa_hydro['R'] = -4.500;
aa_hydro['N'] = -3.500;
aa_hydro['D'] = -3.500;
aa_hydro['C'] =  2.500;
aa_hydro['Q'] = -3.500;

/* Glu: -3.500  Gly: -0.400  His: -3.200  Ile:  4.500  Leu:  3.800  Lys: -3.900 */
aa_hydro['E'] = -3.500;
aa_hydro['G'] = -0.400;
aa_hydro['H'] = -3.200;
aa_hydro['I'] =  4.500;
aa_hydro['L'] =  3.800;
aa_hydro['K'] = -3.900;

/* Met:  1.900  Phe:  2.800  Pro: -1.600  Ser: -0.800  Thr: -0.700  Trp: -0.900 */ 
aa_hydro['M'] =  1.900;
aa_hydro['F'] =  2.800;
aa_hydro['P'] = -1.600;
aa_hydro['S'] = -0.800;
aa_hydro['T'] = -0.700;
aa_hydro['W'] = -0.900;

/* Tyr: -1.300  Val:  4.200  Asx: -3.500  Glx: -3.500  Xaa: -0.490 */
aa_hydro['Y'] = -1.300;
aa_hydro['V'] =  4.200;

proteinDatabaseName = argv[1];
protDbName 	    = argv[2];
taxon 	 	    = argv[3];
database 	    = argv[4];

o2 = mustOpen("pepResDist.tab", "w");

conn  = hAllocConn(database);
conn2 = hAllocConn(database);

for (j=0; j<23; j++)
    {
    aaResCnt[j] = 0;
    }

icnt = jExon = pcnt = 0;
pIcnt = 0;
molWtCnt = 0;

sqlSafef(query2, sizeof(query2), "select acc from %s.accToTaxon where taxon=%s;", proteinDatabaseName, taxon);
sr2  = sqlMustGetResult(conn2, query2);
row2 = sqlNextRow(sr2);

while (row2 != NULL)
    {
    accession = row2[0];   

    sqlSafefFrag(cond_str, sizeof(cond_str), "acc='%s'", accession);
    protDisplayId = sqlGetField(proteinDatabaseName, "displayId", "val", cond_str);
    
    sqlSafefFrag(cond_str, sizeof(cond_str), "proteinID='%s'", protDisplayId);
    answer = sqlGetField(database, "knownGene", "name", cond_str);

    /* count InterPro domains */
    if (answer != NULL)
	{
    	sqlSafefFrag(cond_str, sizeof(cond_str), "accession='%s'", accession);
    	answer2 = sqlGetField(protDbName, "swInterPro", "count(*)", cond_str);
	if (answer2 != NULL)
	    {
	    interProCount = interProCount + atoi(answer2);
	    interProCountDouble[ipcnt] = (double)(atoi(answer2));
	    ipcnt++;
	    }
	else
	    {
	    printf("%s is not in  InterPro DB.\n", accession);fflush(stdout);
	    }
	}
    
    /* count exons, using coding exons from kgProtMap2 (KG-III) table */
    sqlSafefFrag(cond_str, sizeof(cond_str), "spID='%s'", accession);
    kgId = sqlGetField(database, "kgXref", "kgID", cond_str);
    sqlSafefFrag(cond_str, sizeof(cond_str), "qName='%s'", kgId);
    answer2 = sqlGetField(database, "kgProtMap2", "blockCount", cond_str);

    if (answer2 != NULL)
	{
	exonCnt = strdup(answer2);
	if (atoi(exonCnt) == 0)
	    {
	    errAbort("%s %s has 0 block count\n", accession, protDisplayId);
	    }
	exonCountDouble[jExon] = (double)(atoi(exonCnt));
	jExon++;
	}
    else
	{
	exonCnt = emptyStr;
	}
    
    /* process Mol Wt */
    sqlSafefFrag(cond_str, sizeof(cond_str), "accession='%s'", accession);
    answer2 = sqlGetField(database, "pepMwAa", "molWeight", cond_str);
    if (answer2 != NULL)
	{
	molWt[molWtCnt] = (double)(atof(answer2));
	molWtCnt++;
	}
    
    /* process pI */
    sqlSafefFrag(cond_str, sizeof(cond_str), "accession='%s'", accession);
    answer2 = sqlGetField(database, "pepPi", "pI", cond_str);
    if (answer2 != NULL)
	{
	pI[pIcnt] = (double)(atof(answer2));
	pIcnt++;
	}
     
    sqlSafefFrag(cond_str, sizeof(cond_str), "acc='%s'", accession);
    aaSeq = sqlGetField(proteinDatabaseName, "protein", "val", cond_str);
    if (aaSeq == NULL)
	{
	errAbort("%s does not have protein sequence data in %s, aborting ...\n", accession, 
		 proteinDatabaseName);
	}

    len  = strlen(aaSeq);

    chp = aaSeq;
    for (i=0; i<len; i++)
	{
	aaResFound = 0;
	for (j=0; j<23; j++)
	    {
	    if (*chp == aaAlphabet[j])
		{
		aaResFound = 1;
		aaResCnt[j] ++;
		}
	    }
	if (!aaResFound)
	    {
	    warn("%c %d not a valid AA residue in %s:\n%s", *chp, *chp, accession, aaSeq);
	    }
	chp++;
	}
    
    /* calculate hydrophobicity */
    chp  = aaSeq;
    cCnt = 0;
    hydroSum = 0;
    for (i=0; i<len; i++)
	{
        hydroSum = hydroSum + aa_hydro[(int)(*chp)];

	/* count Cysteines */
	if ((*chp == 'C') || (*chp == 'c'))
	    {
	    cCnt ++;
	    }
	chp++;
	}

    aaLenDouble[icnt]  = len;
    cCountDouble[icnt] = (double)cCnt;
    avgHydro[icnt] = hydroSum/(double)len; 
    icnt++;
    row2 = sqlNextRow(sr2);
    }

totalResCnt = 0;
for (i=0; i<23; i++)
    {
    totalResCnt = totalResCnt + aaResCnt[i];
    }

/* write out residue count distribution */
for (i=0; i<20; i++)
    {
    aaResCntDouble[i] = ((double)aaResCnt[i])/((double)totalResCnt);
    fprintf(o2, "%d\t%f\n", i+1, (float)aaResCntDouble[i]);
    }
fprintf(o2, "%d\t%f\n", i+1, 0.0);
carefulClose(&o2);

/* calculate and write out various distributions */
calDist(molWt,  	 molWtCnt, 21, 0.0, 10000.0,"pepMolWtDist.tab");
calDist(pI,  	         pIcnt,    61,     3.0, 0.2, 	"pepPiDist.tab");
calDist(avgHydro,     	  icnt,    41,    -2.0, 0.1, 	"pepHydroDist.tab");
calDist(cCountDouble, 	  icnt,    51,     0.0, 1.0, 	"pepCCntDist.tab");
calDist(exonCountDouble, jExon,    31,     0.0, 1.0, 	"pepExonCntDist.tab");
calDist(interProCountDouble,  ipcnt,    16,     0.0, 1.0, 	"pepIPCntDist.tab");

sqlFreeResult(&sr2);
hFreeConn(&conn);
hFreeConn(&conn2);
return(0);
}