예제 #1
0
파일: main.c 프로젝트: jromer94/cs214
void read_customers(FILE *ifp)
{
	char *full = NULL;
	char *name = NULL;
	char *customer_id = NULL;
	char *balance = NULL;
	char *address1 = NULL;
	char *address2 = NULL;
	char *address3 = NULL;
	size_t len = 0;
	
	
	while (!feof(ifp))
	{
		getline(&full, &len, ifp);
		full = strtok(full, "\n");
		
		char *temp;
		temp = full;
		
		name = strtok(temp, "|");
		customer_id = strtok(NULL, "|");
		balance = strtok(NULL, "|");
		address1 = strtok(NULL, "|");
		address2 = strtok(NULL, "|");
		address3 = strtok(NULL, "|");
		
		add_customer(name, customer_id, balance, address1, address2, address3);
		
	}
}
예제 #2
0
// read in the input file
// and figure out pairs that
// we want to train on
void readpurchases(FILE* fp) {
    long long pk = 0;
    long id, company, brand, quantity;
    long customer_loc, product_loc;
    int parse_return;

    char dump[MAX_STRING+1];
    // get the first line
    fgets(dump, MAX_STRING, fp);

    printf("                              |\r");
    while (!feof(fp)) {
        if ((pk*30)%INTERACTIONS==0) {
            printf("#");
            fflush(stdout);
        }

        fgets(dump, MAX_STRING, fp);
        parse_return = parseline(dump, &id, &company, &brand, &quantity);
        if (parse_return != 0) continue;

        customer_loc =  find_customer(id);
        if (customer_loc == -1) customer_loc = add_customer(id);
        product_loc = find_product(company, brand);
        if (product_loc == -1) product_loc = add_product(company, brand);

        for (int i=0; i<quantity; i++) {
            purchases[pk].custp = customers[customer_loc];
            purchases[pk].prodp = products[product_loc];
            pk++;
            if (pk > INTERACTIONS) {
                log_err("pk > INTERACTIONS");
                exit(1);
            }
        }
    }
    printf("total pk: %lld\n", pk);
}
예제 #3
0
// learn some stuff
void run(FILE *fp) {
    clock_t start = clock();
    clock_t now;
    int label;

    debug("Populating Hashes...");
    long id, company, brand, quantity;
    rewind(fp);
    char dump[MAX_STRING+1];
    // get the first line
    fgets(dump, MAX_STRING, fp);

    // create the temporary arrays
    real *custupdate = calloc(D, sizeof(real));
    real *produpdate = calloc(D, sizeof(real));
    real *cv;
    real *pv;
    real *randcv;
    real *randpv;

    real dot, mult;
    long customer_loc, product_loc;

    long linenum = 1; 
    // file is in format <id,chain,dept,category,company,brand,date,productsize,productmeasure,purchasequantity,purchaseamount>
    while (!feof(fp)) {
        // get the next line from the file
        fgets(dump, MAX_STRING, fp);
        linenum++;
        sscanf(dump, "%ld,%*ld,%*ld,%*ld,%ld,%ld,%*25[^,],%*ld,%*30[^,],%ld,%*s", &id, &company, &brand, &quantity);
        quantity = 1;
        /* debug("Found id: %ld, company: %ld, brand: %ld", id, company, brand); */

        customer_loc =  find_customer(id);
        if (customer_loc == -1) customer_loc = add_customer(id);
        product_loc = find_product(company, brand);
        if (product_loc == -1) product_loc = add_product(company, brand);

        // Do the update
        label = 1;
        cv = customer_vecs + customer_loc*D;
        pv = product_vecs  + product_loc*D;
        alpha = ALPHA;
        alpha = ALPHA * (1. - linenum / (real)(LINES + 1.));
        if (alpha < ALPHA * 0.0001) alpha = ALPHA * 0.0001;

        /* debug("Looking at customer: %ld, product: %ld, dot: %g, mult: %g", customer_loc, product_loc, dot, mult); */
        // adjust the weights
        dot = 0.;
        for (int i=0; i<D; i++) dot += cv[i]*pv[i];
        mult = quantity*getmult(1., dot)*alpha;
        for (int i=0; i<D; i++)  custupdate[i] = mult*pv[i];
        for (int i=0; i<D; i++)  produpdate[i] = mult*cv[i];

        for (int i=0; i<quantity*NEGS; i++) {
            long randp = (lqrand()%PRODS);
            randpv = product_vecs + D*randp;
            // get the dot product
            dot = 0.;
            for (int i=0; i<D; i++) dot += cv[i]*randpv[i];
            // get the multiplier
            /* mult = getmult(0., dot)*alpha/(NEGS+0.); */
            mult = getmult(0., dot)*alpha;
            // adjust the weights
            for (int i=0; i<D; i++)  custupdate[i] += mult*randpv[i];
            for (int i=0; i<D; i++)  randpv[i] += mult*cv[i];
        }
        for (int i=0; i<quantity*NEGS; i++) {
            long randc = (lqrand()%CUSTS);
            randcv = customer_vecs + D*randc;
            // get the dot product
            dot = 0.;
            for (int i=0; i<D; i++) dot += randcv[i]*pv[i];
            // get the multiplier
            /* mult = getmult(0., dot)*alpha/(NEGS+0.); */
            mult = getmult(0., dot)*alpha;
            // adjust the weights
            for (int i=0; i<D; i++)  produpdate[i] += mult*randcv[i];
            for (int i=0; i<D; i++)  randcv[i] += mult*pv[i];
        }

        // apply updates
        for (int i=0; i<D; i++) cv[i] += custupdate[i];
        for (int i=0; i<D; i++) pv[i] += produpdate[i];

        for (int i=0; i<D; i++)
            if (isnan(cv[i]) || isnan(pv[i])) {
                log_err("We've hit a nan!!!!, linenum=%ld, line=%s", linenum, dump);
                exit(1);
            }

        if (linenum%10000 == 0) { 
            /* double totcupdate = 0.; */
            /* double totpupdate = 0.; */
            /* for (int i=0; i<D; i++) totcupdate += custupdate[i]*custupdate[i]; */
            /* for (int i=0; i<D; i++) totpupdate += produpdate[i]*produpdate[i]; */
            /* double totcv = 0.; */
            /* double totpv = 0.; */
            /* for (int i=0; i<D; i++) totcv += cv[i]*cv[i]; */
            /* for (int i=0; i<D; i++) totpv += pv[i]*pv[i]; */
            /* double totcsize = 0.; */
            /* double totpsize = 0.; */
            /* for (long i=0; i<D*CUSTS; i++) totcsize += customer_vecs[i]*customer_vecs[i]; */
            /* for (long i=0; i<D*CUSTS; i++) totpsize += product_vecs[i]*product_vecs[i]; */

            now = clock();
            int seconds_remaining = (int)((now - start)/(CLOCKS_PER_SEC+0.)*LINES/(linenum+0.));
            int hours = seconds_remaining/(60*60);
            seconds_remaining -= hours*60*60;
            int minutes = seconds_remaining/60;
            seconds_remaining -= minutes*60;
            printf("%c%ldK lines processed. %.2f%% done. alpha=%g, num_customers=%ld, num_products=%ld. est time remaining %dh%2dm             ", 
                    13, linenum/1000, linenum/(LINES+0.)*100., alpha, num_customers, num_products, hours, minutes);
            /* printf("%c%ldK lines processed. %.2f%% done. alpha=%g, num_customers=%ld, num_products=%ld. est time remaining %dh%2dm, csize=%g,%g psize=%g,%g             ", */ 
            /*         13, linenum/1000, linenum/(LINES+0.)*100., alpha, num_customers, num_products, hours, minutes, */ 
            /*         sqrt(totcupdate), sqrt(totcv), */
            /*         /1* sqrt(totcsize), *1/ */
            /*         sqrt(totpupdate), sqrt(totpv) ); */
            /*         /1* sqrt(totpsize) ); *1/ */
            fflush(stdout);
        }

        if (linenum%10000000 == 0) {
            FILE *fc = fopen(CUSTFILE,"w");
            print_customers(fc);
            fclose(fc);
            FILE *fp = fopen(PRODFILE,"w");
            print_products(fp);
            fclose(fp);
        }

        if (linenum>LINES-1) break;

        /* if (linenum > 100000) break; */
    }
    printf("\n");
}