void record_and_check_timestamp(TIME_TYPE *next) { pthread_once(&key_init, init_key); void *prev = pthread_getspecific(key); if(prev) { if(!compare_timestamps((TIME_TYPE *) prev, next)) { char time1[sizeof(TIME_TYPE)*2 + 1]; char time2[sizeof(TIME_TYPE)*2 + 1]; int i; for(i = 0; i < sizeof(TIME_TYPE); ++i) { sprintf(time1+i*2, "%02x", ((unsigned char *)prev)[i]); sprintf(time2+i*2, "%02x", ((unsigned char *)next)[i]); } if(backwards_timestamp_callback) (*backwards_timestamp_callback)(time1, time2); } } else { prev = malloc(sizeof(TIME_TYPE)); pthread_setspecific(key, prev); } *((TIME_TYPE *) prev) = *next; }
/** * gl_util_timestamp_to_display: * @microsecs: number of microseconds since the Unix epoch in UTC * @now: the time to compare with * @format: clock format (12 or 24 hour) * * Return a human readable time, corresponding to @microsecs, using an * appropriate @format after comparing it to @now and discarding unnecessary * elements (for example, return only time if the date is today). * * Returns: a newly-allocated human readable string which represents @microsecs */ gchar * gl_util_timestamp_to_display (guint64 microsecs, GDateTime *now, GlUtilClockFormat format, gboolean show_second) { GDateTime *datetime; GDateTime *local; gchar *time = NULL; datetime = g_date_time_new_from_unix_utc (microsecs / G_TIME_SPAN_SECOND); if (datetime == NULL) { g_warning ("Error converting timestamp to time value"); goto out; } local = g_date_time_to_local (datetime); switch (format) { case GL_UTIL_CLOCK_FORMAT_12HR: switch (compare_timestamps (local, now)) { case GL_UTIL_TIMESTAMPS_SAME_DAY: if (show_second) { /* Translators: timestamp format for events on the * current day, showing the time with seconds in * 12-hour format. */ time = g_date_time_format (local, _("%l:%M:%S %p")); } else { /* Translators: timestamp format for events on the * current day, showing the time without seconds in * 12-hour format. */ time = g_date_time_format (local, _("%l:%M %p")); } break; case GL_UTIL_TIMESTAMPS_SAME_YEAR: if (show_second) { time = g_date_time_format (local, /* Translators: timestamp format for events in * the current year, showing the abbreviated * month name, day of the month and the time * with seconds in 12-hour format. */ _("%b %e %l:%M:%S %p")); } else { /* Translators: timestamp format for events in the * current year, showing the abbreviated month name, * day of the month and the time without seconds in * 12-hour format. */ time = g_date_time_format (local, _("%b %e %l:%M %p")); } break; case GL_UTIL_TIMESTAMPS_DIFFERENT_YEAR: if (show_second) { time = g_date_time_format (local, /* Translators: timestamp format for events in * a different year, showing the abbreviated * month name, day of the month, year and the * time with seconds in 12-hour format. */ _("%b %e %Y %l:%M:%S %p")); } else { time = g_date_time_format (local, /* Translators: timestamp format for events in * a different year, showing the abbreviated * month name day of the month, year and the * time without seconds in 12-hour format. */ _("%b %e %Y %l:%M %p")); } break; default: g_assert_not_reached (); } break; case GL_UTIL_CLOCK_FORMAT_24HR: switch (compare_timestamps (local, now)) { case GL_UTIL_TIMESTAMPS_SAME_DAY: if (show_second) { /* Translators: timestamp format for events on the * current day, showing the time with seconds in * 24-hour format. */ time = g_date_time_format (local, _("%H:%M:%S")); } else { /* Translators: timestamp format for events on the * current day, showing the time without seconds in * 24-hour format. */ time = g_date_time_format (local, _("%H:%M")); } break; case GL_UTIL_TIMESTAMPS_SAME_YEAR: if (show_second) { /* Translators: timestamp format for events in the * current year, showing the abbreviated month name, * day of the month and the time with seconds in * 24-hour format. */ time = g_date_time_format (local, _("%b %e %H:%M:%S")); } else { /* Translators: timestamp format for events in the * current year, showing the abbreviated month name, * day of the month and the time without seconds in * 24-hour format. */ time = g_date_time_format (local, _("%b %e %H:%M")); } break; case GL_UTIL_TIMESTAMPS_DIFFERENT_YEAR: if (show_second) { time = g_date_time_format (local, /* Translators: timestamp format for events in * a different year, showing the abbreviated * month name, day of the month, year and the * time with seconds in 24-hour format. */ _("%b %e %Y %H:%M:%S")); } else { /* Translators: timestamp format for events in a * different year, showing the abbreviated month name, * day of the month, year and the time without seconds * in 24-hour format. */ time = g_date_time_format (local, _("%b %e %Y %H:%M")); } break; default: g_assert_not_reached (); } break; default: g_assert_not_reached (); } g_date_time_unref (datetime); g_date_time_unref (local); if (time == NULL) { g_warning ("Error converting datetime to string"); } out: return time; }
int main(int argc, char **argv) { //print usage if needed if (argc != 2) { fprintf(stderr, "Usage: %s totalRecordNumber\n", argv[0]); exit(0); } //get total record number from argument int totalRecordNumber = atoi(argv[1]); // time the program struct timeval sysTimeStart, sysTimeEnd; gettimeofday(&sysTimeStart, NULL); // set up directories; mkdir("cities", 0777); mkdir("datestamps", 0777); mkdir("messages", 0777); mkdir("states", 0777); mkdir("timestamps", 0777); mkdir("users", 0777); mkdir("bplus", 0777); // set up some counters, etc. unsigned int recordCount = 0, userCount = 0, cityCount = 0, stateCount = 0, timestampCount = 0, datestampCount = 0, messageCount = 0, i,j; // SET UP HASH TABLES FOR CITIES, STATES, TIMESTAMPS, DATESTAMPS // cities city_node *cityHT[HASH_SIZE]; for (i = 0; i < HASH_SIZE; i++) { cityHT[i] = malloc(sizeof(city_node)); cityHT[i] = NULL; } // states state_node *stateHT[HASH_SIZE]; for (i = 0; i < HASH_SIZE; i++) { stateHT[i] = malloc(sizeof(state_node)); stateHT[i] = NULL; } // timestamps timestamp_node *timestampHT[HASH_SIZE]; for (i = 0; i < HASH_SIZE; i++) { timestampHT[i] = malloc(sizeof(timestamp_node)); timestampHT[i] = NULL; } // datestamps datestamp_node *datestampHT[HASH_SIZE]; for (i = 0; i < HASH_SIZE; i++) { datestampHT[i] = malloc(sizeof(datestamp_node)); datestampHT[i] = NULL; } // LOOP OVER RECORD FILES char filename[1024]; FILE *fp = NULL; for (i = 0; i < totalRecordNumber; i++) { //open the corresponding file sprintf(filename, "record_%06d.dat", i); fp = fopen(filename,"rb"); if (!fp) { fprintf(stderr, "Cannot open %s\n", filename); continue; } record_t *record = read_record(fp); // split location into city and state, as best we can char cityStr[TEXT_SHORT]; char stateStr[TEXT_SHORT]; // there's one record where the location is \0, which strtok breaks on if (record->location[0] == '\0') { strncpy(cityStr, "", TEXT_SHORT); strncpy(stateStr, "", TEXT_SHORT); } else { char loc[TEXT_SHORT]; strncpy(loc, record->location, TEXT_SHORT); strncpy(cityStr, strtok(loc, ","), TEXT_SHORT); strncpy(stateStr, strtok(NULL, ","), TEXT_SHORT); } // create state state_t state; strncpy(state.name, stateStr, TEXT_SHORT); // get stateId from hash if we have it already unsigned int stateHash = hash_state(&state) % HASH_SIZE; state_node *s; int stateId = -1; for(s = stateHT[stateHash]; (s != NULL) && (stateId == -1); s = s->next) { if (compare_states(&state, &(s->state)) == 0) { stateId = s->state.stateId; } } // assign stateId, add to hash table, and write file if we don't have it if (stateId == -1) { state.stateId = stateCount; stateId = stateCount; write_state(stateCount, &state); stateCount++; s = malloc(sizeof(state_node)); s->state = state; s->next = stateHT[stateHash]; stateHT[stateHash] = s; } // create city city_t city; city.stateId = stateId; strncpy(city.name, cityStr, TEXT_SHORT); // get cityId from hash if we have it already unsigned int cityHash = hash_city(&city) % HASH_SIZE; city_node *c; int cityId = -1; for(c = cityHT[cityHash]; (c != NULL) && (cityId == -1); c = c->next) { if (compare_cities(&city, &(c->city)) == 0) { cityId = c->city.cityId; } } // assign cityId, add to hash table, and write file if we don't have it if (cityId == -1) { city.cityId = cityCount; cityId = cityCount; write_city(cityCount, &city); cityCount++; c = malloc(sizeof(city_node)); c->city = city; c->next = cityHT[cityHash]; cityHT[cityHash] = c; } // create and write user user_t user; user.userId = record->id; user.cityId = cityId; user.stateId = stateId; strncpy(user.name, record->name, TEXT_SHORT); write_user(userCount, &user); userCount++; // loop over messages for(j = 0; j < record->message_num; j++) { // create timestamp timestamp_t timestamp; timestamp.hour = record->messages[j].hour; timestamp.minute = record->messages[j].minute; // get timestampId from hash if we have it already unsigned int timestampHash = hash_timestamp(×tamp) % HASH_SIZE; timestamp_node *t; int tsId = -1; for(t = timestampHT[timestampHash]; (t != NULL) && (tsId == -1); t = t->next) { if (compare_timestamps(×tamp, &(t->timestamp)) == 0) { tsId = t->timestamp.timestampId; } } // assign timestampId, add to hash table, and write file if we don't have it if (tsId == -1) { timestamp.timestampId = timestampCount; tsId = timestampCount; write_timestamp(timestampCount, ×tamp); timestampCount++; t = malloc(sizeof(timestamp_node)); t->timestamp = timestamp; t->next = timestampHT[timestampHash]; timestampHT[timestampHash] = t; } // create datestamp datestamp_t datestamp; datestamp.year = record->messages[j].year; datestamp.month = record->messages[j].month; datestamp.day = record->messages[j].day; // get datestampId from hash if we have it already unsigned int datestampHash = hash_datestamp(&datestamp) % HASH_SIZE; datestamp_node *d; int dsId = -1; for(d = datestampHT[datestampHash]; (d != NULL) && (dsId == -1); d = d->next) { if (compare_datestamps(&datestamp, &(d->datestamp)) == 0) { dsId = d->datestamp.datestampId; } } // assign datestampId, add to hash table, and write file if we don't have it if (dsId == -1) { datestamp.datestampId = datestampCount; dsId = datestampCount; write_datestamp(datestampCount, &datestamp); datestampCount++; d = malloc(sizeof(datestamp_node)); d->datestamp = datestamp; d->next = datestampHT[datestampHash]; datestampHT[datestampHash] = d; } // create and write message message_t message; strncpy(message.text, record->messages[j].text, TEXT_LONG); message.userId = user.userId; message.timestampId = tsId; message.datestampId = dsId; message.messageId = messageCount; write_message(messageCount, &message); messageCount++; } // free and close record free_record(record); fclose(fp); } // free city nodes city_node *cNode; for (i = 0; i < HASH_SIZE; i++) { cNode = cityHT[i]; while (cNode != NULL) { city_node* tmp = cNode; cNode = cNode->next; free (tmp); } } // free state nodes state_node *sNode; for (i = 0; i < HASH_SIZE; i++) { sNode = stateHT[i]; while (sNode != NULL) { state_node* tmp = sNode; sNode = sNode->next; free (tmp); } } // free timestamp nodes timestamp_node *tNode; for (i = 0; i < HASH_SIZE; i++) { tNode = timestampHT[i]; while (tNode != NULL) { timestamp_node* tmp = tNode; tNode = tNode->next; free (tmp); } } // free datestamp nodes datestamp_node *dNode; for (i = 0; i < HASH_SIZE; i++) { dNode = datestampHT[i]; while (dNode != NULL) { datestamp_node* tmp = dNode; dNode = dNode->next; free (tmp); } } // create, write, print file count information file file_count_t fc; fc.users = userCount; fc.cities = cityCount; fc.states = stateCount; fc.messages = messageCount; fc.timestamps = timestampCount; fc.datestamps = datestampCount; write_file_count(&fc); print_file_count(&fc); // end timing the program gettimeofday(&sysTimeEnd, NULL); float totaltime2 = (sysTimeEnd.tv_sec - sysTimeStart.tv_sec) + (sysTimeEnd.tv_usec - sysTimeStart.tv_usec) / 1000000.0f; printf("Process time %f seconds\n", totaltime2); return 0; }