static int parse_external_byte (RIP_MANAGER_INFO* rmi, External_Process* ep, TRACK_INFO* ti, char c) { int got_metadata = 0; if (c != '\r' && c != '\n') { if (ep->line_buf_idx < MAX_EXT_LINE_LEN-1) { ep->line_buf[ep->line_buf_idx++] = c; ep->line_buf[ep->line_buf_idx] = 0; } } else { if (!strcmp (".",ep->line_buf)) { /* Found end of record! */ mchar tmp_raw_metadata[MAX_TRACK_LEN]; gstring_from_string (rmi, ti->artist, MAX_TRACK_LEN, ep->artist_buf, CODESET_METADATA); gstring_from_string (rmi, ti->album, MAX_TRACK_LEN, ep->album_buf, CODESET_METADATA); gstring_from_string (rmi, ti->title, MAX_TRACK_LEN, ep->title_buf, CODESET_METADATA); g_snprintf (tmp_raw_metadata, MAX_TRACK_LEN, "%s - %s", ti->artist, ti->title); string_from_gstring (rmi, ti->raw_metadata, MAX_TRACK_LEN, tmp_raw_metadata, CODESET_METADATA); ti->have_track_info = 1; ti->save_track = TRUE; ep->artist_buf[0] = 0; ep->album_buf[0] = 0; ep->title_buf[0] = 0; got_metadata = 1; } else if (!strncmp ("ARTIST=", ep->line_buf, strlen("ARTIST="))) { strcpy (ep->artist_buf, &ep->line_buf[strlen("ARTIST=")]); } else if (!strncmp ("ALBUM=", ep->line_buf, strlen("ALBUM="))) { strcpy (ep->album_buf, &ep->line_buf[strlen("ALBUM=")]); } else if (!strncmp ("TITLE=", ep->line_buf, strlen("TITLE="))) { strcpy (ep->title_buf, &ep->line_buf[strlen("TITLE=")]); } ep->line_buf[0] = 0; ep->line_buf_idx = 0; } return got_metadata; }
/* This mega-function reads in the rules file, and loads all the rules into the rmi->parse_rules data structure */ void init_metadata_parser(RIP_MANAGER_INFO * rmi, char *rules_file) { FILE *fp; int ri; /* Rule index */ int rn; /* Number of rules allocated */ if (!rules_file || !*rules_file) { use_default_rules(rmi); return; } fp = fopen(rules_file, "r"); if (!fp) { use_default_rules(rmi); return; } rmi->parse_rules = 0; ri = rn = 0; while (1) { char rule_buf[MAX_RULE_SIZE]; char match_buf[MAX_RULE_SIZE]; char subst_buf[MAX_RULE_SIZE]; mchar w_match_buf[MAX_RULE_SIZE]; mchar w_subst_buf[MAX_RULE_SIZE]; char *rbp; char *rp; int got_command; int rc; /* Allocate memory for rule, if necessary. */ /* If there are no more rules in the file, */ /* this rule will become the sentinel null rule */ if (ri + 1 != rn) { rmi->parse_rules = realloc(rmi->parse_rules, (ri + 1) * sizeof(Parse_Rule)); memset(&rmi->parse_rules[ri], 0, sizeof(Parse_Rule)); rn = ri + 1; } /* Get next line from file */ rp = fgets(rule_buf, 2048, fp); if (!rp) break; /* Skip leading whitespace */ rbp = rule_buf; while (*rbp && isspace(*rbp)) rbp++; if (!*rbp) continue; /* Get command */ got_command = 0; switch (*rbp++) { case 'm': got_command = 1; rmi->parse_rules[ri].cmd = PARSERULE_CMD_MATCH; break; case 's': got_command = 1; rmi->parse_rules[ri].cmd = PARSERULE_CMD_SUBST; break; case '#': got_command = 0; break; default: got_command = 0; printf("Warning: malformed command in rules file:\n%s\n", rule_buf); break; } if (!got_command) continue; /* Skip past fwd slash */ if (*rbp++ != '/') { printf("Warning: malformed command in rules file:\n%s\n", rule_buf); continue; } /* Parse match string */ rbp = parse_escaped_string(match_buf, rbp); debug_printf("match_buf=%s\n", match_buf); if (!rbp) { printf("Warning: malformed command in rules file:\n%s\n", rule_buf); continue; } /* Parse subst string */ if (rmi->parse_rules[ri].cmd == PARSERULE_CMD_SUBST) { rbp = parse_escaped_string(subst_buf, rbp); debug_printf("subst_buf=%s\n", subst_buf); if (!rbp) { printf("Warning: malformed command in rules file:\n%s\n", rule_buf); continue; } } /* Parse flags */ rc = parse_flags(&rmi->parse_rules[ri], rbp); if (!rc) { printf("Warning: malformed command in rules file:\n%s\n", rule_buf); continue; } /* Compile the rule */ debug_printf("Compiling the rule\n"); gstring_from_string(rmi, w_match_buf, MAX_RULE_SIZE, match_buf, CODESET_UTF8); if (!compile_rule(&rmi->parse_rules[ri], w_match_buf)) { printf("Warning: malformed regular expression:\n%s\n", match_buf); continue; } /* Copy rule strings */ debug_printf("Copying rule string (1)\n"); debug_mprintf(m_("String is ") m_S m_("\n"), w_match_buf); rmi->parse_rules[ri].match = mstrdup(w_match_buf); debug_printf("Copying rule string (2)\n"); if (rmi->parse_rules[ri].cmd == PARSERULE_CMD_SUBST) { debug_printf("Copying rule string (3)\n"); gstring_from_string(rmi, w_subst_buf, MAX_RULE_SIZE, subst_buf, CODESET_UTF8); debug_printf("Copying rule string (4)\n"); rmi->parse_rules[ri].subst = mstrdup(w_subst_buf); debug_printf("Copying rule string (5)\n"); } debug_printf("End of loop\n"); ri++; } fclose(fp); }
void parse_metadata(RIP_MANAGER_INFO * rmi, TRACK_INFO * ti) { int i; int eflags; int rc; int matched; mchar query_string[MAX_TRACK_LEN]; Parse_Rule *rulep; /* Has any m/.../s rule matched? */ BOOL save_track_matched = FALSE; /* Has any m/.../x rule matched? */ BOOL exclude_track_matched = FALSE; ti->artist[0] = 0; ti->title[0] = 0; ti->album[0] = 0; ti->composed_metadata[0] = 0; ti->save_track = TRUE; /* Loop through rules, if we find a matching rule, then use it */ /* For now, only default rules supported with ascii regular expressions. */ debug_printf("Converting query string to wide\n"); gstring_from_string(rmi, query_string, MAX_TRACK_LEN, ti->raw_metadata, CODESET_METADATA); for (rulep = rmi->parse_rules; rulep->cmd; rulep++) { #if !defined (USE_GLIB_REGEX) regmatch_t pmatch[MAX_SUBMATCHES + 1]; #endif eflags = 0; if (rulep->cmd == PARSERULE_CMD_MATCH) { debug_mprintf(m_("Testing match rule: ") m_S m_(" vs. ") m_S m_("\n"), query_string, rulep->match); if (rulep->flags & PARSERULE_SKIP) { #if defined (USE_GLIB_REGEX) rc = g_regex_match(rulep->reg, query_string, 0, NULL); matched = rc; #else rc = mregexec(rulep->reg, query_string, 0, NULL, eflags); matched = !rc; #endif if (!matched) { continue; } /* GCS FIX: We need to return to the caller that the metadata should be dropped. */ debug_printf("Skip rule matched\n"); ti->save_track = FALSE; ti->have_track_info = 0; return; } else if (rulep->flags & PARSERULE_SAVE) { #if defined (USE_GLIB_REGEX) rc = g_regex_match(rulep->reg, query_string, 0, NULL); matched = rc; #else rc = mregexec(rulep->reg, query_string, 0, NULL, eflags); matched = !rc; #endif if (!matched) { if (!save_track_matched) ti->save_track = FALSE; continue; } if (!exclude_track_matched) { ti->save_track = TRUE; save_track_matched = TRUE; } } else if (rulep->flags & PARSERULE_EXCLUDE) { #if defined (USE_GLIB_REGEX) rc = g_regex_match(rulep->reg, query_string, 0, NULL); matched = rc; #else rc = mregexec(rulep->reg, query_string, 0, NULL, eflags); matched = !rc; #endif if (matched && !save_track_matched) { /* Rule matched => Exclude track */ ti->save_track = FALSE; exclude_track_matched = TRUE; } } else { #if defined (USE_GLIB_REGEX) GMatchInfo *match_info; gint nmatch; rc = g_regex_match(rulep->reg, query_string, 0, &match_info); if (rc == 0) { /* Didn't match rule. */ continue; } nmatch = g_match_info_get_match_count(match_info); debug_printf("Got %d matches\n", nmatch); for (i = 0; i < nmatch; i++) { gchar *match = g_match_info_fetch(match_info, i); debug_printf("[%d] = %s\n", i, match); g_free(match); } copy_rule_result(ti->artist, match_info, rulep->artist_idx); copy_rule_result(ti->title, match_info, rulep->title_idx); copy_rule_result(ti->album, match_info, rulep->album_idx); copy_rule_result(ti->track_p, match_info, rulep->trackno_idx); copy_rule_result(ti->year, match_info, rulep->year_idx); g_match_info_free(match_info); #else eflags = 0; rc = mregexec(rulep->reg, query_string, MAX_SUBMATCHES + 1, pmatch, eflags); if (rc != 0) { /* Didn't match rule. */ continue; } for (i = 0; i < MAX_SUBMATCHES + 1; i++) { debug_printf("pmatch[%d]: (so,eo) = (%d,%d)\n", i, pmatch[i].rm_so, pmatch[i].rm_eo); } copy_rule_result(ti->artist, query_string, pmatch, rulep->artist_idx); copy_rule_result(ti->title, query_string, pmatch, rulep->title_idx); copy_rule_result(ti->album, query_string, pmatch, rulep->album_idx); copy_rule_result(ti->track_p, query_string, pmatch, rulep->trackno_idx); copy_rule_result(ti->year, query_string, pmatch, rulep->year_idx); #endif ti->have_track_info = 1; compose_metadata(rmi, ti); debug_mprintf(m_("Parsed track info.\n") m_("ARTIST: ") m_S m_("\n") m_("TITLE: ") m_S m_("\n") m_("ALBUM: ") m_S m_("\n") m_("TRACK: ") m_S m_("\n") m_("YEAR: ") m_S m_("\n"), ti->artist, ti->title, ti->album, ti->track_p, ti->year); return; } } else if (rulep->cmd == PARSERULE_CMD_SUBST) { #if defined (USE_GLIB_REGEX) GMatchInfo *match_info; gint start_pos, end_pos; gchar *tmp, *subst_string; debug_mprintf(m_("Testing subst rule: ") m_S m_(" vs. ") m_S m_("\n"), query_string, rulep->match); rc = g_regex_match(rulep->reg, query_string, 0, &match_info); if (rc == 0) { /* Didn't match rule. */ continue; } rc = g_match_info_fetch_pos(match_info, 0, &start_pos, &end_pos); if (!rc) { debug_printf("g_match_info_fetch_pos returned 0\n"); g_match_info_free(match_info); continue; } debug_printf("Matched at (%d,%d)\n", start_pos, end_pos); if (start_pos == -1) { g_match_info_free(match_info); continue; } tmp = g_strndup(query_string, start_pos); tmp[start_pos] = 0; subst_string = g_strconcat(tmp, rulep->subst, &tmp[end_pos], NULL); g_free(tmp); g_match_info_free(match_info); mstrncpy(query_string, subst_string, MAX_TRACK_LEN); #else mchar subst_string[MAX_TRACK_LEN]; int used, left; debug_mprintf(m_("Testing subst rule: ") m_S m_(" vs. ") m_S m_("\n"), query_string, rulep->match); rc = mregexec(rulep->reg, query_string, 1, pmatch, eflags); if (rc != 0) { /* Didn't match rule. */ continue; } /* Update the query string and continue. */ debug_printf("Matched at (%d,%d)\n", pmatch[0].rm_so, pmatch[0].rm_eo); mstrncpy(subst_string, query_string, pmatch[0].rm_so + 1); debug_mprintf(m_("(1) subst_string = ") m_S m_("\n"), subst_string); used = pmatch[0].rm_so; left = MAX_TRACK_LEN - used; mstrncpy(subst_string + used, rulep->subst, left); debug_mprintf(m_("(2) subst_string = ") m_S m_("\n"), subst_string); used += mstrlen(rulep->subst); left = MAX_TRACK_LEN - used; mstrncpy(subst_string + used, query_string + pmatch[0].rm_eo, left); debug_mprintf(m_("(3) subst_string = ") m_S m_("\n"), subst_string); mstrncpy(query_string, subst_string, MAX_TRACK_LEN); debug_mprintf(m_("(4) query_string = ") m_S m_("\n"), query_string); #endif } } debug_printf("Fell through while parsing data...\n"); mstrncpy(ti->title, query_string, MAX_TRACK_LEN); ti->have_track_info = 1; compose_metadata(rmi, ti); }
/* Return 1 if the page is a header page */ static int vorbis_process(RIP_MANAGER_INFO * rmi, stream_processor * stream, ogg_page * page, TRACK_INFO * ti) { ogg_packet packet; misc_vorbis_info *inf = stream->data; int i, header = 0; int k; ogg_stream_pagein(&stream->os, page); if (inf->doneheaders < 3) header = 1; while (ogg_stream_packetout(&stream->os, &packet) > 0) { if (inf->doneheaders < 3) { if (vorbis_synthesis_headerin(&inf->vi, &inf->vc, &packet) < 0) { warn(_("Warning: Could not decode vorbis header " "packet - invalid vorbis stream (%d)\n"), stream->num); continue; } inf->doneheaders++; if (inf->doneheaders == 3) { if (ogg_page_granulepos(page) != 0 || ogg_stream_packetpeek(&stream->os, NULL) == 1) warn(_("Warning: Vorbis stream %d does not have headers " "correctly framed. Terminal header page contains " "additional packets or has non-zero granulepos\n"), stream->num); debug_printf("Vorbis headers parsed for stream %d, " "information follows...\n", stream->num); debug_printf("Version: %d\n", inf->vi.version); k = 0; while (releases[k].vendor_string) { if (!strcmp(inf->vc.vendor, releases[k].vendor_string)) { debug_printf("Vendor: %s (%s)\n", inf->vc.vendor, releases[k].desc); break; } k++; } if (!releases[k].vendor_string) debug_printf("Vendor: %s\n", inf->vc.vendor); debug_printf("Channels: %d\n", inf->vi.channels); debug_printf("Rate: %ld\n\n", inf->vi.rate); if (inf->vi.bitrate_nominal > 0) debug_printf("Nominal bitrate: %f kb/s\n", (double)inf->vi.bitrate_nominal / 1000.0); else debug_printf("Nominal bitrate not set\n"); if (inf->vi.bitrate_upper > 0) debug_printf("Upper bitrate: %f kb/s\n", (double)inf->vi.bitrate_upper / 1000.0); else debug_printf("Upper bitrate not set\n"); if (inf->vi.bitrate_lower > 0) debug_printf("Lower bitrate: %f kb/s\n", (double)inf->vi.bitrate_lower / 1000.0); else debug_printf("Lower bitrate not set\n"); if (inf->vc.comments > 0) debug_printf("User comments section follows...\n"); for (i = 0; i < inf->vc.comments; i++) { char *sep = strchr(inf->vc.user_comments[i], '='); char *decoded; int j; int broken = 0; unsigned char *val; int bytes; int remaining; if (sep == NULL) { warn(_("Warning: Comment %d in stream %d is invalidly " "formatted, does not contain '=': \"%s\"\n"), i, stream->num, inf->vc.user_comments[i]); continue; } for (j = 0; j < sep - inf->vc.user_comments[i]; j++) { if (inf->vc.user_comments[i][j] < 0x20 || inf->vc.user_comments[i][j] > 0x7D) { warn(_("Warning: Invalid comment fieldname in " "comment %d (stream %d): \"%s\"\n"), i, stream->num, inf->vc.user_comments[i]); broken = 1; break; } } if (broken) continue; val = (unsigned char *)inf->vc.user_comments[i]; j = sep - inf->vc.user_comments[i] + 1; while (j < inf->vc.comment_lengths[i]) { remaining = inf->vc.comment_lengths[i] - j; if ((val[j] & 0x80) == 0) bytes = 1; else if ((val[j] & 0x40) == 0x40) { if ((val[j] & 0x20) == 0) bytes = 2; else if ((val[j] & 0x10) == 0) bytes = 3; else if ((val[j] & 0x08) == 0) bytes = 4; else if ((val[j] & 0x04) == 0) bytes = 5; else if ((val[j] & 0x02) == 0) bytes = 6; else { warn(_("Warning: Illegal UTF-8 sequence in " "comment %d (stream %d): length " "marker wrong\n"), i, stream->num); broken = 1; break; } } else { warn(_("Warning: Illegal UTF-8 sequence in comment " "%d (stream %d): length marker wrong\n"), i, stream->num); broken = 1; break; } if (bytes > remaining) { warn(_("Warning: Illegal UTF-8 sequence in comment " "%d (stream %d): too few bytes\n"), i, stream->num); broken = 1; break; } switch (bytes) { case 1: /* No more checks needed */ break; case 2: if ((val[j + 1] & 0xC0) != 0x80) broken = 1; if ((val[j] & 0xFE) == 0xC0) broken = 1; break; case 3: if (!((val[j] == 0xE0 && val[j + 1] >= 0xA0 && val[j + 1] <= 0xBF && (val[j + 2] & 0xC0) == 0x80) || (val[j] >= 0xE1 && val[j] <= 0xEC && (val[j + 1] & 0xC0) == 0x80 && (val[j + 2] & 0xC0) == 0x80) || (val[j] == 0xED && val[j + 1] >= 0x80 && val[j + 1] <= 0x9F && (val[j + 2] & 0xC0) == 0x80) || (val[j] >= 0xEE && val[j] <= 0xEF && (val[j + 1] & 0xC0) == 0x80 && (val[j + 2] & 0xC0) == 0x80))) broken = 1; if (val[j] == 0xE0 && (val[j + 1] & 0xE0) == 0x80) broken = 1; break; case 4: if (!((val[j] == 0xF0 && val[j + 1] >= 0x90 && val[j + 1] <= 0xBF && (val[j + 2] & 0xC0) == 0x80 && (val[j + 3] & 0xC0) == 0x80) || (val[j] >= 0xF1 && val[j] <= 0xF3 && (val[j + 1] & 0xC0) == 0x80 && (val[j + 2] & 0xC0) == 0x80 && (val[j + 3] & 0xC0) == 0x80) || (val[j] == 0xF4 && val[j + 1] >= 0x80 && val[j + 1] <= 0x8F && (val[j + 2] & 0xC0) == 0x80 && (val[j + 3] & 0xC0) == 0x80))) broken = 1; if (val[j] == 0xF0 && (val[j + 1] & 0xF0) == 0x80) broken = 1; break; /* 5 and 6 aren't actually allowed at this point */ case 5: broken = 1; break; case 6: broken = 1; break; } if (broken) { warn(_("Warning: Illegal UTF-8 sequence in comment " "%d (stream %d): invalid sequence\n"), i, stream->num); broken = 1; break; } j += bytes; } if (!broken) { if (utf8_decode(sep + 1, &decoded) < 0) { warn(_("Warning: Failure in utf8 decoder. This " "should be impossible\n")); continue; } *sep = 0; debug_printf("\t%s=%s\n", inf->vc.user_comments[i], decoded); /* GCS FIX: Need case insensitive compare */ if (!strcmp(inf->vc.user_comments[i], "artist") || !strcmp(inf->vc.user_comments[i], "ARTIST") || !strcmp(inf->vc.user_comments[i], "Artist")) { /* GCS FIX: This is a bit funky, maybe I need to get rid of the ogg built-in utf8 decoder */ gstring_from_string(rmi, ti->artist, MAX_TRACK_LEN, decoded, CODESET_LOCALE); } else if (!strcmp(inf->vc.user_comments[i], "title") || !strcmp(inf->vc.user_comments[i], "TITLE") || !strcmp(inf->vc.user_comments[i], "Title")) { /* GCS FIX: This is a bit funky, maybe I need to get rid of the ogg built-in utf8 decoder */ gstring_from_string(rmi, ti->title, MAX_TRACK_LEN, decoded, CODESET_LOCALE); ti->have_track_info = 1; } else if (!strcmp(inf->vc.user_comments[i], "album") || !strcmp(inf->vc.user_comments[i], "ALBUM") || !strcmp(inf->vc.user_comments[i], "Album")) { /* GCS FIX: This is a bit funky, maybe I need to get rid of the ogg built-in utf8 decoder */ gstring_from_string(rmi, ti->album, MAX_TRACK_LEN, decoded, CODESET_LOCALE); } else if (!strcmp(inf->vc.user_comments[i], "tracknumber") || !strcmp(inf->vc.user_comments[i], "TRACKNUMBER") || !strcmp(inf->vc.user_comments[i], "Tracknumber")) { /* GCS FIX: This is a bit funky, maybe I need to get rid of the ogg built-in utf8 decoder */ gstring_from_string(rmi, ti->track_p, MAX_TRACK_LEN, decoded, CODESET_LOCALE); } free(decoded); } } /* Done looping through vorbis comments. If we didn't find a title, give a default title. */ if (!ti->have_track_info) { strncpy(ti->title, "Title Unknown", MAX_TRACK_LEN); ti->have_track_info = 1; } } } } if (!header) { ogg_int64_t gp = ogg_page_granulepos(page); if (gp > 0) { if (gp < inf->lastgranulepos) #ifdef _WIN32 warn(_("Warning: granulepos in stream %d decreases from %I64d to %I64d"), stream->num, inf->lastgranulepos, gp); #else warn(_("Warning: granulepos in stream %d decreases from %lld to %lld"), stream->num, inf->lastgranulepos, gp); #endif inf->lastgranulepos = gp; } else { warn(_ ("Negative granulepos on vorbis stream outside of headers. This file was created by a buggy encoder\n")); } if (inf->firstgranulepos < 0) { /* Not set yet */ } inf->bytes += page->header_len + page->body_len; } return header; }