static int convert_marc(void *info, WRBUF record, WRBUF wr_error) { struct marc_info *mi = info; const char *input_charset = mi->input_charset; int ret = 0; yaz_marc_t mt = yaz_marc_create(); yaz_marc_xml(mt, mi->output_format_mode); if (mi->leader_spec) yaz_marc_leader_spec(mt, mi->leader_spec); if (mi->input_format_mode == YAZ_MARC_ISO2709) { int sz = yaz_marc_read_iso2709(mt, wrbuf_buf(record), wrbuf_len(record)); if (sz > 0) { if (yaz_marc_check_marc21_coding(input_charset, wrbuf_buf(record), wrbuf_len(record))) input_charset = "utf-8"; ret = 0; } else ret = -1; } else if (mi->input_format_mode == YAZ_MARC_MARCXML || mi->input_format_mode == YAZ_MARC_TURBOMARC) { xmlDocPtr doc = xmlParseMemory(wrbuf_buf(record), wrbuf_len(record)); if (!doc) { wrbuf_printf(wr_error, "xmlParseMemory failed"); ret = -1; } else { ret = yaz_marc_read_xml(mt, xmlDocGetRootElement(doc)); if (ret) wrbuf_printf(wr_error, "yaz_marc_read_xml failed"); } xmlFreeDoc(doc); } else { wrbuf_printf(wr_error, "unsupported input format"); ret = -1; } if (ret == 0) { yaz_iconv_t cd = yaz_iconv_open(mi->output_charset, input_charset); if (cd) yaz_marc_iconv(mt, cd); wrbuf_rewind(record); ret = yaz_marc_write_mode(mt, record); if (ret) wrbuf_printf(wr_error, "yaz_marc_write_mode failed"); if (cd) yaz_iconv_close(cd); } yaz_marc_destroy(mt); return ret; }
static void dump(const char *fname, const char *from, const char *to, int input_format, int output_format, int write_using_libxml2, int print_offset, const char *split_fname, int split_chunk, int verbose, FILE *cfile, const char *leader_spec) { yaz_marc_t mt = yaz_marc_create(); yaz_iconv_t cd = 0; if (yaz_marc_leader_spec(mt, leader_spec)) { fprintf(stderr, "bad leader spec: %s\n", leader_spec); yaz_marc_destroy(mt); exit(2); } if (from && to) { cd = yaz_iconv_open(to, from); if (!cd) { fprintf(stderr, "conversion from %s to %s " "unsupported\n", from, to); yaz_marc_destroy(mt); exit(2); } yaz_marc_iconv(mt, cd); } yaz_marc_enable_collection(mt); yaz_marc_xml(mt, output_format); yaz_marc_write_using_libxml2(mt, write_using_libxml2); yaz_marc_debug(mt, verbose); if (input_format == YAZ_MARC_MARCXML || input_format == YAZ_MARC_TURBOMARC || input_format == YAZ_MARC_XCHANGE) { #if YAZ_HAVE_XML2 marcdump_read_xml(mt, fname); #endif } else if (input_format == YAZ_MARC_LINE) { marcdump_read_line(mt, fname); } else if (input_format == YAZ_MARC_ISO2709) { FILE *inf = fopen(fname, "rb"); int num = 1; int marc_no = 0; int split_file_no = -1; if (!inf) { fprintf(stderr, "%s: cannot open %s:%s\n", prog, fname, strerror(errno)); exit(1); } if (cfile) fprintf(cfile, "char *marc_records[] = {\n"); for(;; marc_no++) { const char *result = 0; size_t len; size_t rlen; size_t len_result; size_t r; char buf[100001]; r = fread(buf, 1, 5, inf); if (r < 5) { if (r == 0) /* normal EOF, all good */ break; if (print_offset && verbose) { printf("<!-- Extra %ld bytes at end of file -->\n", (long) r); } break; } while (*buf < '0' || *buf > '9') { int i; long off = ftell(inf) - 5; printf("<!-- Skipping bad byte %d (0x%02X) at offset " "%ld (0x%lx) -->\n", *buf & 0xff, *buf & 0xff, off, off); for (i = 0; i<4; i++) buf[i] = buf[i+1]; r = fread(buf+4, 1, 1, inf); no_errors++; if (r < 1) break; } if (r < 1) { if (verbose || print_offset) printf("<!-- End of file with data -->\n"); break; } if (print_offset) { long off = ftell(inf) - 5; printf("<!-- Record %d offset %ld (0x%lx) -->\n", num, off, off); } len = atoi_n(buf, 5); if (len < 25 || len > 100000) { long off = ftell(inf) - 5; printf("<!-- Bad Length %ld read at offset %ld (%lx) -->\n", (long)len, (long) off, (long) off); no_errors++; break; } rlen = len - 5; r = fread(buf + 5, 1, rlen, inf); if (r < rlen) { long off = ftell(inf); printf("<!-- Premature EOF at offset %ld (%lx) -->\n", (long) off, (long) off); no_errors++; break; } while (buf[len-1] != ISO2709_RS) { if (len > sizeof(buf)-2) { r = 0; break; } r = fread(buf + len, 1, 1, inf); if (r != 1) break; len++; } if (r < 1) { printf("<!-- EOF while searching for RS -->\n"); no_errors++; break; } if (split_fname) { char fname[256]; const char *mode = 0; FILE *sf; if ((marc_no % split_chunk) == 0) { mode = "wb"; split_file_no++; } else mode = "ab"; sprintf(fname, "%.200s%07d", split_fname, split_file_no); sf = fopen(fname, mode); if (!sf) { fprintf(stderr, "Could not open %s\n", fname); split_fname = 0; } else { if (fwrite(buf, 1, len, sf) != len) { fprintf(stderr, "Could write content to %s\n", fname); split_fname = 0; no_errors++; } fclose(sf); } } len_result = rlen; r = yaz_marc_decode_buf(mt, buf, -1, &result, &len_result); if (r == -1) no_errors++; if (r > 0 && result && len_result) { if (fwrite(result, len_result, 1, stdout) != 1) { fprintf(stderr, "Write to stdout failed\n"); no_errors++; break; } } if (r > 0 && cfile) { char *p = buf; size_t i; if (marc_no) fprintf(cfile, ","); fprintf(cfile, "\n"); for (i = 0; i < r; i++) { if ((i & 15) == 0) fprintf(cfile, " \""); fprintf(cfile, "\\x%02X", p[i] & 255); if (i < r - 1 && (i & 15) == 15) fprintf(cfile, "\"\n"); } fprintf(cfile, "\"\n"); } num++; if (verbose) printf("\n"); } if (cfile) fprintf(cfile, "};\n"); fclose(inf); } { WRBUF wrbuf = wrbuf_alloc(); yaz_marc_write_trailer(mt, wrbuf); fputs(wrbuf_cstr(wrbuf), stdout); wrbuf_destroy(wrbuf); } if (cd) yaz_iconv_close(cd); yaz_marc_destroy(mt); }