int yaz_record_conv_opac_record(yaz_record_conv_t p, Z_OPACRecord *input_record, WRBUF output_record) { int ret = 0; struct yaz_record_conv_rule *r = p->rules; if (!r || r->type->construct != construct_marc) { wrbuf_puts(p->wr_error, "Expecting MARC rule as first rule for OPAC"); ret = -1; /* no marc rule so we can't do OPAC */ } else { struct marc_info *mi = r->info; const char *input_charset = mi->input_charset; yaz_iconv_t cd; WRBUF res = wrbuf_alloc(); yaz_marc_t mt = yaz_marc_create(); if (yaz_opac_check_marc21_coding(input_charset, input_record)) input_charset = "utf-8"; cd = yaz_iconv_open(mi->output_charset, input_charset); wrbuf_rewind(p->wr_error); yaz_marc_xml(mt, mi->output_format_mode); yaz_marc_iconv(mt, cd); yaz_opac_decode_wrbuf(mt, input_record, res); if (ret != -1) { ret = yaz_record_conv_record_rule(p, r->next, wrbuf_buf(res), wrbuf_len(res), output_record); } yaz_marc_destroy(mt); if (cd) yaz_iconv_close(cd); wrbuf_destroy(res); } return ret; }
static void tst_convert4(void) { NMEM nmem = nmem_create(); int ret; const char *opacxml_rec = "<opacRecord>\n" " <bibliographicRecord>\n" "<record xmlns=\"http://www.loc.gov/MARC21/slim\">\n" " <leader>00077nam a22000498a 4500</leader>\n" " <controlfield tag=\"001\"> 11224466 </controlfield>\n" " <datafield tag=\"010\" ind1=\" \" ind2=\" \">\n" " <subfield code=\"a\">k" "\xc3" "\xb8" /* oslash in UTF_8 */ "benhavn</subfield>\n" " </datafield>\n" "</record>\n" " </bibliographicRecord>\n" " <holdings>\n" " <holding>\n" " <shelvingLocation>Sprague Library hidden basement</shelvingLocation>\n" " <callNumber>E98.L7L44 1976 </callNumber>\n" " <volumes/>\n" " </holding>\n" " </holdings>\n" " </opacRecord>\n" ; Z_OPACRecord *opac = 0; yaz_marc_t mt = yaz_marc_create(); ret = yaz_xml_to_opac(mt, opacxml_rec, strlen(opacxml_rec), &opac, 0 /* iconv */, nmem, 0); YAZ_CHECK(ret); YAZ_CHECK(opac); yaz_marc_destroy(mt); nmem_destroy(nmem); }
static int convert_marc(void *info, WRBUF record, WRBUF wr_error) { struct marc_info *mi = info; const char *input_charset = mi->input_charset; int ret = 0; yaz_marc_t mt = yaz_marc_create(); yaz_marc_xml(mt, mi->output_format_mode); if (mi->leader_spec) yaz_marc_leader_spec(mt, mi->leader_spec); if (mi->input_format_mode == YAZ_MARC_ISO2709) { int sz = yaz_marc_read_iso2709(mt, wrbuf_buf(record), wrbuf_len(record)); if (sz > 0) { if (yaz_marc_check_marc21_coding(input_charset, wrbuf_buf(record), wrbuf_len(record))) input_charset = "utf-8"; ret = 0; } else ret = -1; } else if (mi->input_format_mode == YAZ_MARC_MARCXML || mi->input_format_mode == YAZ_MARC_TURBOMARC) { xmlDocPtr doc = xmlParseMemory(wrbuf_buf(record), wrbuf_len(record)); if (!doc) { wrbuf_printf(wr_error, "xmlParseMemory failed"); ret = -1; } else { ret = yaz_marc_read_xml(mt, xmlDocGetRootElement(doc)); if (ret) wrbuf_printf(wr_error, "yaz_marc_read_xml failed"); } xmlFreeDoc(doc); } else { wrbuf_printf(wr_error, "unsupported input format"); ret = -1; } if (ret == 0) { yaz_iconv_t cd = yaz_iconv_open(mi->output_charset, input_charset); if (cd) yaz_marc_iconv(mt, cd); wrbuf_rewind(record); ret = yaz_marc_write_mode(mt, record); if (ret) wrbuf_printf(wr_error, "yaz_marc_write_mode failed"); if (cd) yaz_iconv_close(cd); } yaz_marc_destroy(mt); return ret; }
static void dump(const char *fname, const char *from, const char *to, int input_format, int output_format, int write_using_libxml2, int print_offset, const char *split_fname, int split_chunk, int verbose, FILE *cfile, const char *leader_spec) { yaz_marc_t mt = yaz_marc_create(); yaz_iconv_t cd = 0; if (yaz_marc_leader_spec(mt, leader_spec)) { fprintf(stderr, "bad leader spec: %s\n", leader_spec); yaz_marc_destroy(mt); exit(2); } if (from && to) { cd = yaz_iconv_open(to, from); if (!cd) { fprintf(stderr, "conversion from %s to %s " "unsupported\n", from, to); yaz_marc_destroy(mt); exit(2); } yaz_marc_iconv(mt, cd); } yaz_marc_enable_collection(mt); yaz_marc_xml(mt, output_format); yaz_marc_write_using_libxml2(mt, write_using_libxml2); yaz_marc_debug(mt, verbose); if (input_format == YAZ_MARC_MARCXML || input_format == YAZ_MARC_TURBOMARC || input_format == YAZ_MARC_XCHANGE) { #if YAZ_HAVE_XML2 marcdump_read_xml(mt, fname); #endif } else if (input_format == YAZ_MARC_LINE) { marcdump_read_line(mt, fname); } else if (input_format == YAZ_MARC_ISO2709) { FILE *inf = fopen(fname, "rb"); int num = 1; int marc_no = 0; int split_file_no = -1; if (!inf) { fprintf(stderr, "%s: cannot open %s:%s\n", prog, fname, strerror(errno)); exit(1); } if (cfile) fprintf(cfile, "char *marc_records[] = {\n"); for(;; marc_no++) { const char *result = 0; size_t len; size_t rlen; size_t len_result; size_t r; char buf[100001]; r = fread(buf, 1, 5, inf); if (r < 5) { if (r == 0) /* normal EOF, all good */ break; if (print_offset && verbose) { printf("<!-- Extra %ld bytes at end of file -->\n", (long) r); } break; } while (*buf < '0' || *buf > '9') { int i; long off = ftell(inf) - 5; printf("<!-- Skipping bad byte %d (0x%02X) at offset " "%ld (0x%lx) -->\n", *buf & 0xff, *buf & 0xff, off, off); for (i = 0; i<4; i++) buf[i] = buf[i+1]; r = fread(buf+4, 1, 1, inf); no_errors++; if (r < 1) break; } if (r < 1) { if (verbose || print_offset) printf("<!-- End of file with data -->\n"); break; } if (print_offset) { long off = ftell(inf) - 5; printf("<!-- Record %d offset %ld (0x%lx) -->\n", num, off, off); } len = atoi_n(buf, 5); if (len < 25 || len > 100000) { long off = ftell(inf) - 5; printf("<!-- Bad Length %ld read at offset %ld (%lx) -->\n", (long)len, (long) off, (long) off); no_errors++; break; } rlen = len - 5; r = fread(buf + 5, 1, rlen, inf); if (r < rlen) { long off = ftell(inf); printf("<!-- Premature EOF at offset %ld (%lx) -->\n", (long) off, (long) off); no_errors++; break; } while (buf[len-1] != ISO2709_RS) { if (len > sizeof(buf)-2) { r = 0; break; } r = fread(buf + len, 1, 1, inf); if (r != 1) break; len++; } if (r < 1) { printf("<!-- EOF while searching for RS -->\n"); no_errors++; break; } if (split_fname) { char fname[256]; const char *mode = 0; FILE *sf; if ((marc_no % split_chunk) == 0) { mode = "wb"; split_file_no++; } else mode = "ab"; sprintf(fname, "%.200s%07d", split_fname, split_file_no); sf = fopen(fname, mode); if (!sf) { fprintf(stderr, "Could not open %s\n", fname); split_fname = 0; } else { if (fwrite(buf, 1, len, sf) != len) { fprintf(stderr, "Could write content to %s\n", fname); split_fname = 0; no_errors++; } fclose(sf); } } len_result = rlen; r = yaz_marc_decode_buf(mt, buf, -1, &result, &len_result); if (r == -1) no_errors++; if (r > 0 && result && len_result) { if (fwrite(result, len_result, 1, stdout) != 1) { fprintf(stderr, "Write to stdout failed\n"); no_errors++; break; } } if (r > 0 && cfile) { char *p = buf; size_t i; if (marc_no) fprintf(cfile, ","); fprintf(cfile, "\n"); for (i = 0; i < r; i++) { if ((i & 15) == 0) fprintf(cfile, " \""); fprintf(cfile, "\\x%02X", p[i] & 255); if (i < r - 1 && (i & 15) == 15) fprintf(cfile, "\"\n"); } fprintf(cfile, "\"\n"); } num++; if (verbose) printf("\n"); } if (cfile) fprintf(cfile, "};\n"); fclose(inf); } { WRBUF wrbuf = wrbuf_alloc(); yaz_marc_write_trailer(mt, wrbuf); fputs(wrbuf_cstr(wrbuf), stdout); wrbuf_destroy(wrbuf); } if (cd) yaz_iconv_close(cd); yaz_marc_destroy(mt); }
static void tst_convert3(void) { NMEM nmem = nmem_create(); int ret; yaz_record_conv_t p = 0; const char *iso2709_rec = "\x30\x30\x30\x37\x37\x6E\x61\x6D\x20\x20\x32\x32\x30\x30\x30\x34" "\x39\x38\x61\x20\x34\x35\x30\x30\x30\x30\x31\x30\x30\x31\x33\x30" "\x30\x30\x30\x30\x30\x31\x30\x30\x30\x31\x34\x30\x30\x30\x31\x33" "\x1E\x20\x20\x20\x31\x31\x32\x32\x34\x34\x36\x36\x20\x1E\x20\x20" "\x1F\x61\x6b\xb2\x62\x65\x6e\x68\x61\x76\x6e\x1E\x1D"; const char *opacxml_rec = "<opacRecord>\n" " <bibliographicRecord>\n" "<record xmlns=\"http://www.loc.gov/MARC21/slim\">\n" " <leader>00077nam a22000498a 4500</leader>\n" " <controlfield tag=\"001\"> 11224466 </controlfield>\n" " <datafield tag=\"010\" ind1=\" \" ind2=\" \">\n" " <subfield code=\"a\">k" "\xc3" "\xb8" /* oslash in UTF_8 */ "benhavn</subfield>\n" " </datafield>\n" "</record>\n" " </bibliographicRecord>\n" "<holdings>\n" " <holding>\n" " <typeOfRecord>u</typeOfRecord>\n" " <encodingLevel>U</encodingLevel>\n" " <receiptAcqStatus>0</receiptAcqStatus>\n" " <dateOfReport>000000</dateOfReport>\n" " <nucCode>s-FM/GC</nucCode>\n" " <localLocation>Main or Science/Business Reading Rms - STORED OFFSITE</localLocation>\n" " <callNumber>MLCM 89/00602 (N)</callNumber>\n" " <shelvingData>FT MEADE</shelvingData>\n" " <copyNumber>Copy 1</copyNumber>\n" " <volumes>\n" " <volume>\n" " <enumeration>1</enumeration>\n" " <chronology>2</chronology>\n" " <enumAndChron>3</enumAndChron>\n" " </volume>\n" " <volume>\n" " <enumeration>1</enumeration>\n" " <chronology>2</chronology>\n" " <enumAndChron>3</enumAndChron>\n" " </volume>\n" " </volumes>\n" " <circulations>\n" " <circulation>\n" " <availableNow value=\"1\"/>\n" " <availabilityDate>20130129</availabilityDate>\n" " <itemId>1226176</itemId>\n" " <renewable value=\"0\"/>\n" " <onHold value=\"0\"/>\n" " </circulation>\n" " </circulations>\n" " </holding>\n" "</holdings>\n" "</opacRecord>\n"; Z_OPACRecord *z_opac = nmem_malloc(nmem, sizeof(*z_opac)); Z_HoldingsAndCircData *h; Z_CircRecord *circ; z_opac->bibliographicRecord = z_ext_record_oid_nmem(nmem, yaz_oid_recsyn_usmarc, iso2709_rec, strlen(iso2709_rec)); z_opac->num_holdingsData = 1; z_opac->holdingsData = (Z_HoldingsRecord **) nmem_malloc(nmem, sizeof(Z_HoldingsRecord *) * 1); z_opac->holdingsData[0] = (Z_HoldingsRecord *) nmem_malloc(nmem, sizeof(Z_HoldingsRecord)); z_opac->holdingsData[0]->which = Z_HoldingsRecord_holdingsAndCirc; h = z_opac->holdingsData[0]->u.holdingsAndCirc = (Z_HoldingsAndCircData *) nmem_malloc(nmem, sizeof(*h)); h->typeOfRecord = nmem_strdup(nmem, "u"); h->encodingLevel = nmem_strdup(nmem, "U"); h->format = 0; h->receiptAcqStatus = nmem_strdup(nmem, "0"); h->generalRetention = 0; h->completeness = 0; h->dateOfReport = nmem_strdup(nmem, "000000"); h->nucCode = nmem_strdup(nmem, "s-FM/GC"); h->localLocation = nmem_strdup(nmem, "Main or Science/Business Reading " "Rms - STORED OFFSITE"); h->shelvingLocation = 0; h->callNumber = nmem_strdup(nmem, "MLCM 89/00602 (N)"); h->shelvingData = nmem_strdup(nmem, "FT MEADE"); h->copyNumber = nmem_strdup(nmem, "Copy 1"); h->publicNote = 0; h->reproductionNote = 0; h->termsUseRepro = 0; h->enumAndChron = 0; h->num_volumes = 2; h->volumes = 0; h->volumes = (Z_Volume **) nmem_malloc(nmem, 2 * sizeof(Z_Volume *)); h->volumes[0] = (Z_Volume *) nmem_malloc(nmem, sizeof(Z_Volume)); h->volumes[1] = h->volumes[0]; h->volumes[0]->enumeration = nmem_strdup(nmem, "1"); h->volumes[0]->chronology = nmem_strdup(nmem, "2"); h->volumes[0]->enumAndChron = nmem_strdup(nmem, "3"); h->num_circulationData = 1; h->circulationData = (Z_CircRecord **) nmem_malloc(nmem, 1 * sizeof(Z_CircRecord *)); circ = h->circulationData[0] = (Z_CircRecord *) nmem_malloc(nmem, sizeof(Z_CircRecord)); circ->availableNow = nmem_booldup(nmem, 1); circ->availablityDate = nmem_strdup(nmem, "20130129"); circ->availableThru = 0; circ->restrictions = 0; circ->itemId = nmem_strdup(nmem, "1226176"); circ->renewable = nmem_booldup(nmem, 0); circ->onHold = nmem_booldup(nmem, 0); circ->enumAndChron = 0; circ->midspine = 0; circ->temporaryLocation = 0; YAZ_CHECK(conv_configure_test("<backend>" "<marc" " inputcharset=\"marc-8\"" " outputcharset=\"utf-8\"" " inputformat=\"marc\"" " outputformat=\"marcxml\"" "/>" "</backend>", 0, &p)); if (p) { WRBUF output_record = wrbuf_alloc(); ret = yaz_record_conv_opac_record(p, z_opac, output_record); YAZ_CHECK(ret == 0); if (ret == 0) { ret = strcmp(wrbuf_cstr(output_record), opacxml_rec); YAZ_CHECK(ret == 0); if (ret) { printf("got-output_record len=%ld: %s\n", (long) wrbuf_len(output_record), wrbuf_cstr(output_record)); printf("output_expect_record len=%ld %s\n", (long) strlen(opacxml_rec), opacxml_rec); } } yaz_record_conv_destroy(p); wrbuf_destroy(output_record); } { Z_OPACRecord *opac = 0; yaz_marc_t mt = yaz_marc_create(); ret = yaz_xml_to_opac(mt, opacxml_rec, strlen(opacxml_rec), &opac, 0 /* iconv */, nmem, 0); YAZ_CHECK(ret); YAZ_CHECK(opac); if (opac) { WRBUF output_record = wrbuf_alloc(); char *p; yaz_marc_xml(mt, YAZ_MARC_MARCXML); yaz_opac_decode_wrbuf(mt, opac, output_record); /* change MARC size to 00077 from 00078, due to encoding of the aring (two bytes in UTF-8) */ p = strstr(wrbuf_buf(output_record), "00078"); YAZ_CHECK(p); if (p) p[4] = '7'; ret = strcmp(wrbuf_cstr(output_record), opacxml_rec); YAZ_CHECK(ret == 0); if (ret) { printf("got-output_record len=%ld: %s\n", (long) wrbuf_len(output_record), wrbuf_cstr(output_record)); printf("output_expect_record len=%ld %s\n", (long) strlen(opacxml_rec), opacxml_rec); } wrbuf_destroy(output_record); } yaz_marc_destroy(mt); } nmem_destroy(nmem); }