int yaz_marc_write_check(yaz_marc_t mt, WRBUF wr) { struct yaz_marc_node *n; int identifier_length; const char *leader = 0; for (n = mt->nodes; n; n = n->next) if (n->which == YAZ_MARC_LEADER) { leader = n->u.leader; break; } if (!leader) return -1; if (!atoi_n_check(leader+11, 1, &identifier_length)) return -1; for (n = mt->nodes; n; n = n->next) { switch(n->which) { case YAZ_MARC_COMMENT: wrbuf_iconv_write(wr, mt->iconv_cd, n->u.comment, strlen(n->u.comment)); wrbuf_puts(wr, "\n"); break; default: break; } } return 0; }
static int convert_solrmarc(void *info, WRBUF record, WRBUF wr_error) { WRBUF w = wrbuf_alloc(); const char *buf = wrbuf_buf(record); size_t i, sz = wrbuf_len(record); for (i = 0; i < sz; i++) { int ch; if (buf[i] == '#' && i < sz - 3 && buf[i+3] == ';' && atoi_n_check(buf+i+1, 2, &ch)) i += 3; else ch = buf[i]; wrbuf_putc(w, ch); } wrbuf_rewind(record); wrbuf_write(record, wrbuf_buf(w), wrbuf_len(w)); wrbuf_destroy(w); return 0; }
static int yaz_marc_write_xml_turbo_xml(yaz_marc_t mt, xmlNode **root_ptr, const char *ns, const char *format, const char *type) { struct yaz_marc_node *n; int identifier_length; const char *leader = 0; xmlNode *record_ptr; xmlNsPtr ns_record; WRBUF wr_cdata = 0; for (n = mt->nodes; n; n = n->next) if (n->which == YAZ_MARC_LEADER) { leader = n->u.leader; break; } if (!leader) return -1; if (!atoi_n_check(leader+11, 1, &identifier_length)) return -1; wr_cdata = wrbuf_alloc(); record_ptr = xmlNewNode(0, BAD_CAST "r"); *root_ptr = record_ptr; ns_record = xmlNewNs(record_ptr, BAD_CAST ns, 0); xmlSetNs(record_ptr, ns_record); if (format) xmlNewProp(record_ptr, BAD_CAST "format", BAD_CAST format); if (type) xmlNewProp(record_ptr, BAD_CAST "type", BAD_CAST type); for (n = mt->nodes; n; n = n->next) { xmlNode *ptr; char field[10]; field[0] = 'c'; field[4] = '\0'; switch(n->which) { case YAZ_MARC_DATAFIELD: add_marc_datafield_turbo_xml(mt, n, record_ptr, ns_record, wr_cdata, identifier_length); break; case YAZ_MARC_CONTROLFIELD: wrbuf_rewind(wr_cdata); wrbuf_iconv_puts(wr_cdata, mt->iconv_cd, n->u.controlfield.data); marc_iconv_reset(mt, wr_cdata); strncpy(field + 1, n->u.controlfield.tag, 3); ptr = xmlNewTextChild(record_ptr, ns_record, BAD_CAST field, BAD_CAST wrbuf_cstr(wr_cdata)); break; case YAZ_MARC_COMMENT: ptr = xmlNewComment(BAD_CAST n->u.comment); xmlAddChild(record_ptr, ptr); break; case YAZ_MARC_LEADER: xmlNewTextChild(record_ptr, ns_record, BAD_CAST "l", BAD_CAST n->u.leader); break; } } wrbuf_destroy(wr_cdata); return 0; }
/** \brief common MARC XML/Xchange/turbomarc writer \param mt handle \param wr WRBUF output \param ns XMLNS for the elements \param format record format (e.g. "MARC21") \param type record type (e.g. "Bibliographic") \param turbo =1 for turbomarc \retval 0 OK \retval -1 failure */ static int yaz_marc_write_marcxml_wrbuf(yaz_marc_t mt, WRBUF wr, const char *ns, const char *format, const char *type, int turbo) { struct yaz_marc_node *n; int identifier_length; const char *leader = 0; for (n = mt->nodes; n; n = n->next) if (n->which == YAZ_MARC_LEADER) { leader = n->u.leader; break; } if (!leader) return -1; if (!atoi_n_check(leader+11, 1, &identifier_length)) return -1; if (mt->enable_collection != no_collection) { if (mt->enable_collection == collection_first) { wrbuf_printf(wr, "<collection xmlns=\"%s\">\n", ns); mt->enable_collection = collection_second; } wrbuf_printf(wr, "<%s", record_name[turbo]); } else { wrbuf_printf(wr, "<%s xmlns=\"%s\"", record_name[turbo], ns); } if (format) wrbuf_printf(wr, " format=\"%.80s\"", format); if (type) wrbuf_printf(wr, " type=\"%.80s\"", type); wrbuf_printf(wr, ">\n"); for (n = mt->nodes; n; n = n->next) { struct yaz_marc_subfield *s; switch(n->which) { case YAZ_MARC_DATAFIELD: wrbuf_printf(wr, " <%s", datafield_name[turbo]); if (!turbo) wrbuf_printf(wr, " tag=\""); wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.datafield.tag, strlen(n->u.datafield.tag)); if (!turbo) wrbuf_printf(wr, "\""); if (n->u.datafield.indicator) { int i; for (i = 0; n->u.datafield.indicator[i]; i++) { wrbuf_printf(wr, " %s%d=\"", indicator_name[turbo], i+1); wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.datafield.indicator+i, 1); wrbuf_iconv_puts(wr, mt->iconv_cd, "\""); } } wrbuf_printf(wr, ">\n"); for (s = n->u.datafield.subfields; s; s = s->next) { size_t using_code_len = get_subfield_len(mt, s->code_data, identifier_length); wrbuf_printf(wr, " <%s", subfield_name[turbo]); if (!turbo) { wrbuf_printf(wr, " code=\""); wrbuf_iconv_write_cdata(wr, mt->iconv_cd, s->code_data, using_code_len); wrbuf_iconv_puts(wr, mt->iconv_cd, "\">"); } else { element_name_append_attribute_value(mt, wr, "code", s->code_data, using_code_len); wrbuf_puts(wr, ">"); } wrbuf_iconv_write_cdata(wr, mt->iconv_cd, s->code_data + using_code_len, strlen(s->code_data + using_code_len)); marc_iconv_reset(mt, wr); wrbuf_printf(wr, "</%s", subfield_name[turbo]); if (turbo) element_name_append_attribute_value(mt, wr, 0, s->code_data, using_code_len); wrbuf_puts(wr, ">\n"); } wrbuf_printf(wr, " </%s", datafield_name[turbo]); /* TODO Not CDATA */ if (turbo) wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.datafield.tag, strlen(n->u.datafield.tag)); wrbuf_printf(wr, ">\n"); break; case YAZ_MARC_CONTROLFIELD: wrbuf_printf(wr, " <%s", controlfield_name[turbo]); if (!turbo) { wrbuf_printf(wr, " tag=\""); wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.controlfield.tag, strlen(n->u.controlfield.tag)); wrbuf_iconv_puts(wr, mt->iconv_cd, "\">"); } else { /* TODO convert special */ wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.controlfield.tag, strlen(n->u.controlfield.tag)); wrbuf_iconv_puts(wr, mt->iconv_cd, ">"); } wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.controlfield.data, strlen(n->u.controlfield.data)); marc_iconv_reset(mt, wr); wrbuf_printf(wr, "</%s", controlfield_name[turbo]); /* TODO convert special */ if (turbo) wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.controlfield.tag, strlen(n->u.controlfield.tag)); wrbuf_puts(wr, ">\n"); break; case YAZ_MARC_COMMENT: wrbuf_printf(wr, "<!-- "); wrbuf_puts(wr, n->u.comment); wrbuf_printf(wr, " -->\n"); break; case YAZ_MARC_LEADER: wrbuf_printf(wr, " <%s>", leader_name[turbo]); wrbuf_iconv_write_cdata(wr, 0 , /* no charset conversion for leader */ n->u.leader, strlen(n->u.leader)); wrbuf_printf(wr, "</%s>\n", leader_name[turbo]); } } wrbuf_printf(wr, "</%s>\n", record_name[turbo]); return 0; }
int yaz_marc_write_line(yaz_marc_t mt, WRBUF wr) { struct yaz_marc_node *n; int identifier_length; const char *leader = 0; for (n = mt->nodes; n; n = n->next) if (n->which == YAZ_MARC_LEADER) { leader = n->u.leader; break; } if (!leader) return -1; if (!atoi_n_check(leader+11, 1, &identifier_length)) return -1; for (n = mt->nodes; n; n = n->next) { struct yaz_marc_subfield *s; switch(n->which) { case YAZ_MARC_DATAFIELD: wrbuf_printf(wr, "%s %s", n->u.datafield.tag, n->u.datafield.indicator); for (s = n->u.datafield.subfields; s; s = s->next) { size_t using_code_len = get_subfield_len(mt, s->code_data, identifier_length); wrbuf_puts (wr, mt->subfield_str); wrbuf_iconv_write(wr, mt->iconv_cd, s->code_data, using_code_len); wrbuf_iconv_puts(wr, mt->iconv_cd, " "); wrbuf_iconv_puts(wr, mt->iconv_cd, s->code_data + using_code_len); marc_iconv_reset(mt, wr); } wrbuf_puts (wr, mt->endline_str); break; case YAZ_MARC_CONTROLFIELD: wrbuf_printf(wr, "%s", n->u.controlfield.tag); wrbuf_iconv_puts(wr, mt->iconv_cd, " "); wrbuf_iconv_puts(wr, mt->iconv_cd, n->u.controlfield.data); marc_iconv_reset(mt, wr); wrbuf_puts (wr, mt->endline_str); break; case YAZ_MARC_COMMENT: wrbuf_puts(wr, "("); wrbuf_iconv_write(wr, mt->iconv_cd, n->u.comment, strlen(n->u.comment)); marc_iconv_reset(mt, wr); wrbuf_puts(wr, ")\n"); break; case YAZ_MARC_LEADER: wrbuf_printf(wr, "%s\n", n->u.leader); } } wrbuf_puts(wr, "\n"); return 0; }
void yaz_marc_set_leader(yaz_marc_t mt, const char *leader_c, int *indicator_length, int *identifier_length, int *base_address, int *length_data_entry, int *length_starting, int *length_implementation) { char leader[24]; memcpy(leader, leader_c, 24); check_ascii(mt, leader, 5, 'a'); check_ascii(mt, leader, 6, 'a'); check_ascii(mt, leader, 7, 'a'); check_ascii(mt, leader, 8, '#'); check_ascii(mt, leader, 9, '#'); if (!atoi_n_check(leader+10, 1, indicator_length) || *indicator_length == 0) { yaz_marc_cprintf(mt, "Indicator length at offset 10 should" " hold a number 1-9. Assuming 2"); leader[10] = '2'; *indicator_length = 2; } if (!atoi_n_check(leader+11, 1, identifier_length) || *identifier_length == 0) { yaz_marc_cprintf(mt, "Identifier length at offset 11 should " " hold a number 1-9. Assuming 2"); leader[11] = '2'; *identifier_length = 2; } if (!atoi_n_check(leader+12, 5, base_address)) { yaz_marc_cprintf(mt, "Base address at offsets 12..16 should" " hold a number. Assuming 0"); *base_address = 0; } check_ascii(mt, leader, 17, '#'); check_ascii(mt, leader, 18, '#'); check_ascii(mt, leader, 19, '#'); if (!atoi_n_check(leader+20, 1, length_data_entry) || *length_data_entry < 3) { yaz_marc_cprintf(mt, "Length data entry at offset 20 should" " hold a number 3-9. Assuming 4"); *length_data_entry = 4; leader[20] = '4'; } if (!atoi_n_check(leader+21, 1, length_starting) || *length_starting < 4) { yaz_marc_cprintf(mt, "Length starting at offset 21 should" " hold a number 4-9. Assuming 5"); *length_starting = 5; leader[21] = '5'; } if (!atoi_n_check(leader+22, 1, length_implementation)) { yaz_marc_cprintf(mt, "Length implementation at offset 22 should" " hold a number. Assuming 0"); *length_implementation = 0; leader[22] = '0'; } check_ascii(mt, leader, 23, '0'); if (mt->debug) { yaz_marc_cprintf(mt, "Indicator length %5d", *indicator_length); yaz_marc_cprintf(mt, "Identifier length %5d", *identifier_length); yaz_marc_cprintf(mt, "Base address %5d", *base_address); yaz_marc_cprintf(mt, "Length data entry %5d", *length_data_entry); yaz_marc_cprintf(mt, "Length starting %5d", *length_starting); yaz_marc_cprintf(mt, "Length implementation %5d", *length_implementation); } yaz_marc_add_leader(mt, leader, 24); }
int yaz_marc_write_json(yaz_marc_t mt, WRBUF w) { int identifier_length; struct yaz_marc_node *n; const char *leader = 0; int first = 1; wrbuf_puts(w, "{\n"); for (n = mt->nodes; n; n = n->next) if (n->which == YAZ_MARC_LEADER) leader = n->u.leader; if (!leader) return -1; if (!atoi_n_check(leader+11, 1, &identifier_length)) return -1; wrbuf_puts(w, "\t\"leader\":\""); wrbuf_json_puts(w, leader); wrbuf_puts(w, "\",\n"); wrbuf_puts(w, "\t\"fields\":\n\t[\n"); for (n = mt->nodes; n; n = n->next) { struct yaz_marc_subfield *s; const char *sep = ""; switch (n->which) { case YAZ_MARC_LEADER: case YAZ_MARC_COMMENT: break; case YAZ_MARC_CONTROLFIELD: if (first) first = 0; else wrbuf_puts(w, ",\n"); wrbuf_puts(w, "\t\t{\n\t\t\t\""); wrbuf_iconv_json_puts(w, mt->iconv_cd, n->u.controlfield.tag); wrbuf_puts(w, "\":\""); wrbuf_iconv_json_puts(w, mt->iconv_cd, n->u.controlfield.data); wrbuf_puts(w, "\"\n\t\t}"); break; case YAZ_MARC_DATAFIELD: if (first) first = 0; else wrbuf_puts(w, ",\n"); wrbuf_puts(w, "\t\t{\n\t\t\t\""); wrbuf_json_puts(w, n->u.datafield.tag); wrbuf_puts(w, "\":\n\t\t\t{\n\t\t\t\t\"subfields\":\n\t\t\t\t[\n"); for (s = n->u.datafield.subfields; s; s = s->next) { size_t using_code_len = get_subfield_len(mt, s->code_data, identifier_length); wrbuf_puts(w, sep); sep = ",\n"; wrbuf_puts(w, "\t\t\t\t\t{\n\t\t\t\t\t\t\""); wrbuf_iconv_json_write(w, mt->iconv_cd, s->code_data, using_code_len); wrbuf_puts(w, "\":\""); wrbuf_iconv_json_puts(w, mt->iconv_cd, s->code_data + using_code_len); wrbuf_puts(w, "\"\n\t\t\t\t\t}"); } wrbuf_puts(w, "\n\t\t\t\t]"); if (n->u.datafield.indicator[0]) { int i; for (i = 0; n->u.datafield.indicator[i]; i++) { wrbuf_printf(w, ",\n\t\t\t\t\"ind%d\":\"%c\"", i + 1, n->u.datafield.indicator[i]); } } wrbuf_puts(w, "\n\t\t\t}\n"); wrbuf_puts(w, "\n\t\t}"); break; } } wrbuf_puts(w, "\n\t]\n"); wrbuf_puts(w, "}\n"); return 0; }
int yaz_marc_write_iso2709(yaz_marc_t mt, WRBUF wr) { struct yaz_marc_node *n; int indicator_length; int identifier_length; int length_data_entry; int length_starting; int length_implementation; int data_offset = 0; const char *leader = 0; WRBUF wr_dir, wr_head, wr_data_tmp; int base_address; for (n = mt->nodes; n; n = n->next) if (n->which == YAZ_MARC_LEADER) leader = n->u.leader; if (!leader) return -1; if (!atoi_n_check(leader+10, 1, &indicator_length)) return -1; if (!atoi_n_check(leader+11, 1, &identifier_length)) return -1; if (!atoi_n_check(leader+20, 1, &length_data_entry)) return -1; if (!atoi_n_check(leader+21, 1, &length_starting)) return -1; if (!atoi_n_check(leader+22, 1, &length_implementation)) return -1; wr_data_tmp = wrbuf_alloc(); wr_dir = wrbuf_alloc(); for (n = mt->nodes; n; n = n->next) { int data_length = 0; struct yaz_marc_subfield *s; switch(n->which) { case YAZ_MARC_DATAFIELD: wrbuf_printf(wr_dir, "%.3s", n->u.datafield.tag); data_length += indicator_length; wrbuf_rewind(wr_data_tmp); for (s = n->u.datafield.subfields; s; s = s->next) { /* write dummy IDFS + content */ wrbuf_iconv_putchar(wr_data_tmp, mt->iconv_cd, ' '); wrbuf_iconv_puts(wr_data_tmp, mt->iconv_cd, s->code_data); marc_iconv_reset(mt, wr_data_tmp); } /* write dummy FS (makes MARC-8 to become ASCII) */ wrbuf_iconv_putchar(wr_data_tmp, mt->iconv_cd, ' '); marc_iconv_reset(mt, wr_data_tmp); data_length += wrbuf_len(wr_data_tmp); break; case YAZ_MARC_CONTROLFIELD: wrbuf_printf(wr_dir, "%.3s", n->u.controlfield.tag); wrbuf_rewind(wr_data_tmp); wrbuf_iconv_puts(wr_data_tmp, mt->iconv_cd, n->u.controlfield.data); marc_iconv_reset(mt, wr_data_tmp); wrbuf_iconv_putchar(wr_data_tmp, mt->iconv_cd, ' ');/* field sep */ marc_iconv_reset(mt, wr_data_tmp); data_length += wrbuf_len(wr_data_tmp); break; case YAZ_MARC_COMMENT: break; case YAZ_MARC_LEADER: break; } if (data_length) { wrbuf_printf(wr_dir, "%0*d", length_data_entry, data_length); wrbuf_printf(wr_dir, "%0*d", length_starting, data_offset); data_offset += data_length; } } /* mark end of directory */ wrbuf_putc(wr_dir, ISO2709_FS); /* base address of data (comes after leader+directory) */ base_address = 24 + wrbuf_len(wr_dir); wr_head = wrbuf_alloc(); /* write record length */ wrbuf_printf(wr_head, "%05d", base_address + data_offset + 1); /* from "original" leader */ wrbuf_write(wr_head, leader+5, 7); /* base address of data */ wrbuf_printf(wr_head, "%05d", base_address); /* from "original" leader */ wrbuf_write(wr_head, leader+17, 7); wrbuf_write(wr, wrbuf_buf(wr_head), 24); wrbuf_write(wr, wrbuf_buf(wr_dir), wrbuf_len(wr_dir)); wrbuf_destroy(wr_head); wrbuf_destroy(wr_dir); wrbuf_destroy(wr_data_tmp); for (n = mt->nodes; n; n = n->next) { struct yaz_marc_subfield *s; switch(n->which) { case YAZ_MARC_DATAFIELD: wrbuf_write(wr, n->u.datafield.indicator, indicator_length); for (s = n->u.datafield.subfields; s; s = s->next) { wrbuf_putc(wr, ISO2709_IDFS); wrbuf_iconv_puts(wr, mt->iconv_cd, s->code_data); marc_iconv_reset(mt, wr); } wrbuf_putc(wr, ISO2709_FS); break; case YAZ_MARC_CONTROLFIELD: wrbuf_iconv_puts(wr, mt->iconv_cd, n->u.controlfield.data); marc_iconv_reset(mt, wr); wrbuf_putc(wr, ISO2709_FS); break; case YAZ_MARC_COMMENT: break; case YAZ_MARC_LEADER: break; } } wrbuf_printf(wr, "%c", ISO2709_RS); return 0; }
int yaz_marc_write_xml(yaz_marc_t mt, xmlNode **root_ptr, const char *ns, const char *format, const char *type) { struct yaz_marc_node *n; int identifier_length; const char *leader = 0; xmlNode *record_ptr; xmlNsPtr ns_record; WRBUF wr_cdata = 0; for (n = mt->nodes; n; n = n->next) if (n->which == YAZ_MARC_LEADER) { leader = n->u.leader; break; } if (!leader) return -1; if (!atoi_n_check(leader+11, 1, &identifier_length)) return -1; wr_cdata = wrbuf_alloc(); record_ptr = xmlNewNode(0, BAD_CAST "record"); *root_ptr = record_ptr; ns_record = xmlNewNs(record_ptr, BAD_CAST ns, 0); xmlSetNs(record_ptr, ns_record); if (format) xmlNewProp(record_ptr, BAD_CAST "format", BAD_CAST format); if (type) xmlNewProp(record_ptr, BAD_CAST "type", BAD_CAST type); for (n = mt->nodes; n; n = n->next) { struct yaz_marc_subfield *s; xmlNode *ptr; switch(n->which) { case YAZ_MARC_DATAFIELD: ptr = xmlNewChild(record_ptr, ns_record, BAD_CAST "datafield", 0); xmlNewProp(ptr, BAD_CAST "tag", BAD_CAST n->u.datafield.tag); if (n->u.datafield.indicator) { int i; for (i = 0; n->u.datafield.indicator[i]; i++) { char ind_str[6]; char ind_val[2]; sprintf(ind_str, "ind%d", i+1); ind_val[0] = n->u.datafield.indicator[i]; ind_val[1] = '\0'; xmlNewProp(ptr, BAD_CAST ind_str, BAD_CAST ind_val); } } for (s = n->u.datafield.subfields; s; s = s->next) { xmlNode *ptr_subfield; size_t using_code_len = get_subfield_len(mt, s->code_data, identifier_length); wrbuf_rewind(wr_cdata); wrbuf_iconv_puts(wr_cdata, mt->iconv_cd, s->code_data + using_code_len); marc_iconv_reset(mt, wr_cdata); ptr_subfield = xmlNewTextChild( ptr, ns_record, BAD_CAST "subfield", BAD_CAST wrbuf_cstr(wr_cdata)); wrbuf_rewind(wr_cdata); wrbuf_iconv_write(wr_cdata, mt->iconv_cd, s->code_data, using_code_len); xmlNewProp(ptr_subfield, BAD_CAST "code", BAD_CAST wrbuf_cstr(wr_cdata)); } break; case YAZ_MARC_CONTROLFIELD: wrbuf_rewind(wr_cdata); wrbuf_iconv_puts(wr_cdata, mt->iconv_cd, n->u.controlfield.data); marc_iconv_reset(mt, wr_cdata); ptr = xmlNewTextChild(record_ptr, ns_record, BAD_CAST "controlfield", BAD_CAST wrbuf_cstr(wr_cdata)); xmlNewProp(ptr, BAD_CAST "tag", BAD_CAST n->u.controlfield.tag); break; case YAZ_MARC_COMMENT: ptr = xmlNewComment(BAD_CAST n->u.comment); xmlAddChild(record_ptr, ptr); break; case YAZ_MARC_LEADER: xmlNewTextChild(record_ptr, ns_record, BAD_CAST "leader", BAD_CAST n->u.leader); break; } } wrbuf_destroy(wr_cdata); return 0; }
int yaz_marc_read_line(yaz_marc_t mt, int (*getbyte)(void *client_data), void (*ungetbyte)(int b, void *client_data), void *client_data) { int indicator_length; int identifier_length; int base_address; int length_data_entry; int length_starting; int length_implementation; int marker_ch = 0; int marker_skip = 0; int header_created = 0; WRBUF wrbuf_line = wrbuf_alloc(); yaz_marc_reset(mt); while (yaz_marc_line_gets(getbyte, ungetbyte, client_data, wrbuf_line)) { const char *line = wrbuf_cstr(wrbuf_line); int val; size_t line_len = strlen(line); if (line_len == 0) /* empty line indicates end of record */ { if (header_created) break; } else if (line[0] == '$') /* indicates beginning/end of record */ { if (header_created) break; } else if (line[0] == '(') /* annotation, skip it */ ; else if (line_len == 24 && atoi_n_check(line, 5, &val)) { /* deal with header lines: 00366nam 22001698a 4500 */ if (header_created) break; yaz_marc_set_leader(mt, line, &indicator_length, &identifier_length, &base_address, &length_data_entry, &length_starting, &length_implementation); header_created = 1; } else if (line_len > 4 && line[0] != ' ' && line[1] != ' ' && line[2] != ' ' && line[3] == ' ' ) { /* deal with data/control lines: 245 12 ........ */ char tag[4]; const char *datafield_start = line+6; marker_ch = 0; marker_skip = 0; memcpy(tag, line, 3); tag[3] = '\0'; if (line_len >= 8) /* control - or datafield ? */ { if (*datafield_start == ' ') datafield_start++; /* skip blank after indicator */ if (strchr("$_*", *datafield_start)) { marker_ch = *datafield_start; if (datafield_start[2] == ' ') marker_skip = 1; /* subfields has blank before data */ } } if (!header_created) { const char *leader = "01000cam 2200265 i 4500"; yaz_marc_set_leader(mt, leader, &indicator_length, &identifier_length, &base_address, &length_data_entry, &length_starting, &length_implementation); header_created = 1; } if (marker_ch == 0) { /* control field */ yaz_marc_add_controlfield(mt, tag, line+4, strlen(line+4)); } else { /* data field */ const char *indicator = line+4; int indicator_len = 2; const char *cp = datafield_start; yaz_marc_add_datafield(mt, tag, indicator, indicator_len); for (;;) { const char *next; size_t len; assert(cp[0] == marker_ch); cp++; next = cp; while ((next = strchr(next, marker_ch))) { if ((next[1] >= 'A' && next[1] <= 'Z') ||(next[1] >= 'a' && next[1] <= 'z') ||(next[1] >= '0' && next[1] <= '9')) { if (!marker_skip) break; else if (next[2] == ' ') break; } next++; } len = strlen(cp); if (next) len = next - cp - marker_skip; if (marker_skip) { /* remove ' ' after subfield marker */ char *cp_blank = strchr(cp, ' '); if (cp_blank) { len--; while (cp_blank != cp) { cp_blank[0] = cp_blank[-1]; cp_blank--; } cp++; } } yaz_marc_add_subfield(mt, cp, len); if (!next) break; cp = next; } } } else { yaz_marc_cprintf(mt, "Ignoring line: %s", line); } } wrbuf_destroy(wrbuf_line); if (!header_created) return -1; return 0; }
int yaz_marc_read_iso2709(yaz_marc_t mt, const char *buf, int bsize) { int entry_p; int record_length; int indicator_length; int identifier_length; int end_of_directory; int base_address; int length_data_entry; int length_starting; int length_implementation; yaz_marc_reset(mt); if (!atoi_n_check(buf, 5, &record_length)) { yaz_marc_cprintf(mt, "Bad leader"); return -1; } if (record_length < 25) { yaz_marc_cprintf(mt, "Record length %d < 24", record_length); return -1; } /* ballout if bsize is known and record_length is less than that */ if (bsize != -1 && record_length > bsize) { yaz_marc_cprintf(mt, "Record appears to be larger than buffer %d < %d", record_length, bsize); return -1; } if (yaz_marc_get_debug(mt)) yaz_marc_cprintf(mt, "Record length %5d", record_length); yaz_marc_set_leader(mt, buf, &indicator_length, &identifier_length, &base_address, &length_data_entry, &length_starting, &length_implementation); /* First pass. determine length of directory & base of data */ for (entry_p = 24; buf[entry_p] != ISO2709_FS; ) { /* length of directory entry */ int l = 3 + length_data_entry + length_starting; if (entry_p + l >= record_length) { yaz_marc_cprintf(mt, "Directory offset %d: end of record." " Missing FS char", entry_p); return -1; } if (yaz_marc_get_debug(mt)) { WRBUF hex = wrbuf_alloc(); wrbuf_puts(hex, "Tag "); wrbuf_write_escaped(hex, buf + entry_p, 3); wrbuf_puts(hex, ", length "); wrbuf_write_escaped(hex, buf + entry_p + 3, length_data_entry); wrbuf_puts(hex, ", starting "); wrbuf_write_escaped(hex, buf + entry_p + 3 + length_data_entry, length_starting); yaz_marc_cprintf(mt, "Directory offset %d: %s", entry_p, wrbuf_cstr(hex)); wrbuf_destroy(hex); } /* Check for digits in length+starting info */ while (--l >= 3) if (!yaz_isdigit(buf[entry_p + l])) break; if (l >= 3) { WRBUF hex = wrbuf_alloc(); /* Not all digits, so stop directory scan */ wrbuf_write_escaped(hex, buf + entry_p, length_data_entry + length_starting + 3); yaz_marc_cprintf(mt, "Directory offset %d: Bad value for data" " length and/or length starting (%s)", entry_p, wrbuf_cstr(hex)); wrbuf_destroy(hex); break; } entry_p += 3 + length_data_entry + length_starting; } end_of_directory = entry_p; if (base_address != entry_p+1) { yaz_marc_cprintf(mt, "Base address not at end of directory," " base %d, end %d", base_address, entry_p+1); } /* Second pass. parse control - and datafields */ for (entry_p = 24; entry_p != end_of_directory; ) { int data_length; int data_offset; int end_offset; int i; char tag[4]; int identifier_flag = 0; int entry_p0 = entry_p; memcpy (tag, buf+entry_p, 3); entry_p += 3; tag[3] = '\0'; data_length = atoi_n(buf+entry_p, length_data_entry); entry_p += length_data_entry; data_offset = atoi_n(buf+entry_p, length_starting); entry_p += length_starting; i = data_offset + base_address; end_offset = i+data_length-1; if (data_length <= 0 || data_offset < 0) break; if (yaz_marc_get_debug(mt)) { yaz_marc_cprintf(mt, "Tag: %s. Directory offset %d: data-length %d," " data-offset %d", tag, entry_p0, data_length, data_offset); } if (end_offset >= record_length) { yaz_marc_cprintf(mt, "Directory offset %d: Data out of bounds %d >= %d", entry_p0, end_offset, record_length); break; } if (memcmp (tag, "00", 2)) identifier_flag = 1; /* if not 00X assume subfields */ else if (indicator_length < 4 && indicator_length > 0) { /* Danmarc 00X have subfields */ if (buf[i + indicator_length] == ISO2709_IDFS) identifier_flag = 1; else if (buf[i + indicator_length + 1] == ISO2709_IDFS) identifier_flag = 2; } if (identifier_flag) { /* datafield */ i += identifier_flag-1; if (indicator_length) { /* skip RS/FS bytes in indicator. They are not allowed there */ int j; for (j = indicator_length; --j >= 0; ) if (buf[j+i] < ' ') { j++; i += j; end_offset += j; yaz_marc_cprintf(mt, "Bad indicator data. " "Skipping %d bytes", j); break; } yaz_marc_add_datafield(mt, tag, buf+i, indicator_length); i += indicator_length; } while (i < end_offset && buf[i] != ISO2709_RS && buf[i] != ISO2709_FS) { int code_offset = i+1; i ++; while (i < end_offset && buf[i] != ISO2709_RS && buf[i] != ISO2709_IDFS && buf[i] != ISO2709_FS) i++; if (i > code_offset) yaz_marc_add_subfield(mt, buf+code_offset, i - code_offset); } } else { /* controlfield */ int i0 = i; while (i < end_offset && buf[i] != ISO2709_RS && buf[i] != ISO2709_FS) i++; yaz_marc_add_controlfield(mt, tag, buf+i0, i-i0); } if (i < end_offset) { yaz_marc_cprintf(mt, "Separator but not at end of field length=%d", data_length); } if (buf[i] != ISO2709_RS && buf[i] != ISO2709_FS) { yaz_marc_cprintf(mt, "No separator at end of field length=%d", data_length); } } return record_length; }