Example #1
0
int yaz_marc_write_check(yaz_marc_t mt, WRBUF wr)
{
    struct yaz_marc_node *n;
    int identifier_length;
    const char *leader = 0;

    for (n = mt->nodes; n; n = n->next)
        if (n->which == YAZ_MARC_LEADER)
        {
            leader = n->u.leader;
            break;
        }

    if (!leader)
        return -1;
    if (!atoi_n_check(leader+11, 1, &identifier_length))
        return -1;

    for (n = mt->nodes; n; n = n->next)
    {
        switch(n->which)
        {
        case YAZ_MARC_COMMENT:
            wrbuf_iconv_write(wr, mt->iconv_cd,
                              n->u.comment, strlen(n->u.comment));
            wrbuf_puts(wr, "\n");
            break;
        default:
            break;
        }
    }
    return 0;
}
Example #2
0
static int convert_solrmarc(void *info, WRBUF record, WRBUF wr_error)
{
    WRBUF w = wrbuf_alloc();
    const char *buf = wrbuf_buf(record);
    size_t i, sz = wrbuf_len(record);
    for (i = 0; i < sz; i++)
    {
        int ch;
        if (buf[i] == '#' && i < sz - 3 && buf[i+3] == ';'
            && atoi_n_check(buf+i+1, 2, &ch))
            i += 3;
        else
            ch = buf[i];
        wrbuf_putc(w, ch);
    }
    wrbuf_rewind(record);
    wrbuf_write(record, wrbuf_buf(w), wrbuf_len(w));
    wrbuf_destroy(w);
    return 0;
}
Example #3
0
static int yaz_marc_write_xml_turbo_xml(yaz_marc_t mt, xmlNode **root_ptr,
                                        const char *ns,
                                        const char *format,
                                        const char *type)
{
    struct yaz_marc_node *n;
    int identifier_length;
    const char *leader = 0;
    xmlNode *record_ptr;
    xmlNsPtr ns_record;
    WRBUF wr_cdata = 0;

    for (n = mt->nodes; n; n = n->next)
        if (n->which == YAZ_MARC_LEADER)
        {
            leader = n->u.leader;
            break;
        }

    if (!leader)
        return -1;
    if (!atoi_n_check(leader+11, 1, &identifier_length))
        return -1;

    wr_cdata = wrbuf_alloc();

    record_ptr = xmlNewNode(0, BAD_CAST "r");
    *root_ptr = record_ptr;

    ns_record = xmlNewNs(record_ptr, BAD_CAST ns, 0);
    xmlSetNs(record_ptr, ns_record);

    if (format)
        xmlNewProp(record_ptr, BAD_CAST "format", BAD_CAST format);
    if (type)
        xmlNewProp(record_ptr, BAD_CAST "type", BAD_CAST type);
    for (n = mt->nodes; n; n = n->next)
    {
        xmlNode *ptr;

        char field[10];
        field[0] = 'c';
        field[4] = '\0';

        switch(n->which)
        {
        case YAZ_MARC_DATAFIELD:
            add_marc_datafield_turbo_xml(mt, n, record_ptr, ns_record, wr_cdata, identifier_length);
            break;
        case YAZ_MARC_CONTROLFIELD:
            wrbuf_rewind(wr_cdata);
            wrbuf_iconv_puts(wr_cdata, mt->iconv_cd, n->u.controlfield.data);
            marc_iconv_reset(mt, wr_cdata);

            strncpy(field + 1, n->u.controlfield.tag, 3);
            ptr = xmlNewTextChild(record_ptr, ns_record,
                                  BAD_CAST field,
                                  BAD_CAST wrbuf_cstr(wr_cdata));
            break;
        case YAZ_MARC_COMMENT:
            ptr = xmlNewComment(BAD_CAST n->u.comment);
            xmlAddChild(record_ptr, ptr);
            break;
        case YAZ_MARC_LEADER:
            xmlNewTextChild(record_ptr, ns_record, BAD_CAST "l",
                            BAD_CAST n->u.leader);
            break;
        }
    }
    wrbuf_destroy(wr_cdata);
    return 0;
}
Example #4
0
/** \brief common MARC XML/Xchange/turbomarc writer
    \param mt handle
    \param wr WRBUF output
    \param ns XMLNS for the elements
    \param format record format (e.g. "MARC21")
    \param type record type (e.g. "Bibliographic")
    \param turbo =1 for turbomarc
    \retval 0 OK
    \retval -1 failure
*/
static int yaz_marc_write_marcxml_wrbuf(yaz_marc_t mt, WRBUF wr,
                                        const char *ns,
                                        const char *format,
                                        const char *type,
                                        int turbo)
{
    struct yaz_marc_node *n;
    int identifier_length;
    const char *leader = 0;

    for (n = mt->nodes; n; n = n->next)
        if (n->which == YAZ_MARC_LEADER)
        {
            leader = n->u.leader;
            break;
        }

    if (!leader)
        return -1;
    if (!atoi_n_check(leader+11, 1, &identifier_length))
        return -1;

    if (mt->enable_collection != no_collection)
    {
        if (mt->enable_collection == collection_first)
        {
            wrbuf_printf(wr, "<collection xmlns=\"%s\">\n", ns);
            mt->enable_collection = collection_second;
        }
        wrbuf_printf(wr, "<%s", record_name[turbo]);
    }
    else
    {
        wrbuf_printf(wr, "<%s xmlns=\"%s\"", record_name[turbo], ns);
    }
    if (format)
        wrbuf_printf(wr, " format=\"%.80s\"", format);
    if (type)
        wrbuf_printf(wr, " type=\"%.80s\"", type);
    wrbuf_printf(wr, ">\n");
    for (n = mt->nodes; n; n = n->next)
    {
        struct yaz_marc_subfield *s;

        switch(n->which)
        {
        case YAZ_MARC_DATAFIELD:

            wrbuf_printf(wr, "  <%s", datafield_name[turbo]);
            if (!turbo)
            	wrbuf_printf(wr, " tag=\"");
            wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.datafield.tag,
                                    strlen(n->u.datafield.tag));
            if (!turbo)
                wrbuf_printf(wr, "\"");
    	    if (n->u.datafield.indicator)
    	    {
    	    	int i;
    	    	for (i = 0; n->u.datafield.indicator[i]; i++)
    	    	{
                    wrbuf_printf(wr, " %s%d=\"", indicator_name[turbo], i+1);
                    wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
                                            n->u.datafield.indicator+i, 1);
                    wrbuf_iconv_puts(wr, mt->iconv_cd, "\"");
                }
            }
            wrbuf_printf(wr, ">\n");
            for (s = n->u.datafield.subfields; s; s = s->next)
            {
                size_t using_code_len = get_subfield_len(mt, s->code_data,
                                                         identifier_length);
                wrbuf_printf(wr, "    <%s", subfield_name[turbo]);
                if (!turbo)
                {
                    wrbuf_printf(wr, " code=\"");
                    wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
                                            s->code_data, using_code_len);
                    wrbuf_iconv_puts(wr, mt->iconv_cd, "\">");
                }
                else
                {
                    element_name_append_attribute_value(mt, wr, "code", s->code_data, using_code_len);
                    wrbuf_puts(wr, ">");
                }
                wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
                                        s->code_data + using_code_len,
                                        strlen(s->code_data + using_code_len));
                marc_iconv_reset(mt, wr);
                wrbuf_printf(wr, "</%s", subfield_name[turbo]);
                if (turbo)
                    element_name_append_attribute_value(mt, wr, 0, s->code_data, using_code_len);
                wrbuf_puts(wr, ">\n");
            }
            wrbuf_printf(wr, "  </%s", datafield_name[turbo]);
            /* TODO Not CDATA */
            if (turbo)
            	wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.datafield.tag,
                                        strlen(n->u.datafield.tag));
            wrbuf_printf(wr, ">\n");
            break;
        case YAZ_MARC_CONTROLFIELD:
            wrbuf_printf(wr, "  <%s", controlfield_name[turbo]);
            if (!turbo)
            {
            	wrbuf_printf(wr, " tag=\"");
                wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.controlfield.tag,
        				strlen(n->u.controlfield.tag));
                wrbuf_iconv_puts(wr, mt->iconv_cd, "\">");
            }
            else
            {
                /* TODO convert special */
                wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.controlfield.tag,
        				strlen(n->u.controlfield.tag));
                wrbuf_iconv_puts(wr, mt->iconv_cd, ">");
            }
            wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
                                    n->u.controlfield.data,
                                    strlen(n->u.controlfield.data));
            marc_iconv_reset(mt, wr);
            wrbuf_printf(wr, "</%s", controlfield_name[turbo]);
            /* TODO convert special */
            if (turbo)
                wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.controlfield.tag,
    					strlen(n->u.controlfield.tag));
            wrbuf_puts(wr, ">\n");
            break;
        case YAZ_MARC_COMMENT:
            wrbuf_printf(wr, "<!-- ");
            wrbuf_puts(wr, n->u.comment);
            wrbuf_printf(wr, " -->\n");
            break;
        case YAZ_MARC_LEADER:
            wrbuf_printf(wr, "  <%s>", leader_name[turbo]);
            wrbuf_iconv_write_cdata(wr,
                                    0 , /* no charset conversion for leader */
                                    n->u.leader, strlen(n->u.leader));
            wrbuf_printf(wr, "</%s>\n", leader_name[turbo]);
        }
    }
    wrbuf_printf(wr, "</%s>\n", record_name[turbo]);
    return 0;
}
Example #5
0
int yaz_marc_write_line(yaz_marc_t mt, WRBUF wr)
{
    struct yaz_marc_node *n;
    int identifier_length;
    const char *leader = 0;

    for (n = mt->nodes; n; n = n->next)
        if (n->which == YAZ_MARC_LEADER)
        {
            leader = n->u.leader;
            break;
        }

    if (!leader)
        return -1;
    if (!atoi_n_check(leader+11, 1, &identifier_length))
        return -1;

    for (n = mt->nodes; n; n = n->next)
    {
        struct yaz_marc_subfield *s;
        switch(n->which)
        {
        case YAZ_MARC_DATAFIELD:
            wrbuf_printf(wr, "%s %s", n->u.datafield.tag,
                         n->u.datafield.indicator);
            for (s = n->u.datafield.subfields; s; s = s->next)
            {
                size_t using_code_len = get_subfield_len(mt, s->code_data,
                                                         identifier_length);

                wrbuf_puts (wr, mt->subfield_str);
                wrbuf_iconv_write(wr, mt->iconv_cd, s->code_data,
                                  using_code_len);
                wrbuf_iconv_puts(wr, mt->iconv_cd, " ");
                wrbuf_iconv_puts(wr, mt->iconv_cd,
                                 s->code_data + using_code_len);
                marc_iconv_reset(mt, wr);
            }
            wrbuf_puts (wr, mt->endline_str);
            break;
        case YAZ_MARC_CONTROLFIELD:
            wrbuf_printf(wr, "%s", n->u.controlfield.tag);
            wrbuf_iconv_puts(wr, mt->iconv_cd, " ");
            wrbuf_iconv_puts(wr, mt->iconv_cd, n->u.controlfield.data);
            marc_iconv_reset(mt, wr);
            wrbuf_puts (wr, mt->endline_str);
            break;
        case YAZ_MARC_COMMENT:
            wrbuf_puts(wr, "(");
            wrbuf_iconv_write(wr, mt->iconv_cd,
                              n->u.comment, strlen(n->u.comment));
            marc_iconv_reset(mt, wr);
            wrbuf_puts(wr, ")\n");
            break;
        case YAZ_MARC_LEADER:
            wrbuf_printf(wr, "%s\n", n->u.leader);
        }
    }
    wrbuf_puts(wr, "\n");
    return 0;
}
Example #6
0
void yaz_marc_set_leader(yaz_marc_t mt, const char *leader_c,
                         int *indicator_length,
                         int *identifier_length,
                         int *base_address,
                         int *length_data_entry,
                         int *length_starting,
                         int *length_implementation)
{
    char leader[24];

    memcpy(leader, leader_c, 24);

    check_ascii(mt, leader, 5, 'a');
    check_ascii(mt, leader, 6, 'a');
    check_ascii(mt, leader, 7, 'a');
    check_ascii(mt, leader, 8, '#');
    check_ascii(mt, leader, 9, '#');
    if (!atoi_n_check(leader+10, 1, indicator_length) || *indicator_length == 0)
    {
        yaz_marc_cprintf(mt, "Indicator length at offset 10 should"
                         " hold a number 1-9. Assuming 2");
        leader[10] = '2';
        *indicator_length = 2;
    }
    if (!atoi_n_check(leader+11, 1, identifier_length) || *identifier_length == 0)
    {
        yaz_marc_cprintf(mt, "Identifier length at offset 11 should "
                         " hold a number 1-9. Assuming 2");
        leader[11] = '2';
        *identifier_length = 2;
    }
    if (!atoi_n_check(leader+12, 5, base_address))
    {
        yaz_marc_cprintf(mt, "Base address at offsets 12..16 should"
                         " hold a number. Assuming 0");
        *base_address = 0;
    }
    check_ascii(mt, leader, 17, '#');
    check_ascii(mt, leader, 18, '#');
    check_ascii(mt, leader, 19, '#');
    if (!atoi_n_check(leader+20, 1, length_data_entry) ||
        *length_data_entry < 3)
    {
        yaz_marc_cprintf(mt, "Length data entry at offset 20 should"
                         " hold a number 3-9. Assuming 4");
        *length_data_entry = 4;
        leader[20] = '4';
    }
    if (!atoi_n_check(leader+21, 1, length_starting) || *length_starting < 4)
    {
        yaz_marc_cprintf(mt, "Length starting at offset 21 should"
                         " hold a number 4-9. Assuming 5");
        *length_starting = 5;
        leader[21] = '5';
    }
    if (!atoi_n_check(leader+22, 1, length_implementation))
    {
        yaz_marc_cprintf(mt, "Length implementation at offset 22 should"
                         " hold a number. Assuming 0");
        *length_implementation = 0;
        leader[22] = '0';
    }
    check_ascii(mt, leader, 23, '0');

    if (mt->debug)
    {
        yaz_marc_cprintf(mt, "Indicator length      %5d", *indicator_length);
        yaz_marc_cprintf(mt, "Identifier length     %5d", *identifier_length);
        yaz_marc_cprintf(mt, "Base address          %5d", *base_address);
        yaz_marc_cprintf(mt, "Length data entry     %5d", *length_data_entry);
        yaz_marc_cprintf(mt, "Length starting       %5d", *length_starting);
        yaz_marc_cprintf(mt, "Length implementation %5d", *length_implementation);
    }
    yaz_marc_add_leader(mt, leader, 24);
}
Example #7
0
int yaz_marc_write_json(yaz_marc_t mt, WRBUF w)
{
    int identifier_length;
    struct yaz_marc_node *n;
    const char *leader = 0;
    int first = 1;

    wrbuf_puts(w, "{\n");
    for (n = mt->nodes; n; n = n->next)
        if (n->which == YAZ_MARC_LEADER)
            leader = n->u.leader;

    if (!leader)
        return -1;

    if (!atoi_n_check(leader+11, 1, &identifier_length))
        return -1;

    wrbuf_puts(w, "\t\"leader\":\"");
    wrbuf_json_puts(w, leader);
    wrbuf_puts(w, "\",\n");
    wrbuf_puts(w, "\t\"fields\":\n\t[\n");

    for (n = mt->nodes; n; n = n->next)
    {
        struct yaz_marc_subfield *s;
        const char *sep = "";
        switch (n->which)
        {
        case YAZ_MARC_LEADER:
        case YAZ_MARC_COMMENT:
            break;
        case YAZ_MARC_CONTROLFIELD:
            if (first)
                first = 0;
            else
                wrbuf_puts(w, ",\n");
            wrbuf_puts(w, "\t\t{\n\t\t\t\"");
            wrbuf_iconv_json_puts(w, mt->iconv_cd, n->u.controlfield.tag);
            wrbuf_puts(w, "\":\"");
            wrbuf_iconv_json_puts(w, mt->iconv_cd, n->u.controlfield.data);
            wrbuf_puts(w, "\"\n\t\t}");
            break;
        case YAZ_MARC_DATAFIELD:
            if (first)
                first = 0;
            else
                wrbuf_puts(w, ",\n");

            wrbuf_puts(w, "\t\t{\n\t\t\t\"");
            wrbuf_json_puts(w, n->u.datafield.tag);
            wrbuf_puts(w, "\":\n\t\t\t{\n\t\t\t\t\"subfields\":\n\t\t\t\t[\n");
            for (s = n->u.datafield.subfields; s; s = s->next)
            {
                size_t using_code_len = get_subfield_len(mt, s->code_data,
                                                         identifier_length);
                wrbuf_puts(w, sep);
                sep = ",\n";
                wrbuf_puts(w, "\t\t\t\t\t{\n\t\t\t\t\t\t\"");
                wrbuf_iconv_json_write(w, mt->iconv_cd,
                                       s->code_data, using_code_len);
                wrbuf_puts(w, "\":\"");
                wrbuf_iconv_json_puts(w, mt->iconv_cd,
                                      s->code_data + using_code_len);
                wrbuf_puts(w, "\"\n\t\t\t\t\t}");
            }
            wrbuf_puts(w, "\n\t\t\t\t]");
            if (n->u.datafield.indicator[0])
            {
                int i;
                for (i = 0; n->u.datafield.indicator[i]; i++)
                {
                    wrbuf_printf(w, ",\n\t\t\t\t\"ind%d\":\"%c\"", i + 1,
                                 n->u.datafield.indicator[i]);
                }
            }
            wrbuf_puts(w, "\n\t\t\t}\n");
            wrbuf_puts(w, "\n\t\t}");
            break;
        }
    }
    wrbuf_puts(w, "\n\t]\n");
    wrbuf_puts(w, "}\n");
    return 0;
}
Example #8
0
int yaz_marc_write_iso2709(yaz_marc_t mt, WRBUF wr)
{
    struct yaz_marc_node *n;
    int indicator_length;
    int identifier_length;
    int length_data_entry;
    int length_starting;
    int length_implementation;
    int data_offset = 0;
    const char *leader = 0;
    WRBUF wr_dir, wr_head, wr_data_tmp;
    int base_address;

    for (n = mt->nodes; n; n = n->next)
        if (n->which == YAZ_MARC_LEADER)
            leader = n->u.leader;

    if (!leader)
        return -1;
    if (!atoi_n_check(leader+10, 1, &indicator_length))
        return -1;
    if (!atoi_n_check(leader+11, 1, &identifier_length))
        return -1;
    if (!atoi_n_check(leader+20, 1, &length_data_entry))
        return -1;
    if (!atoi_n_check(leader+21, 1, &length_starting))
        return -1;
    if (!atoi_n_check(leader+22, 1, &length_implementation))
        return -1;

    wr_data_tmp = wrbuf_alloc();
    wr_dir = wrbuf_alloc();
    for (n = mt->nodes; n; n = n->next)
    {
        int data_length = 0;
        struct yaz_marc_subfield *s;

        switch(n->which)
        {
        case YAZ_MARC_DATAFIELD:
            wrbuf_printf(wr_dir, "%.3s", n->u.datafield.tag);
            data_length += indicator_length;
            wrbuf_rewind(wr_data_tmp);
            for (s = n->u.datafield.subfields; s; s = s->next)
            {
                /* write dummy IDFS + content */
                wrbuf_iconv_putchar(wr_data_tmp, mt->iconv_cd, ' ');
                wrbuf_iconv_puts(wr_data_tmp, mt->iconv_cd, s->code_data);
                marc_iconv_reset(mt, wr_data_tmp);
            }
            /* write dummy FS (makes MARC-8 to become ASCII) */
            wrbuf_iconv_putchar(wr_data_tmp, mt->iconv_cd, ' ');
            marc_iconv_reset(mt, wr_data_tmp);
            data_length += wrbuf_len(wr_data_tmp);
            break;
        case YAZ_MARC_CONTROLFIELD:
            wrbuf_printf(wr_dir, "%.3s", n->u.controlfield.tag);

            wrbuf_rewind(wr_data_tmp);
            wrbuf_iconv_puts(wr_data_tmp, mt->iconv_cd,
                             n->u.controlfield.data);
            marc_iconv_reset(mt, wr_data_tmp);
            wrbuf_iconv_putchar(wr_data_tmp, mt->iconv_cd, ' ');/* field sep */
            marc_iconv_reset(mt, wr_data_tmp);
            data_length += wrbuf_len(wr_data_tmp);
            break;
        case YAZ_MARC_COMMENT:
            break;
        case YAZ_MARC_LEADER:
            break;
        }
        if (data_length)
        {
            wrbuf_printf(wr_dir, "%0*d", length_data_entry, data_length);
            wrbuf_printf(wr_dir, "%0*d", length_starting, data_offset);
            data_offset += data_length;
        }
    }
    /* mark end of directory */
    wrbuf_putc(wr_dir, ISO2709_FS);

    /* base address of data (comes after leader+directory) */
    base_address = 24 + wrbuf_len(wr_dir);

    wr_head = wrbuf_alloc();

    /* write record length */
    wrbuf_printf(wr_head, "%05d", base_address + data_offset + 1);
    /* from "original" leader */
    wrbuf_write(wr_head, leader+5, 7);
    /* base address of data */
    wrbuf_printf(wr_head, "%05d", base_address);
    /* from "original" leader */
    wrbuf_write(wr_head, leader+17, 7);

    wrbuf_write(wr, wrbuf_buf(wr_head), 24);
    wrbuf_write(wr, wrbuf_buf(wr_dir), wrbuf_len(wr_dir));
    wrbuf_destroy(wr_head);
    wrbuf_destroy(wr_dir);
    wrbuf_destroy(wr_data_tmp);

    for (n = mt->nodes; n; n = n->next)
    {
        struct yaz_marc_subfield *s;

        switch(n->which)
        {
        case YAZ_MARC_DATAFIELD:
            wrbuf_write(wr, n->u.datafield.indicator, indicator_length);
            for (s = n->u.datafield.subfields; s; s = s->next)
            {
                wrbuf_putc(wr, ISO2709_IDFS);
                wrbuf_iconv_puts(wr, mt->iconv_cd, s->code_data);
                marc_iconv_reset(mt, wr);
            }
            wrbuf_putc(wr, ISO2709_FS);
            break;
        case YAZ_MARC_CONTROLFIELD:
            wrbuf_iconv_puts(wr, mt->iconv_cd, n->u.controlfield.data);
            marc_iconv_reset(mt, wr);
            wrbuf_putc(wr, ISO2709_FS);
            break;
        case YAZ_MARC_COMMENT:
            break;
        case YAZ_MARC_LEADER:
            break;
        }
    }
    wrbuf_printf(wr, "%c", ISO2709_RS);
    return 0;
}
Example #9
0
int yaz_marc_write_xml(yaz_marc_t mt, xmlNode **root_ptr,
                       const char *ns,
                       const char *format,
                       const char *type)
{
    struct yaz_marc_node *n;
    int identifier_length;
    const char *leader = 0;
    xmlNode *record_ptr;
    xmlNsPtr ns_record;
    WRBUF wr_cdata = 0;

    for (n = mt->nodes; n; n = n->next)
        if (n->which == YAZ_MARC_LEADER)
        {
            leader = n->u.leader;
            break;
        }

    if (!leader)
        return -1;
    if (!atoi_n_check(leader+11, 1, &identifier_length))
        return -1;

    wr_cdata = wrbuf_alloc();

    record_ptr = xmlNewNode(0, BAD_CAST "record");
    *root_ptr = record_ptr;

    ns_record = xmlNewNs(record_ptr, BAD_CAST ns, 0);
    xmlSetNs(record_ptr, ns_record);

    if (format)
        xmlNewProp(record_ptr, BAD_CAST "format", BAD_CAST format);
    if (type)
        xmlNewProp(record_ptr, BAD_CAST "type", BAD_CAST type);
    for (n = mt->nodes; n; n = n->next)
    {
        struct yaz_marc_subfield *s;
        xmlNode *ptr;

        switch(n->which)
        {
        case YAZ_MARC_DATAFIELD:
            ptr = xmlNewChild(record_ptr, ns_record, BAD_CAST "datafield", 0);
            xmlNewProp(ptr, BAD_CAST "tag", BAD_CAST n->u.datafield.tag);
            if (n->u.datafield.indicator)
            {
                int i;
                for (i = 0; n->u.datafield.indicator[i]; i++)
                {
                    char ind_str[6];
                    char ind_val[2];

                    sprintf(ind_str, "ind%d", i+1);
                    ind_val[0] = n->u.datafield.indicator[i];
                    ind_val[1] = '\0';
                    xmlNewProp(ptr, BAD_CAST ind_str, BAD_CAST ind_val);
                }
            }
            for (s = n->u.datafield.subfields; s; s = s->next)
            {
                xmlNode *ptr_subfield;
                size_t using_code_len = get_subfield_len(mt, s->code_data,
                                                         identifier_length);
                wrbuf_rewind(wr_cdata);
                wrbuf_iconv_puts(wr_cdata, mt->iconv_cd,
                                 s->code_data + using_code_len);
                marc_iconv_reset(mt, wr_cdata);
                ptr_subfield = xmlNewTextChild(
                    ptr, ns_record,
                    BAD_CAST "subfield",  BAD_CAST wrbuf_cstr(wr_cdata));

                wrbuf_rewind(wr_cdata);
                wrbuf_iconv_write(wr_cdata, mt->iconv_cd,
                                  s->code_data, using_code_len);
                xmlNewProp(ptr_subfield, BAD_CAST "code",
                           BAD_CAST wrbuf_cstr(wr_cdata));
            }
            break;
        case YAZ_MARC_CONTROLFIELD:
            wrbuf_rewind(wr_cdata);
            wrbuf_iconv_puts(wr_cdata, mt->iconv_cd, n->u.controlfield.data);
            marc_iconv_reset(mt, wr_cdata);

            ptr = xmlNewTextChild(record_ptr, ns_record,
                                  BAD_CAST "controlfield",
                                  BAD_CAST wrbuf_cstr(wr_cdata));

            xmlNewProp(ptr, BAD_CAST "tag", BAD_CAST n->u.controlfield.tag);
            break;
        case YAZ_MARC_COMMENT:
            ptr = xmlNewComment(BAD_CAST n->u.comment);
            xmlAddChild(record_ptr, ptr);
            break;
        case YAZ_MARC_LEADER:
            xmlNewTextChild(record_ptr, ns_record, BAD_CAST "leader",
                            BAD_CAST n->u.leader);
            break;
        }
    }
    wrbuf_destroy(wr_cdata);
    return 0;
}
Example #10
0
int yaz_marc_read_line(yaz_marc_t mt,
                       int (*getbyte)(void *client_data),
                       void (*ungetbyte)(int b, void *client_data),
                       void *client_data)
{
    int indicator_length;
    int identifier_length;
    int base_address;
    int length_data_entry;
    int length_starting;
    int length_implementation;
    int marker_ch = 0;
    int marker_skip = 0;
    int header_created = 0;
    WRBUF wrbuf_line = wrbuf_alloc();

    yaz_marc_reset(mt);

    while (yaz_marc_line_gets(getbyte, ungetbyte, client_data, wrbuf_line))
    {
        const char *line = wrbuf_cstr(wrbuf_line);
        int val;
        size_t line_len = strlen(line);
        if (line_len == 0)       /* empty line indicates end of record */
        {
            if (header_created)
                break;
        }
        else if (line[0] == '$') /* indicates beginning/end of record */
        {
            if (header_created)
                break;
        }
        else if (line[0] == '(') /* annotation, skip it */
            ;
        else if (line_len == 24 && atoi_n_check(line, 5, &val))
        {
            /* deal with header lines:  00366nam  22001698a 4500
            */

            if (header_created)
                break;
            yaz_marc_set_leader(mt, line,
                                &indicator_length,
                                &identifier_length,
                                &base_address,
                                &length_data_entry,
                                &length_starting,
                                &length_implementation);
            header_created = 1;
        }
        else if (line_len > 4 && line[0] != ' ' && line[1] != ' '
                 && line[2] != ' ' && line[3] == ' ' )
        {
            /* deal with data/control lines: 245 12 ........ */
            char tag[4];
            const char *datafield_start = line+6;
            marker_ch = 0;
            marker_skip = 0;

            memcpy(tag, line, 3);
            tag[3] = '\0';
            if (line_len >= 8) /* control - or datafield ? */
            {
                if (*datafield_start == ' ')
                    datafield_start++;  /* skip blank after indicator */

                if (strchr("$_*", *datafield_start))
                {
                    marker_ch = *datafield_start;
                    if (datafield_start[2] == ' ')
                        marker_skip = 1; /* subfields has blank before data */
                }
            }
            if (!header_created)
            {
                const char *leader = "01000cam  2200265 i 4500";

                yaz_marc_set_leader(mt, leader,
                                    &indicator_length,
                                    &identifier_length,
                                    &base_address,
                                    &length_data_entry,
                                    &length_starting,
                                    &length_implementation);
                header_created = 1;
            }

            if (marker_ch == 0)
            {   /* control field */
                yaz_marc_add_controlfield(mt, tag, line+4, strlen(line+4));
            }
            else
            {   /* data field */
                const char *indicator = line+4;
                int indicator_len = 2;
                const char *cp = datafield_start;

                yaz_marc_add_datafield(mt, tag, indicator, indicator_len);
                for (;;)
                {
                    const char *next;
                    size_t len;

                    assert(cp[0] == marker_ch);
                    cp++;
                    next = cp;
                    while ((next = strchr(next, marker_ch)))
                    {
                        if ((next[1] >= 'A' && next[1] <= 'Z')
                            ||(next[1] >= 'a' && next[1] <= 'z')
                            ||(next[1] >= '0' && next[1] <= '9'))
                        {
                            if (!marker_skip)
                                break;
                            else if (next[2] == ' ')
                                break;
                        }
                        next++;
                    }
                    len = strlen(cp);
                    if (next)
                        len = next - cp - marker_skip;

                    if (marker_skip)
                    {
                        /* remove ' ' after subfield marker */
                        char *cp_blank = strchr(cp, ' ');
                        if (cp_blank)
                        {
                            len--;
                            while (cp_blank != cp)
                            {
                                cp_blank[0] = cp_blank[-1];
                                cp_blank--;
                            }
                            cp++;
                        }
                    }
                    yaz_marc_add_subfield(mt, cp, len);
                    if (!next)
                        break;
                    cp = next;
                }
            }
        }
        else
        {
            yaz_marc_cprintf(mt, "Ignoring line: %s", line);
        }
    }
    wrbuf_destroy(wrbuf_line);
    if (!header_created)
        return -1;
    return 0;
}
Example #11
0
int yaz_marc_read_iso2709(yaz_marc_t mt, const char *buf, int bsize)
{
    int entry_p;
    int record_length;
    int indicator_length;
    int identifier_length;
    int end_of_directory;
    int base_address;
    int length_data_entry;
    int length_starting;
    int length_implementation;

    yaz_marc_reset(mt);

    if (!atoi_n_check(buf, 5, &record_length))
    {
        yaz_marc_cprintf(mt, "Bad leader");
        return -1;
    }
    if (record_length < 25)
    {
        yaz_marc_cprintf(mt, "Record length %d < 24", record_length);
        return -1;
    }
    /* ballout if bsize is known and record_length is less than that */
    if (bsize != -1 && record_length > bsize)
    {
        yaz_marc_cprintf(mt, "Record appears to be larger than buffer %d < %d",
                         record_length, bsize);
        return -1;
    }
    if (yaz_marc_get_debug(mt))
        yaz_marc_cprintf(mt, "Record length         %5d", record_length);

    yaz_marc_set_leader(mt, buf,
                        &indicator_length,
                        &identifier_length,
                        &base_address,
                        &length_data_entry,
                        &length_starting,
                        &length_implementation);

    /* First pass. determine length of directory & base of data */
    for (entry_p = 24; buf[entry_p] != ISO2709_FS; )
    {
        /* length of directory entry */
        int l = 3 + length_data_entry + length_starting;
        if (entry_p + l >= record_length)
        {
            yaz_marc_cprintf(mt, "Directory offset %d: end of record."
                             " Missing FS char", entry_p);
            return -1;
        }
        if (yaz_marc_get_debug(mt))
        {
            WRBUF hex = wrbuf_alloc();

            wrbuf_puts(hex, "Tag ");
            wrbuf_write_escaped(hex, buf + entry_p, 3);
            wrbuf_puts(hex, ", length ");
            wrbuf_write_escaped(hex, buf + entry_p + 3,
                                length_data_entry);
            wrbuf_puts(hex, ", starting ");
            wrbuf_write_escaped(hex, buf + entry_p + 3 + length_data_entry,
                                length_starting);
            yaz_marc_cprintf(mt, "Directory offset %d: %s",
                             entry_p, wrbuf_cstr(hex));
            wrbuf_destroy(hex);
        }
        /* Check for digits in length+starting info */
        while (--l >= 3)
            if (!yaz_isdigit(buf[entry_p + l]))
                break;
        if (l >= 3)
        {
            WRBUF hex = wrbuf_alloc();
            /* Not all digits, so stop directory scan */
            wrbuf_write_escaped(hex, buf + entry_p,
                                length_data_entry + length_starting + 3);
            yaz_marc_cprintf(mt, "Directory offset %d: Bad value for data"
                             " length and/or length starting (%s)", entry_p,
                             wrbuf_cstr(hex));
            wrbuf_destroy(hex);
            break;
        }
        entry_p += 3 + length_data_entry + length_starting;
    }
    end_of_directory = entry_p;
    if (base_address != entry_p+1)
    {
        yaz_marc_cprintf(mt, "Base address not at end of directory,"
                         " base %d, end %d", base_address, entry_p+1);
    }

    /* Second pass. parse control - and datafields */
    for (entry_p = 24; entry_p != end_of_directory; )
    {
        int data_length;
        int data_offset;
        int end_offset;
        int i;
        char tag[4];
        int identifier_flag = 0;
        int entry_p0 = entry_p;

        memcpy (tag, buf+entry_p, 3);
        entry_p += 3;
        tag[3] = '\0';
        data_length = atoi_n(buf+entry_p, length_data_entry);
        entry_p += length_data_entry;
        data_offset = atoi_n(buf+entry_p, length_starting);
        entry_p += length_starting;
        i = data_offset + base_address;
        end_offset = i+data_length-1;

        if (data_length <= 0 || data_offset < 0)
            break;

        if (yaz_marc_get_debug(mt))
        {
            yaz_marc_cprintf(mt, "Tag: %s. Directory offset %d: data-length %d,"
                             " data-offset %d",
                             tag, entry_p0, data_length, data_offset);
        }
        if (end_offset >= record_length)
        {
            yaz_marc_cprintf(mt, "Directory offset %d: Data out of bounds %d >= %d",
                             entry_p0, end_offset, record_length);
            break;
        }

        if (memcmp (tag, "00", 2))
            identifier_flag = 1;  /* if not 00X assume subfields */
        else if (indicator_length < 4 && indicator_length > 0)
        {
            /* Danmarc 00X have subfields */
            if (buf[i + indicator_length] == ISO2709_IDFS)
                identifier_flag = 1;
            else if (buf[i + indicator_length + 1] == ISO2709_IDFS)
                identifier_flag = 2;
        }

        if (identifier_flag)
        {
            /* datafield */
            i += identifier_flag-1;
            if (indicator_length)
            {
                /* skip RS/FS bytes in indicator. They are not allowed there */
                int j;
                for (j = indicator_length; --j >= 0; )
                    if (buf[j+i] < ' ')
                    {
                        j++;
                        i += j;
                        end_offset += j;
                        yaz_marc_cprintf(mt, "Bad indicator data. "
                                         "Skipping %d bytes", j);
                        break;
                    }
                yaz_marc_add_datafield(mt, tag, buf+i, indicator_length);
                i += indicator_length;
            }

            while (i < end_offset &&
                    buf[i] != ISO2709_RS && buf[i] != ISO2709_FS)
            {
                int code_offset = i+1;

                i ++;
                while (i < end_offset &&
                        buf[i] != ISO2709_RS && buf[i] != ISO2709_IDFS &&
                       buf[i] != ISO2709_FS)
                    i++;
                if (i > code_offset)
                    yaz_marc_add_subfield(mt, buf+code_offset, i - code_offset);
            }
        }
        else
        {
            /* controlfield */
            int i0 = i;
            while (i < end_offset &&
                buf[i] != ISO2709_RS && buf[i] != ISO2709_FS)
                i++;
            yaz_marc_add_controlfield(mt, tag, buf+i0, i-i0);
        }
        if (i < end_offset)
        {
            yaz_marc_cprintf(mt, "Separator but not at end of field length=%d",
                    data_length);
        }
        if (buf[i] != ISO2709_RS && buf[i] != ISO2709_FS)
        {
            yaz_marc_cprintf(mt, "No separator at end of field length=%d",
                             data_length);
        }
    }
    return record_length;
}