diff options
author | yzrh <yzrh@noema.org> | 2020-12-31 22:36:28 +0000 |
---|---|---|
committer | yzrh <yzrh@noema.org> | 2021-01-03 03:01:28 +0000 |
commit | 1994f122cc29504862944cca1da1c5203c7e41eb (patch) | |
tree | d89e37c5f3443156116bd8476e0efc2d380acd55 /src/cnki_hn.c | |
parent | b20c6ad3ed930977990f3812b25b80d2ce282d79 (diff) | |
download | melon-1994f122cc29504862944cca1da1c5203c7e41eb.tar.gz melon-1994f122cc29504862944cca1da1c5203c7e41eb.tar.zst |
Decode JBIG and JPEG during HN conversion.
Diffstat (limited to 'src/cnki_hn.c')
-rw-r--r-- | src/cnki_hn.c | 228 |
1 files changed, 6 insertions, 222 deletions
diff --git a/src/cnki_hn.c b/src/cnki_hn.c index f6a4c24..0e16a76 100644 --- a/src/cnki_hn.c +++ b/src/cnki_hn.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, yzrh <yzrh@noema.org> + * Copyright (c) 2020-2021, yzrh <yzrh@noema.org> * * SPDX-License-Identifier: Apache-2.0 */ @@ -9,6 +9,8 @@ #include "cnki.h" #include "iconv.h" +#include "zlib.h" +#include "jpeg.h" #include "pdf.h" #include "pdf_cnki.h" @@ -131,231 +133,13 @@ cnki_hn(cnki_t **param) ptr = ptr->next; } - if ((*param)->stat > 1) - printf("Loaded %d page(s)\n", (*param)->file_stat->page); - - if ((*param)->stat > 1) - printf("Generating PDF object(s)\n"); - - pdf_object_t *pdf = NULL; - - if (pdf_obj_create(&pdf) != 0) - return 1; - - int buf_size; - char *buf; - - int str_size; - char *str; - - int conv_size; - char *conv_dst; - char conv_src[2]; - char conv_hex[3]; - - ptr = (*param)->object_hn; - while (ptr != NULL) { - if (strncmp(ptr->text + 8, "COMPRESSTEXT", 12) == 0) { - cnki_zlib(&buf, &buf_size, ptr->text, ptr->text_size); - - str_size = buf_size / 8 + 7; - str = malloc(str_size); - - if (str == NULL) - return 1; - - memset(str, 0, str_size); - - strcat(str, "<feff"); - - for (int i = 0; i < buf_size; i += 16) { - conv_src[0] = buf[i + 7]; - conv_src[1] = buf[i + 6]; - - conv_size = 6; - - if (strconv(&conv_dst, "UTF-16BE", - conv_src, "GB18030", &conv_size) == 0) { - for (int j = 0; j < conv_size - 2; j++) { - snprintf(conv_hex, 3, - "%02x", (unsigned char) conv_dst[j]); - strcat(str, conv_hex); - } - free(conv_dst); - } - } - free(buf); - - strcat(str, ">"); - } else { - str_size = ptr->text_size; - str = malloc(str_size); - - if (str == NULL) - return 1; - - memset(str, 0, str_size); - - strcat(str, "<feff"); - - for (int i = 0; i < ptr->text_size; i += 4) { - conv_src[0] = ptr->text[i + 3]; - conv_src[1] = ptr->text[i + 2]; - - conv_size = 6; - - if (strconv(&conv_dst, "UTF-16BE", - conv_src, "GB18030", &conv_size) == 0) { - for (int j = 0; j < conv_size - 2; j++) { - snprintf(conv_hex, 3, - "%02x", (unsigned char) conv_dst[j]); - strcat(str, conv_hex); - } - free(conv_dst); - } - } - - strcat(str, ">"); - } - - pdf_obj_append(&pdf, 0, str, NULL, NULL); - - free(str); - - ptr = ptr->next; - } - - if ((*param)->stat > 1) { - printf("\t%8s\t%12s\t%12s\t%12s\n", - "id", - "object", - "dictionary", - "stream"); - - pdf_object_t *ptr = pdf->next; - while (ptr != NULL) { - printf("\t%8d\t%12d\t%12d\t%12d\n", - ptr->id, - ptr->object_size, - ptr->dictionary_size, - ptr->stream_size); - ptr = ptr->next; - } - } - if ((*param)->stat > 0) - printf("Generated %d object(s)\n", - pdf_get_count(&pdf)); - - int *ids = NULL; - - if ((*param)->file_stat->outline > 0) { - if ((*param)->stat > 1) - printf("Generating outline object(s)\n\t%8s\n", "id"); - - pdf_get_free_ids(&pdf, &ids, (*param)->file_stat->outline + 1); - int outline = pdf_cnki_outline(&pdf, &(*param)->object_outline, &ids); - - if ((*param)->stat > 1) - for (int i = 0; i < (*param)->file_stat->outline + 1; i++) - printf("\t%8d\n", ids[i]); - - if ((*param)->stat > 0) { - if (outline != 0) - printf("No outline information\n"); - else - printf("Generated %d outline object(s)\n", - (*param)->file_stat->outline + 1); - } - } - - if ((*param)->stat > 1) - printf("Writing header\n"); - - long cur = 0; - - if ((*param)->stat > 0) - cur = ftell((*param)->fp_o); - - if (pdf_dump_header(&pdf, &(*param)->fp_o) != 0) { - fprintf(stderr, "Header not written\n"); - return 1; - } else { - if ((*param)->stat > 0) - printf("Header %ld byte(s) written\n", - ftell((*param)->fp_o) - cur); - } - - if ((*param)->stat > 1) - printf("Writing object(s)\n"); - - pdf_dump_obj(&pdf, &(*param)->fp_o); - - if ((*param)->stat > 1) { - printf("\t%8s\t%8s\t%8s\t%12s\t%12s\t%12s\n", - "address", - "size", - "id", - "object", - "dictionary", - "stream"); - - pdf_object_t *ptr = pdf->next; - while (ptr != NULL) { - printf("\t%08x\t%8d\t%8d\t%12d\t%12d\t%12d\n", - ptr->address, - ptr->size, - ptr->id, - ptr->object_size, - ptr->dictionary_size, - ptr->stream_size); - ptr = ptr->next; - } - } - - if ((*param)->stat > 0) - printf("%d object(s) %ld byte(s) written\n", - pdf_get_count(&pdf), - ftell((*param)->fp_o)); - - long xref = ftell((*param)->fp_o); - - if ((*param)->stat > 1) - printf("Writing cross-reference table\n"); - - if (pdf_dump_xref(&pdf, &(*param)->fp_o) != 0) { - if ((*param)->stat > 0) - printf("Cross-reference table not written\n"); - } else { - if ((*param)->stat > 0) - printf("Cross-reference table %ld byte(s) written\n", - ftell((*param)->fp_o) - xref); - } - - if ((*param)->stat > 1) - printf("Writing trailer\n"); - - if ((*param)->stat > 0) - cur = ftell((*param)->fp_o); - - if (pdf_dump_trailer(&pdf, &(*param)->fp_o, xref) != 0) { - if ((*param)->stat > 0) - printf("Trailer not written\n"); - } else { - if ((*param)->stat > 0) - printf("Trailer %ld byte(s) written\n", - ftell((*param)->fp_o) - cur); - } - - if ((*param)->stat > 0) - printf("Total %ld byte(s) written\n", - ftell((*param)->fp_o)); + printf("Loaded %d page(s)\n", (*param)->file_stat->page); - pdf_obj_destroy(&pdf); + cnki_pdf_hn(param); if ((*param)->stat > 0) - printf("Conversion ended (partial)\n"); + printf("Conversion ended\n"); - /* TODO: Finish me please :) */ return 0; } |