aboutsummaryrefslogtreecommitdiffstats
path: root/src/cnki_hn.c
diff options
context:
space:
mode:
authoryzrh <yzrh@noema.org>2020-12-31 22:36:28 +0000
committeryzrh <yzrh@noema.org>2021-01-03 03:01:28 +0000
commit1994f122cc29504862944cca1da1c5203c7e41eb (patch)
treed89e37c5f3443156116bd8476e0efc2d380acd55 /src/cnki_hn.c
parentb20c6ad3ed930977990f3812b25b80d2ce282d79 (diff)
downloadmelon-1994f122cc29504862944cca1da1c5203c7e41eb.tar.gz
melon-1994f122cc29504862944cca1da1c5203c7e41eb.tar.zst
Decode JBIG and JPEG during HN conversion.
Diffstat (limited to 'src/cnki_hn.c')
-rw-r--r--src/cnki_hn.c228
1 files changed, 6 insertions, 222 deletions
diff --git a/src/cnki_hn.c b/src/cnki_hn.c
index f6a4c24..0e16a76 100644
--- a/src/cnki_hn.c
+++ b/src/cnki_hn.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2020, yzrh <yzrh@noema.org>
+ * Copyright (c) 2020-2021, yzrh <yzrh@noema.org>
*
* SPDX-License-Identifier: Apache-2.0
*/
@@ -9,6 +9,8 @@
#include "cnki.h"
#include "iconv.h"
+#include "zlib.h"
+#include "jpeg.h"
#include "pdf.h"
#include "pdf_cnki.h"
@@ -131,231 +133,13 @@ cnki_hn(cnki_t **param)
ptr = ptr->next;
}
- if ((*param)->stat > 1)
- printf("Loaded %d page(s)\n", (*param)->file_stat->page);
-
- if ((*param)->stat > 1)
- printf("Generating PDF object(s)\n");
-
- pdf_object_t *pdf = NULL;
-
- if (pdf_obj_create(&pdf) != 0)
- return 1;
-
- int buf_size;
- char *buf;
-
- int str_size;
- char *str;
-
- int conv_size;
- char *conv_dst;
- char conv_src[2];
- char conv_hex[3];
-
- ptr = (*param)->object_hn;
- while (ptr != NULL) {
- if (strncmp(ptr->text + 8, "COMPRESSTEXT", 12) == 0) {
- cnki_zlib(&buf, &buf_size, ptr->text, ptr->text_size);
-
- str_size = buf_size / 8 + 7;
- str = malloc(str_size);
-
- if (str == NULL)
- return 1;
-
- memset(str, 0, str_size);
-
- strcat(str, "<feff");
-
- for (int i = 0; i < buf_size; i += 16) {
- conv_src[0] = buf[i + 7];
- conv_src[1] = buf[i + 6];
-
- conv_size = 6;
-
- if (strconv(&conv_dst, "UTF-16BE",
- conv_src, "GB18030", &conv_size) == 0) {
- for (int j = 0; j < conv_size - 2; j++) {
- snprintf(conv_hex, 3,
- "%02x", (unsigned char) conv_dst[j]);
- strcat(str, conv_hex);
- }
- free(conv_dst);
- }
- }
- free(buf);
-
- strcat(str, ">");
- } else {
- str_size = ptr->text_size;
- str = malloc(str_size);
-
- if (str == NULL)
- return 1;
-
- memset(str, 0, str_size);
-
- strcat(str, "<feff");
-
- for (int i = 0; i < ptr->text_size; i += 4) {
- conv_src[0] = ptr->text[i + 3];
- conv_src[1] = ptr->text[i + 2];
-
- conv_size = 6;
-
- if (strconv(&conv_dst, "UTF-16BE",
- conv_src, "GB18030", &conv_size) == 0) {
- for (int j = 0; j < conv_size - 2; j++) {
- snprintf(conv_hex, 3,
- "%02x", (unsigned char) conv_dst[j]);
- strcat(str, conv_hex);
- }
- free(conv_dst);
- }
- }
-
- strcat(str, ">");
- }
-
- pdf_obj_append(&pdf, 0, str, NULL, NULL);
-
- free(str);
-
- ptr = ptr->next;
- }
-
- if ((*param)->stat > 1) {
- printf("\t%8s\t%12s\t%12s\t%12s\n",
- "id",
- "object",
- "dictionary",
- "stream");
-
- pdf_object_t *ptr = pdf->next;
- while (ptr != NULL) {
- printf("\t%8d\t%12d\t%12d\t%12d\n",
- ptr->id,
- ptr->object_size,
- ptr->dictionary_size,
- ptr->stream_size);
- ptr = ptr->next;
- }
- }
-
if ((*param)->stat > 0)
- printf("Generated %d object(s)\n",
- pdf_get_count(&pdf));
-
- int *ids = NULL;
-
- if ((*param)->file_stat->outline > 0) {
- if ((*param)->stat > 1)
- printf("Generating outline object(s)\n\t%8s\n", "id");
-
- pdf_get_free_ids(&pdf, &ids, (*param)->file_stat->outline + 1);
- int outline = pdf_cnki_outline(&pdf, &(*param)->object_outline, &ids);
-
- if ((*param)->stat > 1)
- for (int i = 0; i < (*param)->file_stat->outline + 1; i++)
- printf("\t%8d\n", ids[i]);
-
- if ((*param)->stat > 0) {
- if (outline != 0)
- printf("No outline information\n");
- else
- printf("Generated %d outline object(s)\n",
- (*param)->file_stat->outline + 1);
- }
- }
-
- if ((*param)->stat > 1)
- printf("Writing header\n");
-
- long cur = 0;
-
- if ((*param)->stat > 0)
- cur = ftell((*param)->fp_o);
-
- if (pdf_dump_header(&pdf, &(*param)->fp_o) != 0) {
- fprintf(stderr, "Header not written\n");
- return 1;
- } else {
- if ((*param)->stat > 0)
- printf("Header %ld byte(s) written\n",
- ftell((*param)->fp_o) - cur);
- }
-
- if ((*param)->stat > 1)
- printf("Writing object(s)\n");
-
- pdf_dump_obj(&pdf, &(*param)->fp_o);
-
- if ((*param)->stat > 1) {
- printf("\t%8s\t%8s\t%8s\t%12s\t%12s\t%12s\n",
- "address",
- "size",
- "id",
- "object",
- "dictionary",
- "stream");
-
- pdf_object_t *ptr = pdf->next;
- while (ptr != NULL) {
- printf("\t%08x\t%8d\t%8d\t%12d\t%12d\t%12d\n",
- ptr->address,
- ptr->size,
- ptr->id,
- ptr->object_size,
- ptr->dictionary_size,
- ptr->stream_size);
- ptr = ptr->next;
- }
- }
-
- if ((*param)->stat > 0)
- printf("%d object(s) %ld byte(s) written\n",
- pdf_get_count(&pdf),
- ftell((*param)->fp_o));
-
- long xref = ftell((*param)->fp_o);
-
- if ((*param)->stat > 1)
- printf("Writing cross-reference table\n");
-
- if (pdf_dump_xref(&pdf, &(*param)->fp_o) != 0) {
- if ((*param)->stat > 0)
- printf("Cross-reference table not written\n");
- } else {
- if ((*param)->stat > 0)
- printf("Cross-reference table %ld byte(s) written\n",
- ftell((*param)->fp_o) - xref);
- }
-
- if ((*param)->stat > 1)
- printf("Writing trailer\n");
-
- if ((*param)->stat > 0)
- cur = ftell((*param)->fp_o);
-
- if (pdf_dump_trailer(&pdf, &(*param)->fp_o, xref) != 0) {
- if ((*param)->stat > 0)
- printf("Trailer not written\n");
- } else {
- if ((*param)->stat > 0)
- printf("Trailer %ld byte(s) written\n",
- ftell((*param)->fp_o) - cur);
- }
-
- if ((*param)->stat > 0)
- printf("Total %ld byte(s) written\n",
- ftell((*param)->fp_o));
+ printf("Loaded %d page(s)\n", (*param)->file_stat->page);
- pdf_obj_destroy(&pdf);
+ cnki_pdf_hn(param);
if ((*param)->stat > 0)
- printf("Conversion ended (partial)\n");
+ printf("Conversion ended\n");
- /* TODO: Finish me please :) */
return 0;
}