aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authoryzrh <yzrh@noema.org>2022-12-25 18:03:01 +0000
committeryzrh <yzrh@noema.org>2022-12-25 23:18:17 +0000
commitc2ad6549fb337ce707e04aa441c9b492171a3b9d (patch)
tree611b43986a0c50d335ba69aafc8ace26cdcaffd3
parentd2826fa075544ada1fb9f530a375ef85f58c8ea0 (diff)
downloadmelon-c2ad6549fb337ce707e04aa441c9b492171a3b9d.tar.gz
melon-c2ad6549fb337ce707e04aa441c9b492171a3b9d.tar.zst
Handle headless HN and page with no image.
Signed-off-by: yzrh <yzrh@noema.org>
-rw-r--r--src/cnki.c29
-rw-r--r--src/cnki.h5
-rw-r--r--src/cnki_hn.c110
-rw-r--r--src/cnki_pdf.c265
-rw-r--r--src/cnki_zlib.c9
-rw-r--r--src/melon.c3
6 files changed, 239 insertions, 182 deletions
diff --git a/src/cnki.c b/src/cnki.c
index 5f120d0..cc49d73 100644
--- a/src/cnki.c
+++ b/src/cnki.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2020-2021, yzrh <yzrh@noema.org>
+ * Copyright (c) 2020-2022, yzrh <yzrh@noema.org>
*
* SPDX-License-Identifier: Apache-2.0
*/
@@ -54,6 +54,11 @@ cnki_destroy(cnki_t **param)
object_hn_t *ptr_hn;
while ((ptr_hn = (*param)->object_hn) != NULL) {
(*param)->object_hn = (*param)->object_hn->next;
+ free(ptr_hn->text);
+ if (ptr_hn->image_data != NULL)
+ for (int i = 0; i < ptr_hn->image_length; i++)
+ free(ptr_hn->image_data[i].image);
+ free(ptr_hn->image_data);
free(ptr_hn);
}
@@ -71,12 +76,19 @@ cnki_info(cnki_t **param)
printf("Reading file header at 0x%x\n", ADDRESS_HEAD);
int addr[2];
+ unsigned char str[2];
fseek((*param)->fp_i, ADDRESS_HEAD, SEEK_SET);
fread((*param)->file_stat->type, 4, 1, (*param)->fp_i);
- if ((*param)->stat > 0)
- printf("File type is '%s'\n", (*param)->file_stat->type);
+ fread(str, 2, 1, (*param)->fp_i);
+
+ if ((*param)->stat > 0) {
+ if ((unsigned char) (*param)->file_stat->type[0] > 0x7f)
+ printf("File type is '%02x'\n", (unsigned char) (*param)->file_stat->type[0]);
+ else
+ printf("File type is '%s'\n", (*param)->file_stat->type);
+ }
if (strncmp((*param)->file_stat->type, "%PDF", 4) == 0) {
return 0;
@@ -86,6 +98,9 @@ cnki_info(cnki_t **param)
} else if (strncmp((*param)->file_stat->type, "HN", 2) == 0) {
addr[0] = ADDRESS_HN_PAGE;
addr[1] = ADDRESS_HN_OUTLINE;
+ } else if ((unsigned char) (*param)->file_stat->type[0] == 0xc8) {
+ addr[0] = ADDRESS_C8_PAGE;
+ addr[1] = ADDRESS_HN_OUTLINE;
} else if (strncmp((*param)->file_stat->type, "KDH ", 4) == 0) {
return 0;
} else {
@@ -102,6 +117,14 @@ cnki_info(cnki_t **param)
printf("Advised %d page(s)\n",
(*param)->file_stat->page);
+ if (strncmp((*param)->file_stat->type, "HN", 2) == 0 && str[0] == 0xc8 && str[1] == 0x00) {
+ fseek((*param)->fp_i, 0xd8, SEEK_SET);
+ return 0;
+ } else if ((unsigned char) (*param)->file_stat->type[0] == 0xc8) {
+ fseek((*param)->fp_i, 0x50, SEEK_SET);
+ return 0;
+ }
+
if ((*param)->stat > 1)
printf("Reading outline count at 0x%x\n", addr[1]);
diff --git a/src/cnki.h b/src/cnki.h
index 237a2c1..193e69b 100644
--- a/src/cnki.h
+++ b/src/cnki.h
@@ -16,6 +16,8 @@
#define ADDRESS_HN_PAGE 0x0090
#define ADDRESS_HN_OUTLINE 0x0158
+#define ADDRESS_C8_PAGE 0x0008
+
#define ADDRESS_KDH_BODY 0x00fe
#define KEY_KDH "FZHMEI"
@@ -64,7 +66,8 @@ typedef struct _object_hn_t {
int32_t text_size;
int16_t image_length;
int16_t page;
- int32_t unknown[2]; /* TODO: what is it? */
+ int32_t unknown; /* TODO: what is it? */
+ int32_t address_next;
char *text;
struct _hn_image_t *image_data;
struct _object_hn_t *next;
diff --git a/src/cnki_hn.c b/src/cnki_hn.c
index feabb48..4d32092 100644
--- a/src/cnki_hn.c
+++ b/src/cnki_hn.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2020-2021, yzrh <yzrh@noema.org>
+ * Copyright (c) 2020-2022, yzrh <yzrh@noema.org>
*
* SPDX-License-Identifier: Apache-2.0
*/
@@ -27,12 +27,13 @@ cnki_hn(cnki_t **param)
if ((*param)->stat > 1) {
printf("Loading page(s)\n");
- printf("\t%8s\t%8s\t%6s\t%4s\t%16s\t%4s\t%8s\t%8s\n",
+ printf("\t%8s\t%8s\t%6s\t%4s\t%8s\t%8s\t%4s\t%8s\t%8s\n",
"address",
"text",
"length",
"page",
"unknown",
+ "next",
"code",
"address",
"image");
@@ -44,7 +45,8 @@ cnki_hn(cnki_t **param)
fread(&ptr->text_size, 4, 1, (*param)->fp_i);
fread(&ptr->image_length, 2, 1, (*param)->fp_i);
fread(&ptr->page, 2, 1, (*param)->fp_i);
- fread(&ptr->unknown, 8, 1, (*param)->fp_i);
+ fread(&ptr->unknown, 4, 1, (*param)->fp_i);
+ fread(&ptr->address_next, 4, 1, (*param)->fp_i);
ptr->text = NULL;
ptr->image_data = NULL;
@@ -62,66 +64,76 @@ cnki_hn(cnki_t **param)
ptr = (*param)->object_hn;
while (ptr != NULL) {
- ptr->text = malloc(ptr->text_size);
+ if (ptr->text_size > 0) {
+ ptr->text = malloc(ptr->text_size);
- if (ptr->text == NULL)
- return 1;
+ if (ptr->text == NULL)
+ return 1;
- fseek((*param)->fp_i, ptr->address, SEEK_SET);
- fread(ptr->text, ptr->text_size, 1, (*param)->fp_i);
+ fseek((*param)->fp_i, ptr->address, SEEK_SET);
+ fread(ptr->text, ptr->text_size, 1, (*param)->fp_i);
+ }
if ((*param)->stat > 1)
- printf("\t%08x\t%8d\t%6d\t%4d\t{%4d, %8d}",
+ printf("\t%08x\t%8d\t%6d\t%4d\t%8d\t%08x",
ptr->address,
ptr->text_size,
ptr->image_length,
ptr->page,
- ptr->unknown[0],
- ptr->unknown[1]);
-
- ptr->image_data = malloc(ptr->image_length * sizeof(hn_image_t));
-
- if (ptr->image_data == NULL)
- return 1;
+ ptr->unknown,
+ ptr->address_next);
- for (int i = 0; i < ptr->image_length; i++) {
- fread(&ptr->image_data[i].format, 4, 1, (*param)->fp_i);
- fread(&ptr->image_data[i].address, 4, 1, (*param)->fp_i);
- fread(&ptr->image_data[i].size, 4, 1, (*param)->fp_i);
- fseek((*param)->fp_i,
- ptr->image_data[i].address + ptr->image_data[i].size,
- SEEK_SET);
- }
-
- for (int i = 0; i < ptr->image_length; i++) {
- ptr->image_data[i].image = malloc(ptr->image_data[i].size);
+ if (ptr->image_length > 0) {
+ ptr->image_data = malloc(ptr->image_length * sizeof(hn_image_t));
- if (ptr->image_data[i].image == NULL)
+ if (ptr->image_data == NULL)
return 1;
- fseek((*param)->fp_i, ptr->image_data[i].address, SEEK_SET);
- fread(ptr->image_data[i].image,
- ptr->image_data[i].size, 1,
- (*param)->fp_i);
-
- if ((*param)->stat > 1) {
- if (i == 0) {
- printf("\t%4d\t%08x\t%8d\n",
- ptr->image_data[i].format,
- ptr->image_data[i].address,
- ptr->image_data[i].size);
- } else {
- printf("\t%8s\t%8s\t%6s\t%4s\t%16s\t%4d\t%08x\t%8d\n",
- "",
- "",
- "",
- "",
- "",
- ptr->image_data[i].format,
- ptr->image_data[i].address,
- ptr->image_data[i].size);
+ for (int i = 0; i < ptr->image_length; i++) {
+ fread(&ptr->image_data[i].format, 4, 1, (*param)->fp_i);
+ fread(&ptr->image_data[i].address, 4, 1, (*param)->fp_i);
+ fread(&ptr->image_data[i].size, 4, 1, (*param)->fp_i);
+ fseek((*param)->fp_i,
+ ptr->image_data[i].address + ptr->image_data[i].size,
+ SEEK_SET);
+ }
+
+ for (int i = 0; i < ptr->image_length; i++) {
+ ptr->image_data[i].image = malloc(ptr->image_data[i].size);
+
+ if (ptr->image_data[i].image == NULL)
+ return 1;
+
+ fseek((*param)->fp_i, ptr->image_data[i].address, SEEK_SET);
+ fread(ptr->image_data[i].image,
+ ptr->image_data[i].size, 1,
+ (*param)->fp_i);
+
+ if ((*param)->stat > 1) {
+ if (i == 0) {
+ printf("\t%4d\t%08x\t%8d\n",
+ ptr->image_data[i].format,
+ ptr->image_data[i].address,
+ ptr->image_data[i].size);
+ } else {
+ printf("\t%8s\t%8s\t%6s\t%4s\t%8s\t%8s\t%4d\t%08x\t%8d\n",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ ptr->image_data[i].format,
+ ptr->image_data[i].address,
+ ptr->image_data[i].size);
+ }
}
}
+ } else if ((*param)->stat > 1) {
+ printf("\t%4s\t%8s\t%8s\n",
+ "",
+ "",
+ "");
}
ptr = ptr->next;
diff --git a/src/cnki_pdf.c b/src/cnki_pdf.c
index c56a45f..603ce01 100644
--- a/src/cnki_pdf.c
+++ b/src/cnki_pdf.c
@@ -481,7 +481,10 @@ cnki_pdf_hn(cnki_t **param)
* page object
*/
int *ids = NULL;
- pdf_get_free_ids(&pdf, &ids, ptr->image_length + 3);
+ if (ptr->image_length > 0)
+ pdf_get_free_ids(&pdf, &ids, ptr->image_length + 3);
+ else
+ pdf_get_free_ids(&pdf, &ids, 2);
int bitmap_size;
char *bitmap;
@@ -489,27 +492,31 @@ cnki_pdf_hn(cnki_t **param)
int stream_size;
char *stream;
- int *dim = malloc(2 * ptr->image_length * sizeof(int));
+ int *dim;
- int ret;
- int info[3];
+ if (ptr->image_length > 0) {
+ dim = malloc(2 * ptr->image_length * sizeof(int));
- if (dim == NULL) {
- free(root_kid);
- free(ids);
- return 1;
- }
+ if (dim == NULL) {
+ free(root_kid);
+ free(ids);
+ return 1;
+ }
- dictionary_size = 256;
- dictionary = malloc(dictionary_size);
+ dictionary_size = 256;
+ dictionary = malloc(dictionary_size);
- if (dictionary == NULL) {
- free(root_kid);
- free(ids);
- free(dim);
- return 1;
+ if (dictionary == NULL) {
+ free(root_kid);
+ free(ids);
+ free(dim);
+ return 1;
+ }
}
+ int ret;
+ int info[3];
+
for (int i = 0; i < ptr->image_length; i++) {
memset(dictionary, 0, dictionary_size);
@@ -684,66 +691,42 @@ cnki_pdf_hn(cnki_t **param)
}
}
- memset(dictionary, 0, dictionary_size);
+ if (ptr->image_length > 0) {
+ memset(dictionary, 0, dictionary_size);
- strcat(dictionary, "<<\n/XObject <<");
+ strcat(dictionary, "<<\n/XObject <<");
- for (int i = 0; i < ptr->image_length; i++) {
- snprintf(buf, 64, "/Im%d %d 0 R", i, ids[i]);
- strcat(dictionary, buf);
+ for (int i = 0; i < ptr->image_length; i++) {
+ snprintf(buf, 64, "/Im%d %d 0 R", i, ids[i]);
+ strcat(dictionary, buf);
- if (i + 1 < ptr->image_length)
- strcat(dictionary, " ");
- }
+ if (i + 1 < ptr->image_length)
+ strcat(dictionary, " ");
+ }
- strcat(dictionary, ">>\n>>");
+ strcat(dictionary, ">>\n>>");
- pdf_obj_append(&pdf, ids[ptr->image_length], NULL, dictionary, NULL, 0);
+ pdf_obj_append(&pdf, ids[ptr->image_length], NULL, dictionary, NULL, 0);
- free(dictionary);
+ free(dictionary);
+ }
int conv_size;
char *conv_dst;
char conv_src[2];
char conv_hex[3];
- if (strncmp(ptr->text + 8, "COMPRESSTEXT", 12) == 0) {
- cnki_zlib(&stream, &stream_size, ptr->text, ptr->text_size);
-
- dictionary_size = 64 + 2 * stream_size;
- dictionary = malloc(dictionary_size);
+ if (ptr->text_size > 0) {
+ if (strncmp(ptr->text + 8, "COMPRESSTEXT", 12) == 0 ||
+ strncmp(ptr->text, "COMPRESSTEXT", 12) == 0) {
+ cnki_zlib(&stream, &stream_size, ptr->text, ptr->text_size);
- if (dictionary == NULL) {
- free(root_kid);
- free(ids);
- free(dim);
- return 1;
- }
+ free(ptr->text);
- memset(dictionary, 0, dictionary_size);
-
- strcat(dictionary, "<feff");
-
- for (int i = 0; i < stream_size; i += 16) {
- conv_src[0] = stream[i + 7];
- conv_src[1] = stream[i + 6];
-
- conv_size = 6;
-
- if (strconv(&conv_dst, "UTF-16BE",
- conv_src, "GB18030", &conv_size) == 0) {
- for (int j = 0; j < conv_size - 2; j++) {
- snprintf(conv_hex, 3,
- "%02x", (unsigned char) conv_dst[j]);
- strcat(dictionary, conv_hex);
- }
- free(conv_dst);
- }
+ ptr->text_size = stream_size;
+ ptr->text = stream;
}
- free(stream);
- strcat(dictionary, ">");
- } else {
dictionary_size = 64 + 2 * ptr->text_size;
dictionary = malloc(dictionary_size);
@@ -758,9 +741,26 @@ cnki_pdf_hn(cnki_t **param)
strcat(dictionary, "<feff");
- for (int i = 0; i < ptr->text_size; i += 4) {
- conv_src[0] = ptr->text[i + 3];
- conv_src[1] = ptr->text[i + 2];
+ for (int i = 0; i < ptr->text_size; i += 6) {
+ if (i + 5 >= ptr->text_size)
+ break;
+
+ conv_src[0] = ptr->text[i + 5];
+ conv_src[1] = ptr->text[i + 4];
+
+ if ((conv_src[0] << 8 | conv_src[1]) == 0xa389) {
+ strcat(dictionary, "a389");
+ continue;
+ } else if ((conv_src[0] << 8 | conv_src[1]) == 0xa38a) {
+ strcat(dictionary, "a38a");
+ continue;
+ } else if ((conv_src[0] << 8 | conv_src[1]) == 0xa38d) {
+ strcat(dictionary, "a38d");
+ continue;
+ } else if ((conv_src[0] << 8 | conv_src[1]) == 0xa3a0) {
+ strcat(dictionary, "a3a0");
+ continue;
+ }
conv_size = 6;
@@ -776,12 +776,12 @@ cnki_pdf_hn(cnki_t **param)
}
strcat(dictionary, ">");
- }
- /* FIXME: Use the text somehow? */
- free(dictionary);
+ /* FIXME: Use the text somehow? */
+ free(dictionary);
+ }
- dictionary_size = 64 + 64 * ptr->image_length;
+ dictionary_size = 64 + 128 * ptr->image_length;
dictionary = malloc(dictionary_size);
if (dictionary == NULL) {
@@ -791,96 +791,109 @@ cnki_pdf_hn(cnki_t **param)
return 1;
}
- memset(dictionary, 0, dictionary_size);
-
- strcat(dictionary, "q\n");
+ if (ptr->image_length > 0) {
+ memset(dictionary, 0, dictionary_size);
- strcat(dictionary, "0.25 0 0 0.25 0 0 cm\n");
+ strcat(dictionary, "q\n");
- double resize_x;
- double resize_y;
+ strcat(dictionary, "0.25 0 0 0.25 0 0 cm\n");
- for (int i = 0; i < ptr->image_length; i++) {
- if (dim[i * 2] <= 0 || dim[i * 2 + 1] <= 0)
- continue;
+ double resize_x;
+ double resize_y;
- /* Scale within bound of A4 paper */
- resize_x = 595.276 * 4 / dim[i * 2];
- resize_y = 841.89 * 4 / dim[i * 2 + 1];
+ for (int i = 0; i < ptr->image_length; i++) {
+ if (dim[i * 2] <= 0 || dim[i * 2 + 1] <= 0)
+ continue;
- if (resize_y < resize_x)
- snprintf(buf, 64, "%f 0 0 %f 0 0 cm\n",
- resize_y, resize_y);
- else
- snprintf(buf, 64, "%f 0 0 %f 0 0 cm\n",
- resize_x, resize_x);
- strcat(dictionary, buf);
+ /* Scale within bound of A4 paper */
+ resize_x = 595.276 * 4 / dim[i * 2];
+ resize_y = 841.89 * 4 / dim[i * 2 + 1];
- /* Apply transformation matrix */
- if (ptr->image_data[i].format == JBIG || ptr->image_data[i].format == DCT_1) {
- snprintf(buf, 64, "1 0 0 1 0 %d cm\n",
- dim[i * 2 + 1]);
+ if (resize_y < resize_x)
+ snprintf(buf, 64, "%f 0 0 %f 0 0 cm\n",
+ resize_y, resize_y);
+ else
+ snprintf(buf, 64, "%f 0 0 %f 0 0 cm\n",
+ resize_x, resize_x);
strcat(dictionary, buf);
- strcat(dictionary, "1 0 0 -1 0 0 cm\n");
- }
+ /* Apply transformation matrix */
+ if (ptr->image_data[i].format == JBIG || ptr->image_data[i].format == DCT_1) {
+ snprintf(buf, 64, "1 0 0 1 0 %d cm\n",
+ dim[i * 2 + 1]);
+ strcat(dictionary, buf);
- snprintf(buf, 64, "%d 0 0 %d 0 0 cm\n",
- dim[i * 2], dim[i * 2 + 1]);
- strcat(dictionary, buf);
+ strcat(dictionary, "1 0 0 -1 0 0 cm\n");
+ }
- snprintf(buf, 64, "/Im%d Do\n", i);
- strcat(dictionary, buf);
- }
+ snprintf(buf, 64, "%d 0 0 %d 0 0 cm\n",
+ dim[i * 2], dim[i * 2 + 1]);
+ strcat(dictionary, buf);
- strcat(dictionary, "Q");
+ snprintf(buf, 64, "/Im%d Do\n", i);
+ strcat(dictionary, buf);
+ }
- if (strdeflate(&stream, &stream_size, dictionary, strlen(dictionary)) != 0) {
- free(root_kid);
- free(ids);
- free(dim);
- free(dictionary);
- return 1;
- }
+ strcat(dictionary, "Q");
- memset(dictionary, 0, dictionary_size);
+ if (strdeflate(&stream, &stream_size, dictionary, strlen(dictionary)) != 0) {
+ free(root_kid);
+ free(ids);
+ free(dim);
+ free(dictionary);
+ return 1;
+ }
- strcat(dictionary, "<<\n");
+ memset(dictionary, 0, dictionary_size);
- snprintf(buf, 64, "/Length %d\n", stream_size);
- strcat(dictionary, buf);
+ strcat(dictionary, "<<\n");
- strcat(dictionary, "/Filter /FlateDecode\n");
+ snprintf(buf, 64, "/Length %d\n", stream_size);
+ strcat(dictionary, buf);
- strcat(dictionary, ">>");
+ strcat(dictionary, "/Filter /FlateDecode\n");
+
+ strcat(dictionary, ">>");
- pdf_obj_append(&pdf, ids[ptr->image_length + 1],
- NULL, dictionary, stream, stream_size);
+ pdf_obj_append(&pdf, ids[ptr->image_length + 1],
+ NULL, dictionary, stream, stream_size);
- free(stream);
+ free(stream);
+ }
memset(dictionary, 0, dictionary_size);
strcat(dictionary, "<<\n/Type /Page\n");
- snprintf(buf, 64, "/Resources %d 0 R\n", ids[ptr->image_length]);
- strcat(dictionary, buf);
-
- snprintf(buf, 64, "/Contents %d 0 R\n", ids[ptr->image_length + 1]);
- strcat(dictionary, buf);
-
/* A4 paper */
strcat(dictionary, "/MediaBox [0 0 595.276 841.89]\n");
- /* Add /Parent when we know root */
- pdf_obj_append(&pdf, ids[ptr->image_length + 2], NULL, dictionary, NULL, 0);
+ if (ptr->image_length > 0) {
+ free(dim);
- free(dictionary);
+ snprintf(buf, 64, "/Resources %d 0 R\n", ids[ptr->image_length]);
+ strcat(dictionary, buf);
+
+ snprintf(buf, 64, "/Contents %d 0 R\n", ids[ptr->image_length + 1]);
+ strcat(dictionary, buf);
+
+ /* Add /Parent when we know root */
+ pdf_obj_append(&pdf, ids[ptr->image_length + 2], NULL, dictionary, NULL, 0);
- root_kid[cnt++] = ids[ptr->image_length + 2];
+ root_kid[cnt++] = ids[ptr->image_length + 2];
+ } else {
+ snprintf(buf, 64, "/Contents %d 0 R\n", ids[ptr->image_length]);
+ strcat(dictionary, buf);
+
+ /* Add /Parent when we know root */
+ pdf_obj_append(&pdf, ids[ptr->image_length + 1], NULL, dictionary, NULL, 0);
+
+ root_kid[cnt++] = ids[ptr->image_length + 1];
+ }
+
+ free(dictionary);
free(ids);
- free(dim);
ptr = ptr->next;
}
diff --git a/src/cnki_zlib.c b/src/cnki_zlib.c
index edff141..075456b 100644
--- a/src/cnki_zlib.c
+++ b/src/cnki_zlib.c
@@ -13,12 +13,17 @@ int
cnki_zlib(char **dst, int *dst_size,
const char * restrict src, int src_size)
{
+ uint8_t padding = 0;
int32_t size;
- memcpy(&size, src + 20, 4);
+
+ if (strncmp(src + 8, "COMPRESSTEXT", 12) == 0)
+ padding = 8;
+
+ memcpy(&size, src + 12 + padding, 4);
*dst_size = size;
- if (strinflate(dst, size, src + 24, src_size - 24) != 0)
+ if (strinflate(dst, size, src + 16 + padding, src_size - 16 - padding) != 0)
return 1;
return 0;
diff --git a/src/melon.c b/src/melon.c
index af6aaf4..f8bb645 100644
--- a/src/melon.c
+++ b/src/melon.c
@@ -98,7 +98,8 @@ main(int argc, char **argv)
strerror(errno));
return EXIT_FAILURE;
}
- } else if (strncmp(param->file_stat->type, "HN", 2) == 0) {
+ } else if (strncmp(param->file_stat->type, "HN", 2) == 0 ||
+ (unsigned char) param->file_stat->type[0] == 0xc8) {
if (cnki_hn(&param) != 0) {
fprintf(stderr, "%s: %s\n", argv[0],
strerror(errno));