aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authoryzrh <yzrh@noema.org>2022-12-26 03:46:01 +0000
committeryzrh <yzrh@noema.org>2022-12-28 19:29:46 +0000
commitabce2fd2e4f8089779fb9b1dce94133716b0bb39 (patch)
tree1bbc6005064a70201e2ea1860fc29b21b93a050a
parent224a09a015de72f56ee86841504575fe474c6260 (diff)
downloadmelon-abce2fd2e4f8089779fb9b1dce94133716b0bb39.tar.gz
melon-abce2fd2e4f8089779fb9b1dce94133716b0bb39.tar.zst
Add preliminary support for HN figure placement.
Signed-off-by: yzrh <yzrh@noema.org>
-rw-r--r--README.md4
-rw-r--r--src/cnki.c2
-rw-r--r--src/cnki.h4
-rw-r--r--src/cnki_hn.c4
-rw-r--r--src/cnki_pdf.c203
-rw-r--r--src/version.h2
6 files changed, 161 insertions, 58 deletions
diff --git a/README.md b/README.md
index 471282e..b94a3e7 100644
--- a/README.md
+++ b/README.md
@@ -39,9 +39,9 @@ Specify output file
Set buffer size (default 512k)
-v, --verbose
-Print more information (twice for even more, three times for HN image decoding information as well)
+Print more information (twice for even more, three times for HN image processing information as well)
Thanks
======
-This project is inspired by [https://github.com/JeziL/caj2pdf](https://github.com/JeziL/caj2pdf)
+This project is inspired by [https://github.com/caj2pdf/caj2pdf](https://github.com/caj2pdf/caj2pdf)
diff --git a/src/cnki.c b/src/cnki.c
index cc49d73..8c2e6e6 100644
--- a/src/cnki.c
+++ b/src/cnki.c
@@ -138,7 +138,7 @@ cnki_info(cnki_t **param)
if ((*param)->file_stat->outline > 0) {
if ((*param)->stat > 1) {
printf("Loading outline(s)\n");
- printf("\t%16s\t%-24s\t%12s\t%12s\t%5s\n",
+ printf("\t%19s\t%-24s\t%12s\t%12s\t%5s\n",
"title",
"hierarchy",
"page",
diff --git a/src/cnki.h b/src/cnki.h
index 193e69b..e9cc5d1 100644
--- a/src/cnki.h
+++ b/src/cnki.h
@@ -58,6 +58,10 @@ typedef struct _hn_image_t {
int32_t format; /* hn_code */
int32_t address;
int32_t size;
+ int16_t x;
+ int16_t y;
+ int16_t w;
+ int16_t h;
char *image;
} hn_image_t;
diff --git a/src/cnki_hn.c b/src/cnki_hn.c
index 4d32092..c2f76ec 100644
--- a/src/cnki_hn.c
+++ b/src/cnki_hn.c
@@ -93,6 +93,10 @@ cnki_hn(cnki_t **param)
fread(&ptr->image_data[i].format, 4, 1, (*param)->fp_i);
fread(&ptr->image_data[i].address, 4, 1, (*param)->fp_i);
fread(&ptr->image_data[i].size, 4, 1, (*param)->fp_i);
+ ptr->image_data[i].x = 0;
+ ptr->image_data[i].y = 0;
+ ptr->image_data[i].w = 0;
+ ptr->image_data[i].h = 0;
fseek((*param)->fp_i,
ptr->image_data[i].address + ptr->image_data[i].size,
SEEK_SET);
diff --git a/src/cnki_pdf.c b/src/cnki_pdf.c
index 603ce01..0cb30ca 100644
--- a/src/cnki_pdf.c
+++ b/src/cnki_pdf.c
@@ -524,7 +524,7 @@ cnki_pdf_hn(cnki_t **param)
"/Subtype /Image\n");
if ((*param)->stat > 2)
- printf("\tDecoding data, page %04d item %02d format %d... ",
+ printf("\tProcessing image, page %04d item %d format %d... ",
ptr->page, i, ptr->image_data[i].format);
switch (ptr->image_data[i].format) {
@@ -700,7 +700,7 @@ cnki_pdf_hn(cnki_t **param)
snprintf(buf, 64, "/Im%d %d 0 R", i, ids[i]);
strcat(dictionary, buf);
- if (i + 1 < ptr->image_length)
+ if (i < ptr->image_length - 1)
strcat(dictionary, " ");
}
@@ -739,43 +739,112 @@ cnki_pdf_hn(cnki_t **param)
memset(dictionary, 0, dictionary_size);
- strcat(dictionary, "<feff");
-
- for (int i = 0; i < ptr->text_size; i += 6) {
- if (i + 5 >= ptr->text_size)
- break;
-
- conv_src[0] = ptr->text[i + 5];
- conv_src[1] = ptr->text[i + 4];
-
- if ((conv_src[0] << 8 | conv_src[1]) == 0xa389) {
- strcat(dictionary, "a389");
- continue;
- } else if ((conv_src[0] << 8 | conv_src[1]) == 0xa38a) {
- strcat(dictionary, "a38a");
- continue;
- } else if ((conv_src[0] << 8 | conv_src[1]) == 0xa38d) {
- strcat(dictionary, "a38d");
- continue;
- } else if ((conv_src[0] << 8 | conv_src[1]) == 0xa3a0) {
- strcat(dictionary, "a3a0");
- continue;
- }
-
- conv_size = 6;
-
- if (strconv(&conv_dst, "UTF-16BE",
- conv_src, "GB18030", &conv_size) == 0) {
- for (int j = 0; j < conv_size - 2; j++) {
- snprintf(conv_hex, 3,
- "%02x", (unsigned char) conv_dst[j]);
- strcat(dictionary, conv_hex);
- }
- free(conv_dst);
+ strcat(dictionary, "BT\n");
+
+ for (int i = 0, j = 0; i < ptr->text_size - 1;) {
+ switch ((uint16_t) (ptr->text[i + 1] << 8 | ptr->text[i])) {
+ case 0x8001:
+ if (ptr->address_next <= ptr->address) {
+ i += 2;
+ break;
+ }
+
+ strcat(dictionary, "T*\n");
+ case 0x8070:
+ if (ptr->address_next > ptr->address) {
+ i += 4;
+
+ for (;;) {
+ if (i + 3 >= ptr->text_size ||
+ (unsigned char) ptr->text[i + 1] == 0x80)
+ break;
+
+ conv_src[0] = ptr->text[i + 3];
+ conv_src[1] = ptr->text[i + 2];
+
+ conv_size = 6;
+
+ if (strconv(&conv_dst, "UTF-16BE",
+ conv_src, "GB18030", &conv_size) == 0) {
+ if (conv_size - 2 > 0) {
+ strcat(dictionary, "<feff");
+ for (int k = 0; k < conv_size - 2; k++) {
+ snprintf(conv_hex, 3,
+ "%02x", (unsigned char) conv_dst[k]);
+ strcat(dictionary, conv_hex);
+ }
+ strcat(dictionary, "> Tj\n");
+ }
+ free(conv_dst);
+ }
+
+ i += 4;
+ }
+
+ break;
+ }
+
+ if (i + 7 >= ptr->text_size) {
+ i += 2;
+ break;
+ }
+
+ conv_src[0] = ptr->text[i + 7];
+ conv_src[1] = ptr->text[i + 6];
+
+ conv_size = 6;
+
+ if (strconv(&conv_dst, "UTF-16BE",
+ conv_src, "GB18030", &conv_size) == 0) {
+ if (conv_size - 2 > 0) {
+ strcat(dictionary, "<feff");
+ for (int k = 0; k < conv_size - 2; k++) {
+ snprintf(conv_hex, 3,
+ "%02x", (unsigned char) conv_dst[k]);
+ strcat(dictionary, conv_hex);
+ }
+ strcat(dictionary, "> Tj\n");
+ }
+ free(conv_dst);
+ }
+
+ i += 8;
+ break;
+ case 0x800a:
+ if (i + 27 >= ptr->text_size || j >= ptr->image_length) {
+ i += 2;
+ break;
+ }
+
+ if (ptr->image_length > 0) {
+ ptr->image_data[j].x =
+ ptr->text[i + 5] << 8 | ptr->text[i + 4];
+ ptr->image_data[j].y =
+ ptr->text[i + 7] << 8 | ptr->text[i + 6];
+ ptr->image_data[j].w =
+ ptr->text[i + 9] << 8 | ptr->text[i + 8];
+ ptr->image_data[j].h =
+ ptr->text[i + 11] << 8 | ptr->text[i + 10];
+
+ if ((*param)->stat > 2)
+ printf("\tItem %d: origin (%4d, %4d), width %4d, height %4d\n",
+ j,
+ ptr->image_data[j].x,
+ ptr->image_data[j].y,
+ ptr->image_data[j].w,
+ ptr->image_data[j].h);
+ }
+
+ i += 28;
+ j++;
+ break;
+ default:
+ i += 4;
+ break;
}
}
- strcat(dictionary, ">");
+ strcat(dictionary, "ET");
/* FIXME: Use the text somehow? */
free(dictionary);
@@ -794,20 +863,14 @@ cnki_pdf_hn(cnki_t **param)
if (ptr->image_length > 0) {
memset(dictionary, 0, dictionary_size);
- strcat(dictionary, "q\n");
-
- strcat(dictionary, "0.25 0 0 0.25 0 0 cm\n");
-
- double resize_x;
- double resize_y;
-
- for (int i = 0; i < ptr->image_length; i++) {
- if (dim[i * 2] <= 0 || dim[i * 2 + 1] <= 0)
- continue;
+ char resize_str[64] = "0.25 0 0 0.25 0 0 cm\n";
+ double resize_x = 1;
+ double resize_y = 1;
+ if (dim[0] > 0 && dim[1] > 0) {
/* Scale within bound of A4 paper */
- resize_x = 595.276 * 4 / dim[i * 2];
- resize_y = 841.89 * 4 / dim[i * 2 + 1];
+ resize_x = 4 * 595.2756 / dim[0];
+ resize_y = 4 * 841.8898 / dim[1];
if (resize_y < resize_x)
snprintf(buf, 64, "%f 0 0 %f 0 0 cm\n",
@@ -815,9 +878,18 @@ cnki_pdf_hn(cnki_t **param)
else
snprintf(buf, 64, "%f 0 0 %f 0 0 cm\n",
resize_x, resize_x);
- strcat(dictionary, buf);
+ strcat(resize_str, buf);
+ }
+
+ for (int i = 0; i < ptr->image_length; i++) {
+ if (dim[i * 2] <= 0 || dim[i * 2 + 1] <= 0)
+ continue;
+
+ strcat(dictionary, "q\n");
- /* Apply transformation matrix */
+ strcat(dictionary, resize_str);
+
+ /* Rotate image */
if (ptr->image_data[i].format == JBIG || ptr->image_data[i].format == DCT_1) {
snprintf(buf, 64, "1 0 0 1 0 %d cm\n",
dim[i * 2 + 1]);
@@ -826,15 +898,38 @@ cnki_pdf_hn(cnki_t **param)
strcat(dictionary, "1 0 0 -1 0 0 cm\n");
}
+ /* Translate figure */
+ if (i > 0) {
+ double origin_x = 0.4043745 * ptr->image_data[i].x;
+ double origin_y = 0.4043561 * ptr->image_data[i].y;
+
+ if (origin_x < 0)
+ origin_x += (2381.102 - dim[i * 2]) / 2;
+
+ if (origin_y < 0)
+ origin_y += (3367.559 + dim[i * 2 + 1]) / 2;
+
+ if (ptr->image_data[i].format == JBIG || ptr->image_data[i].format == DCT_1)
+ origin_y = -3367.559 + origin_y + dim[i * 2 + 1];
+ else
+ origin_y = 3367.559 - origin_y - dim[i * 2 + 1];
+
+ snprintf(buf, 64, "1 0 0 1 %f %f cm\n", origin_x, origin_y);
+ strcat(dictionary, buf);
+ }
+
snprintf(buf, 64, "%d 0 0 %d 0 0 cm\n",
dim[i * 2], dim[i * 2 + 1]);
strcat(dictionary, buf);
snprintf(buf, 64, "/Im%d Do\n", i);
strcat(dictionary, buf);
- }
- strcat(dictionary, "Q");
+ strcat(dictionary, "Q");
+
+ if (i < ptr->image_length - 1)
+ strcat(dictionary, "\n");
+ }
if (strdeflate(&stream, &stream_size, dictionary, strlen(dictionary)) != 0) {
free(root_kid);
@@ -866,7 +961,7 @@ cnki_pdf_hn(cnki_t **param)
strcat(dictionary, "<<\n/Type /Page\n");
/* A4 paper */
- strcat(dictionary, "/MediaBox [0 0 595.276 841.89]\n");
+ strcat(dictionary, "/MediaBox [0 0 595.2756 841.8898]\n");
if (ptr->image_length > 0) {
free(dim);
@@ -946,7 +1041,7 @@ cnki_pdf_hn(cnki_t **param)
for (int i = 0; i < (*param)->file_stat->page; i++) {
snprintf(buf, 64, "%d 0 R", root_kid[i]);
strcat(dictionary, buf);
- if (i + 1 < (*param)->file_stat->page)
+ if (i < (*param)->file_stat->page - 1)
strcat(dictionary, " ");
}
diff --git a/src/version.h b/src/version.h
index 7c1ca3b..db25a27 100644
--- a/src/version.h
+++ b/src/version.h
@@ -6,5 +6,5 @@
#define VERSION "0"
#define RELEASE "2"
-#define PATCH "1"
+#define PATCH "2"
#define EXTRA ""