aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authoryzrh <yzrh@noema.org>2022-12-29 21:10:03 +0000
committeryzrh <yzrh@noema.org>2022-12-29 21:10:03 +0000
commit1ce3f89574fa7256ab019eefc96a7362165cca52 (patch)
tree5c208b34f81916b91ec60809a905c53a0a3654f9
parent5a1afb00567aeacca9ead58f5383c3f502317a69 (diff)
downloadmelon-1ce3f89574fa7256ab019eefc96a7362165cca52.tar.gz
melon-1ce3f89574fa7256ab019eefc96a7362165cca52.tar.zst
Handle combination of text and image in page content.
Signed-off-by: yzrh <yzrh@noema.org>
-rw-r--r--src/cnki_pdf.c123
-rw-r--r--src/version.h2
2 files changed, 62 insertions, 63 deletions
diff --git a/src/cnki_pdf.c b/src/cnki_pdf.c
index 328a989..3d446e4 100644
--- a/src/cnki_pdf.c
+++ b/src/cnki_pdf.c
@@ -460,6 +460,9 @@ cnki_pdf_hn(cnki_t **param)
if (pdf_obj_create(&pdf) != 0)
return 1;
+ int font = pdf_get_free_id(&pdf);
+ pdf_obj_append(&pdf, font, NULL, "<<\n/Type /Font\n/Subtype /TrueType\n/BaseFont /NotoSansCJKSC\n>>", NULL, 0);
+
if ((*param)->stat > 1)
printf("Generating PDF object(s)\n");
@@ -480,15 +483,12 @@ cnki_pdf_hn(cnki_t **param)
while (ptr != NULL) {
/*
* External object (ptr->image_length) +
- * content object +
* resource object +
+ * content object +
* page object
*/
int *ids = NULL;
- if (ptr->image_length > 0)
- pdf_get_free_ids(&pdf, &ids, ptr->image_length + 3);
- else
- pdf_get_free_ids(&pdf, &ids, 2);
+ pdf_get_free_ids(&pdf, &ids, ptr->image_length + 3);
int bitmap_size;
char *bitmap;
@@ -721,19 +721,39 @@ cnki_pdf_hn(cnki_t **param)
if ((*param)->stat > 2)
printf("Not extracted.\n");
- pdf_obj_append(&pdf, ids[i], NULL, NULL, NULL, 0);
+ pdf_obj_append(&pdf, ids[i], "null", NULL, NULL, 0);
} else {
if ((*param)->stat > 2)
printf("Unsupported format.\n");
- pdf_obj_append(&pdf, ids[i], NULL, NULL, NULL, 0);
+ pdf_obj_append(&pdf, ids[i], "null", NULL, NULL, 0);
}
}
- if (ptr->image_length > 0) {
- memset(dictionary, 0, dictionary_size);
+ if (ptr->image_length > 0)
+ free(dictionary);
+
+ dictionary_size = 128 + 2 * ptr->text_size + 128 * ptr->image_length;
+ dictionary = malloc(dictionary_size);
+
+ if (dictionary == NULL) {
+ free(root_kid);
+ free(ids);
+ free(dim);
+ return 1;
+ }
+
+ memset(dictionary, 0, dictionary_size);
- strcat(dictionary, "<<\n/XObject <<");
+ strcat(dictionary, "<<\n");
+
+ if (ptr->text_size > 0) {
+ snprintf(buf, 64, "/Font <</F0 %d 0 R>>\n", font);
+ strcat(dictionary, buf);
+ }
+
+ if (ptr->image_length > 0) {
+ strcat(dictionary, "/XObject <<");
for (int i = 0; i < ptr->image_length; i++) {
snprintf(buf, 64, "/Im%d %d 0 R", i, ids[i]);
@@ -743,12 +763,14 @@ cnki_pdf_hn(cnki_t **param)
strcat(dictionary, " ");
}
- strcat(dictionary, ">>\n>>");
+ strcat(dictionary, ">>\n");
+ }
- pdf_obj_append(&pdf, ids[ptr->image_length], NULL, dictionary, NULL, 0);
+ strcat(dictionary, ">>");
- free(dictionary);
- }
+ pdf_obj_append(&pdf, ids[ptr->image_length], NULL, dictionary, NULL, 0);
+
+ memset(dictionary, 0, dictionary_size);
int conv_size;
char *conv_dst;
@@ -766,20 +788,10 @@ cnki_pdf_hn(cnki_t **param)
ptr->text = stream;
}
- dictionary_size = 64 + 2 * ptr->text_size;
- dictionary = malloc(dictionary_size);
-
- if (dictionary == NULL) {
- free(root_kid);
- free(ids);
- free(dim);
- return 1;
- }
-
- memset(dictionary, 0, dictionary_size);
-
strcat(dictionary, "BT\n");
+ strcat(dictionary, "/F0 10 Tf\n");
+
for (int i = 0, j = 0; i < ptr->text_size - 1;) {
switch ((uint16_t) (ptr->text[i + 1] << 8 | ptr->text[i])) {
case 0x8001:
@@ -801,6 +813,9 @@ cnki_pdf_hn(cnki_t **param)
conv_src[0] = ptr->text[i + 3];
conv_src[1] = ptr->text[i + 2];
+ //snprintf(buf, 64, "%f %f Td\n");
+ //strcat(dictionary, buf);
+
conv_size = 6;
if (strconv(&conv_dst, "UTF-16BE",
@@ -831,6 +846,9 @@ cnki_pdf_hn(cnki_t **param)
conv_src[0] = ptr->text[i + 7];
conv_src[1] = ptr->text[i + 6];
+ //snprintf(buf, 64, "%f %f Td\n");
+ //strcat(dictionary, buf);
+
conv_size = 6;
if (strconv(&conv_dst, "UTF-16BE",
@@ -885,23 +903,11 @@ cnki_pdf_hn(cnki_t **param)
strcat(dictionary, "ET");
- /* FIXME: Use the text somehow? */
- free(dictionary);
- }
-
- dictionary_size = 128 + 128 * ptr->image_length;
- dictionary = malloc(dictionary_size);
-
- if (dictionary == NULL) {
- free(root_kid);
- free(ids);
- free(dim);
- return 1;
+ if (ptr->image_length > 0)
+ strcat(dictionary, "\n");
}
if (ptr->image_length > 0) {
- memset(dictionary, 0, dictionary_size);
-
char resize_str[64] = "0.25 0 0 0.25 0 0 cm\n";
double resize_x = 1;
double resize_y = 1;
@@ -970,10 +976,13 @@ cnki_pdf_hn(cnki_t **param)
strcat(dictionary, "\n");
}
+ free(dim);
+ }
+
+ if (strlen(dictionary) > 0) {
if (strdeflate(&stream, &stream_size, dictionary, strlen(dictionary)) != 0) {
free(root_kid);
free(ids);
- free(dim);
free(dictionary);
return 1;
}
@@ -993,6 +1002,9 @@ cnki_pdf_hn(cnki_t **param)
NULL, dictionary, stream, stream_size);
free(stream);
+ } else {
+ pdf_obj_append(&pdf, ids[ptr->image_length + 1],
+ "null", NULL, NULL, 0);
}
memset(dictionary, 0, dictionary_size);
@@ -1002,32 +1014,19 @@ cnki_pdf_hn(cnki_t **param)
/* A4 paper */
strcat(dictionary, "/MediaBox [0 0 595.2756 841.8898]\n");
- if (ptr->image_length > 0) {
- free(dim);
-
- snprintf(buf, 64, "/Resources %d 0 R\n", ids[ptr->image_length]);
- strcat(dictionary, buf);
-
- snprintf(buf, 64, "/Contents %d 0 R\n", ids[ptr->image_length + 1]);
- strcat(dictionary, buf);
-
- /* Add /Parent when we know root */
- pdf_obj_append(&pdf, ids[ptr->image_length + 2], NULL, dictionary, NULL, 0);
-
- root_kid[cnt++] = ids[ptr->image_length + 2];
- } else {
- snprintf(buf, 64, "/Contents %d 0 R\n", ids[ptr->image_length]);
- strcat(dictionary, buf);
+ snprintf(buf, 64, "/Resources %d 0 R\n", ids[ptr->image_length]);
+ strcat(dictionary, buf);
- /* Add /Parent when we know root */
- pdf_obj_append(&pdf, ids[ptr->image_length + 1], NULL, dictionary, NULL, 0);
+ snprintf(buf, 64, "/Contents %d 0 R\n", ids[ptr->image_length + 1]);
+ strcat(dictionary, buf);
- root_kid[cnt++] = ids[ptr->image_length + 1];
- }
+ /* Add /Parent when we know root */
+ pdf_obj_append(&pdf, ids[ptr->image_length + 2], NULL, dictionary, NULL, 0);
- free(dictionary);
+ root_kid[cnt++] = ids[ptr->image_length + 2];
free(ids);
+ free(dictionary);
ptr = ptr->next;
}
diff --git a/src/version.h b/src/version.h
index db25a27..08b70ea 100644
--- a/src/version.h
+++ b/src/version.h
@@ -6,5 +6,5 @@
#define VERSION "0"
#define RELEASE "2"
-#define PATCH "2"
+#define PATCH "3"
#define EXTRA ""