aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authoryzrh <yzrh@noema.org>2023-01-02 15:38:45 +0000
committeryzrh <yzrh@noema.org>2023-01-02 15:38:45 +0000
commit7d9d658461ed5b0118a1bff8f1df29fb00165a25 (patch)
treefb8ff38c024d3f7111edcd1604d5a6ca37b88a74 /src
parent000405693ead8ef950558b15de576e36e184b680 (diff)
downloadmelon-7d9d658461ed5b0118a1bff8f1df29fb00165a25.tar.gz
melon-7d9d658461ed5b0118a1bff8f1df29fb00165a25.tar.zst
Handle duplicated image in HN.
Signed-off-by: yzrh <yzrh@noema.org>
Diffstat (limited to 'src')
-rw-r--r--src/cnki_pdf.c26
1 files changed, 24 insertions, 2 deletions
diff --git a/src/cnki_pdf.c b/src/cnki_pdf.c
index 90ba28e..cfcba25 100644
--- a/src/cnki_pdf.c
+++ b/src/cnki_pdf.c
@@ -500,6 +500,8 @@ cnki_pdf_hn(cnki_t **param)
char buf[64];
+ pdf_object_t *tmp;
+
int cnt = 0;
int *root_kid = malloc((*param)->file_stat->page * sizeof(int));
@@ -971,6 +973,28 @@ cnki_pdf_hn(cnki_t **param)
margin_y = (3507.874 - dim[1]) / 2;
}
+ /* Remove duplicated image, ptr->image_length is sometimes squared */
+ for (int i = 1; i < ptr->image_length; i++) {
+ if ((ptr->image_data[i].x > 0 || ptr->image_data[i].y > 0) &&
+ dim[i * 2] < dim[0] && dim[i * 2 + 1] < dim[1])
+ continue;
+
+ for (int j = i; j < ptr->image_length; j++) {
+ pdf_get_obj(&pdf, ids[j], &tmp);
+ pdf_obj_del(&pdf, ids[j]);
+
+ tmp->next = NULL;
+ pdf_obj_destroy(&tmp);
+
+ dim[j * 2] = -1;
+ dim[j * 2 + 1] = -1;
+
+ pdf_obj_append(&pdf, ids[j], NULL, NULL, NULL, 0);
+ }
+
+ break;
+ }
+
for (int i = 0; i < ptr->image_length; i++) {
if (dim[i * 2] <= 0 || dim[i * 2 + 1] <= 0)
continue;
@@ -1150,8 +1174,6 @@ cnki_pdf_hn(cnki_t **param)
return 1;
}
- pdf_object_t *tmp;
-
/* Add /Parent to page object */
for (int i = 0; i < (*param)->file_stat->page; i++) {
if (pdf_get_obj(&pdf, root_kid[i], &tmp) != 0) {