From 7d9d658461ed5b0118a1bff8f1df29fb00165a25 Mon Sep 17 00:00:00 2001 From: yzrh Date: Mon, 2 Jan 2023 15:38:45 +0000 Subject: Handle duplicated image in HN. Signed-off-by: yzrh --- src/cnki_pdf.c | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) (limited to 'src/cnki_pdf.c') diff --git a/src/cnki_pdf.c b/src/cnki_pdf.c index 90ba28e..cfcba25 100644 --- a/src/cnki_pdf.c +++ b/src/cnki_pdf.c @@ -500,6 +500,8 @@ cnki_pdf_hn(cnki_t **param) char buf[64]; + pdf_object_t *tmp; + int cnt = 0; int *root_kid = malloc((*param)->file_stat->page * sizeof(int)); @@ -971,6 +973,28 @@ cnki_pdf_hn(cnki_t **param) margin_y = (3507.874 - dim[1]) / 2; } + /* Remove duplicated image, ptr->image_length is sometimes squared */ + for (int i = 1; i < ptr->image_length; i++) { + if ((ptr->image_data[i].x > 0 || ptr->image_data[i].y > 0) && + dim[i * 2] < dim[0] && dim[i * 2 + 1] < dim[1]) + continue; + + for (int j = i; j < ptr->image_length; j++) { + pdf_get_obj(&pdf, ids[j], &tmp); + pdf_obj_del(&pdf, ids[j]); + + tmp->next = NULL; + pdf_obj_destroy(&tmp); + + dim[j * 2] = -1; + dim[j * 2 + 1] = -1; + + pdf_obj_append(&pdf, ids[j], NULL, NULL, NULL, 0); + } + + break; + } + for (int i = 0; i < ptr->image_length; i++) { if (dim[i * 2] <= 0 || dim[i * 2 + 1] <= 0) continue; @@ -1150,8 +1174,6 @@ cnki_pdf_hn(cnki_t **param) return 1; } - pdf_object_t *tmp; - /* Add /Parent to page object */ for (int i = 0; i < (*param)->file_stat->page; i++) { if (pdf_get_obj(&pdf, root_kid[i], &tmp) != 0) { -- cgit v1.2.3