diff options
author | yzrh <yzrh@noema.org> | 2023-01-01 19:31:33 +0000 |
---|---|---|
committer | yzrh <yzrh@noema.org> | 2023-01-01 19:31:33 +0000 |
commit | 1a1fee1034b7d143a3ad77707ef930f2a8d1e3d8 (patch) | |
tree | de9d58c584ec37ca4243926c81e68573dabcaac9 /src | |
parent | cde014cffbe2e8d94de144008ad00bbccbb3a8ab (diff) | |
download | melon-1a1fee1034b7d143a3ad77707ef930f2a8d1e3d8.tar.gz melon-1a1fee1034b7d143a3ad77707ef930f2a8d1e3d8.tar.zst |
Handle duplicated object in CAJ.
Signed-off-by: yzrh <yzrh@noema.org>
Diffstat (limited to 'src')
-rw-r--r-- | src/cnki_pdf.c | 73 |
1 files changed, 47 insertions, 26 deletions
diff --git a/src/cnki_pdf.c b/src/cnki_pdf.c index 6cb8c9f..90ba28e 100644 --- a/src/cnki_pdf.c +++ b/src/cnki_pdf.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2022, yzrh <yzrh@noema.org> + * Copyright (c) 2020-2023, yzrh <yzrh@noema.org> * * SPDX-License-Identifier: Apache-2.0 */ @@ -145,11 +145,46 @@ _pdf_obj_sort(cnki_t **param, pdf_object_t **pdf) ret = pdf_obj_sort(pdf); + if ((*param)->stat > 0) + printf("Sorted object(s)\n"); + + return ret; +} + +static int +_pdf_obj_dedup(cnki_t **param, pdf_object_t **pdf) +{ + int ret = 0; + + pdf_object_t *tmp; + pdf_object_t *ptr; + + if ((*param)->stat > 1) + printf("Deleting duplicated object\n\t%8s\n", "id"); + + ptr = *pdf; + while (ptr->next != NULL) { + if (ptr->id == ptr->next->id) { + pdf_get_obj(&ptr, ptr->id, &tmp); + pdf_obj_del(&ptr, ptr->id); + + tmp->next = NULL; + pdf_obj_destroy(&tmp); + + ret++; + + if ((*param)->stat > 1) + printf("\t%8d\n", ptr->id); + } + + ptr = ptr->next; + } + if ((*param)->stat > 0) { if (ret == 0) - printf("Sorted object(s)\n"); + printf("No duplicated object\n"); else - printf("Object(s) not sorted\n"); + printf("Deleted %d duplicated object(s)\n", ret); } return ret; @@ -338,12 +373,9 @@ cnki_pdf(cnki_t **param) printf("Generating root object\n"); snprintf(buf, 64, - "<<\n/Type /Pages\n/Kids "); + "<<\n/Type /Pages\n/Kids ["); strcat(dictionary, buf); - if (parent[0] > 1) - strcat(dictionary, "["); - for (int i = 0, j = 0; i < parent[0]; i++) { if (parent_missing[i] == 1) { snprintf(buf, 64, "%d 0 R", parent[i + 1]); @@ -354,12 +386,7 @@ cnki_pdf(cnki_t **param) } } - if (parent[0] > 1) - strcat(dictionary, "]"); - - strcat(dictionary, "\n"); - - snprintf(buf, 64, "/Count %d\n", (*param)->file_stat->page); + snprintf(buf, 64, "]\n/Count %d\n", (*param)->file_stat->page); strcat(dictionary, buf); strcat(dictionary, ">>"); @@ -442,6 +469,8 @@ cnki_pdf(cnki_t **param) _pdf_obj_sort(param, &pdf); + _pdf_obj_dedup(param, &pdf); + _pdf_dump(param, &pdf); pdf_obj_destroy(&pdf); @@ -721,12 +750,12 @@ cnki_pdf_hn(cnki_t **param) if ((*param)->stat > 2) printf("Not extracted.\n"); - pdf_obj_append(&pdf, ids[i], "null", NULL, NULL, 0); + pdf_obj_append(&pdf, ids[i], NULL, NULL, NULL, 0); } else { if ((*param)->stat > 2) printf("Unsupported format.\n"); - pdf_obj_append(&pdf, ids[i], "null", NULL, NULL, 0); + pdf_obj_append(&pdf, ids[i], NULL, NULL, NULL, 0); } } @@ -1028,7 +1057,7 @@ cnki_pdf_hn(cnki_t **param) free(stream); } else { pdf_obj_append(&pdf, ids[ptr->image_length + 1], - "null", NULL, NULL, 0); + NULL, NULL, NULL, 0); } memset(dictionary, 0, dictionary_size); @@ -1094,12 +1123,9 @@ cnki_pdf_hn(cnki_t **param) int root = pdf_get_free_id(&pdf); - snprintf(buf, 64, "<<\n/Type /Pages\n/Kids "); + snprintf(buf, 64, "<<\n/Type /Pages\n/Kids ["); strcat(dictionary, buf); - if ((*param)->file_stat->page > 1) - strcat(dictionary, "["); - for (int i = 0; i < (*param)->file_stat->page; i++) { snprintf(buf, 64, "%d 0 R", root_kid[i]); strcat(dictionary, buf); @@ -1107,12 +1133,7 @@ cnki_pdf_hn(cnki_t **param) strcat(dictionary, " "); } - if ((*param)->file_stat->page > 1) - strcat(dictionary, "]"); - - strcat(dictionary, "\n"); - - snprintf(buf, 64, "/Count %d\n", (*param)->file_stat->page); + snprintf(buf, 64, "]\n/Count %d\n", (*param)->file_stat->page); strcat(dictionary, buf); strcat(dictionary, ">>"); |