diff options
author | yzrh <yzrh@noema.org> | 2023-01-03 15:39:53 +0000 |
---|---|---|
committer | yzrh <yzrh@noema.org> | 2023-01-03 15:39:53 +0000 |
commit | 7ac0971a1711233bc0eaa5e8191590612959867b (patch) | |
tree | 955a3c2d9fc4c32e4b972b558878a2d439c61b93 | |
parent | e0fe937e1a3c61581f80e27ad5d2c510e0901755 (diff) | |
download | melon-7ac0971a1711233bc0eaa5e8191590612959867b.tar.gz melon-7ac0971a1711233bc0eaa5e8191590612959867b.tar.zst |
Handle invalid result from PDF parser.
Signed-off-by: yzrh <yzrh@noema.org>
-rw-r--r-- | src/cnki_pdf.c | 53 | ||||
-rw-r--r-- | src/pdf_parser.c | 3 |
2 files changed, 34 insertions, 22 deletions
diff --git a/src/cnki_pdf.c b/src/cnki_pdf.c index cfcba25..af38aa6 100644 --- a/src/cnki_pdf.c +++ b/src/cnki_pdf.c @@ -160,10 +160,10 @@ _pdf_obj_dedup(cnki_t **param, pdf_object_t **pdf) pdf_object_t *ptr; if ((*param)->stat > 1) - printf("Deleting duplicated object\n\t%8s\n", "id"); + printf("Deleting duplicated object\n"); ptr = *pdf; - while (ptr->next != NULL) { + while (ptr != NULL && ptr->next != NULL) { if (ptr->id == ptr->next->id) { pdf_get_obj(&ptr, ptr->id, &tmp); pdf_obj_del(&ptr, ptr->id); @@ -174,7 +174,7 @@ _pdf_obj_dedup(cnki_t **param, pdf_object_t **pdf) ret++; if ((*param)->stat > 1) - printf("\t%8d\n", ptr->id); + printf("Deleted duplicated object %d.\n", ptr->id); } ptr = ptr->next; @@ -247,18 +247,18 @@ cnki_pdf(cnki_t **param) int *parent = NULL; pdf_get_parent_id(&pdf, &parent); - if (parent[0] == 0) - return 1; - if ((*param)->stat > 0) printf("Discovered %d parent object(s)\n", parent[0]); - int8_t *parent_missing = malloc(parent[0] * sizeof(int8_t)); + int8_t *parent_missing; + int *kid; - if (parent_missing == NULL) - return 1; + if (parent[0] > 0) { + parent_missing = malloc(parent[0] * sizeof(int8_t)); - int *kid; + if (parent_missing == NULL) + return 1; + } for (int i = 1; i <= parent[0]; i++) { if ((*param)->stat > 1) @@ -326,7 +326,7 @@ cnki_pdf(cnki_t **param) if ((*param)->stat > 1) printf("Searching for root object\n"); - dictionary_size = 128; + dictionary_size = 128 + 12 * parent[0]; dictionary = malloc(dictionary_size); if (dictionary == NULL) { @@ -400,8 +400,10 @@ cnki_pdf(cnki_t **param) root); } + if (parent[0] > 0) + free(parent_missing); + free(parent); - free(parent_missing); int outline = _pdf_cnki_outline(param, &pdf); @@ -1166,14 +1168,6 @@ cnki_pdf_hn(cnki_t **param) free(dictionary); - dictionary_size = 256; - dictionary = malloc(dictionary_size); - - if (dictionary == NULL) { - free(root_kid); - return 1; - } - /* Add /Parent to page object */ for (int i = 0; i < (*param)->file_stat->page; i++) { if (pdf_get_obj(&pdf, root_kid[i], &tmp) != 0) { @@ -1182,9 +1176,16 @@ cnki_pdf_hn(cnki_t **param) return 1; } - memset(dictionary, 0, dictionary_size); + dictionary_size = tmp->dictionary_size + 24; + dictionary = malloc(dictionary_size); + + if (dictionary == NULL) { + free(root_kid); + return 1; + } memcpy(dictionary, tmp->dictionary, tmp->dictionary_size); + memset(dictionary + tmp->dictionary_size, 0, 24); snprintf(buf, 64, "/Parent %d 0 R\n>>", root); strcat(dictionary, buf); @@ -1194,10 +1195,20 @@ cnki_pdf_hn(cnki_t **param) free(root_kid); return 1; } + + free(dictionary); } free(root_kid); + dictionary_size = 128; + dictionary = malloc(dictionary_size); + + if (dictionary == NULL) { + free(root_kid); + return 1; + } + memset(dictionary, 0, dictionary_size); if ((*param)->stat > 0) diff --git a/src/pdf_parser.c b/src/pdf_parser.c index 2585e6f..781bafa 100644 --- a/src/pdf_parser.c +++ b/src/pdf_parser.c @@ -119,7 +119,8 @@ _locate(pdf_object_t **pdf, FILE **fp, int size_buf) fseek(*fp, tail + 7, SEEK_SET); head = tail = 0; } else if (head > 0 && tail > 0) { - fseek(*fp, head, SEEK_SET); + if (cur + size_buf < end) + fseek(*fp, head, SEEK_SET); tail = 0; } else { fseek(*fp, -7, SEEK_CUR); |