aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authoryzrh <yzrh@noema.org>2023-01-03 15:39:53 +0000
committeryzrh <yzrh@noema.org>2023-01-03 15:39:53 +0000
commit7ac0971a1711233bc0eaa5e8191590612959867b (patch)
tree955a3c2d9fc4c32e4b972b558878a2d439c61b93
parente0fe937e1a3c61581f80e27ad5d2c510e0901755 (diff)
downloadmelon-7ac0971a1711233bc0eaa5e8191590612959867b.tar.gz
melon-7ac0971a1711233bc0eaa5e8191590612959867b.tar.zst
Handle invalid result from PDF parser.
Signed-off-by: yzrh <yzrh@noema.org>
-rw-r--r--src/cnki_pdf.c53
-rw-r--r--src/pdf_parser.c3
2 files changed, 34 insertions, 22 deletions
diff --git a/src/cnki_pdf.c b/src/cnki_pdf.c
index cfcba25..af38aa6 100644
--- a/src/cnki_pdf.c
+++ b/src/cnki_pdf.c
@@ -160,10 +160,10 @@ _pdf_obj_dedup(cnki_t **param, pdf_object_t **pdf)
pdf_object_t *ptr;
if ((*param)->stat > 1)
- printf("Deleting duplicated object\n\t%8s\n", "id");
+ printf("Deleting duplicated object\n");
ptr = *pdf;
- while (ptr->next != NULL) {
+ while (ptr != NULL && ptr->next != NULL) {
if (ptr->id == ptr->next->id) {
pdf_get_obj(&ptr, ptr->id, &tmp);
pdf_obj_del(&ptr, ptr->id);
@@ -174,7 +174,7 @@ _pdf_obj_dedup(cnki_t **param, pdf_object_t **pdf)
ret++;
if ((*param)->stat > 1)
- printf("\t%8d\n", ptr->id);
+ printf("Deleted duplicated object %d.\n", ptr->id);
}
ptr = ptr->next;
@@ -247,18 +247,18 @@ cnki_pdf(cnki_t **param)
int *parent = NULL;
pdf_get_parent_id(&pdf, &parent);
- if (parent[0] == 0)
- return 1;
-
if ((*param)->stat > 0)
printf("Discovered %d parent object(s)\n", parent[0]);
- int8_t *parent_missing = malloc(parent[0] * sizeof(int8_t));
+ int8_t *parent_missing;
+ int *kid;
- if (parent_missing == NULL)
- return 1;
+ if (parent[0] > 0) {
+ parent_missing = malloc(parent[0] * sizeof(int8_t));
- int *kid;
+ if (parent_missing == NULL)
+ return 1;
+ }
for (int i = 1; i <= parent[0]; i++) {
if ((*param)->stat > 1)
@@ -326,7 +326,7 @@ cnki_pdf(cnki_t **param)
if ((*param)->stat > 1)
printf("Searching for root object\n");
- dictionary_size = 128;
+ dictionary_size = 128 + 12 * parent[0];
dictionary = malloc(dictionary_size);
if (dictionary == NULL) {
@@ -400,8 +400,10 @@ cnki_pdf(cnki_t **param)
root);
}
+ if (parent[0] > 0)
+ free(parent_missing);
+
free(parent);
- free(parent_missing);
int outline = _pdf_cnki_outline(param, &pdf);
@@ -1166,14 +1168,6 @@ cnki_pdf_hn(cnki_t **param)
free(dictionary);
- dictionary_size = 256;
- dictionary = malloc(dictionary_size);
-
- if (dictionary == NULL) {
- free(root_kid);
- return 1;
- }
-
/* Add /Parent to page object */
for (int i = 0; i < (*param)->file_stat->page; i++) {
if (pdf_get_obj(&pdf, root_kid[i], &tmp) != 0) {
@@ -1182,9 +1176,16 @@ cnki_pdf_hn(cnki_t **param)
return 1;
}
- memset(dictionary, 0, dictionary_size);
+ dictionary_size = tmp->dictionary_size + 24;
+ dictionary = malloc(dictionary_size);
+
+ if (dictionary == NULL) {
+ free(root_kid);
+ return 1;
+ }
memcpy(dictionary, tmp->dictionary, tmp->dictionary_size);
+ memset(dictionary + tmp->dictionary_size, 0, 24);
snprintf(buf, 64, "/Parent %d 0 R\n>>", root);
strcat(dictionary, buf);
@@ -1194,10 +1195,20 @@ cnki_pdf_hn(cnki_t **param)
free(root_kid);
return 1;
}
+
+ free(dictionary);
}
free(root_kid);
+ dictionary_size = 128;
+ dictionary = malloc(dictionary_size);
+
+ if (dictionary == NULL) {
+ free(root_kid);
+ return 1;
+ }
+
memset(dictionary, 0, dictionary_size);
if ((*param)->stat > 0)
diff --git a/src/pdf_parser.c b/src/pdf_parser.c
index 2585e6f..781bafa 100644
--- a/src/pdf_parser.c
+++ b/src/pdf_parser.c
@@ -119,7 +119,8 @@ _locate(pdf_object_t **pdf, FILE **fp, int size_buf)
fseek(*fp, tail + 7, SEEK_SET);
head = tail = 0;
} else if (head > 0 && tail > 0) {
- fseek(*fp, head, SEEK_SET);
+ if (cur + size_buf < end)
+ fseek(*fp, head, SEEK_SET);
tail = 0;
} else {
fseek(*fp, -7, SEEK_CUR);