aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authoryzrh <yzrh@noema.org>2023-01-04 17:19:06 +0000
committeryzrh <yzrh@noema.org>2023-01-04 17:19:06 +0000
commitc2afbb3cbc947dec4d2878c9c3608306039f9c8b (patch)
tree24735845470795559999b66009718aa4ce45fff1 /src
parent8cd8a8fbbadaeee6563d6cb5d7c648570d78b2fc (diff)
downloadmelon-c2afbb3cbc947dec4d2878c9c3608306039f9c8b.tar.gz
melon-c2afbb3cbc947dec4d2878c9c3608306039f9c8b.tar.zst
Handle invalid PDF object.
Signed-off-by: yzrh <yzrh@noema.org>
Diffstat (limited to 'src')
-rw-r--r--src/pdf_parser.c99
1 files changed, 71 insertions, 28 deletions
diff --git a/src/pdf_parser.c b/src/pdf_parser.c
index ed7bfba..70d72d5 100644
--- a/src/pdf_parser.c
+++ b/src/pdf_parser.c
@@ -79,8 +79,25 @@ _locate(pdf_object_t **pdf, FILE **fp, int size_buf)
memset(buf + end - cur, 0, size_buf - end + cur);
}
- if (head == 0 && (pos = _memmem_whitespace(buf, size_buf, " 0 obj", 6)) != NULL)
- head = cur + (pos - buf) + 7;
+ if (head == 0) {
+ /* Hack needed for invalid object */
+ pos = _memmem_whitespace(buf, size_buf, " 0 obj", 6);
+ tmp = memmem(buf, size_buf, " 0 obj", 6);
+
+ while (tmp != NULL && tmp[6] != 0x3c && tmp[6] != 0x5b)
+ tmp = memmem(tmp + 6, size_buf - (tmp - buf) - 6, " 0 obj", 6);
+
+ if (pos != NULL && tmp != NULL) {
+ if (pos - buf < tmp - buf)
+ head = cur + (pos - buf) + 7;
+ else
+ head = cur + (tmp - buf) + 6;
+ } else if (pos != NULL) {
+ head = cur + (pos - buf) + 7;
+ } else if (tmp != NULL) {
+ head = cur + (tmp - buf) + 6;
+ }
+ }
if (tail == 0 && (pos = _memmem_whitespace(buf, size_buf, "endobj", 6)) != NULL) {
/* We need to check if it is the object stored in stream */
@@ -156,9 +173,46 @@ pdf_load(pdf_object_t **pdf, FILE **fp, int size_buf)
if (buf == NULL)
return 1;
- fseek(*fp, ptr->address - 15, SEEK_SET);
+ fseek(*fp, ptr->address, SEEK_SET);
+ fread(buf, ptr->size, 1, *fp);
+
+ /* Handle incomplete object */
+ head = buf;
+ while ((tmp = _memmem_whitespace(head,
+ ptr->size - (head - buf),
+ " 0 obj", 6)) != NULL)
+ head = tmp + 7;
+
+ /* Hack needed for invalid object */
+ while ((tmp = memmem(head,
+ ptr->size - (head - buf),
+ " 0 obj", 6)) != NULL)
+ head = tmp + 6;
+
+ if (head - buf > 0) {
+ ptr->address += head - buf;
+ ptr->size -= head - buf;
+
+ tmp = realloc(buf, ptr->size);
+
+ if (tmp == NULL)
+ return 1;
+
+ buf = tmp;
+
+ fseek(*fp, ptr->address, SEEK_SET);
+ fread(buf, ptr->size, 1, *fp);
+ }
+
+ /* Hack needed for invalid object */
+ fseek(*fp, ptr->address - 14, SEEK_SET);
fread(str, 8, 1, *fp);
+ if (str[7] < '0' || str[7] > '9') {
+ fseek(*fp, ptr->address - 15, SEEK_SET);
+ fread(str, 8, 1, *fp);
+ }
+
for (int i = 7; i >= 0; i--) {
if (str[i] < '0' || str[i] > '9') {
if (i < 7)
@@ -170,11 +224,10 @@ pdf_load(pdf_object_t **pdf, FILE **fp, int size_buf)
}
}
- fseek(*fp, ptr->address, SEEK_SET);
- fread(buf, ptr->size, 1, *fp);
-
if ((head = memmem(buf, ptr->size, "<<", 2)) != NULL &&
- (tail = _memmem_whitespace(buf, ptr->size, ">>", 2)) != NULL) {
+ ((tail = _memmem_whitespace(buf, ptr->size, ">>", 2)) != NULL ||
+ /* Hack needed for invalid object */
+ (tail = memmem(buf, ptr->size, ">>", 2)) != NULL)) {
/*
* A dictionary object may have nested dictionary,
* but it should not be in a stream
@@ -187,6 +240,15 @@ pdf_load(pdf_object_t **pdf, FILE **fp, int size_buf)
"stream\r\n", 8) == NULL)
tail = tmp;
+ /* Hack needed for invalid object */
+ while ((tmp = memmem(tail + 2,
+ ptr->size - (tail - buf) - 2,
+ ">>", 2)) != NULL &&
+ memmem(tail + 2,
+ (tmp - tail) - 2,
+ "stream\r\n", 8) == NULL)
+ tail = tmp;
+
ptr->dictionary_size = tail - head + 2;
ptr->dictionary = malloc(ptr->dictionary_size + 1);
@@ -226,27 +288,8 @@ pdf_load(pdf_object_t **pdf, FILE **fp, int size_buf)
free(buf);
} else {
- /* Handle incomplete object */
- head = buf;
- while ((tmp = _memmem_whitespace(head,
- ptr->size - (head - buf),
- " 0 obj", 6)) != NULL)
- head = tmp + 7;
-
- if (head - buf > 0) {
- ptr->object_size = ptr->size - (head - buf);
- ptr->object = malloc(ptr->object_size);
-
- if (ptr->object == NULL)
- return 1;
-
- memcpy(ptr->object, head, ptr->object_size);
-
- free(buf);
- } else {
- ptr->object_size = ptr->size;
- ptr->object = buf;
- }
+ ptr->object_size = ptr->size;
+ ptr->object = buf;
}
ptr = ptr->next;