aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/pdf_parser.c99
1 files changed, 71 insertions, 28 deletions
diff --git a/src/pdf_parser.c b/src/pdf_parser.c
index ed7bfba..70d72d5 100644
--- a/src/pdf_parser.c
+++ b/src/pdf_parser.c
@@ -79,8 +79,25 @@ _locate(pdf_object_t **pdf, FILE **fp, int size_buf)
memset(buf + end - cur, 0, size_buf - end + cur);
}
- if (head == 0 && (pos = _memmem_whitespace(buf, size_buf, " 0 obj", 6)) != NULL)
- head = cur + (pos - buf) + 7;
+ if (head == 0) {
+ /* Hack needed for invalid object */
+ pos = _memmem_whitespace(buf, size_buf, " 0 obj", 6);
+ tmp = memmem(buf, size_buf, " 0 obj", 6);
+
+ while (tmp != NULL && tmp[6] != 0x3c && tmp[6] != 0x5b)
+ tmp = memmem(tmp + 6, size_buf - (tmp - buf) - 6, " 0 obj", 6);
+
+ if (pos != NULL && tmp != NULL) {
+ if (pos - buf < tmp - buf)
+ head = cur + (pos - buf) + 7;
+ else
+ head = cur + (tmp - buf) + 6;
+ } else if (pos != NULL) {
+ head = cur + (pos - buf) + 7;
+ } else if (tmp != NULL) {
+ head = cur + (tmp - buf) + 6;
+ }
+ }
if (tail == 0 && (pos = _memmem_whitespace(buf, size_buf, "endobj", 6)) != NULL) {
/* We need to check if it is the object stored in stream */
@@ -156,9 +173,46 @@ pdf_load(pdf_object_t **pdf, FILE **fp, int size_buf)
if (buf == NULL)
return 1;
- fseek(*fp, ptr->address - 15, SEEK_SET);
+ fseek(*fp, ptr->address, SEEK_SET);
+ fread(buf, ptr->size, 1, *fp);
+
+ /* Handle incomplete object */
+ head = buf;
+ while ((tmp = _memmem_whitespace(head,
+ ptr->size - (head - buf),
+ " 0 obj", 6)) != NULL)
+ head = tmp + 7;
+
+ /* Hack needed for invalid object */
+ while ((tmp = memmem(head,
+ ptr->size - (head - buf),
+ " 0 obj", 6)) != NULL)
+ head = tmp + 6;
+
+ if (head - buf > 0) {
+ ptr->address += head - buf;
+ ptr->size -= head - buf;
+
+ tmp = realloc(buf, ptr->size);
+
+ if (tmp == NULL)
+ return 1;
+
+ buf = tmp;
+
+ fseek(*fp, ptr->address, SEEK_SET);
+ fread(buf, ptr->size, 1, *fp);
+ }
+
+ /* Hack needed for invalid object */
+ fseek(*fp, ptr->address - 14, SEEK_SET);
fread(str, 8, 1, *fp);
+ if (str[7] < '0' || str[7] > '9') {
+ fseek(*fp, ptr->address - 15, SEEK_SET);
+ fread(str, 8, 1, *fp);
+ }
+
for (int i = 7; i >= 0; i--) {
if (str[i] < '0' || str[i] > '9') {
if (i < 7)
@@ -170,11 +224,10 @@ pdf_load(pdf_object_t **pdf, FILE **fp, int size_buf)
}
}
- fseek(*fp, ptr->address, SEEK_SET);
- fread(buf, ptr->size, 1, *fp);
-
if ((head = memmem(buf, ptr->size, "<<", 2)) != NULL &&
- (tail = _memmem_whitespace(buf, ptr->size, ">>", 2)) != NULL) {
+ ((tail = _memmem_whitespace(buf, ptr->size, ">>", 2)) != NULL ||
+ /* Hack needed for invalid object */
+ (tail = memmem(buf, ptr->size, ">>", 2)) != NULL)) {
/*
* A dictionary object may have nested dictionary,
* but it should not be in a stream
@@ -187,6 +240,15 @@ pdf_load(pdf_object_t **pdf, FILE **fp, int size_buf)
"stream\r\n", 8) == NULL)
tail = tmp;
+ /* Hack needed for invalid object */
+ while ((tmp = memmem(tail + 2,
+ ptr->size - (tail - buf) - 2,
+ ">>", 2)) != NULL &&
+ memmem(tail + 2,
+ (tmp - tail) - 2,
+ "stream\r\n", 8) == NULL)
+ tail = tmp;
+
ptr->dictionary_size = tail - head + 2;
ptr->dictionary = malloc(ptr->dictionary_size + 1);
@@ -226,27 +288,8 @@ pdf_load(pdf_object_t **pdf, FILE **fp, int size_buf)
free(buf);
} else {
- /* Handle incomplete object */
- head = buf;
- while ((tmp = _memmem_whitespace(head,
- ptr->size - (head - buf),
- " 0 obj", 6)) != NULL)
- head = tmp + 7;
-
- if (head - buf > 0) {
- ptr->object_size = ptr->size - (head - buf);
- ptr->object = malloc(ptr->object_size);
-
- if (ptr->object == NULL)
- return 1;
-
- memcpy(ptr->object, head, ptr->object_size);
-
- free(buf);
- } else {
- ptr->object_size = ptr->size;
- ptr->object = buf;
- }
+ ptr->object_size = ptr->size;
+ ptr->object = buf;
}
ptr = ptr->next;