/* * Copyright (c) 2020-2023, yzrh * * SPDX-License-Identifier: Apache-2.0 */ #ifdef __linux__ #define _GNU_SOURCE #endif /* __linux__ */ #include #include #include "pdf.h" static void * _memmem_whitespace(const void *p0, size_t s0, const void *p1, size_t s1) { const char whitespace[6] = { 0x00, 0x09, 0x0a, 0x0c, 0x0d, 0x20 }; char *ret = NULL; char str[s1 + 1]; memcpy(str, p1, s1); size_t tmp_size = 0; char *tmp; for (int i = 0; i < 6; i++) { str[s1] = whitespace[i]; if ((tmp = memmem(p0, s0, str, s1 + 1)) == NULL) continue; if (tmp_size == 0 || (size_t) (tmp - (char *) p0) < tmp_size) { tmp_size = tmp - (char *) p0; ret = tmp; } } return ret; } static int _locate(pdf_object_t **pdf, FILE **fp, int size_buf) { pdf_object_t *ptr = *pdf; while (ptr->next != NULL) ptr = ptr->next; char buf[size_buf]; long cur = ftell(*fp); long end; fseek(*fp, 0, SEEK_END); end = ftell(*fp); fseek(*fp, cur, SEEK_SET); long head = 0; long tail = 0; char *pos; char *tmp; for (;;) { if (cur + size_buf < end) { fread(buf, size_buf, 1, *fp); } else { fread(buf, end - cur, 1, *fp); memset(buf + end - cur, 0, size_buf - end + cur); } if (head == 0) { /* Hack needed for invalid object */ pos = _memmem_whitespace(buf, size_buf, " 0 obj", 6); tmp = memmem(buf, size_buf, " 0 obj", 6); while (tmp != NULL && tmp[6] != 0x3c && tmp[6] != 0x5b) tmp = memmem(tmp + 6, size_buf - (tmp - buf) - 6, " 0 obj", 6); if (pos != NULL && tmp != NULL) { if (pos - buf < tmp - buf) head = cur + (pos - buf) + 7; else head = cur + (tmp - buf) + 6; } else if (pos != NULL) { head = cur + (pos - buf) + 7; } else if (tmp != NULL) { head = cur + (tmp - buf) + 6; } } if (tail == 0 && (pos = _memmem_whitespace(buf, size_buf, "endobj", 6)) != NULL) { /* We need to check if it is the object stored in stream */ while (memcmp(pos + 7, "\r\nendstream", 11) == 0 && (tmp = _memmem_whitespace(pos + 7, size_buf - (pos - buf) - 7, "endobj", 6)) != NULL) pos = tmp; if (pos - buf < size_buf - 7) tail = cur + (pos - buf); } if (tail > head) { if (ptr->next == NULL) { ptr->next = malloc(sizeof(pdf_object_t)); if (ptr->next == NULL) return 1; ptr->next->id = 0; ptr->next->object_size = 0; ptr->next->object = NULL; ptr->next->dictionary_size = 0; ptr->next->dictionary = NULL; ptr->next->stream_size = 0; ptr->next->stream = NULL; ptr->next->next = NULL; ptr = ptr->next; } ptr->address = head; ptr->size = tail - head; fseek(*fp, tail + 7, SEEK_SET); head = tail = 0; } else if (head > 0 && tail > 0) { if (cur + size_buf < end) fseek(*fp, head, SEEK_SET); tail = 0; } else { fseek(*fp, -7, SEEK_CUR); } if ((cur = ftell(*fp)) + 7 >= end) break; } return 0; } int pdf_load(pdf_object_t **pdf, FILE **fp, int size_buf) { if (*pdf == NULL || *fp == NULL || size_buf < 7) return 1; if (_locate(pdf, fp, size_buf) != 0) return 1; pdf_object_t *ptr = (*pdf)->next; char str[8]; char *buf; char *head; char *tail; char *tmp; while (ptr != NULL) { buf = malloc(ptr->size); if (buf == NULL) return 1; fseek(*fp, ptr->address, SEEK_SET); fread(buf, ptr->size, 1, *fp); /* Handle incomplete object */ head = buf; while ((tmp = _memmem_whitespace(head, ptr->size - (head - buf), " 0 obj", 6)) != NULL) head = tmp + 7; /* Hack needed for invalid object */ while ((tmp = memmem(head, ptr->size - (head - buf), " 0 obj", 6)) != NULL) head = tmp + 6; if (head - buf > 0) { ptr->address += head - buf; ptr->size -= head - buf; tmp = realloc(buf, ptr->size); if (tmp == NULL) return 1; buf = tmp; fseek(*fp, ptr->address, SEEK_SET); fread(buf, ptr->size, 1, *fp); } /* Hack needed for invalid object */ fseek(*fp, ptr->address - 14, SEEK_SET); fread(str, 8, 1, *fp); if (str[7] < '0' || str[7] > '9') { fseek(*fp, ptr->address - 15, SEEK_SET); fread(str, 8, 1, *fp); } for (int i = 7; i >= 0; i--) { if (str[i] < '0' || str[i] > '9') { if (i < 7) ptr->id = atoi(str + i + 1); else ptr->id = 0; break; } } if ((head = memmem(buf, ptr->size, "<<", 2)) != NULL && ((tail = _memmem_whitespace(buf, ptr->size, ">>", 2)) != NULL || /* Hack needed for invalid object */ (tail = memmem(buf, ptr->size, ">>", 2)) != NULL)) { if (memmem(buf, tail - buf, "stream\r\n", 8) != NULL) { tail = memmem(buf, ptr->size, ">>", 2); while (ptr->size - (tail - buf) > 2 && (tmp = memmem(tail + 2, ptr->size - (tail - buf) - 2, ">>", 2)) != NULL && memmem(tail + 2, (tmp - tail) - 2, "stream\r\n", 8) == NULL) tail = tmp; } else { /* * A dictionary object may have nested dictionary, * but it should not be in a stream */ while (ptr->size - (tail - buf) > 3 && (tmp = _memmem_whitespace(tail + 3, ptr->size - (tail - buf) - 3, ">>", 2)) != NULL && memmem(tail + 3, (tmp - tail) - 3, "stream\r\n", 8) == NULL) tail = tmp; } ptr->dictionary_size = tail - head + 2; ptr->dictionary = malloc(ptr->dictionary_size + 1); if (ptr->dictionary == NULL) return 1; memcpy(ptr->dictionary, head, ptr->dictionary_size); memset(ptr->dictionary + ptr->dictionary_size, 0, 1); if ((head = memmem(tail, ptr->size - (tail - buf), "stream\r\n", 8)) != NULL && (tail = _memmem_whitespace(head, ptr->size - (head - buf), "endstream", 9)) != NULL) { /* * An object may contain a stream that * contains another object that * contains another stream */ while (_memmem_whitespace(tail + 10, ptr->size - (tail - buf) - 10, "endobj", 6) != NULL && (tmp = _memmem_whitespace(tail + 10, ptr->size - (tail - buf) - 10, "endstream", 9)) != NULL) tail = tmp; ptr->stream_size = (tail - head) - 8; ptr->stream = malloc(ptr->stream_size); if (ptr->stream == NULL) return 1; memcpy(ptr->stream, head + 8, ptr->stream_size); } free(buf); } else { ptr->object_size = ptr->size; ptr->object = buf; } ptr = ptr->next; } return 0; }