From 4a02b8bfc74920291a62f06fff9cf6e6c4f23ace Mon Sep 17 00:00:00 2001 From: yzrh Date: Mon, 2 Jan 2023 23:40:54 +0000 Subject: Fix inconsistent whitespace detection in PDF parser. Signed-off-by: yzrh --- CHANGE.md | 1 + src/pdf_parser.c | 35 ++++++++++++++++++++++------------- 2 files changed, 23 insertions(+), 13 deletions(-) diff --git a/CHANGE.md b/CHANGE.md index a2bfe9f..063d93a 100644 --- a/CHANGE.md +++ b/CHANGE.md @@ -8,6 +8,7 @@ * Improve PDF parser. * Handle duplicated object in CAJ. +* Handle duplicated image in HN. * Fix JBIG decoder. 0.2.4 (2022-12-31) diff --git a/src/pdf_parser.c b/src/pdf_parser.c index bb45e63..2585e6f 100644 --- a/src/pdf_parser.c +++ b/src/pdf_parser.c @@ -19,26 +19,35 @@ static void * _memmem_whitespace(const void *p0, size_t s0, const void *p1, size_t s1) { const char whitespace[6] = { - ' ', - '\r', - '\n', - '\f', - '\t', - '\0' + 0x00, + 0x09, + 0x0a, + 0x0c, + 0x0d, + 0x20 }; - char tmp[s1 + 1]; - memcpy(tmp, p1, s1); + char *ret = NULL; - char *ret; + char str[s1 + 1]; + memcpy(str, p1, s1); + + size_t tmp_size = 0; + char *tmp; for (int i = 0; i < 6; i++) { - tmp[s1] = whitespace[i]; - if ((ret = memmem(p0, s0, tmp, s1 + 1)) != NULL) - return ret; + str[s1] = whitespace[i]; + + if ((tmp = memmem(p0, s0, str, s1 + 1)) == NULL) + continue; + + if (tmp_size == 0 || (size_t) (tmp - (char *) p0) < tmp_size) { + tmp_size = tmp - (char *) p0; + ret = tmp; + } } - return NULL; + return ret; } static int -- cgit v1.2.3