From 4a02b8bfc74920291a62f06fff9cf6e6c4f23ace Mon Sep 17 00:00:00 2001 From: yzrh Date: Mon, 2 Jan 2023 23:40:54 +0000 Subject: Fix inconsistent whitespace detection in PDF parser. Signed-off-by: yzrh --- src/pdf_parser.c | 35 ++++++++++++++++++++++------------- 1 file changed, 22 insertions(+), 13 deletions(-) (limited to 'src') diff --git a/src/pdf_parser.c b/src/pdf_parser.c index bb45e63..2585e6f 100644 --- a/src/pdf_parser.c +++ b/src/pdf_parser.c @@ -19,26 +19,35 @@ static void * _memmem_whitespace(const void *p0, size_t s0, const void *p1, size_t s1) { const char whitespace[6] = { - ' ', - '\r', - '\n', - '\f', - '\t', - '\0' + 0x00, + 0x09, + 0x0a, + 0x0c, + 0x0d, + 0x20 }; - char tmp[s1 + 1]; - memcpy(tmp, p1, s1); + char *ret = NULL; - char *ret; + char str[s1 + 1]; + memcpy(str, p1, s1); + + size_t tmp_size = 0; + char *tmp; for (int i = 0; i < 6; i++) { - tmp[s1] = whitespace[i]; - if ((ret = memmem(p0, s0, tmp, s1 + 1)) != NULL) - return ret; + str[s1] = whitespace[i]; + + if ((tmp = memmem(p0, s0, str, s1 + 1)) == NULL) + continue; + + if (tmp_size == 0 || (size_t) (tmp - (char *) p0) < tmp_size) { + tmp_size = tmp - (char *) p0; + ret = tmp; + } } - return NULL; + return ret; } static int -- cgit v1.2.3