From c2ad6549fb337ce707e04aa441c9b492171a3b9d Mon Sep 17 00:00:00 2001 From: yzrh Date: Sun, 25 Dec 2022 18:03:01 +0000 Subject: Handle headless HN and page with no image. Signed-off-by: yzrh --- src/cnki_pdf.c | 265 ++++++++++++++++++++++++++++++--------------------------- 1 file changed, 139 insertions(+), 126 deletions(-) (limited to 'src/cnki_pdf.c') diff --git a/src/cnki_pdf.c b/src/cnki_pdf.c index c56a45f..603ce01 100644 --- a/src/cnki_pdf.c +++ b/src/cnki_pdf.c @@ -481,7 +481,10 @@ cnki_pdf_hn(cnki_t **param) * page object */ int *ids = NULL; - pdf_get_free_ids(&pdf, &ids, ptr->image_length + 3); + if (ptr->image_length > 0) + pdf_get_free_ids(&pdf, &ids, ptr->image_length + 3); + else + pdf_get_free_ids(&pdf, &ids, 2); int bitmap_size; char *bitmap; @@ -489,27 +492,31 @@ cnki_pdf_hn(cnki_t **param) int stream_size; char *stream; - int *dim = malloc(2 * ptr->image_length * sizeof(int)); + int *dim; - int ret; - int info[3]; + if (ptr->image_length > 0) { + dim = malloc(2 * ptr->image_length * sizeof(int)); - if (dim == NULL) { - free(root_kid); - free(ids); - return 1; - } + if (dim == NULL) { + free(root_kid); + free(ids); + return 1; + } - dictionary_size = 256; - dictionary = malloc(dictionary_size); + dictionary_size = 256; + dictionary = malloc(dictionary_size); - if (dictionary == NULL) { - free(root_kid); - free(ids); - free(dim); - return 1; + if (dictionary == NULL) { + free(root_kid); + free(ids); + free(dim); + return 1; + } } + int ret; + int info[3]; + for (int i = 0; i < ptr->image_length; i++) { memset(dictionary, 0, dictionary_size); @@ -684,66 +691,42 @@ cnki_pdf_hn(cnki_t **param) } } - memset(dictionary, 0, dictionary_size); + if (ptr->image_length > 0) { + memset(dictionary, 0, dictionary_size); - strcat(dictionary, "<<\n/XObject <<"); + strcat(dictionary, "<<\n/XObject <<"); - for (int i = 0; i < ptr->image_length; i++) { - snprintf(buf, 64, "/Im%d %d 0 R", i, ids[i]); - strcat(dictionary, buf); + for (int i = 0; i < ptr->image_length; i++) { + snprintf(buf, 64, "/Im%d %d 0 R", i, ids[i]); + strcat(dictionary, buf); - if (i + 1 < ptr->image_length) - strcat(dictionary, " "); - } + if (i + 1 < ptr->image_length) + strcat(dictionary, " "); + } - strcat(dictionary, ">>\n>>"); + strcat(dictionary, ">>\n>>"); - pdf_obj_append(&pdf, ids[ptr->image_length], NULL, dictionary, NULL, 0); + pdf_obj_append(&pdf, ids[ptr->image_length], NULL, dictionary, NULL, 0); - free(dictionary); + free(dictionary); + } int conv_size; char *conv_dst; char conv_src[2]; char conv_hex[3]; - if (strncmp(ptr->text + 8, "COMPRESSTEXT", 12) == 0) { - cnki_zlib(&stream, &stream_size, ptr->text, ptr->text_size); - - dictionary_size = 64 + 2 * stream_size; - dictionary = malloc(dictionary_size); + if (ptr->text_size > 0) { + if (strncmp(ptr->text + 8, "COMPRESSTEXT", 12) == 0 || + strncmp(ptr->text, "COMPRESSTEXT", 12) == 0) { + cnki_zlib(&stream, &stream_size, ptr->text, ptr->text_size); - if (dictionary == NULL) { - free(root_kid); - free(ids); - free(dim); - return 1; - } + free(ptr->text); - memset(dictionary, 0, dictionary_size); - - strcat(dictionary, "text_size = stream_size; + ptr->text = stream; } - free(stream); - strcat(dictionary, ">"); - } else { dictionary_size = 64 + 2 * ptr->text_size; dictionary = malloc(dictionary_size); @@ -758,9 +741,26 @@ cnki_pdf_hn(cnki_t **param) strcat(dictionary, "text_size; i += 4) { - conv_src[0] = ptr->text[i + 3]; - conv_src[1] = ptr->text[i + 2]; + for (int i = 0; i < ptr->text_size; i += 6) { + if (i + 5 >= ptr->text_size) + break; + + conv_src[0] = ptr->text[i + 5]; + conv_src[1] = ptr->text[i + 4]; + + if ((conv_src[0] << 8 | conv_src[1]) == 0xa389) { + strcat(dictionary, "a389"); + continue; + } else if ((conv_src[0] << 8 | conv_src[1]) == 0xa38a) { + strcat(dictionary, "a38a"); + continue; + } else if ((conv_src[0] << 8 | conv_src[1]) == 0xa38d) { + strcat(dictionary, "a38d"); + continue; + } else if ((conv_src[0] << 8 | conv_src[1]) == 0xa3a0) { + strcat(dictionary, "a3a0"); + continue; + } conv_size = 6; @@ -776,12 +776,12 @@ cnki_pdf_hn(cnki_t **param) } strcat(dictionary, ">"); - } - /* FIXME: Use the text somehow? */ - free(dictionary); + /* FIXME: Use the text somehow? */ + free(dictionary); + } - dictionary_size = 64 + 64 * ptr->image_length; + dictionary_size = 64 + 128 * ptr->image_length; dictionary = malloc(dictionary_size); if (dictionary == NULL) { @@ -791,96 +791,109 @@ cnki_pdf_hn(cnki_t **param) return 1; } - memset(dictionary, 0, dictionary_size); - - strcat(dictionary, "q\n"); + if (ptr->image_length > 0) { + memset(dictionary, 0, dictionary_size); - strcat(dictionary, "0.25 0 0 0.25 0 0 cm\n"); + strcat(dictionary, "q\n"); - double resize_x; - double resize_y; + strcat(dictionary, "0.25 0 0 0.25 0 0 cm\n"); - for (int i = 0; i < ptr->image_length; i++) { - if (dim[i * 2] <= 0 || dim[i * 2 + 1] <= 0) - continue; + double resize_x; + double resize_y; - /* Scale within bound of A4 paper */ - resize_x = 595.276 * 4 / dim[i * 2]; - resize_y = 841.89 * 4 / dim[i * 2 + 1]; + for (int i = 0; i < ptr->image_length; i++) { + if (dim[i * 2] <= 0 || dim[i * 2 + 1] <= 0) + continue; - if (resize_y < resize_x) - snprintf(buf, 64, "%f 0 0 %f 0 0 cm\n", - resize_y, resize_y); - else - snprintf(buf, 64, "%f 0 0 %f 0 0 cm\n", - resize_x, resize_x); - strcat(dictionary, buf); + /* Scale within bound of A4 paper */ + resize_x = 595.276 * 4 / dim[i * 2]; + resize_y = 841.89 * 4 / dim[i * 2 + 1]; - /* Apply transformation matrix */ - if (ptr->image_data[i].format == JBIG || ptr->image_data[i].format == DCT_1) { - snprintf(buf, 64, "1 0 0 1 0 %d cm\n", - dim[i * 2 + 1]); + if (resize_y < resize_x) + snprintf(buf, 64, "%f 0 0 %f 0 0 cm\n", + resize_y, resize_y); + else + snprintf(buf, 64, "%f 0 0 %f 0 0 cm\n", + resize_x, resize_x); strcat(dictionary, buf); - strcat(dictionary, "1 0 0 -1 0 0 cm\n"); - } + /* Apply transformation matrix */ + if (ptr->image_data[i].format == JBIG || ptr->image_data[i].format == DCT_1) { + snprintf(buf, 64, "1 0 0 1 0 %d cm\n", + dim[i * 2 + 1]); + strcat(dictionary, buf); - snprintf(buf, 64, "%d 0 0 %d 0 0 cm\n", - dim[i * 2], dim[i * 2 + 1]); - strcat(dictionary, buf); + strcat(dictionary, "1 0 0 -1 0 0 cm\n"); + } - snprintf(buf, 64, "/Im%d Do\n", i); - strcat(dictionary, buf); - } + snprintf(buf, 64, "%d 0 0 %d 0 0 cm\n", + dim[i * 2], dim[i * 2 + 1]); + strcat(dictionary, buf); - strcat(dictionary, "Q"); + snprintf(buf, 64, "/Im%d Do\n", i); + strcat(dictionary, buf); + } - if (strdeflate(&stream, &stream_size, dictionary, strlen(dictionary)) != 0) { - free(root_kid); - free(ids); - free(dim); - free(dictionary); - return 1; - } + strcat(dictionary, "Q"); - memset(dictionary, 0, dictionary_size); + if (strdeflate(&stream, &stream_size, dictionary, strlen(dictionary)) != 0) { + free(root_kid); + free(ids); + free(dim); + free(dictionary); + return 1; + } - strcat(dictionary, "<<\n"); + memset(dictionary, 0, dictionary_size); - snprintf(buf, 64, "/Length %d\n", stream_size); - strcat(dictionary, buf); + strcat(dictionary, "<<\n"); - strcat(dictionary, "/Filter /FlateDecode\n"); + snprintf(buf, 64, "/Length %d\n", stream_size); + strcat(dictionary, buf); - strcat(dictionary, ">>"); + strcat(dictionary, "/Filter /FlateDecode\n"); + + strcat(dictionary, ">>"); - pdf_obj_append(&pdf, ids[ptr->image_length + 1], - NULL, dictionary, stream, stream_size); + pdf_obj_append(&pdf, ids[ptr->image_length + 1], + NULL, dictionary, stream, stream_size); - free(stream); + free(stream); + } memset(dictionary, 0, dictionary_size); strcat(dictionary, "<<\n/Type /Page\n"); - snprintf(buf, 64, "/Resources %d 0 R\n", ids[ptr->image_length]); - strcat(dictionary, buf); - - snprintf(buf, 64, "/Contents %d 0 R\n", ids[ptr->image_length + 1]); - strcat(dictionary, buf); - /* A4 paper */ strcat(dictionary, "/MediaBox [0 0 595.276 841.89]\n"); - /* Add /Parent when we know root */ - pdf_obj_append(&pdf, ids[ptr->image_length + 2], NULL, dictionary, NULL, 0); + if (ptr->image_length > 0) { + free(dim); - free(dictionary); + snprintf(buf, 64, "/Resources %d 0 R\n", ids[ptr->image_length]); + strcat(dictionary, buf); + + snprintf(buf, 64, "/Contents %d 0 R\n", ids[ptr->image_length + 1]); + strcat(dictionary, buf); + + /* Add /Parent when we know root */ + pdf_obj_append(&pdf, ids[ptr->image_length + 2], NULL, dictionary, NULL, 0); - root_kid[cnt++] = ids[ptr->image_length + 2]; + root_kid[cnt++] = ids[ptr->image_length + 2]; + } else { + snprintf(buf, 64, "/Contents %d 0 R\n", ids[ptr->image_length]); + strcat(dictionary, buf); + + /* Add /Parent when we know root */ + pdf_obj_append(&pdf, ids[ptr->image_length + 1], NULL, dictionary, NULL, 0); + + root_kid[cnt++] = ids[ptr->image_length + 1]; + } + + free(dictionary); free(ids); - free(dim); ptr = ptr->next; } -- cgit v1.2.3