From 9c1f1d0b75de0d2ed299842d3025941f3e681c16 Mon Sep 17 00:00:00 2001 From: yzrh Date: Thu, 22 Dec 2022 19:47:40 +0000 Subject: Fix HN conversion and add JBIG2 support. Signed-off-by: yzrh --- src/cnki_pdf.c | 147 +++++++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 107 insertions(+), 40 deletions(-) (limited to 'src/cnki_pdf.c') diff --git a/src/cnki_pdf.c b/src/cnki_pdf.c index dcf6d30..b59b7c6 100644 --- a/src/cnki_pdf.c +++ b/src/cnki_pdf.c @@ -238,7 +238,7 @@ cnki_pdf(cnki_t **param) if ((*param)->stat > 1) printf("Generating object\n"); - dictionary_size = 64 + 12 * kid[0]; + dictionary_size = 64 + 16 * kid[0]; dictionary = malloc(dictionary_size); if (dictionary == NULL) { @@ -483,6 +483,9 @@ cnki_pdf_hn(cnki_t **param) int *ids = NULL; pdf_get_free_ids(&pdf, &ids, ptr->image_length + 3); + int bitmap_size; + char *bitmap; + int stream_size; char *stream; @@ -493,32 +496,34 @@ cnki_pdf_hn(cnki_t **param) if (dim == NULL) { free(root_kid); + free(ids); return 1; } - for (int i = 0; i < ptr->image_length; i++) { - dictionary_size = 128; - dictionary = malloc(dictionary_size); + dictionary_size = 256; + dictionary = malloc(dictionary_size); - if (dictionary == NULL) { - free(root_kid); - free(dim); - return 1; - } + if (dictionary == NULL) { + free(root_kid); + free(ids); + free(dim); + return 1; + } + for (int i = 0; i < ptr->image_length; i++) { memset(dictionary, 0, dictionary_size); strcat(dictionary, "<<\n/Type /XObject\n" "/Subtype /Image\n"); if ((*param)->stat > 2) - printf("\tDecoding data, page %04d item %02d... ", - ptr->page, i); + printf("\tDecoding data, page %04d item %02d format %d... ", + ptr->page, i, ptr->image_data[i].format); switch (ptr->image_data[i].format) { case JBIG: - ret = cnki_jbig(&stream, - &stream_size, + ret = cnki_jbig(&bitmap, + &bitmap_size, &wh[0], &wh[1], ptr->image_data[i].image, @@ -530,18 +535,30 @@ cnki_pdf_hn(cnki_t **param) break; } + if (strdeflate(&stream, &stream_size, + bitmap, bitmap_size) != 0) { + free(root_kid); + free(ids); + free(dim); + free(dictionary); + return 1; + } + + free(bitmap); + snprintf(buf, 64, "/Width %d\n/Height %d\n", wh[0], wh[1]); strcat(dictionary, buf); strcat(dictionary, "/ColorSpace /DeviceGray\n" "/BitsPerComponent 1\n"); + strcat(dictionary, "/Decode [1.0 0.0]\n"); snprintf(buf, 64, "/Length %d\n", stream_size); strcat(dictionary, buf); - strcat(dictionary, "/Filter /CCITTFaxDecode\n"); + strcat(dictionary, "/Filter /FlateDecode\n"); dim[i * 2] = wh[0]; dim[i * 2 + 1] = wh[1]; @@ -562,9 +579,10 @@ cnki_pdf_hn(cnki_t **param) stream_size = ptr->image_data[i].size; stream = malloc(stream_size); if (stream == NULL) { - free(dictionary); free(root_kid); + free(ids); free(dim); + free(dictionary); return 1; } memcpy(stream, ptr->image_data[i].image, stream_size); @@ -573,7 +591,7 @@ cnki_pdf_hn(cnki_t **param) wh[0], wh[1]); strcat(dictionary, buf); - strcat(dictionary, "/ColorSpace /DeviceRGB\n" + strcat(dictionary, "/ColorSpace /DeviceGray\n" "/BitsPerComponent 8\n"); snprintf(buf, 64, "/Length %d\n", @@ -586,6 +604,47 @@ cnki_pdf_hn(cnki_t **param) dim[i * 2 + 1] = wh[1]; break; case JBIG2: + ret = cnki_jbig2(&bitmap, + &bitmap_size, + &wh[0], + &wh[1], + ptr->image_data[i].image, + ptr->image_data[i].size); + + if (ret != 0) { + dim[i * 2] = 0; + dim[i * 2 + 1] = 0; + break; + } + + if (strdeflate(&stream, &stream_size, + bitmap, bitmap_size) != 0) { + free(root_kid); + free(ids); + free(dim); + free(dictionary); + return 1; + } + + free(bitmap); + + snprintf(buf, 64, "/Width %d\n/Height %d\n", + wh[0], wh[1]); + strcat(dictionary, buf); + + strcat(dictionary, "/ColorSpace /DeviceGray\n" + "/BitsPerComponent 1\n"); + strcat(dictionary, "/Decode [1.0 0.0]\n"); + + snprintf(buf, 64, "/Length %d\n", + stream_size); + strcat(dictionary, buf); + + strcat(dictionary, "/Filter /FlateDecode\n"); + + dim[i * 2] = wh[0]; + dim[i * 2 + 1] = wh[1]; + break; case JPX: default: ret = -1; @@ -598,37 +657,26 @@ cnki_pdf_hn(cnki_t **param) if (ret == 0) { if ((*param)->stat > 2) - printf("Done\n"); + printf("%6d byte(s), width %4d, height %4d.\n", + stream_size, wh[0], wh[1]); pdf_obj_append(&pdf, ids[i], NULL, dictionary, stream, stream_size); - free(dictionary); free(stream); } else if (ret == 1) { if ((*param)->stat > 2) - printf("Failed\n"); - - free(dictionary); + printf("Not extracted.\n"); pdf_obj_append(&pdf, ids[i], NULL, NULL, NULL, 0); } else { if ((*param)->stat > 2) - printf("Unsupported format\n"); + printf("Unsupported format.\n"); - free(dictionary); + pdf_obj_append(&pdf, ids[i], NULL, NULL, NULL, 0); } } - dictionary_size = 128; - dictionary = malloc(dictionary_size); - - if (dictionary == NULL) { - free(root_kid); - free(dim); - return 1; - } - memset(dictionary, 0, dictionary_size); strcat(dictionary, "<<\n/XObject <<"); @@ -655,11 +703,12 @@ cnki_pdf_hn(cnki_t **param) if (strncmp(ptr->text + 8, "COMPRESSTEXT", 12) == 0) { cnki_zlib(&stream, &stream_size, ptr->text, ptr->text_size); - dictionary_size = stream_size / 8 + 7; + dictionary_size = 64 + 2 * stream_size; dictionary = malloc(dictionary_size); if (dictionary == NULL) { free(root_kid); + free(ids); free(dim); return 1; } @@ -688,11 +737,12 @@ cnki_pdf_hn(cnki_t **param) strcat(dictionary, ">"); } else { - dictionary_size = ptr->text_size; + dictionary_size = 64 + 2 * ptr->text_size; dictionary = malloc(dictionary_size); if (dictionary == NULL) { free(root_kid); + free(ids); free(dim); return 1; } @@ -724,11 +774,12 @@ cnki_pdf_hn(cnki_t **param) /* FIXME: Use the text somehow? */ free(dictionary); - dictionary_size = 64 + 12 * ptr->image_length; + dictionary_size = 64 + 64 * ptr->image_length; dictionary = malloc(dictionary_size); if (dictionary == NULL) { free(root_kid); + free(ids); free(dim); return 1; } @@ -739,12 +790,27 @@ cnki_pdf_hn(cnki_t **param) strcat(dictionary, "0.25 0 0 0.25 0 0 cm\n"); + double resize_x; + double resize_y; + for (int i = 0; i < ptr->image_length; i++) { if (dim[i * 2] <= 0 || dim[i * 2 + 1] <= 0) continue; + /* Scale within bound of A4 paper */ + resize_x = 595.276 * 4 / dim[i * 2]; + resize_y = 841.89 * 4 / dim[i * 2 + 1]; + + if (resize_y < resize_x) + snprintf(buf, 64, "%f 0 0 %f 0 0 cm\n", + resize_y, resize_y); + else + snprintf(buf, 64, "%f 0 0 %f 0 0 cm\n", + resize_x, resize_x); + strcat(dictionary, buf); + /* Apply transformation matrix */ - if (ptr->image_data[i].format == DCT_1) { + if (ptr->image_data[i].format == JBIG || ptr->image_data[i].format == DCT_1) { snprintf(buf, 64, "1 0 0 1 0 %d cm\n", dim[i * 2 + 1]); strcat(dictionary, buf); @@ -763,9 +829,10 @@ cnki_pdf_hn(cnki_t **param) strcat(dictionary, "Q"); if (strdeflate(&stream, &stream_size, dictionary, strlen(dictionary)) != 0) { - free(dictionary); free(root_kid); + free(ids); free(dim); + free(dictionary); return 1; } @@ -796,7 +863,7 @@ cnki_pdf_hn(cnki_t **param) strcat(dictionary, buf); /* A4 paper */ - strcat(dictionary, "/MediaBox [ 0 0 595.276 841.89 ]\n"); + strcat(dictionary, "/MediaBox [0 0 595.276 841.89]\n"); /* Add /Parent when we know root */ pdf_obj_append(&pdf, ids[ptr->image_length + 2], NULL, dictionary, NULL, 0); @@ -838,7 +905,7 @@ cnki_pdf_hn(cnki_t **param) if ((*param)->stat > 1) printf("Generating root object\n"); - dictionary_size = 64 + 12 * (*param)->file_stat->page; + dictionary_size = 64 + 64 * (*param)->file_stat->page; dictionary = malloc(dictionary_size); if (dictionary == NULL) { @@ -877,7 +944,7 @@ cnki_pdf_hn(cnki_t **param) free(dictionary); - dictionary_size = 128; + dictionary_size = 256; dictionary = malloc(dictionary_size); if (dictionary == NULL) { -- cgit v1.2.3