diff options
author | yzrh <yzrh@noema.org> | 2022-12-22 19:47:40 +0000 |
---|---|---|
committer | yzrh <yzrh@noema.org> | 2022-12-24 23:29:56 +0000 |
commit | 9c1f1d0b75de0d2ed299842d3025941f3e681c16 (patch) | |
tree | 302f6d4a2235acfe8872a3c0c1c216fecc323b95 | |
parent | ac3b1dda63944f2cc8caaa52344774255e1956c8 (diff) | |
download | melon-9c1f1d0b75de0d2ed299842d3025941f3e681c16.tar.gz melon-9c1f1d0b75de0d2ed299842d3025941f3e681c16.tar.zst |
Fix HN conversion and add JBIG2 support.
Signed-off-by: yzrh <yzrh@noema.org>
-rw-r--r-- | CHANGE.md | 8 | ||||
-rw-r--r-- | README.md | 8 | ||||
-rw-r--r-- | src/Makefile | 10 | ||||
-rw-r--r-- | src/cnki.h | 8 | ||||
-rw-r--r-- | src/cnki_jbig.c | 62 | ||||
-rw-r--r-- | src/cnki_jbig.h | 39 | ||||
-rw-r--r-- | src/cnki_jbig2.c | 43 | ||||
-rw-r--r-- | src/cnki_pdf.c | 147 | ||||
-rw-r--r-- | src/cnki_xml.c | 14 | ||||
-rw-r--r-- | src/cnki_zlib.c | 4 | ||||
-rw-r--r-- | src/jbig.c | 304 | ||||
-rw-r--r-- | src/jbig.h | 6 | ||||
-rw-r--r-- | src/jbig2.c | 35 | ||||
-rw-r--r-- | src/jbig2.h | 7 | ||||
-rw-r--r-- | src/melon.c | 4 | ||||
-rw-r--r-- | src/pdf.c | 2 | ||||
-rw-r--r-- | src/pdf_cnki.c | 2 | ||||
-rw-r--r-- | src/pdf_get.c | 2 | ||||
-rw-r--r-- | src/version.h | 4 |
19 files changed, 517 insertions, 192 deletions
@@ -1,10 +1,16 @@ -0.2.0 (2021-XX-XX) +0.3.0 (2022-XX-XX) +================== + +* Support JPEG 2000 for HN. + +0.2.0 (2022-12-22) ================== * KDH conversion now produces a valid PDF * Handle binary data in dictionary. * Add preliminary support for HN * Fix root object dictionary generation when root object has more than two children. +* Fix memory leak and data type. 0.1.0 (2020-04-08) ================== @@ -6,10 +6,10 @@ Melon: Converter that produces PDF from CNKI proprietary formats Development ----------- -Currently, CAJ and KDH can be converted. Please report +Currently, CAJ, KDH, and HN can be converted. Please report any failures with a sample that can reproduce the behaviour. -HN support is being worked on. +HN support does not support JPEG 2000 yet. Dependency ---------- @@ -17,7 +17,7 @@ Dependency 1. OpenSSL 2. libiconv 3. zlib -4. JBIG-KIT +4. jbig2dec 5. libjpeg-turbo Usage @@ -39,7 +39,7 @@ Specify output file Set buffer size (default 512k) -v, --verbose -Print more information (twice for even more) +Print more information (twice for even more, three times for HN image decoding information as well) Thanks ====== diff --git a/src/Makefile b/src/Makefile index 7016c6c..6943af3 100644 --- a/src/Makefile +++ b/src/Makefile @@ -4,19 +4,19 @@ # SPDX-License-Identifier: Apache-2.0 # -src = melon.c iconv.c zlib.c jbig.c jpeg.c \ +src = melon.c iconv.c zlib.c jbig.c jbig2.c jpeg.c \ cnki_caj.c cnki_hn.c cnki_kdh.c cnki_outline_tree.c \ - cnki_pdf.c cnki_xml.c cnki_zlib.c cnki_jbig.c cnki.c \ + cnki_pdf.c cnki_zlib.c cnki_jbig.c cnki_jbig2.c cnki.c \ pdf_cnki.c pdf_get.c pdf_parser.c pdf_writer.c pdf.c -inc = extern.h version.h iconv.h zlib.h jbig.h jpeg.h \ +inc = extern.h version.h iconv.h zlib.h jbig.h jbig2.h jpeg.h \ cnki.h pdf_cnki.h cnki_jbig.h pdf.h obj = ${src:.c=.o} PREFIX = /usr/local -CFLAGS = -O2 -pipe -flto -Wall -Wextra -Wno-unused-parameter -LDFLAGS = -Wl,-O2 -lcrypto -liconv -lz -ljbig -ljpeg -Wl,--as-needed +CFLAGS = -O2 -pipe -flto -Wall -Wextra +LDFLAGS = -Wl,-O2 -lcrypto -liconv -lz -ljbig2dec -ljpeg -Wl,--as-needed CFLAGS += -I/usr/local/include LDFLAGS += -L/usr/local/lib @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2021, yzrh <yzrh@noema.org> + * Copyright (c) 2020-2022, yzrh <yzrh@noema.org> * * SPDX-License-Identifier: Apache-2.0 */ @@ -97,5 +97,7 @@ int cnki_jbig(char **bitmap, int *bitmap_size, int *bitmap_width, int *bitmap_height, const char * restrict jbig, int jbig_size); -/* cnki_xml.c */ -int cnki_xml(char **xml, FILE **fp); +/* cnki_jbig2.c */ +int cnki_jbig2(char **bitmap, int *bitmap_size, + int *bitmap_width, int *bitmap_height, + const char * restrict jbig, int jbig_size); diff --git a/src/cnki_jbig.c b/src/cnki_jbig.c index 02040be..f35d1d5 100644 --- a/src/cnki_jbig.c +++ b/src/cnki_jbig.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2021, yzrh <yzrh@noema.org> + * Copyright (c) 2020-2022, yzrh <yzrh@noema.org> * * SPDX-License-Identifier: Apache-2.0 */ @@ -22,68 +22,22 @@ cnki_jbig(char **bitmap, int *bitmap_size, memcpy(dib, jbig, 40); - bih_t *bih = malloc(sizeof(bih_t)); + int width_padded = (dib->width * dib->depth + 7) / 8; - if (bih == NULL) { - free(dib); - return 1; - } - - memset(bih, 0, sizeof(bih_t)); - - bih->d_l = 0; - bih->d = 0; - - bih->p = 1; - - bih->fill = 0; - - bih->x_d = dib->width; - bih->y_d = dib->height; - bih->l_0 = bih->y_d / 35; - - while (bih->l_0 > 128) - bih->l_0--; - if (bih->l_0 < 2) - bih->l_0 = 2; + *bitmap_size = dib->height * width_padded; + *bitmap = malloc(*bitmap_size); - bih->m_x = 8; - bih->m_y = 0; - - bih->order |= 1 << 1; - bih->order |= 1 << 0; - - bih->options |= 1 << 4; - bih->options |= 1 << 3; - bih->options |= 1 << 2; - - bih->dptable = NULL; - - int bie_size = jbig_size - 28; /* - 40 - 8 + 20 */ - char *bie = malloc(bie_size); - - if (bie == NULL) { + if (*bitmap == NULL) { free(dib); - free(bih); return 1; } - memcpy(bie, bih, 20); - memcpy(bie + 20, jbig + 48, jbig_size - 48); + strdec_jbig(bitmap, dib->width, dib->height, jbig + 48, jbig_size - 48); - int ret = strdec_jbig(bitmap, bitmap_size, bie, bie_size); - - if (ret == 0) { - *bitmap_width = bih->x_d; - *bitmap_height = bih->y_d; - } + *bitmap_width = dib->width; + *bitmap_height = dib->height; free(dib); - free(bih); - free(bie); - - if (ret != 0) - return 1; return 0; } diff --git a/src/cnki_jbig.h b/src/cnki_jbig.h index 96e4ea8..701b4df 100644 --- a/src/cnki_jbig.h +++ b/src/cnki_jbig.h @@ -1,48 +1,11 @@ /* - * Copyright (c) 2020-2021, yzrh <yzrh@noema.org> + * Copyright (c) 2020-2022, yzrh <yzrh@noema.org> * * SPDX-License-Identifier: Apache-2.0 */ #include <stdint.h> -/* - * order (MSB first): - * 0 - * 0 - * 0 - * 0 - * HITOLO - * SEQ - * ILEAVE (default) - * SMID (default) - * - * options (MSB first): - * 0 - * LRLTWO - * VLENGTH - * TPDON (default) - * TPBON (default) - * DPON (default) - * DPPRIV - * DPLAST - */ -typedef struct _bih_t { - char d_l; /* Initial resolution layer */ - char d; /* Final resolution layer */ - char p; /* Number of bit-planes, for bi-level image, always 1 */ - char fill; /* Always 0 */ - /* MSB first */ - int32_t x_d; /* Horizontal dimension at highestresolution */ - int32_t y_d; /* Vertical dimension at highest resolution */ - int32_t l_0; /* Number of lines per stripe at lowest resolution */ - char m_x; /* Maximum horizontal offsets (default: 8) */ - char m_y; /* Maximum vertical offsets (default: 0) */ - char order; - char options; - char *dptable; /* 0 or 1728 */ -} bih_t; - typedef enum _dib_compression_code { BI_RGB, BI_RLE8, diff --git a/src/cnki_jbig2.c b/src/cnki_jbig2.c new file mode 100644 index 0000000..69f4a5b --- /dev/null +++ b/src/cnki_jbig2.c @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2022, yzrh <yzrh@noema.org> + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include <stdlib.h> +#include <string.h> + +#include "cnki_jbig.h" +#include "jbig2.h" + +int +cnki_jbig2(char **bitmap, int *bitmap_size, + int *bitmap_width, int *bitmap_height, + const char * restrict jbig, int jbig_size) +{ + dib_t *dib = malloc(sizeof(dib_t)); + + if (dib == NULL) + return 1; + + memcpy(dib, jbig, 40); + + int width_padded = (dib->width * dib->depth + 7) / 8; + + *bitmap_size = dib->height * width_padded; + *bitmap = malloc(*bitmap_size); + + if (*bitmap == NULL) { + free(dib); + return 1; + } + + strdec_jbig2(bitmap, jbig + 48, jbig_size - 48); + + *bitmap_width = dib->width; + *bitmap_height = dib->height; + + free(dib); + + return 0; +} diff --git a/src/cnki_pdf.c b/src/cnki_pdf.c index dcf6d30..b59b7c6 100644 --- a/src/cnki_pdf.c +++ b/src/cnki_pdf.c @@ -238,7 +238,7 @@ cnki_pdf(cnki_t **param) if ((*param)->stat > 1) printf("Generating object\n"); - dictionary_size = 64 + 12 * kid[0]; + dictionary_size = 64 + 16 * kid[0]; dictionary = malloc(dictionary_size); if (dictionary == NULL) { @@ -483,6 +483,9 @@ cnki_pdf_hn(cnki_t **param) int *ids = NULL; pdf_get_free_ids(&pdf, &ids, ptr->image_length + 3); + int bitmap_size; + char *bitmap; + int stream_size; char *stream; @@ -493,32 +496,34 @@ cnki_pdf_hn(cnki_t **param) if (dim == NULL) { free(root_kid); + free(ids); return 1; } - for (int i = 0; i < ptr->image_length; i++) { - dictionary_size = 128; - dictionary = malloc(dictionary_size); + dictionary_size = 256; + dictionary = malloc(dictionary_size); - if (dictionary == NULL) { - free(root_kid); - free(dim); - return 1; - } + if (dictionary == NULL) { + free(root_kid); + free(ids); + free(dim); + return 1; + } + for (int i = 0; i < ptr->image_length; i++) { memset(dictionary, 0, dictionary_size); strcat(dictionary, "<<\n/Type /XObject\n" "/Subtype /Image\n"); if ((*param)->stat > 2) - printf("\tDecoding data, page %04d item %02d... ", - ptr->page, i); + printf("\tDecoding data, page %04d item %02d format %d... ", + ptr->page, i, ptr->image_data[i].format); switch (ptr->image_data[i].format) { case JBIG: - ret = cnki_jbig(&stream, - &stream_size, + ret = cnki_jbig(&bitmap, + &bitmap_size, &wh[0], &wh[1], ptr->image_data[i].image, @@ -530,18 +535,30 @@ cnki_pdf_hn(cnki_t **param) break; } + if (strdeflate(&stream, &stream_size, + bitmap, bitmap_size) != 0) { + free(root_kid); + free(ids); + free(dim); + free(dictionary); + return 1; + } + + free(bitmap); + snprintf(buf, 64, "/Width %d\n/Height %d\n", wh[0], wh[1]); strcat(dictionary, buf); strcat(dictionary, "/ColorSpace /DeviceGray\n" "/BitsPerComponent 1\n"); + strcat(dictionary, "/Decode [1.0 0.0]\n"); snprintf(buf, 64, "/Length %d\n", stream_size); strcat(dictionary, buf); - strcat(dictionary, "/Filter /CCITTFaxDecode\n"); + strcat(dictionary, "/Filter /FlateDecode\n"); dim[i * 2] = wh[0]; dim[i * 2 + 1] = wh[1]; @@ -562,9 +579,10 @@ cnki_pdf_hn(cnki_t **param) stream_size = ptr->image_data[i].size; stream = malloc(stream_size); if (stream == NULL) { - free(dictionary); free(root_kid); + free(ids); free(dim); + free(dictionary); return 1; } memcpy(stream, ptr->image_data[i].image, stream_size); @@ -573,7 +591,7 @@ cnki_pdf_hn(cnki_t **param) wh[0], wh[1]); strcat(dictionary, buf); - strcat(dictionary, "/ColorSpace /DeviceRGB\n" + strcat(dictionary, "/ColorSpace /DeviceGray\n" "/BitsPerComponent 8\n"); snprintf(buf, 64, "/Length %d\n", @@ -586,6 +604,47 @@ cnki_pdf_hn(cnki_t **param) dim[i * 2 + 1] = wh[1]; break; case JBIG2: + ret = cnki_jbig2(&bitmap, + &bitmap_size, + &wh[0], + &wh[1], + ptr->image_data[i].image, + ptr->image_data[i].size); + + if (ret != 0) { + dim[i * 2] = 0; + dim[i * 2 + 1] = 0; + break; + } + + if (strdeflate(&stream, &stream_size, + bitmap, bitmap_size) != 0) { + free(root_kid); + free(ids); + free(dim); + free(dictionary); + return 1; + } + + free(bitmap); + + snprintf(buf, 64, "/Width %d\n/Height %d\n", + wh[0], wh[1]); + strcat(dictionary, buf); + + strcat(dictionary, "/ColorSpace /DeviceGray\n" + "/BitsPerComponent 1\n"); + strcat(dictionary, "/Decode [1.0 0.0]\n"); + + snprintf(buf, 64, "/Length %d\n", + stream_size); + strcat(dictionary, buf); + + strcat(dictionary, "/Filter /FlateDecode\n"); + + dim[i * 2] = wh[0]; + dim[i * 2 + 1] = wh[1]; + break; case JPX: default: ret = -1; @@ -598,37 +657,26 @@ cnki_pdf_hn(cnki_t **param) if (ret == 0) { if ((*param)->stat > 2) - printf("Done\n"); + printf("%6d byte(s), width %4d, height %4d.\n", + stream_size, wh[0], wh[1]); pdf_obj_append(&pdf, ids[i], NULL, dictionary, stream, stream_size); - free(dictionary); free(stream); } else if (ret == 1) { if ((*param)->stat > 2) - printf("Failed\n"); - - free(dictionary); + printf("Not extracted.\n"); pdf_obj_append(&pdf, ids[i], NULL, NULL, NULL, 0); } else { if ((*param)->stat > 2) - printf("Unsupported format\n"); + printf("Unsupported format.\n"); - free(dictionary); + pdf_obj_append(&pdf, ids[i], NULL, NULL, NULL, 0); } } - dictionary_size = 128; - dictionary = malloc(dictionary_size); - - if (dictionary == NULL) { - free(root_kid); - free(dim); - return 1; - } - memset(dictionary, 0, dictionary_size); strcat(dictionary, "<<\n/XObject <<"); @@ -655,11 +703,12 @@ cnki_pdf_hn(cnki_t **param) if (strncmp(ptr->text + 8, "COMPRESSTEXT", 12) == 0) { cnki_zlib(&stream, &stream_size, ptr->text, ptr->text_size); - dictionary_size = stream_size / 8 + 7; + dictionary_size = 64 + 2 * stream_size; dictionary = malloc(dictionary_size); if (dictionary == NULL) { free(root_kid); + free(ids); free(dim); return 1; } @@ -688,11 +737,12 @@ cnki_pdf_hn(cnki_t **param) strcat(dictionary, ">"); } else { - dictionary_size = ptr->text_size; + dictionary_size = 64 + 2 * ptr->text_size; dictionary = malloc(dictionary_size); if (dictionary == NULL) { free(root_kid); + free(ids); free(dim); return 1; } @@ -724,11 +774,12 @@ cnki_pdf_hn(cnki_t **param) /* FIXME: Use the text somehow? */ free(dictionary); - dictionary_size = 64 + 12 * ptr->image_length; + dictionary_size = 64 + 64 * ptr->image_length; dictionary = malloc(dictionary_size); if (dictionary == NULL) { free(root_kid); + free(ids); free(dim); return 1; } @@ -739,12 +790,27 @@ cnki_pdf_hn(cnki_t **param) strcat(dictionary, "0.25 0 0 0.25 0 0 cm\n"); + double resize_x; + double resize_y; + for (int i = 0; i < ptr->image_length; i++) { if (dim[i * 2] <= 0 || dim[i * 2 + 1] <= 0) continue; + /* Scale within bound of A4 paper */ + resize_x = 595.276 * 4 / dim[i * 2]; + resize_y = 841.89 * 4 / dim[i * 2 + 1]; + + if (resize_y < resize_x) + snprintf(buf, 64, "%f 0 0 %f 0 0 cm\n", + resize_y, resize_y); + else + snprintf(buf, 64, "%f 0 0 %f 0 0 cm\n", + resize_x, resize_x); + strcat(dictionary, buf); + /* Apply transformation matrix */ - if (ptr->image_data[i].format == DCT_1) { + if (ptr->image_data[i].format == JBIG || ptr->image_data[i].format == DCT_1) { snprintf(buf, 64, "1 0 0 1 0 %d cm\n", dim[i * 2 + 1]); strcat(dictionary, buf); @@ -763,9 +829,10 @@ cnki_pdf_hn(cnki_t **param) strcat(dictionary, "Q"); if (strdeflate(&stream, &stream_size, dictionary, strlen(dictionary)) != 0) { - free(dictionary); free(root_kid); + free(ids); free(dim); + free(dictionary); return 1; } @@ -796,7 +863,7 @@ cnki_pdf_hn(cnki_t **param) strcat(dictionary, buf); /* A4 paper */ - strcat(dictionary, "/MediaBox [ 0 0 595.276 841.89 ]\n"); + strcat(dictionary, "/MediaBox [0 0 595.276 841.89]\n"); /* Add /Parent when we know root */ pdf_obj_append(&pdf, ids[ptr->image_length + 2], NULL, dictionary, NULL, 0); @@ -838,7 +905,7 @@ cnki_pdf_hn(cnki_t **param) if ((*param)->stat > 1) printf("Generating root object\n"); - dictionary_size = 64 + 12 * (*param)->file_stat->page; + dictionary_size = 64 + 64 * (*param)->file_stat->page; dictionary = malloc(dictionary_size); if (dictionary == NULL) { @@ -877,7 +944,7 @@ cnki_pdf_hn(cnki_t **param) free(dictionary); - dictionary_size = 128; + dictionary_size = 256; dictionary = malloc(dictionary_size); if (dictionary == NULL) { diff --git a/src/cnki_xml.c b/src/cnki_xml.c deleted file mode 100644 index 7f870d1..0000000 --- a/src/cnki_xml.c +++ /dev/null @@ -1,14 +0,0 @@ -/* - * Copyright (c) 2020-2021, yzrh <yzrh@noema.org> - * - * SPDX-License-Identifier: Apache-2.0 - */ - -#include <stdio.h> - -int -cnki_xml(char **xml, FILE **fp) -{ - /* TODO: Extract XML and embed into `/Metadata' */ - return 1; -} diff --git a/src/cnki_zlib.c b/src/cnki_zlib.c index 7731036..edff141 100644 --- a/src/cnki_zlib.c +++ b/src/cnki_zlib.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2021, yzrh <yzrh@noema.org> + * Copyright (c) 2020-2022, yzrh <yzrh@noema.org> * * SPDX-License-Identifier: Apache-2.0 */ @@ -18,7 +18,7 @@ cnki_zlib(char **dst, int *dst_size, *dst_size = size; - if (strinflate(dst, size, src + 24, size - 24) != 0) + if (strinflate(dst, size, src + 24, src_size - 24) != 0) return 1; return 0; @@ -1,41 +1,303 @@ /* - * Copyright (c) 2020-2022, yzrh <yzrh@noema.org> + * Copyright (c) 2022, yzrh <yzrh@noema.org> * * SPDX-License-Identifier: Apache-2.0 */ -#include <stdio.h> /* FIXME: test */ +#include <stdbool.h> +#include <stdint.h> #include <stdlib.h> #include <string.h> -#include <jbig.h> +static const uint16_t _LSZ[256] = { + 0x5a1d, + 0x2586, 0x1114, 0x080b, 0x03d8, 0x01da, 0x00e5, 0x006f, 0x0036, + 0x001a, 0x000d, 0x0006, 0x0003, 0x0001, 0x5a7f, 0x3f25, 0x2cf2, + 0x207c, 0x17b9, 0x1182, 0x0cef, 0x09a1, 0x072f, 0x055c, 0x0406, + 0x0303, 0x0240, 0x01b1, 0x0144, 0x00f5, 0x00b7, 0x008a, 0x0068, + 0x004e, 0x003b, 0x002c, 0x5ae1, 0x484c, 0x3a0d, 0x2ef1, 0x261f, + 0x1f33, 0x19a8, 0x1518, 0x1177, 0x0e74, 0x0bfb, 0x09f8, 0x0861, + 0x0706, 0x05cd, 0x04de, 0x040f, 0x0363, 0x02d4, 0x025c, 0x01f8, -int -strdec_jbig(char **bitmap, int *bitmap_size, - const char * restrict data, int data_size) + 0x01a4, 0x0160, 0x0125, 0x00f6, 0x00cb, 0x00ab, 0x008f, 0x5b12, + 0x4d04, 0x412c, 0x37d8, 0x2fe8, 0x293c, 0x2379, 0x1edf, 0x1aa9, + 0x174e, 0x1424, 0x119c, 0x0f6b, 0x0d51, 0x0bb6, 0x0a40, 0x5832, + 0x4d1c, 0x438e, 0x3bdd, 0x34ee, 0x2eae, 0x299a, 0x2516, 0x5570, + 0x4ca9, 0x44d9, 0x3e22, 0x3824, 0x32b4, 0x2e17, 0x56a8, 0x4f46, + 0x47e5, 0x41cf, 0x3c3d, 0x375e, 0x5231, 0x4c0f, 0x4639, 0x415e, + 0x5627, 0x50e7, 0x4b85, 0x5597, 0x504f, 0x5a10, 0x5522, 0x59eb +}; + +static const uint8_t _NLPS[256] = { + 1, + 14, 16, 18, 20, 23, 25, 28, 30, + 33, 35, 9, 10, 12, 15, 36, 38, + 39, 40, 42, 43, 45, 46, 48, 49, + 51, 52, 54, 56, 57, 59, 60, 62, + 63, 32, 33, 37, 64, 65, 67, 68, + 69, 70, 72, 73, 74, 75, 77, 78, + 79, 48, 50, 50, 51, 52, 53, 54, + + 55, 56, 57, 58, 59, 61, 61, 65, + 80, 81, 82, 83, 84, 86, 87, 87, + 72, 72, 74, 74, 75, 77, 77, 80, + 88, 89, 90, 91, 92, 93, 86, 88, + 95, 96, 97, 99, 99, 93, 95, 101, + 102, 103, 104, 99, 105, 106, 107, 103, + 105, 108, 109, 110, 111, 110, 112, 112 +}; + +static const uint8_t _NMPS[256] = { + 1, + 2, 3, 4, 5, 6, 7, 8, 9, + 10, 11, 12, 13, 13, 15, 16, 17, + 18, 19, 20, 21, 22, 23, 24, 25, + 26, 27, 28, 29, 30, 31, 32, 33, + 34, 35, 9, 37, 38, 39, 40, 41, + 42, 43, 44, 45, 46, 47, 48, 49, + 50, 51, 52, 53, 54, 55, 56, 57, + + 58, 59, 60, 61, 62, 63, 32, 65, + 66, 67, 68, 69, 70, 71, 72, 73, + 74, 75, 76, 77, 78, 79, 48, 81, + 82, 83, 84, 85, 86, 87, 71, 89, + 90, 91, 92, 93, 94, 86, 96, 97, + 98, 99, 100, 93, 102, 103, 104, 99, + 106, 107, 103, 109, 107, 111, 109, 111 +}; + +static const bool _SWTCH[256] = { + 1, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 1, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 1, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 1, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 1, + 0, 0, 0, 0, 0, 0, 0, 1, + 0, 0, 0, 0, 0, 0, 1, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 1, 0, 0, 0, 0, 1, 0, 1 +}; + +static uint8_t _ct; +static uint8_t _pix; + +static uint16_t _reg_a; +static uint32_t _reg_c; +static uint8_t _mps[0x1000]; +static uint8_t _st[0x1000]; + +static int _width; +static int _height; + +static int _width_padded; + +static int _ret_pos; +static char *_ret; + +static int _scd_size; +static unsigned char *_scd; + +static void +_bytein(void) { - struct jbg_dec_state sd; + if (_ret_pos < _scd_size) + _reg_c += *(_scd + _ret_pos++) << 8; - jbg_dec_init(&sd); + _ct = 8; +} - unsigned char *data_ptr[1] = {(unsigned char *) data}; +static void +_initdec(void) +{ + memset(_mps, 0, 0x1000); + memset(_st, 0, 0x1000); - /* FIXME: test */ - int ret; - if ((ret = jbg_dec_in(&sd, (unsigned char *) data_ptr, - data_size, NULL)) != JBG_EOK) { - printf("[%s] ", jbg_strerror(ret)); - jbg_dec_free(&sd); - return 1; + _reg_c = 0; + _bytein(); + _reg_c <<= 8; + _bytein(); + _reg_c <<= 8; + _bytein(); + _reg_a = 0x0000; +} + +static void +_exchange_lps(uint16_t cx) +{ + uint8_t st_cx = _st[cx]; + uint16_t lsz_st_cx = _LSZ[_st[cx]]; + + if (_reg_a < lsz_st_cx) { + _pix = _mps[cx]; + _st[cx] = _NMPS[st_cx]; + } else { + _pix = 1 - _mps[cx]; + + if (_SWTCH[st_cx]) + _mps[cx] = _pix; + + _st[cx] = _NLPS[st_cx]; + } + + _reg_c -= _reg_a << 16; + _reg_a = lsz_st_cx; +} + +static void +_exchange_mps(uint16_t cx) +{ + uint8_t st_cx = _st[cx]; + uint16_t lsz_st_cx = _LSZ[_st[cx]]; + + if (_reg_a < lsz_st_cx) { + _pix = 1 - _mps[cx]; + + if (_SWTCH[st_cx]) + _mps[cx] = _pix; + + _st[cx] = _NLPS[st_cx]; + } else { + _pix = _mps[cx]; + _st[cx] = _NMPS[st_cx]; + } +} + +static void +_renormd(void) +{ + do { + if (_ct == 0) + _bytein(); + + _reg_a <<= 1; + _reg_c <<= 1; + _ct--; + } while (_reg_a < 0x8000); + + if (_ct == 0) + _bytein(); +} + +static void +_decode(uint16_t cx) +{ + _reg_a -= _LSZ[_st[cx]]; + + if (_reg_a > _reg_c >> 16) { + if (_reg_a < 0x8000) { + _exchange_mps(cx); + _renormd(); + } else { + _pix = _mps[cx]; + } + } else { + _exchange_lps(cx); + _renormd(); + } +} + +static void +_procline(int line, char *a, char *b, char *c) +{ + /* The encoder must be erroneous */ + uint16_t cx = (*b & 0x01) << 2; + + for (int i = 0; i < _width; i++) { + _decode(cx); + + cx >>= 1; + + if (_pix == 1) { + *(_ret + _width_padded * (_height - line - 1) + i / 8) |= _pix << (7 - (i & 0x07)); + *(c + i) = 1; + cx |= 0x0200; + } else { + cx &= 0xfdff; + } + + if (i + 2 < _width && *(a + i + 2) == 1) + cx |= 0x0004; + else + cx &= 0xfffb; + + if (i + 3 < _width && *(b + i + 3) == 1) + cx |= 0x0080; + else + cx &= 0xff7f; } +} + +static int +_procstripe(void) +{ + if (_height <= 0 || _width_padded <= 0) + return 1; - *bitmap_size = jbg_dec_getsize(&sd); - *bitmap = malloc(*bitmap_size); + int pix_size = 8 * _width_padded; - if (*bitmap != NULL) - memcpy(*bitmap, jbg_dec_getimage(&sd, 0), *bitmap_size); + char *buf = malloc(3 * pix_size); - jbg_dec_free(&sd); + if (buf == NULL) + return 1; + + memset(buf, 0, 3 * pix_size); + + char *a = buf; + char *b = a + pix_size; + char *c = b + pix_size; + char *z; + + for (int i = 0; i < _height; i++) { + _decode(0x029c); + + if (_pix == 1) { + if (i > 0) + memcpy(_ret + _width_padded * (_height - i - 1), + _ret + _width_padded * (_height - i), + _width_padded); + + memcpy(c, b, pix_size); + } else { + /* line atypical */ + memset(c, 0, pix_size); + _procline(i, a, b, c); + } + + z = a; + a = b; + b = c; + c = z; + } + + free(buf); return 0; } + +int +strdec_jbig(char **bitmap, int width, int height, + const char * restrict jbig, int jbig_size) +{ + _width = width; + _height = height; + + _width_padded = (_width + 7) / 8; + + memset(*bitmap, 0, _height * _width_padded); + + _ret_pos = 0; + _ret = *bitmap; + + _scd_size = jbig_size; + _scd = (unsigned char *) jbig; + + _initdec(); + return _procstripe(); +} @@ -1,8 +1,8 @@ /* - * Copyright (c) 2020-2021, yzrh <yzrh@noema.org> + * Copyright (c) 2022, yzrh <yzrh@noema.org> * * SPDX-License-Identifier: Apache-2.0 */ -int strdec_jbig(char **bitmap, int *bitmap_size, - const char * restrict data, int data_size); +int strdec_jbig(char **bitmap, int width, int height, + const char * restrict jbig, int jbig_size); diff --git a/src/jbig2.c b/src/jbig2.c new file mode 100644 index 0000000..9b3a9be --- /dev/null +++ b/src/jbig2.c @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2022, yzrh <yzrh@noema.org> + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include <stdint.h> +#include <stdlib.h> +#include <string.h> + +#include <jbig2.h> + +int +strdec_jbig2(char **bitmap, + const char * restrict jbig2, int jbig2_size) +{ + Jbig2Ctx *ctx = jbig2_ctx_new(NULL, JBIG2_OPTIONS_EMBEDDED, NULL, NULL, NULL); + + jbig2_data_in(ctx, (unsigned char *) jbig2, jbig2_size); + + jbig2_complete_page(ctx); + + Jbig2Image *image = jbig2_page_out(ctx); + + int width_padded = (image->width + 7) / 8; + unsigned char *data = image->data; + + for (unsigned int i = 0; i < image->height; i++) { + memcpy(*bitmap + i * width_padded, data, width_padded); + data += image->stride; + } + + jbig2_release_page(ctx, image); + return 0; +} diff --git a/src/jbig2.h b/src/jbig2.h new file mode 100644 index 0000000..11249cf --- /dev/null +++ b/src/jbig2.h @@ -0,0 +1,7 @@ +/* + * Copyright (c) 2022, yzrh <yzrh@noema.org> + * + * SPDX-License-Identifier: Apache-2.0 + */ + +int strdec_jbig2(char **bitmap, const char * restrict jbig2, int jbig2_size); diff --git a/src/melon.c b/src/melon.c index 8741989..af6aaf4 100644 --- a/src/melon.c +++ b/src/melon.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2021, yzrh <yzrh@noema.org> + * Copyright (c) 2020-2022, yzrh <yzrh@noema.org> * * SPDX-License-Identifier: Apache-2.0 */ @@ -82,7 +82,7 @@ main(int argc, char **argv) if (param->stat > 0) printf("Melon " VERSION "." RELEASE "." PATCH EXTRA "\n" - "Copyright (c) 2020-2021, yzrh <yzrh@noema.org>\n\n"); + "Copyright (c) 2020-2022, yzrh <yzrh@noema.org>\n\n"); cnki_info(¶m); @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2021, yzrh <yzrh@noema.org> + * Copyright (c) 2020-2022, yzrh <yzrh@noema.org> * * SPDX-License-Identifier: Apache-2.0 */ diff --git a/src/pdf_cnki.c b/src/pdf_cnki.c index a1c7a09..e8e2f90 100644 --- a/src/pdf_cnki.c +++ b/src/pdf_cnki.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2021, yzrh <yzrh@noema.org> + * Copyright (c) 2020-2022, yzrh <yzrh@noema.org> * * SPDX-License-Identifier: Apache-2.0 */ diff --git a/src/pdf_get.c b/src/pdf_get.c index 4e1ec56..a72c68d 100644 --- a/src/pdf_get.c +++ b/src/pdf_get.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2021, yzrh <yzrh@noema.org> + * Copyright (c) 2020-2022, yzrh <yzrh@noema.org> * * SPDX-License-Identifier: Apache-2.0 */ diff --git a/src/version.h b/src/version.h index 4731e6a..4e5cfa6 100644 --- a/src/version.h +++ b/src/version.h @@ -1,10 +1,10 @@ /* - * Copyright (c) 2020-2021, yzrh <yzrh@noema.org> + * Copyright (c) 2020-2022, yzrh <yzrh@noema.org> * * SPDX-License-Identifier: Apache-2.0 */ #define VERSION "0" -#define RELEASE "1" +#define RELEASE "2" #define PATCH "0" #define EXTRA "" |