aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authoryzrh <yzrh@noema.org>2022-12-22 19:47:40 +0000
committeryzrh <yzrh@noema.org>2022-12-24 23:29:56 +0000
commit9c1f1d0b75de0d2ed299842d3025941f3e681c16 (patch)
tree302f6d4a2235acfe8872a3c0c1c216fecc323b95
parentac3b1dda63944f2cc8caaa52344774255e1956c8 (diff)
downloadmelon-9c1f1d0b75de0d2ed299842d3025941f3e681c16.tar.gz
melon-9c1f1d0b75de0d2ed299842d3025941f3e681c16.tar.zst
Fix HN conversion and add JBIG2 support.
Signed-off-by: yzrh <yzrh@noema.org>
-rw-r--r--CHANGE.md8
-rw-r--r--README.md8
-rw-r--r--src/Makefile10
-rw-r--r--src/cnki.h8
-rw-r--r--src/cnki_jbig.c62
-rw-r--r--src/cnki_jbig.h39
-rw-r--r--src/cnki_jbig2.c43
-rw-r--r--src/cnki_pdf.c147
-rw-r--r--src/cnki_xml.c14
-rw-r--r--src/cnki_zlib.c4
-rw-r--r--src/jbig.c304
-rw-r--r--src/jbig.h6
-rw-r--r--src/jbig2.c35
-rw-r--r--src/jbig2.h7
-rw-r--r--src/melon.c4
-rw-r--r--src/pdf.c2
-rw-r--r--src/pdf_cnki.c2
-rw-r--r--src/pdf_get.c2
-rw-r--r--src/version.h4
19 files changed, 517 insertions, 192 deletions
diff --git a/CHANGE.md b/CHANGE.md
index b999eec..e4217a5 100644
--- a/CHANGE.md
+++ b/CHANGE.md
@@ -1,10 +1,16 @@
-0.2.0 (2021-XX-XX)
+0.3.0 (2022-XX-XX)
+==================
+
+* Support JPEG 2000 for HN.
+
+0.2.0 (2022-12-22)
==================
* KDH conversion now produces a valid PDF
* Handle binary data in dictionary.
* Add preliminary support for HN
* Fix root object dictionary generation when root object has more than two children.
+* Fix memory leak and data type.
0.1.0 (2020-04-08)
==================
diff --git a/README.md b/README.md
index 78a4107..471282e 100644
--- a/README.md
+++ b/README.md
@@ -6,10 +6,10 @@ Melon: Converter that produces PDF from CNKI proprietary formats
Development
-----------
-Currently, CAJ and KDH can be converted. Please report
+Currently, CAJ, KDH, and HN can be converted. Please report
any failures with a sample that can reproduce the behaviour.
-HN support is being worked on.
+HN support does not support JPEG 2000 yet.
Dependency
----------
@@ -17,7 +17,7 @@ Dependency
1. OpenSSL
2. libiconv
3. zlib
-4. JBIG-KIT
+4. jbig2dec
5. libjpeg-turbo
Usage
@@ -39,7 +39,7 @@ Specify output file
Set buffer size (default 512k)
-v, --verbose
-Print more information (twice for even more)
+Print more information (twice for even more, three times for HN image decoding information as well)
Thanks
======
diff --git a/src/Makefile b/src/Makefile
index 7016c6c..6943af3 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -4,19 +4,19 @@
# SPDX-License-Identifier: Apache-2.0
#
-src = melon.c iconv.c zlib.c jbig.c jpeg.c \
+src = melon.c iconv.c zlib.c jbig.c jbig2.c jpeg.c \
cnki_caj.c cnki_hn.c cnki_kdh.c cnki_outline_tree.c \
- cnki_pdf.c cnki_xml.c cnki_zlib.c cnki_jbig.c cnki.c \
+ cnki_pdf.c cnki_zlib.c cnki_jbig.c cnki_jbig2.c cnki.c \
pdf_cnki.c pdf_get.c pdf_parser.c pdf_writer.c pdf.c
-inc = extern.h version.h iconv.h zlib.h jbig.h jpeg.h \
+inc = extern.h version.h iconv.h zlib.h jbig.h jbig2.h jpeg.h \
cnki.h pdf_cnki.h cnki_jbig.h pdf.h
obj = ${src:.c=.o}
PREFIX = /usr/local
-CFLAGS = -O2 -pipe -flto -Wall -Wextra -Wno-unused-parameter
-LDFLAGS = -Wl,-O2 -lcrypto -liconv -lz -ljbig -ljpeg -Wl,--as-needed
+CFLAGS = -O2 -pipe -flto -Wall -Wextra
+LDFLAGS = -Wl,-O2 -lcrypto -liconv -lz -ljbig2dec -ljpeg -Wl,--as-needed
CFLAGS += -I/usr/local/include
LDFLAGS += -L/usr/local/lib
diff --git a/src/cnki.h b/src/cnki.h
index 569cea7..237a2c1 100644
--- a/src/cnki.h
+++ b/src/cnki.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2020-2021, yzrh <yzrh@noema.org>
+ * Copyright (c) 2020-2022, yzrh <yzrh@noema.org>
*
* SPDX-License-Identifier: Apache-2.0
*/
@@ -97,5 +97,7 @@ int cnki_jbig(char **bitmap, int *bitmap_size,
int *bitmap_width, int *bitmap_height,
const char * restrict jbig, int jbig_size);
-/* cnki_xml.c */
-int cnki_xml(char **xml, FILE **fp);
+/* cnki_jbig2.c */
+int cnki_jbig2(char **bitmap, int *bitmap_size,
+ int *bitmap_width, int *bitmap_height,
+ const char * restrict jbig, int jbig_size);
diff --git a/src/cnki_jbig.c b/src/cnki_jbig.c
index 02040be..f35d1d5 100644
--- a/src/cnki_jbig.c
+++ b/src/cnki_jbig.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2020-2021, yzrh <yzrh@noema.org>
+ * Copyright (c) 2020-2022, yzrh <yzrh@noema.org>
*
* SPDX-License-Identifier: Apache-2.0
*/
@@ -22,68 +22,22 @@ cnki_jbig(char **bitmap, int *bitmap_size,
memcpy(dib, jbig, 40);
- bih_t *bih = malloc(sizeof(bih_t));
+ int width_padded = (dib->width * dib->depth + 7) / 8;
- if (bih == NULL) {
- free(dib);
- return 1;
- }
-
- memset(bih, 0, sizeof(bih_t));
-
- bih->d_l = 0;
- bih->d = 0;
-
- bih->p = 1;
-
- bih->fill = 0;
-
- bih->x_d = dib->width;
- bih->y_d = dib->height;
- bih->l_0 = bih->y_d / 35;
-
- while (bih->l_0 > 128)
- bih->l_0--;
- if (bih->l_0 < 2)
- bih->l_0 = 2;
+ *bitmap_size = dib->height * width_padded;
+ *bitmap = malloc(*bitmap_size);
- bih->m_x = 8;
- bih->m_y = 0;
-
- bih->order |= 1 << 1;
- bih->order |= 1 << 0;
-
- bih->options |= 1 << 4;
- bih->options |= 1 << 3;
- bih->options |= 1 << 2;
-
- bih->dptable = NULL;
-
- int bie_size = jbig_size - 28; /* - 40 - 8 + 20 */
- char *bie = malloc(bie_size);
-
- if (bie == NULL) {
+ if (*bitmap == NULL) {
free(dib);
- free(bih);
return 1;
}
- memcpy(bie, bih, 20);
- memcpy(bie + 20, jbig + 48, jbig_size - 48);
+ strdec_jbig(bitmap, dib->width, dib->height, jbig + 48, jbig_size - 48);
- int ret = strdec_jbig(bitmap, bitmap_size, bie, bie_size);
-
- if (ret == 0) {
- *bitmap_width = bih->x_d;
- *bitmap_height = bih->y_d;
- }
+ *bitmap_width = dib->width;
+ *bitmap_height = dib->height;
free(dib);
- free(bih);
- free(bie);
-
- if (ret != 0)
- return 1;
return 0;
}
diff --git a/src/cnki_jbig.h b/src/cnki_jbig.h
index 96e4ea8..701b4df 100644
--- a/src/cnki_jbig.h
+++ b/src/cnki_jbig.h
@@ -1,48 +1,11 @@
/*
- * Copyright (c) 2020-2021, yzrh <yzrh@noema.org>
+ * Copyright (c) 2020-2022, yzrh <yzrh@noema.org>
*
* SPDX-License-Identifier: Apache-2.0
*/
#include <stdint.h>
-/*
- * order (MSB first):
- * 0
- * 0
- * 0
- * 0
- * HITOLO
- * SEQ
- * ILEAVE (default)
- * SMID (default)
- *
- * options (MSB first):
- * 0
- * LRLTWO
- * VLENGTH
- * TPDON (default)
- * TPBON (default)
- * DPON (default)
- * DPPRIV
- * DPLAST
- */
-typedef struct _bih_t {
- char d_l; /* Initial resolution layer */
- char d; /* Final resolution layer */
- char p; /* Number of bit-planes, for bi-level image, always 1 */
- char fill; /* Always 0 */
- /* MSB first */
- int32_t x_d; /* Horizontal dimension at highestresolution */
- int32_t y_d; /* Vertical dimension at highest resolution */
- int32_t l_0; /* Number of lines per stripe at lowest resolution */
- char m_x; /* Maximum horizontal offsets (default: 8) */
- char m_y; /* Maximum vertical offsets (default: 0) */
- char order;
- char options;
- char *dptable; /* 0 or 1728 */
-} bih_t;
-
typedef enum _dib_compression_code {
BI_RGB,
BI_RLE8,
diff --git a/src/cnki_jbig2.c b/src/cnki_jbig2.c
new file mode 100644
index 0000000..69f4a5b
--- /dev/null
+++ b/src/cnki_jbig2.c
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2022, yzrh <yzrh@noema.org>
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#include <stdlib.h>
+#include <string.h>
+
+#include "cnki_jbig.h"
+#include "jbig2.h"
+
+int
+cnki_jbig2(char **bitmap, int *bitmap_size,
+ int *bitmap_width, int *bitmap_height,
+ const char * restrict jbig, int jbig_size)
+{
+ dib_t *dib = malloc(sizeof(dib_t));
+
+ if (dib == NULL)
+ return 1;
+
+ memcpy(dib, jbig, 40);
+
+ int width_padded = (dib->width * dib->depth + 7) / 8;
+
+ *bitmap_size = dib->height * width_padded;
+ *bitmap = malloc(*bitmap_size);
+
+ if (*bitmap == NULL) {
+ free(dib);
+ return 1;
+ }
+
+ strdec_jbig2(bitmap, jbig + 48, jbig_size - 48);
+
+ *bitmap_width = dib->width;
+ *bitmap_height = dib->height;
+
+ free(dib);
+
+ return 0;
+}
diff --git a/src/cnki_pdf.c b/src/cnki_pdf.c
index dcf6d30..b59b7c6 100644
--- a/src/cnki_pdf.c
+++ b/src/cnki_pdf.c
@@ -238,7 +238,7 @@ cnki_pdf(cnki_t **param)
if ((*param)->stat > 1)
printf("Generating object\n");
- dictionary_size = 64 + 12 * kid[0];
+ dictionary_size = 64 + 16 * kid[0];
dictionary = malloc(dictionary_size);
if (dictionary == NULL) {
@@ -483,6 +483,9 @@ cnki_pdf_hn(cnki_t **param)
int *ids = NULL;
pdf_get_free_ids(&pdf, &ids, ptr->image_length + 3);
+ int bitmap_size;
+ char *bitmap;
+
int stream_size;
char *stream;
@@ -493,32 +496,34 @@ cnki_pdf_hn(cnki_t **param)
if (dim == NULL) {
free(root_kid);
+ free(ids);
return 1;
}
- for (int i = 0; i < ptr->image_length; i++) {
- dictionary_size = 128;
- dictionary = malloc(dictionary_size);
+ dictionary_size = 256;
+ dictionary = malloc(dictionary_size);
- if (dictionary == NULL) {
- free(root_kid);
- free(dim);
- return 1;
- }
+ if (dictionary == NULL) {
+ free(root_kid);
+ free(ids);
+ free(dim);
+ return 1;
+ }
+ for (int i = 0; i < ptr->image_length; i++) {
memset(dictionary, 0, dictionary_size);
strcat(dictionary, "<<\n/Type /XObject\n"
"/Subtype /Image\n");
if ((*param)->stat > 2)
- printf("\tDecoding data, page %04d item %02d... ",
- ptr->page, i);
+ printf("\tDecoding data, page %04d item %02d format %d... ",
+ ptr->page, i, ptr->image_data[i].format);
switch (ptr->image_data[i].format) {
case JBIG:
- ret = cnki_jbig(&stream,
- &stream_size,
+ ret = cnki_jbig(&bitmap,
+ &bitmap_size,
&wh[0],
&wh[1],
ptr->image_data[i].image,
@@ -530,18 +535,30 @@ cnki_pdf_hn(cnki_t **param)
break;
}
+ if (strdeflate(&stream, &stream_size,
+ bitmap, bitmap_size) != 0) {
+ free(root_kid);
+ free(ids);
+ free(dim);
+ free(dictionary);
+ return 1;
+ }
+
+ free(bitmap);
+
snprintf(buf, 64, "/Width %d\n/Height %d\n",
wh[0], wh[1]);
strcat(dictionary, buf);
strcat(dictionary, "/ColorSpace /DeviceGray\n"
"/BitsPerComponent 1\n");
+ strcat(dictionary, "/Decode [1.0 0.0]\n");
snprintf(buf, 64, "/Length %d\n",
stream_size);
strcat(dictionary, buf);
- strcat(dictionary, "/Filter /CCITTFaxDecode\n");
+ strcat(dictionary, "/Filter /FlateDecode\n");
dim[i * 2] = wh[0];
dim[i * 2 + 1] = wh[1];
@@ -562,9 +579,10 @@ cnki_pdf_hn(cnki_t **param)
stream_size = ptr->image_data[i].size;
stream = malloc(stream_size);
if (stream == NULL) {
- free(dictionary);
free(root_kid);
+ free(ids);
free(dim);
+ free(dictionary);
return 1;
}
memcpy(stream, ptr->image_data[i].image, stream_size);
@@ -573,7 +591,7 @@ cnki_pdf_hn(cnki_t **param)
wh[0], wh[1]);
strcat(dictionary, buf);
- strcat(dictionary, "/ColorSpace /DeviceRGB\n"
+ strcat(dictionary, "/ColorSpace /DeviceGray\n"
"/BitsPerComponent 8\n");
snprintf(buf, 64, "/Length %d\n",
@@ -586,6 +604,47 @@ cnki_pdf_hn(cnki_t **param)
dim[i * 2 + 1] = wh[1];
break;
case JBIG2:
+ ret = cnki_jbig2(&bitmap,
+ &bitmap_size,
+ &wh[0],
+ &wh[1],
+ ptr->image_data[i].image,
+ ptr->image_data[i].size);
+
+ if (ret != 0) {
+ dim[i * 2] = 0;
+ dim[i * 2 + 1] = 0;
+ break;
+ }
+
+ if (strdeflate(&stream, &stream_size,
+ bitmap, bitmap_size) != 0) {
+ free(root_kid);
+ free(ids);
+ free(dim);
+ free(dictionary);
+ return 1;
+ }
+
+ free(bitmap);
+
+ snprintf(buf, 64, "/Width %d\n/Height %d\n",
+ wh[0], wh[1]);
+ strcat(dictionary, buf);
+
+ strcat(dictionary, "/ColorSpace /DeviceGray\n"
+ "/BitsPerComponent 1\n");
+ strcat(dictionary, "/Decode [1.0 0.0]\n");
+
+ snprintf(buf, 64, "/Length %d\n",
+ stream_size);
+ strcat(dictionary, buf);
+
+ strcat(dictionary, "/Filter /FlateDecode\n");
+
+ dim[i * 2] = wh[0];
+ dim[i * 2 + 1] = wh[1];
+ break;
case JPX:
default:
ret = -1;
@@ -598,37 +657,26 @@ cnki_pdf_hn(cnki_t **param)
if (ret == 0) {
if ((*param)->stat > 2)
- printf("Done\n");
+ printf("%6d byte(s), width %4d, height %4d.\n",
+ stream_size, wh[0], wh[1]);
pdf_obj_append(&pdf, ids[i],
NULL, dictionary, stream, stream_size);
- free(dictionary);
free(stream);
} else if (ret == 1) {
if ((*param)->stat > 2)
- printf("Failed\n");
-
- free(dictionary);
+ printf("Not extracted.\n");
pdf_obj_append(&pdf, ids[i], NULL, NULL, NULL, 0);
} else {
if ((*param)->stat > 2)
- printf("Unsupported format\n");
+ printf("Unsupported format.\n");
- free(dictionary);
+ pdf_obj_append(&pdf, ids[i], NULL, NULL, NULL, 0);
}
}
- dictionary_size = 128;
- dictionary = malloc(dictionary_size);
-
- if (dictionary == NULL) {
- free(root_kid);
- free(dim);
- return 1;
- }
-
memset(dictionary, 0, dictionary_size);
strcat(dictionary, "<<\n/XObject <<");
@@ -655,11 +703,12 @@ cnki_pdf_hn(cnki_t **param)
if (strncmp(ptr->text + 8, "COMPRESSTEXT", 12) == 0) {
cnki_zlib(&stream, &stream_size, ptr->text, ptr->text_size);
- dictionary_size = stream_size / 8 + 7;
+ dictionary_size = 64 + 2 * stream_size;
dictionary = malloc(dictionary_size);
if (dictionary == NULL) {
free(root_kid);
+ free(ids);
free(dim);
return 1;
}
@@ -688,11 +737,12 @@ cnki_pdf_hn(cnki_t **param)
strcat(dictionary, ">");
} else {
- dictionary_size = ptr->text_size;
+ dictionary_size = 64 + 2 * ptr->text_size;
dictionary = malloc(dictionary_size);
if (dictionary == NULL) {
free(root_kid);
+ free(ids);
free(dim);
return 1;
}
@@ -724,11 +774,12 @@ cnki_pdf_hn(cnki_t **param)
/* FIXME: Use the text somehow? */
free(dictionary);
- dictionary_size = 64 + 12 * ptr->image_length;
+ dictionary_size = 64 + 64 * ptr->image_length;
dictionary = malloc(dictionary_size);
if (dictionary == NULL) {
free(root_kid);
+ free(ids);
free(dim);
return 1;
}
@@ -739,12 +790,27 @@ cnki_pdf_hn(cnki_t **param)
strcat(dictionary, "0.25 0 0 0.25 0 0 cm\n");
+ double resize_x;
+ double resize_y;
+
for (int i = 0; i < ptr->image_length; i++) {
if (dim[i * 2] <= 0 || dim[i * 2 + 1] <= 0)
continue;
+ /* Scale within bound of A4 paper */
+ resize_x = 595.276 * 4 / dim[i * 2];
+ resize_y = 841.89 * 4 / dim[i * 2 + 1];
+
+ if (resize_y < resize_x)
+ snprintf(buf, 64, "%f 0 0 %f 0 0 cm\n",
+ resize_y, resize_y);
+ else
+ snprintf(buf, 64, "%f 0 0 %f 0 0 cm\n",
+ resize_x, resize_x);
+ strcat(dictionary, buf);
+
/* Apply transformation matrix */
- if (ptr->image_data[i].format == DCT_1) {
+ if (ptr->image_data[i].format == JBIG || ptr->image_data[i].format == DCT_1) {
snprintf(buf, 64, "1 0 0 1 0 %d cm\n",
dim[i * 2 + 1]);
strcat(dictionary, buf);
@@ -763,9 +829,10 @@ cnki_pdf_hn(cnki_t **param)
strcat(dictionary, "Q");
if (strdeflate(&stream, &stream_size, dictionary, strlen(dictionary)) != 0) {
- free(dictionary);
free(root_kid);
+ free(ids);
free(dim);
+ free(dictionary);
return 1;
}
@@ -796,7 +863,7 @@ cnki_pdf_hn(cnki_t **param)
strcat(dictionary, buf);
/* A4 paper */
- strcat(dictionary, "/MediaBox [ 0 0 595.276 841.89 ]\n");
+ strcat(dictionary, "/MediaBox [0 0 595.276 841.89]\n");
/* Add /Parent when we know root */
pdf_obj_append(&pdf, ids[ptr->image_length + 2], NULL, dictionary, NULL, 0);
@@ -838,7 +905,7 @@ cnki_pdf_hn(cnki_t **param)
if ((*param)->stat > 1)
printf("Generating root object\n");
- dictionary_size = 64 + 12 * (*param)->file_stat->page;
+ dictionary_size = 64 + 64 * (*param)->file_stat->page;
dictionary = malloc(dictionary_size);
if (dictionary == NULL) {
@@ -877,7 +944,7 @@ cnki_pdf_hn(cnki_t **param)
free(dictionary);
- dictionary_size = 128;
+ dictionary_size = 256;
dictionary = malloc(dictionary_size);
if (dictionary == NULL) {
diff --git a/src/cnki_xml.c b/src/cnki_xml.c
deleted file mode 100644
index 7f870d1..0000000
--- a/src/cnki_xml.c
+++ /dev/null
@@ -1,14 +0,0 @@
-/*
- * Copyright (c) 2020-2021, yzrh <yzrh@noema.org>
- *
- * SPDX-License-Identifier: Apache-2.0
- */
-
-#include <stdio.h>
-
-int
-cnki_xml(char **xml, FILE **fp)
-{
- /* TODO: Extract XML and embed into `/Metadata' */
- return 1;
-}
diff --git a/src/cnki_zlib.c b/src/cnki_zlib.c
index 7731036..edff141 100644
--- a/src/cnki_zlib.c
+++ b/src/cnki_zlib.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2020-2021, yzrh <yzrh@noema.org>
+ * Copyright (c) 2020-2022, yzrh <yzrh@noema.org>
*
* SPDX-License-Identifier: Apache-2.0
*/
@@ -18,7 +18,7 @@ cnki_zlib(char **dst, int *dst_size,
*dst_size = size;
- if (strinflate(dst, size, src + 24, size - 24) != 0)
+ if (strinflate(dst, size, src + 24, src_size - 24) != 0)
return 1;
return 0;
diff --git a/src/jbig.c b/src/jbig.c
index 8e3dbaf..09a3d92 100644
--- a/src/jbig.c
+++ b/src/jbig.c
@@ -1,41 +1,303 @@
/*
- * Copyright (c) 2020-2022, yzrh <yzrh@noema.org>
+ * Copyright (c) 2022, yzrh <yzrh@noema.org>
*
* SPDX-License-Identifier: Apache-2.0
*/
-#include <stdio.h> /* FIXME: test */
+#include <stdbool.h>
+#include <stdint.h>
#include <stdlib.h>
#include <string.h>
-#include <jbig.h>
+static const uint16_t _LSZ[256] = {
+ 0x5a1d,
+ 0x2586, 0x1114, 0x080b, 0x03d8, 0x01da, 0x00e5, 0x006f, 0x0036,
+ 0x001a, 0x000d, 0x0006, 0x0003, 0x0001, 0x5a7f, 0x3f25, 0x2cf2,
+ 0x207c, 0x17b9, 0x1182, 0x0cef, 0x09a1, 0x072f, 0x055c, 0x0406,
+ 0x0303, 0x0240, 0x01b1, 0x0144, 0x00f5, 0x00b7, 0x008a, 0x0068,
+ 0x004e, 0x003b, 0x002c, 0x5ae1, 0x484c, 0x3a0d, 0x2ef1, 0x261f,
+ 0x1f33, 0x19a8, 0x1518, 0x1177, 0x0e74, 0x0bfb, 0x09f8, 0x0861,
+ 0x0706, 0x05cd, 0x04de, 0x040f, 0x0363, 0x02d4, 0x025c, 0x01f8,
-int
-strdec_jbig(char **bitmap, int *bitmap_size,
- const char * restrict data, int data_size)
+ 0x01a4, 0x0160, 0x0125, 0x00f6, 0x00cb, 0x00ab, 0x008f, 0x5b12,
+ 0x4d04, 0x412c, 0x37d8, 0x2fe8, 0x293c, 0x2379, 0x1edf, 0x1aa9,
+ 0x174e, 0x1424, 0x119c, 0x0f6b, 0x0d51, 0x0bb6, 0x0a40, 0x5832,
+ 0x4d1c, 0x438e, 0x3bdd, 0x34ee, 0x2eae, 0x299a, 0x2516, 0x5570,
+ 0x4ca9, 0x44d9, 0x3e22, 0x3824, 0x32b4, 0x2e17, 0x56a8, 0x4f46,
+ 0x47e5, 0x41cf, 0x3c3d, 0x375e, 0x5231, 0x4c0f, 0x4639, 0x415e,
+ 0x5627, 0x50e7, 0x4b85, 0x5597, 0x504f, 0x5a10, 0x5522, 0x59eb
+};
+
+static const uint8_t _NLPS[256] = {
+ 1,
+ 14, 16, 18, 20, 23, 25, 28, 30,
+ 33, 35, 9, 10, 12, 15, 36, 38,
+ 39, 40, 42, 43, 45, 46, 48, 49,
+ 51, 52, 54, 56, 57, 59, 60, 62,
+ 63, 32, 33, 37, 64, 65, 67, 68,
+ 69, 70, 72, 73, 74, 75, 77, 78,
+ 79, 48, 50, 50, 51, 52, 53, 54,
+
+ 55, 56, 57, 58, 59, 61, 61, 65,
+ 80, 81, 82, 83, 84, 86, 87, 87,
+ 72, 72, 74, 74, 75, 77, 77, 80,
+ 88, 89, 90, 91, 92, 93, 86, 88,
+ 95, 96, 97, 99, 99, 93, 95, 101,
+ 102, 103, 104, 99, 105, 106, 107, 103,
+ 105, 108, 109, 110, 111, 110, 112, 112
+};
+
+static const uint8_t _NMPS[256] = {
+ 1,
+ 2, 3, 4, 5, 6, 7, 8, 9,
+ 10, 11, 12, 13, 13, 15, 16, 17,
+ 18, 19, 20, 21, 22, 23, 24, 25,
+ 26, 27, 28, 29, 30, 31, 32, 33,
+ 34, 35, 9, 37, 38, 39, 40, 41,
+ 42, 43, 44, 45, 46, 47, 48, 49,
+ 50, 51, 52, 53, 54, 55, 56, 57,
+
+ 58, 59, 60, 61, 62, 63, 32, 65,
+ 66, 67, 68, 69, 70, 71, 72, 73,
+ 74, 75, 76, 77, 78, 79, 48, 81,
+ 82, 83, 84, 85, 86, 87, 71, 89,
+ 90, 91, 92, 93, 94, 86, 96, 97,
+ 98, 99, 100, 93, 102, 103, 104, 99,
+ 106, 107, 103, 109, 107, 111, 109, 111
+};
+
+static const bool _SWTCH[256] = {
+ 1,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 1, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 1, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+
+ 0, 0, 0, 0, 0, 0, 0, 1,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 1,
+ 0, 0, 0, 0, 0, 0, 0, 1,
+ 0, 0, 0, 0, 0, 0, 1, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 1, 0, 0, 0, 0, 1, 0, 1
+};
+
+static uint8_t _ct;
+static uint8_t _pix;
+
+static uint16_t _reg_a;
+static uint32_t _reg_c;
+static uint8_t _mps[0x1000];
+static uint8_t _st[0x1000];
+
+static int _width;
+static int _height;
+
+static int _width_padded;
+
+static int _ret_pos;
+static char *_ret;
+
+static int _scd_size;
+static unsigned char *_scd;
+
+static void
+_bytein(void)
{
- struct jbg_dec_state sd;
+ if (_ret_pos < _scd_size)
+ _reg_c += *(_scd + _ret_pos++) << 8;
- jbg_dec_init(&sd);
+ _ct = 8;
+}
- unsigned char *data_ptr[1] = {(unsigned char *) data};
+static void
+_initdec(void)
+{
+ memset(_mps, 0, 0x1000);
+ memset(_st, 0, 0x1000);
- /* FIXME: test */
- int ret;
- if ((ret = jbg_dec_in(&sd, (unsigned char *) data_ptr,
- data_size, NULL)) != JBG_EOK) {
- printf("[%s] ", jbg_strerror(ret));
- jbg_dec_free(&sd);
- return 1;
+ _reg_c = 0;
+ _bytein();
+ _reg_c <<= 8;
+ _bytein();
+ _reg_c <<= 8;
+ _bytein();
+ _reg_a = 0x0000;
+}
+
+static void
+_exchange_lps(uint16_t cx)
+{
+ uint8_t st_cx = _st[cx];
+ uint16_t lsz_st_cx = _LSZ[_st[cx]];
+
+ if (_reg_a < lsz_st_cx) {
+ _pix = _mps[cx];
+ _st[cx] = _NMPS[st_cx];
+ } else {
+ _pix = 1 - _mps[cx];
+
+ if (_SWTCH[st_cx])
+ _mps[cx] = _pix;
+
+ _st[cx] = _NLPS[st_cx];
+ }
+
+ _reg_c -= _reg_a << 16;
+ _reg_a = lsz_st_cx;
+}
+
+static void
+_exchange_mps(uint16_t cx)
+{
+ uint8_t st_cx = _st[cx];
+ uint16_t lsz_st_cx = _LSZ[_st[cx]];
+
+ if (_reg_a < lsz_st_cx) {
+ _pix = 1 - _mps[cx];
+
+ if (_SWTCH[st_cx])
+ _mps[cx] = _pix;
+
+ _st[cx] = _NLPS[st_cx];
+ } else {
+ _pix = _mps[cx];
+ _st[cx] = _NMPS[st_cx];
+ }
+}
+
+static void
+_renormd(void)
+{
+ do {
+ if (_ct == 0)
+ _bytein();
+
+ _reg_a <<= 1;
+ _reg_c <<= 1;
+ _ct--;
+ } while (_reg_a < 0x8000);
+
+ if (_ct == 0)
+ _bytein();
+}
+
+static void
+_decode(uint16_t cx)
+{
+ _reg_a -= _LSZ[_st[cx]];
+
+ if (_reg_a > _reg_c >> 16) {
+ if (_reg_a < 0x8000) {
+ _exchange_mps(cx);
+ _renormd();
+ } else {
+ _pix = _mps[cx];
+ }
+ } else {
+ _exchange_lps(cx);
+ _renormd();
+ }
+}
+
+static void
+_procline(int line, char *a, char *b, char *c)
+{
+ /* The encoder must be erroneous */
+ uint16_t cx = (*b & 0x01) << 2;
+
+ for (int i = 0; i < _width; i++) {
+ _decode(cx);
+
+ cx >>= 1;
+
+ if (_pix == 1) {
+ *(_ret + _width_padded * (_height - line - 1) + i / 8) |= _pix << (7 - (i & 0x07));
+ *(c + i) = 1;
+ cx |= 0x0200;
+ } else {
+ cx &= 0xfdff;
+ }
+
+ if (i + 2 < _width && *(a + i + 2) == 1)
+ cx |= 0x0004;
+ else
+ cx &= 0xfffb;
+
+ if (i + 3 < _width && *(b + i + 3) == 1)
+ cx |= 0x0080;
+ else
+ cx &= 0xff7f;
}
+}
+
+static int
+_procstripe(void)
+{
+ if (_height <= 0 || _width_padded <= 0)
+ return 1;
- *bitmap_size = jbg_dec_getsize(&sd);
- *bitmap = malloc(*bitmap_size);
+ int pix_size = 8 * _width_padded;
- if (*bitmap != NULL)
- memcpy(*bitmap, jbg_dec_getimage(&sd, 0), *bitmap_size);
+ char *buf = malloc(3 * pix_size);
- jbg_dec_free(&sd);
+ if (buf == NULL)
+ return 1;
+
+ memset(buf, 0, 3 * pix_size);
+
+ char *a = buf;
+ char *b = a + pix_size;
+ char *c = b + pix_size;
+ char *z;
+
+ for (int i = 0; i < _height; i++) {
+ _decode(0x029c);
+
+ if (_pix == 1) {
+ if (i > 0)
+ memcpy(_ret + _width_padded * (_height - i - 1),
+ _ret + _width_padded * (_height - i),
+ _width_padded);
+
+ memcpy(c, b, pix_size);
+ } else {
+ /* line atypical */
+ memset(c, 0, pix_size);
+ _procline(i, a, b, c);
+ }
+
+ z = a;
+ a = b;
+ b = c;
+ c = z;
+ }
+
+ free(buf);
return 0;
}
+
+int
+strdec_jbig(char **bitmap, int width, int height,
+ const char * restrict jbig, int jbig_size)
+{
+ _width = width;
+ _height = height;
+
+ _width_padded = (_width + 7) / 8;
+
+ memset(*bitmap, 0, _height * _width_padded);
+
+ _ret_pos = 0;
+ _ret = *bitmap;
+
+ _scd_size = jbig_size;
+ _scd = (unsigned char *) jbig;
+
+ _initdec();
+ return _procstripe();
+}
diff --git a/src/jbig.h b/src/jbig.h
index 170eda1..1919f3e 100644
--- a/src/jbig.h
+++ b/src/jbig.h
@@ -1,8 +1,8 @@
/*
- * Copyright (c) 2020-2021, yzrh <yzrh@noema.org>
+ * Copyright (c) 2022, yzrh <yzrh@noema.org>
*
* SPDX-License-Identifier: Apache-2.0
*/
-int strdec_jbig(char **bitmap, int *bitmap_size,
- const char * restrict data, int data_size);
+int strdec_jbig(char **bitmap, int width, int height,
+ const char * restrict jbig, int jbig_size);
diff --git a/src/jbig2.c b/src/jbig2.c
new file mode 100644
index 0000000..9b3a9be
--- /dev/null
+++ b/src/jbig2.c
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2022, yzrh <yzrh@noema.org>
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <jbig2.h>
+
+int
+strdec_jbig2(char **bitmap,
+ const char * restrict jbig2, int jbig2_size)
+{
+ Jbig2Ctx *ctx = jbig2_ctx_new(NULL, JBIG2_OPTIONS_EMBEDDED, NULL, NULL, NULL);
+
+ jbig2_data_in(ctx, (unsigned char *) jbig2, jbig2_size);
+
+ jbig2_complete_page(ctx);
+
+ Jbig2Image *image = jbig2_page_out(ctx);
+
+ int width_padded = (image->width + 7) / 8;
+ unsigned char *data = image->data;
+
+ for (unsigned int i = 0; i < image->height; i++) {
+ memcpy(*bitmap + i * width_padded, data, width_padded);
+ data += image->stride;
+ }
+
+ jbig2_release_page(ctx, image);
+ return 0;
+}
diff --git a/src/jbig2.h b/src/jbig2.h
new file mode 100644
index 0000000..11249cf
--- /dev/null
+++ b/src/jbig2.h
@@ -0,0 +1,7 @@
+/*
+ * Copyright (c) 2022, yzrh <yzrh@noema.org>
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+int strdec_jbig2(char **bitmap, const char * restrict jbig2, int jbig2_size);
diff --git a/src/melon.c b/src/melon.c
index 8741989..af6aaf4 100644
--- a/src/melon.c
+++ b/src/melon.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2020-2021, yzrh <yzrh@noema.org>
+ * Copyright (c) 2020-2022, yzrh <yzrh@noema.org>
*
* SPDX-License-Identifier: Apache-2.0
*/
@@ -82,7 +82,7 @@ main(int argc, char **argv)
if (param->stat > 0)
printf("Melon " VERSION "." RELEASE "." PATCH EXTRA "\n"
- "Copyright (c) 2020-2021, yzrh <yzrh@noema.org>\n\n");
+ "Copyright (c) 2020-2022, yzrh <yzrh@noema.org>\n\n");
cnki_info(&param);
diff --git a/src/pdf.c b/src/pdf.c
index de98706..bf9da5c 100644
--- a/src/pdf.c
+++ b/src/pdf.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2020-2021, yzrh <yzrh@noema.org>
+ * Copyright (c) 2020-2022, yzrh <yzrh@noema.org>
*
* SPDX-License-Identifier: Apache-2.0
*/
diff --git a/src/pdf_cnki.c b/src/pdf_cnki.c
index a1c7a09..e8e2f90 100644
--- a/src/pdf_cnki.c
+++ b/src/pdf_cnki.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2020-2021, yzrh <yzrh@noema.org>
+ * Copyright (c) 2020-2022, yzrh <yzrh@noema.org>
*
* SPDX-License-Identifier: Apache-2.0
*/
diff --git a/src/pdf_get.c b/src/pdf_get.c
index 4e1ec56..a72c68d 100644
--- a/src/pdf_get.c
+++ b/src/pdf_get.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2020-2021, yzrh <yzrh@noema.org>
+ * Copyright (c) 2020-2022, yzrh <yzrh@noema.org>
*
* SPDX-License-Identifier: Apache-2.0
*/
diff --git a/src/version.h b/src/version.h
index 4731e6a..4e5cfa6 100644
--- a/src/version.h
+++ b/src/version.h
@@ -1,10 +1,10 @@
/*
- * Copyright (c) 2020-2021, yzrh <yzrh@noema.org>
+ * Copyright (c) 2020-2022, yzrh <yzrh@noema.org>
*
* SPDX-License-Identifier: Apache-2.0
*/
#define VERSION "0"
-#define RELEASE "1"
+#define RELEASE "2"
#define PATCH "0"
#define EXTRA ""