aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authoryzrh <yzrh@noema.org>2020-12-31 22:36:28 +0000
committeryzrh <yzrh@noema.org>2021-01-03 03:01:28 +0000
commit1994f122cc29504862944cca1da1c5203c7e41eb (patch)
treed89e37c5f3443156116bd8476e0efc2d380acd55 /src
parentb20c6ad3ed930977990f3812b25b80d2ce282d79 (diff)
downloadmelon-1994f122cc29504862944cca1da1c5203c7e41eb.tar.gz
melon-1994f122cc29504862944cca1da1c5203c7e41eb.tar.zst
Decode JBIG and JPEG during HN conversion.
Diffstat (limited to 'src')
-rw-r--r--src/Makefile14
-rw-r--r--src/cnki.c2
-rw-r--r--src/cnki.h8
-rw-r--r--src/cnki_caj.c2
-rw-r--r--src/cnki_hn.c228
-rw-r--r--src/cnki_jbig.c89
-rw-r--r--src/cnki_jbig.h78
-rw-r--r--src/cnki_kdh.c2
-rw-r--r--src/cnki_outline_tree.c2
-rw-r--r--src/cnki_pdf.c632
-rw-r--r--src/cnki_xml.c2
-rw-r--r--src/cnki_zlib.c2
-rw-r--r--src/extern.h2
-rw-r--r--src/iconv.c2
-rw-r--r--src/iconv.h2
-rw-r--r--src/jbig.c41
-rw-r--r--src/jbig.h8
-rw-r--r--src/jpeg.c36
-rw-r--r--src/jpeg.h8
-rw-r--r--src/melon.c4
-rw-r--r--src/pdf.c76
-rw-r--r--src/pdf.h16
-rw-r--r--src/pdf_cnki.c6
-rw-r--r--src/pdf_cnki.h2
-rw-r--r--src/pdf_get.c2
-rw-r--r--src/pdf_parser.c2
-rw-r--r--src/pdf_writer.c2
-rw-r--r--src/version.h2
-rw-r--r--src/zlib.c28
-rw-r--r--src/zlib.h5
30 files changed, 1032 insertions, 273 deletions
diff --git a/src/Makefile b/src/Makefile
index f8c1e34..005f6da 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -1,22 +1,22 @@
#
-# Copyright (c) 2020, yzrh <yzrh@noema.org>
+# Copyright (c) 2020-2021, yzrh <yzrh@noema.org>
#
# SPDX-License-Identifier: Apache-2.0
#
-src = melon.c iconv.c zlib.c \
+src = melon.c iconv.c zlib.c jbig.c jpeg.c \
cnki_caj.c cnki_hn.c cnki_kdh.c cnki_outline_tree.c \
- cnki_pdf.c cnki_xml.c cnki_zlib.c cnki.c \
+ cnki_pdf.c cnki_xml.c cnki_zlib.c cnki_jbig.c cnki.c \
pdf_cnki.c pdf_get.c pdf_parser.c pdf_writer.c pdf.c
-inc = extern.h version.h iconv.h zlib.h \
- cnki.h pdf_cnki.h pdf.h
+inc = extern.h version.h iconv.h zlib.h jbig.h jpeg.h \
+ cnki.h pdf_cnki.h cnki_jbig.h pdf.h
obj = ${src:.c=.o}
PREFIX = /usr/local
-CFLAGS = -O3 -march=native -pipe -flto=thin -Wall
-LDFLAGS = -Wl,-O3 -lcrypto -liconv -lz -Wl,--as-needed
+CFLAGS = -O3 -march=native -pipe -flto=thin -Wall -Wextra -Wno-unused-parameter
+LDFLAGS = -Wl,-O3 -lcrypto -liconv -lz -ljbig -ljpeg -Wl,--as-needed
CFLAGS += -I/usr/local/include
LDFLAGS += -L/usr/local/lib
diff --git a/src/cnki.c b/src/cnki.c
index 3234403..6866f8e 100644
--- a/src/cnki.c
+++ b/src/cnki.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2020, yzrh <yzrh@noema.org>
+ * Copyright (c) 2020-2021, yzrh <yzrh@noema.org>
*
* SPDX-License-Identifier: Apache-2.0
*/
diff --git a/src/cnki.h b/src/cnki.h
index 30557c1..816bcc8 100644
--- a/src/cnki.h
+++ b/src/cnki.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2020, yzrh <yzrh@noema.org>
+ * Copyright (c) 2020-2021, yzrh <yzrh@noema.org>
*
* SPDX-License-Identifier: Apache-2.0
*/
@@ -82,6 +82,7 @@ typedef struct _cnki_t {
/* cnki_pdf.c */
int cnki_pdf(cnki_t **param);
+int cnki_pdf_hn(cnki_t **param);
/* cnki_outline_tree.c */
int cnki_outline_tree(object_outline_tree_t **outline_tree,
@@ -91,5 +92,10 @@ int cnki_outline_tree(object_outline_tree_t **outline_tree,
int cnki_zlib(char **dst, int *dst_size,
const char * restrict src, int src_size);
+/* cnki_jbig.c */
+int cnki_jbig(char **bitmap, int *bitmap_size,
+ int *bitmap_width, int *bitmap_height,
+ const char * restrict jbig, int jbig_size);
+
/* cnki_xml.c */
int cnki_xml(char **xml, FILE **fp);
diff --git a/src/cnki_caj.c b/src/cnki_caj.c
index 1e3bd8b..3c0b1b9 100644
--- a/src/cnki_caj.c
+++ b/src/cnki_caj.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2020, yzrh <yzrh@noema.org>
+ * Copyright (c) 2020-2021, yzrh <yzrh@noema.org>
*
* SPDX-License-Identifier: Apache-2.0
*/
diff --git a/src/cnki_hn.c b/src/cnki_hn.c
index f6a4c24..0e16a76 100644
--- a/src/cnki_hn.c
+++ b/src/cnki_hn.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2020, yzrh <yzrh@noema.org>
+ * Copyright (c) 2020-2021, yzrh <yzrh@noema.org>
*
* SPDX-License-Identifier: Apache-2.0
*/
@@ -9,6 +9,8 @@
#include "cnki.h"
#include "iconv.h"
+#include "zlib.h"
+#include "jpeg.h"
#include "pdf.h"
#include "pdf_cnki.h"
@@ -131,231 +133,13 @@ cnki_hn(cnki_t **param)
ptr = ptr->next;
}
- if ((*param)->stat > 1)
- printf("Loaded %d page(s)\n", (*param)->file_stat->page);
-
- if ((*param)->stat > 1)
- printf("Generating PDF object(s)\n");
-
- pdf_object_t *pdf = NULL;
-
- if (pdf_obj_create(&pdf) != 0)
- return 1;
-
- int buf_size;
- char *buf;
-
- int str_size;
- char *str;
-
- int conv_size;
- char *conv_dst;
- char conv_src[2];
- char conv_hex[3];
-
- ptr = (*param)->object_hn;
- while (ptr != NULL) {
- if (strncmp(ptr->text + 8, "COMPRESSTEXT", 12) == 0) {
- cnki_zlib(&buf, &buf_size, ptr->text, ptr->text_size);
-
- str_size = buf_size / 8 + 7;
- str = malloc(str_size);
-
- if (str == NULL)
- return 1;
-
- memset(str, 0, str_size);
-
- strcat(str, "<feff");
-
- for (int i = 0; i < buf_size; i += 16) {
- conv_src[0] = buf[i + 7];
- conv_src[1] = buf[i + 6];
-
- conv_size = 6;
-
- if (strconv(&conv_dst, "UTF-16BE",
- conv_src, "GB18030", &conv_size) == 0) {
- for (int j = 0; j < conv_size - 2; j++) {
- snprintf(conv_hex, 3,
- "%02x", (unsigned char) conv_dst[j]);
- strcat(str, conv_hex);
- }
- free(conv_dst);
- }
- }
- free(buf);
-
- strcat(str, ">");
- } else {
- str_size = ptr->text_size;
- str = malloc(str_size);
-
- if (str == NULL)
- return 1;
-
- memset(str, 0, str_size);
-
- strcat(str, "<feff");
-
- for (int i = 0; i < ptr->text_size; i += 4) {
- conv_src[0] = ptr->text[i + 3];
- conv_src[1] = ptr->text[i + 2];
-
- conv_size = 6;
-
- if (strconv(&conv_dst, "UTF-16BE",
- conv_src, "GB18030", &conv_size) == 0) {
- for (int j = 0; j < conv_size - 2; j++) {
- snprintf(conv_hex, 3,
- "%02x", (unsigned char) conv_dst[j]);
- strcat(str, conv_hex);
- }
- free(conv_dst);
- }
- }
-
- strcat(str, ">");
- }
-
- pdf_obj_append(&pdf, 0, str, NULL, NULL);
-
- free(str);
-
- ptr = ptr->next;
- }
-
- if ((*param)->stat > 1) {
- printf("\t%8s\t%12s\t%12s\t%12s\n",
- "id",
- "object",
- "dictionary",
- "stream");
-
- pdf_object_t *ptr = pdf->next;
- while (ptr != NULL) {
- printf("\t%8d\t%12d\t%12d\t%12d\n",
- ptr->id,
- ptr->object_size,
- ptr->dictionary_size,
- ptr->stream_size);
- ptr = ptr->next;
- }
- }
-
if ((*param)->stat > 0)
- printf("Generated %d object(s)\n",
- pdf_get_count(&pdf));
-
- int *ids = NULL;
-
- if ((*param)->file_stat->outline > 0) {
- if ((*param)->stat > 1)
- printf("Generating outline object(s)\n\t%8s\n", "id");
-
- pdf_get_free_ids(&pdf, &ids, (*param)->file_stat->outline + 1);
- int outline = pdf_cnki_outline(&pdf, &(*param)->object_outline, &ids);
-
- if ((*param)->stat > 1)
- for (int i = 0; i < (*param)->file_stat->outline + 1; i++)
- printf("\t%8d\n", ids[i]);
-
- if ((*param)->stat > 0) {
- if (outline != 0)
- printf("No outline information\n");
- else
- printf("Generated %d outline object(s)\n",
- (*param)->file_stat->outline + 1);
- }
- }
-
- if ((*param)->stat > 1)
- printf("Writing header\n");
-
- long cur = 0;
-
- if ((*param)->stat > 0)
- cur = ftell((*param)->fp_o);
-
- if (pdf_dump_header(&pdf, &(*param)->fp_o) != 0) {
- fprintf(stderr, "Header not written\n");
- return 1;
- } else {
- if ((*param)->stat > 0)
- printf("Header %ld byte(s) written\n",
- ftell((*param)->fp_o) - cur);
- }
-
- if ((*param)->stat > 1)
- printf("Writing object(s)\n");
-
- pdf_dump_obj(&pdf, &(*param)->fp_o);
-
- if ((*param)->stat > 1) {
- printf("\t%8s\t%8s\t%8s\t%12s\t%12s\t%12s\n",
- "address",
- "size",
- "id",
- "object",
- "dictionary",
- "stream");
-
- pdf_object_t *ptr = pdf->next;
- while (ptr != NULL) {
- printf("\t%08x\t%8d\t%8d\t%12d\t%12d\t%12d\n",
- ptr->address,
- ptr->size,
- ptr->id,
- ptr->object_size,
- ptr->dictionary_size,
- ptr->stream_size);
- ptr = ptr->next;
- }
- }
-
- if ((*param)->stat > 0)
- printf("%d object(s) %ld byte(s) written\n",
- pdf_get_count(&pdf),
- ftell((*param)->fp_o));
-
- long xref = ftell((*param)->fp_o);
-
- if ((*param)->stat > 1)
- printf("Writing cross-reference table\n");
-
- if (pdf_dump_xref(&pdf, &(*param)->fp_o) != 0) {
- if ((*param)->stat > 0)
- printf("Cross-reference table not written\n");
- } else {
- if ((*param)->stat > 0)
- printf("Cross-reference table %ld byte(s) written\n",
- ftell((*param)->fp_o) - xref);
- }
-
- if ((*param)->stat > 1)
- printf("Writing trailer\n");
-
- if ((*param)->stat > 0)
- cur = ftell((*param)->fp_o);
-
- if (pdf_dump_trailer(&pdf, &(*param)->fp_o, xref) != 0) {
- if ((*param)->stat > 0)
- printf("Trailer not written\n");
- } else {
- if ((*param)->stat > 0)
- printf("Trailer %ld byte(s) written\n",
- ftell((*param)->fp_o) - cur);
- }
-
- if ((*param)->stat > 0)
- printf("Total %ld byte(s) written\n",
- ftell((*param)->fp_o));
+ printf("Loaded %d page(s)\n", (*param)->file_stat->page);
- pdf_obj_destroy(&pdf);
+ cnki_pdf_hn(param);
if ((*param)->stat > 0)
- printf("Conversion ended (partial)\n");
+ printf("Conversion ended\n");
- /* TODO: Finish me please :) */
return 0;
}
diff --git a/src/cnki_jbig.c b/src/cnki_jbig.c
new file mode 100644
index 0000000..02040be
--- /dev/null
+++ b/src/cnki_jbig.c
@@ -0,0 +1,89 @@
+/*
+ * Copyright (c) 2020-2021, yzrh <yzrh@noema.org>
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#include <stdlib.h>
+#include <string.h>
+
+#include "cnki_jbig.h"
+#include "jbig.h"
+
+int
+cnki_jbig(char **bitmap, int *bitmap_size,
+ int *bitmap_width, int *bitmap_height,
+ const char * restrict jbig, int jbig_size)
+{
+ dib_t *dib = malloc(sizeof(dib_t));
+
+ if (dib == NULL)
+ return 1;
+
+ memcpy(dib, jbig, 40);
+
+ bih_t *bih = malloc(sizeof(bih_t));
+
+ if (bih == NULL) {
+ free(dib);
+ return 1;
+ }
+
+ memset(bih, 0, sizeof(bih_t));
+
+ bih->d_l = 0;
+ bih->d = 0;
+
+ bih->p = 1;
+
+ bih->fill = 0;
+
+ bih->x_d = dib->width;
+ bih->y_d = dib->height;
+ bih->l_0 = bih->y_d / 35;
+
+ while (bih->l_0 > 128)
+ bih->l_0--;
+ if (bih->l_0 < 2)
+ bih->l_0 = 2;
+
+ bih->m_x = 8;
+ bih->m_y = 0;
+
+ bih->order |= 1 << 1;
+ bih->order |= 1 << 0;
+
+ bih->options |= 1 << 4;
+ bih->options |= 1 << 3;
+ bih->options |= 1 << 2;
+
+ bih->dptable = NULL;
+
+ int bie_size = jbig_size - 28; /* - 40 - 8 + 20 */
+ char *bie = malloc(bie_size);
+
+ if (bie == NULL) {
+ free(dib);
+ free(bih);
+ return 1;
+ }
+
+ memcpy(bie, bih, 20);
+ memcpy(bie + 20, jbig + 48, jbig_size - 48);
+
+ int ret = strdec_jbig(bitmap, bitmap_size, bie, bie_size);
+
+ if (ret == 0) {
+ *bitmap_width = bih->x_d;
+ *bitmap_height = bih->y_d;
+ }
+
+ free(dib);
+ free(bih);
+ free(bie);
+
+ if (ret != 0)
+ return 1;
+
+ return 0;
+}
diff --git a/src/cnki_jbig.h b/src/cnki_jbig.h
new file mode 100644
index 0000000..96e4ea8
--- /dev/null
+++ b/src/cnki_jbig.h
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2020-2021, yzrh <yzrh@noema.org>
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#include <stdint.h>
+
+/*
+ * order (MSB first):
+ * 0
+ * 0
+ * 0
+ * 0
+ * HITOLO
+ * SEQ
+ * ILEAVE (default)
+ * SMID (default)
+ *
+ * options (MSB first):
+ * 0
+ * LRLTWO
+ * VLENGTH
+ * TPDON (default)
+ * TPBON (default)
+ * DPON (default)
+ * DPPRIV
+ * DPLAST
+ */
+typedef struct _bih_t {
+ char d_l; /* Initial resolution layer */
+ char d; /* Final resolution layer */
+ char p; /* Number of bit-planes, for bi-level image, always 1 */
+ char fill; /* Always 0 */
+ /* MSB first */
+ int32_t x_d; /* Horizontal dimension at highestresolution */
+ int32_t y_d; /* Vertical dimension at highest resolution */
+ int32_t l_0; /* Number of lines per stripe at lowest resolution */
+ char m_x; /* Maximum horizontal offsets (default: 8) */
+ char m_y; /* Maximum vertical offsets (default: 0) */
+ char order;
+ char options;
+ char *dptable; /* 0 or 1728 */
+} bih_t;
+
+typedef enum _dib_compression_code {
+ BI_RGB,
+ BI_RLE8,
+ BI_RLE4,
+ BI_BITFIELDS,
+ BI_JPEG,
+ BI_PNG,
+ BI_ALPHABITFIELDS,
+ BI_CMYK = 11,
+ BI_CMYKRLE8 = 12,
+ BI_CMYKRLE4 = 13
+} dib_compression_code;
+
+typedef struct _dib_t {
+ uint32_t dib_size; /* Always 40 */
+ int32_t width;
+ int32_t height;
+ uint16_t plane; /* Always 1 */
+ uint16_t depth;
+ uint32_t compression; /* dib_compression_code */
+ uint32_t size;
+ uint32_t resolution_h;
+ uint32_t resolution_v;
+ uint32_t colour;
+ uint32_t colour_used;
+} dib_t;
+
+typedef struct _colour_table {
+ uint16_t blue;
+ uint16_t green;
+ uint16_t red;
+ uint16_t fill; /* Always 0 */
+} colour_table;
diff --git a/src/cnki_kdh.c b/src/cnki_kdh.c
index 78e8957..b13434d 100644
--- a/src/cnki_kdh.c
+++ b/src/cnki_kdh.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2020, yzrh <yzrh@noema.org>
+ * Copyright (c) 2020-2021, yzrh <yzrh@noema.org>
*
* SPDX-License-Identifier: Apache-2.0
*/
diff --git a/src/cnki_outline_tree.c b/src/cnki_outline_tree.c
index 58294c3..cf59d10 100644
--- a/src/cnki_outline_tree.c
+++ b/src/cnki_outline_tree.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2020, yzrh <yzrh@noema.org>
+ * Copyright (c) 2020-2021, yzrh <yzrh@noema.org>
*
* SPDX-License-Identifier: Apache-2.0
*/
diff --git a/src/cnki_pdf.c b/src/cnki_pdf.c
index 8c9fbb8..bb76a04 100644
--- a/src/cnki_pdf.c
+++ b/src/cnki_pdf.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2020, yzrh <yzrh@noema.org>
+ * Copyright (c) 2020-2021, yzrh <yzrh@noema.org>
*
* SPDX-License-Identifier: Apache-2.0
*/
@@ -8,6 +8,9 @@
#include <string.h>
#include "cnki.h"
+#include "iconv.h"
+#include "zlib.h"
+#include "jpeg.h"
#include "pdf.h"
#include "pdf_cnki.h"
@@ -57,6 +60,11 @@ cnki_pdf(cnki_t **param)
printf("Loaded %d object(s)\n",
pdf_get_count(&pdf));
+ int dictionary_size;
+ char *dictionary;
+
+ char buf[64];
+
if ((*param)->stat > 1)
printf("Searching for parent object(s)\n");
@@ -69,12 +77,8 @@ cnki_pdf(cnki_t **param)
if ((*param)->stat > 0)
printf("Discovered %d parent object(s)\n", parent[0]);
- char buf[64];
-
int parent_missing[parent[0]];
int *kid;
- int dictionary_size;
- char *dictionary;
for (int i = 1; i <= parent[0]; i++) {
if ((*param)->stat > 1)
@@ -101,20 +105,23 @@ cnki_pdf(cnki_t **param)
snprintf(buf, 64,
"<<\n/Type /Pages\n/Kids [");
strcat(dictionary, buf);
+
for (int j = 1; j <= kid[0]; j++) {
snprintf(buf, 64,
"%d 0 R",
kid[j]);
strcat(dictionary, buf);
+
if (j < kid[0])
strcat(dictionary, " ");
}
+
snprintf(buf, 64,
"]\n/Count %d\n>>",
pdf_get_kid_count(&pdf, parent[i]));
strcat(dictionary, buf);
- pdf_obj_prepend(&pdf, parent[i], NULL, dictionary, NULL);
+ pdf_obj_prepend(&pdf, parent[i], NULL, dictionary, NULL, 0);
parent_missing[i - 1] = 1;
@@ -185,6 +192,7 @@ cnki_pdf(cnki_t **param)
if (parent_missing[i]) {
snprintf(buf, 64, "%d 0 R", parent[i + 1]);
strcat(dictionary, buf);
+
if (i < root_kid)
strcat(dictionary, " ");
}
@@ -200,7 +208,7 @@ cnki_pdf(cnki_t **param)
strcat(dictionary, ">>");
- pdf_obj_prepend(&pdf, root, NULL, dictionary, NULL);
+ pdf_obj_prepend(&pdf, root, NULL, dictionary, NULL, 0);
memset(dictionary, 0, dictionary_size);
@@ -260,7 +268,7 @@ cnki_pdf(cnki_t **param)
strcat(dictionary, ">>");
- pdf_obj_append(&pdf, 0, NULL, dictionary, NULL);
+ pdf_obj_append(&pdf, 0, NULL, dictionary, NULL, 0);
if ((*param)->stat > 0)
printf("Generated catalog object\n");
@@ -383,3 +391,611 @@ cnki_pdf(cnki_t **param)
return 0;
}
+
+int
+cnki_pdf_hn(cnki_t **param)
+{
+ if (*param == NULL)
+ return 1;
+
+ pdf_object_t *pdf = NULL;
+
+ if (pdf_obj_create(&pdf) != 0)
+ return 1;
+
+ if ((*param)->stat > 1)
+ printf("Generating PDF object(s)\n");
+
+ int dictionary_size;
+ char *dictionary;
+
+ char buf[64];
+
+ int *ids = NULL;
+
+ int cnt = 0;
+ int *root_kid = malloc((*param)->file_stat->page * sizeof(int));
+
+ if (root_kid == NULL)
+ return 1;
+
+ memset(root_kid, 0, (*param)->file_stat->page);
+
+ object_hn_t *ptr = (*param)->object_hn;
+ while (ptr != NULL) {
+ /*
+ * External object (ptr->image_length) +
+ * content object +
+ * resource object +
+ * page object
+ */
+ pdf_get_free_ids(&pdf, &ids, ptr->image_length + 3);
+
+ int stream_size;
+ char *stream;
+
+ int *dim = malloc(2 * ptr->image_length * sizeof(int));
+
+ int ret;
+ int wh[2];
+
+ if (dim == NULL) {
+ free(root_kid);
+ return 1;
+ }
+
+ for (int i = 0; i < ptr->image_length; i++) {
+ dictionary_size = 128;
+ dictionary = malloc(dictionary_size);
+
+ if (dictionary == NULL) {
+ free(root_kid);
+ free(dim);
+ return 1;
+ }
+
+ memset(dictionary, 0, dictionary_size);
+
+ strcat(dictionary, "<<\n/Type /XObject\n"
+ "/Subtype /Image\n");
+
+ if ((*param)->stat > 2)
+ printf("\tDecoding data, page %04d item %02d... ",
+ ptr->page, i);
+
+ switch (ptr->image_data[i].format) {
+ case JBIG:
+ ret = cnki_jbig(&stream,
+ &stream_size,
+ &wh[0],
+ &wh[1],
+ ptr->image_data[i].image,
+ ptr->image_data[i].size);
+
+ if (ret != 0) {
+ dim[i * 2] = 0;
+ dim[i * 2 + 1] = 0;
+ break;
+ }
+
+ snprintf(buf, 64, "/Width %d\n/Height %d\n",
+ wh[0], wh[1]);
+ strcat(dictionary, buf);
+
+ strcat(dictionary, "/ColorSpace /DeviceGray\n"
+ "/BitsPerComponent 1\n");
+
+ snprintf(buf, 64, "/Length %d\n",
+ stream_size);
+ strcat(dictionary, buf);
+
+ strcat(dictionary, "/Filter /CCITTFaxDecode\n");
+
+ dim[i * 2] = wh[0];
+ dim[i * 2 + 1] = wh[1];
+ break;
+ case DCT_0:
+ case DCT_1:
+ ret = strinfo_jpeg_dim(&wh[0],
+ &wh[1],
+ ptr->image_data[i].image,
+ ptr->image_data[i].size);
+
+ if (ret != 0) {
+ dim[i * 2] = 0;
+ dim[i * 2 + 1] = 0;
+ break;
+ }
+
+ stream_size = ptr->image_data[i].size;
+ stream = malloc(stream_size);
+ if (stream == NULL) {
+ free(dictionary);
+ free(root_kid);
+ free(dim);
+ return 1;
+ }
+ memcpy(stream, ptr->image_data[i].image, stream_size);
+
+ snprintf(buf, 64, "/Width %d\n/Height %d\n",
+ wh[0], wh[1]);
+ strcat(dictionary, buf);
+
+ strcat(dictionary, "/ColorSpace /DeviceRGB\n"
+ "/BitsPerComponent 8\n");
+
+ snprintf(buf, 64, "/Length %d\n",
+ stream_size);
+ strcat(dictionary, buf);
+
+ strcat(dictionary, "/Filter /DCTDecode\n");
+
+ dim[i * 2] = wh[0];
+ dim[i * 2 + 1] = wh[1];
+ break;
+ case JBIG2:
+ case JPX:
+ default:
+ ret = -1;
+ dim[i * 2] = -1;
+ dim[i * 2 + 1] = -1;
+ break;
+ }
+
+ strcat(dictionary, ">>");
+
+ if (ret == 0) {
+ if ((*param)->stat > 2)
+ printf("Succeed\n");
+
+ pdf_obj_append(&pdf, ids[i],
+ NULL, dictionary, stream, stream_size);
+
+ free(dictionary);
+ free(stream);
+ } else if (ret == 1) {
+ if ((*param)->stat > 2)
+ printf("; Failed\n");
+
+ free(dictionary);
+
+ pdf_obj_append(&pdf, ids[i], NULL, NULL, NULL, 0);
+ } else {
+ free(dictionary);
+ }
+ }
+
+ dictionary_size = 128;
+ dictionary = malloc(dictionary_size);
+
+ if (dictionary == NULL) {
+ free(root_kid);
+ free(dim);
+ return 1;
+ }
+
+ memset(dictionary, 0, dictionary_size);
+
+ strcat(dictionary, "<<\n/XObject <<");
+
+ for (int i = 0; i < ptr->image_length; i++) {
+ snprintf(buf, 64, "/Im%d %d 0 R", i, ids[i]);
+ strcat(dictionary, buf);
+
+ if (i + 1 < ptr->image_length)
+ strcat(dictionary, " ");
+ }
+
+ strcat(dictionary, ">>\n>>");
+
+ pdf_obj_append(&pdf, ids[ptr->image_length], NULL, dictionary, NULL, 0);
+
+ free(dictionary);
+
+ int conv_size;
+ char *conv_dst;
+ char conv_src[2];
+ char conv_hex[3];
+
+ if (strncmp(ptr->text + 8, "COMPRESSTEXT", 12) == 0) {
+ cnki_zlib(&stream, &stream_size, ptr->text, ptr->text_size);
+
+ dictionary_size = stream_size / 8 + 7;
+ dictionary = malloc(dictionary_size);
+
+ if (dictionary == NULL) {
+ free(root_kid);
+ free(dim);
+ return 1;
+ }
+
+ memset(dictionary, 0, dictionary_size);
+
+ strcat(dictionary, "<feff");
+
+ for (int i = 0; i < stream_size; i += 16) {
+ conv_src[0] = stream[i + 7];
+ conv_src[1] = stream[i + 6];
+
+ conv_size = 6;
+
+ if (strconv(&conv_dst, "UTF-16BE",
+ conv_src, "GB18030", &conv_size) == 0) {
+ for (int j = 0; j < conv_size - 2; j++) {
+ snprintf(conv_hex, 3,
+ "%02x", (unsigned char) conv_dst[j]);
+ strcat(dictionary, conv_hex);
+ }
+ free(conv_dst);
+ }
+ }
+ free(stream);
+
+ strcat(dictionary, ">");
+ } else {
+ dictionary_size = ptr->text_size;
+ dictionary = malloc(dictionary_size);
+
+ if (dictionary == NULL) {
+ free(root_kid);
+ free(dim);
+ return 1;
+ }
+
+ memset(dictionary, 0, dictionary_size);
+
+ strcat(dictionary, "<feff");
+
+ for (int i = 0; i < ptr->text_size; i += 4) {
+ conv_src[0] = ptr->text[i + 3];
+ conv_src[1] = ptr->text[i + 2];
+
+ conv_size = 6;
+
+ if (strconv(&conv_dst, "UTF-16BE",
+ conv_src, "GB18030", &conv_size) == 0) {
+ for (int j = 0; j < conv_size - 2; j++) {
+ snprintf(conv_hex, 3,
+ "%02x", (unsigned char) conv_dst[j]);
+ strcat(dictionary, conv_hex);
+ }
+ free(conv_dst);
+ }
+ }
+
+ strcat(dictionary, ">");
+ }
+
+ /* FIXME: Use the text somehow? */
+ free(dictionary);
+
+ dictionary_size = 64 + 12 * ptr->image_length;
+ dictionary = malloc(dictionary_size);
+
+ if (dictionary == NULL) {
+ free(root_kid);
+ free(dim);
+ return 1;
+ }
+
+ memset(dictionary, 0, dictionary_size);
+
+ strcat(dictionary, "q\n");
+
+ strcat(dictionary, "0.120000 0 0 0.120000 0 0 cm\n");
+
+ for (int i = 0; i < ptr->image_length; i++) {
+ if (dim[i * 2] <= 0 || dim[i * 2 + 1] <= 0)
+ continue;
+
+ /* Apply transformation matrix */
+ if (ptr->image_data[i].format == DCT_1)
+ strcat(dictionary, "-1 0 0 -1 0 0 cm\n");
+
+ snprintf(buf, 64, "%d 0 0 %d 0 0 cm\n",
+ dim[i * 2], dim[i * 2 + 1]);
+ strcat(dictionary, buf);
+
+ snprintf(buf, 64, "/Im%d Do\n", i);
+ strcat(dictionary, buf);
+ }
+
+ strcat(dictionary, "Q");
+
+ if (strdeflate(&stream, &stream_size, dictionary, strlen(dictionary)) != 0) {
+ free(dictionary);
+ free(root_kid);
+ free(dim);
+ return 1;
+ }
+
+ memset(dictionary, 0, dictionary_size);
+
+ strcat(dictionary, "<<\n");
+
+ snprintf(buf, 64, "/Length %d\n", stream_size);
+ strcat(dictionary, buf);
+
+ strcat(dictionary, "/Filter /FlateDecode\n");
+
+ strcat(dictionary, ">>");
+
+ pdf_obj_append(&pdf, ids[ptr->image_length + 1],
+ NULL, dictionary, stream, stream_size);
+
+ free(stream);
+
+ memset(dictionary, 0, dictionary_size);
+
+ strcat(dictionary, "<<\n/Type /Page\n");
+
+ snprintf(buf, 64, "/Resources %d 0 R\n", ids[ptr->image_length]);
+ strcat(dictionary, buf);
+
+ snprintf(buf, 64, "/Contents %d 0 R\n", ids[ptr->image_length + 1]);
+ strcat(dictionary, buf);
+
+ /* A4 paper */
+ strcat(dictionary, "/MediaBox [ 0 0 595.276 841.89 ]\n");
+
+ /* Add /Parent when we know root */
+ pdf_obj_append(&pdf, ids[ptr->image_length + 2], NULL, dictionary, NULL, 0);
+
+ free(dictionary);
+
+ root_kid[cnt++] = ids[ptr->image_length + 2];
+
+ free(ids);
+ ids = NULL;
+
+ free(dim);
+
+ ptr = ptr->next;
+ }
+
+ if ((*param)->stat > 1) {
+ printf("\t%8s\t%12s\t%12s\t%12s\n",
+ "id",
+ "object",
+ "dictionary",
+ "stream");
+
+ pdf_object_t *ptr = pdf->next;
+ while (ptr != NULL) {
+ printf("\t%8d\t%12d\t%12d\t%12d\n",
+ ptr->id,
+ ptr->object_size,
+ ptr->dictionary_size,
+ ptr->stream_size);
+ ptr = ptr->next;
+ }
+ }
+
+ if ((*param)->stat > 0)
+ printf("Generated %d object(s)\n",
+ pdf_get_count(&pdf));
+
+ ids = NULL;
+
+ if ((*param)->file_stat->outline > 0) {
+ if ((*param)->stat > 1)
+ printf("Generating outline object(s)\n\t%8s\n", "id");
+
+ pdf_get_free_ids(&pdf, &ids, (*param)->file_stat->outline + 1);
+ int outline = pdf_cnki_outline(&pdf, &(*param)->object_outline, &ids);
+
+ if ((*param)->stat > 1)
+ for (int i = 0; i < (*param)->file_stat->outline + 1; i++)
+ printf("\t%8d\n", ids[i]);
+
+ if ((*param)->stat > 0) {
+ if (outline != 0)
+ printf("No outline information\n");
+ else
+ printf("Generated %d outline object(s)\n",
+ (*param)->file_stat->outline + 1);
+ }
+ }
+
+ if ((*param)->stat > 1)
+ printf("Generating root object\n");
+
+ dictionary_size = 64 + 12 * (*param)->file_stat->page;
+ dictionary = malloc(dictionary_size);
+
+ if (dictionary == NULL) {
+ free(root_kid);
+ return 1;
+ }
+
+ memset(dictionary, 0, dictionary_size);
+
+ int root = pdf_get_free_id(&pdf);
+
+ snprintf(buf, 64, "<<\n/Type /Pages\n/Kids ");
+ strcat(dictionary, buf);
+
+ if ((*param)->file_stat->page > 1)
+ strcat(dictionary, "[");
+
+ for (int i = 0; i < (*param)->file_stat->page; i++) {
+ snprintf(buf, 64, "%d 0 R", root_kid[i]);
+ strcat(dictionary, buf);
+ if (i + 1 < (*param)->file_stat->page)
+ strcat(dictionary, " ");
+ }
+
+ if ((*param)->file_stat->page > 1)
+ strcat(dictionary, "]");
+
+ strcat(dictionary, "\n");
+
+ snprintf(buf, 64, "/Count %d\n", (*param)->file_stat->page);
+ strcat(dictionary, buf);
+
+ strcat(dictionary, ">>");
+
+ pdf_obj_prepend(&pdf, root, NULL, dictionary, NULL, 0);
+
+ free(dictionary);
+
+ dictionary_size = 128;
+ dictionary = malloc(dictionary_size);
+
+ if (dictionary == NULL) {
+ free(root_kid);
+ return 1;
+ }
+
+ memset(dictionary, 0, dictionary_size);
+
+ pdf_object_t *tmp = NULL;
+
+ /* Add /Parent to page object */
+ for (int i = 0; i < (*param)->file_stat->page; i++) {
+ if (pdf_get_obj(&pdf, root_kid[i], &tmp) != 0) {
+ free(dictionary);
+ free(root_kid);
+ return 1;
+ }
+
+ memset(dictionary, 0, dictionary_size);
+
+ strcat(dictionary, tmp->dictionary);
+
+ snprintf(buf, 64, "/Parent %d 0 R\n>>", root);
+ strcat(dictionary, buf);
+
+ if (pdf_obj_replace(&pdf, root_kid[i], NULL, dictionary, NULL, 0) != 0) {
+ free(dictionary);
+ free(root_kid);
+ return 1;
+ }
+ }
+
+ free(root_kid);
+
+ memset(dictionary, 0, dictionary_size);
+
+ if ((*param)->stat > 0)
+ printf("Generated root object %d.\n",
+ root);
+
+ if ((*param)->stat > 1)
+ printf("Generating catalog object\n");
+
+ snprintf(buf, 64,
+ "<<\n/Type /Catalog\n/Pages %d 0 R\n",
+ root);
+ strcat(dictionary, buf);
+
+ if (ids != NULL) {
+ snprintf(buf, 64,
+ "/Outlines %d 0 R\n/PageMode /UseOutlines\n",
+ ids[0]);
+ strcat(dictionary, buf);
+ }
+
+ strcat(dictionary, ">>");
+
+ pdf_obj_append(&pdf, 0, NULL, dictionary, NULL, 0);
+
+ free(dictionary);
+
+ if ((*param)->stat > 0)
+ printf("Generated catalog object\n");
+
+ if ((*param)->stat > 1)
+ printf("Sorting object(s)\n");
+
+ pdf_obj_sort(&pdf);
+
+ if ((*param)->stat > 0)
+ printf("Sorted object(s)\n");
+
+ if ((*param)->stat > 1)
+ printf("Writing header\n");
+
+ long cur = 0;
+
+ if ((*param)->stat > 0)
+ cur = ftell((*param)->fp_o);
+
+ if (pdf_dump_header(&pdf, &(*param)->fp_o) != 0) {
+ fprintf(stderr, "Header not written\n");
+ return 1;
+ } else {
+ if ((*param)->stat > 0)
+ printf("Header %ld byte(s) written\n",
+ ftell((*param)->fp_o) - cur);
+ }
+
+ if ((*param)->stat > 1)
+ printf("Writing object(s)\n");
+
+ pdf_dump_obj(&pdf, &(*param)->fp_o);
+
+ if ((*param)->stat > 1) {
+ printf("\t%8s\t%8s\t%8s\t%12s\t%12s\t%12s\n",
+ "address",
+ "size",
+ "id",
+ "object",
+ "dictionary",
+ "stream");
+
+ pdf_object_t *ptr = pdf->next;
+ while (ptr != NULL) {
+ printf("\t%08x\t%8d\t%8d\t%12d\t%12d\t%12d\n",
+ ptr->address,
+ ptr->size,
+ ptr->id,
+ ptr->object_size,
+ ptr->dictionary_size,
+ ptr->stream_size);
+ ptr = ptr->next;
+ }
+ }
+
+ if ((*param)->stat > 0)
+ printf("%d object(s) %ld byte(s) written\n",
+ pdf_get_count(&pdf),
+ ftell((*param)->fp_o));
+
+ long xref = ftell((*param)->fp_o);
+
+ if ((*param)->stat > 1)
+ printf("Writing cross-reference table\n");
+
+ if (pdf_dump_xref(&pdf, &(*param)->fp_o) != 0) {
+ if ((*param)->stat > 0)
+ printf("Cross-reference table not written\n");
+ } else {
+ if ((*param)->stat > 0)
+ printf("Cross-reference table %ld byte(s) written\n",
+ ftell((*param)->fp_o) - xref);
+ }
+
+ if ((*param)->stat > 1)
+ printf("Writing trailer\n");
+
+ if ((*param)->stat > 0)
+ cur = ftell((*param)->fp_o);
+
+ if (pdf_dump_trailer(&pdf, &(*param)->fp_o, xref) != 0) {
+ if ((*param)->stat > 0)
+ printf("Trailer not written\n");
+ } else {
+ if ((*param)->stat > 0)
+ printf("Trailer %ld byte(s) written\n",
+ ftell((*param)->fp_o) - cur);
+ }
+
+ if ((*param)->stat > 0)
+ printf("Total %ld byte(s) written\n",
+ ftell((*param)->fp_o));
+
+ pdf_obj_destroy(&pdf);
+
+ return 0;
+}
diff --git a/src/cnki_xml.c b/src/cnki_xml.c
index f8e693c..7f870d1 100644
--- a/src/cnki_xml.c
+++ b/src/cnki_xml.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2020, yzrh <yzrh@noema.org>
+ * Copyright (c) 2020-2021, yzrh <yzrh@noema.org>
*
* SPDX-License-Identifier: Apache-2.0
*/
diff --git a/src/cnki_zlib.c b/src/cnki_zlib.c
index fd4cedf..7731036 100644
--- a/src/cnki_zlib.c
+++ b/src/cnki_zlib.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2020, yzrh <yzrh@noema.org>
+ * Copyright (c) 2020-2021, yzrh <yzrh@noema.org>
*
* SPDX-License-Identifier: Apache-2.0
*/
diff --git a/src/extern.h b/src/extern.h
index 4109396..3d71df3 100644
--- a/src/extern.h
+++ b/src/extern.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2020, yzrh <yzrh@noema.org>
+ * Copyright (c) 2020-2021, yzrh <yzrh@noema.org>
*
* SPDX-License-Identifier: Apache-2.0
*/
diff --git a/src/iconv.c b/src/iconv.c
index f5a3dbe..ae7764b 100644
--- a/src/iconv.c
+++ b/src/iconv.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2020, yzrh <yzrh@noema.org>
+ * Copyright (c) 2020-2021, yzrh <yzrh@noema.org>
*
* SPDX-License-Identifier: Apache-2.0
*/
diff --git a/src/iconv.h b/src/iconv.h
index da7fefa..5a2bb6b 100644
--- a/src/iconv.h
+++ b/src/iconv.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2020, yzrh <yzrh@noema.org>
+ * Copyright (c) 2020-2021, yzrh <yzrh@noema.org>
*
* SPDX-License-Identifier: Apache-2.0
*/
diff --git a/src/jbig.c b/src/jbig.c
new file mode 100644
index 0000000..1e9afb3
--- /dev/null
+++ b/src/jbig.c
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2020-2021, yzrh <yzrh@noema.org>
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#include <stdio.h> /* FIXME: test */
+#include <stdlib.h>
+#include <string.h>
+
+#include <jbig.h>
+
+int
+strdec_jbig(char **bitmap, int *bitmap_size,
+ const char * restrict data, int data_size)
+{
+ struct jbg_dec_state sd;
+
+ jbg_dec_init(&sd);
+
+ unsigned char *data_ptr[1] = {(unsigned char *) data};
+
+ /* FIXME: test */
+ int ret;
+ if ((ret = jbg_dec_in(&sd, (unsigned char *) data_ptr,
+ data_size, NULL)) != JBG_EOK) {
+ printf("%s", jbg_strerror(ret));
+ jbg_dec_free(&sd);
+ return 1;
+ }
+
+ *bitmap_size = jbg_dec_getsize(&sd);
+ *bitmap = malloc(*bitmap_size);
+
+ if (*bitmap != NULL)
+ memcpy(*bitmap, jbg_dec_getimage(&sd, 0), *bitmap_size);
+
+ jbg_dec_free(&sd);
+
+ return 0;
+}
diff --git a/src/jbig.h b/src/jbig.h
new file mode 100644
index 0000000..170eda1
--- /dev/null
+++ b/src/jbig.h
@@ -0,0 +1,8 @@
+/*
+ * Copyright (c) 2020-2021, yzrh <yzrh@noema.org>
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+int strdec_jbig(char **bitmap, int *bitmap_size,
+ const char * restrict data, int data_size);
diff --git a/src/jpeg.c b/src/jpeg.c
new file mode 100644
index 0000000..4ea4d7f
--- /dev/null
+++ b/src/jpeg.c
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2020-2021, yzrh <yzrh@noema.org>
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#include <stdio.h>
+
+#include <jpeglib.h>
+
+int
+strinfo_jpeg_dim(int *jpeg_width, int *jpeg_height,
+ const char * restrict data, int data_size)
+{
+ struct jpeg_decompress_struct cinfo;
+ struct jpeg_error_mgr jerr;
+
+ cinfo.err = jpeg_std_error(&jerr);
+
+ jpeg_create_decompress(&cinfo);
+
+ jpeg_mem_src(&cinfo, (unsigned char *) data, data_size);
+
+ jpeg_read_header(&cinfo, TRUE);
+
+ jpeg_calc_output_dimensions(&cinfo);
+
+ *jpeg_width = cinfo.output_width;
+ *jpeg_height = cinfo.output_height;
+
+ jpeg_destroy((struct jpeg_common_struct *) &cinfo);
+
+ jpeg_destroy_decompress(&cinfo);
+
+ return 0;
+}
diff --git a/src/jpeg.h b/src/jpeg.h
new file mode 100644
index 0000000..db35d94
--- /dev/null
+++ b/src/jpeg.h
@@ -0,0 +1,8 @@
+/*
+ * Copyright (c) 2020-2021, yzrh <yzrh@noema.org>
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+int strinfo_jpeg_dim(int *jpeg_width, int *jpeg_height,
+ const char * restrict data, int data_size);
diff --git a/src/melon.c b/src/melon.c
index 375cf09..9a90551 100644
--- a/src/melon.c
+++ b/src/melon.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2020, yzrh <yzrh@noema.org>
+ * Copyright (c) 2020-2021, yzrh <yzrh@noema.org>
*
* SPDX-License-Identifier: Apache-2.0
*/
@@ -82,7 +82,7 @@ main(int argc, char **argv, char **envp)
if (param->stat > 0)
printf("Melon " VERSION "." RELEASE "." PATCH EXTRA "\n"
- "Copyright (c) 2020, yzrh <yzrh@noema.org>\n\n");
+ "Copyright (c) 2020-2021, yzrh <yzrh@noema.org>\n\n");
cnki_info(&param);
diff --git a/src/pdf.c b/src/pdf.c
index 202b6d2..6700e5b 100644
--- a/src/pdf.c
+++ b/src/pdf.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2020, yzrh <yzrh@noema.org>
+ * Copyright (c) 2020-2021, yzrh <yzrh@noema.org>
*
* SPDX-License-Identifier: Apache-2.0
*/
@@ -67,7 +67,8 @@ int
pdf_obj_add(pdf_object_t **pdf, int id,
const char * restrict object,
const char * restrict dictionary,
- const char * restrict stream)
+ const char * restrict stream,
+ int stream_size)
{
if (*pdf != NULL || id <= 0 ||
(object != NULL && dictionary != NULL))
@@ -112,14 +113,15 @@ pdf_obj_add(pdf_object_t **pdf, int id,
(*pdf)->dictionary = NULL;
}
- if (stream != NULL) {
- (*pdf)->stream_size = sizeof(stream);
+ if (stream != NULL && stream_size > 0) {
+ (*pdf)->stream_size = stream_size + 1;
(*pdf)->stream = malloc((*pdf)->stream_size);
if ((*pdf)->stream == NULL)
return 1;
memcpy((*pdf)->stream, stream, (*pdf)->stream_size);
+ (*pdf)->stream[(*pdf)->stream_size - 1] = '\n';
} else {
(*pdf)->stream_size = 0;
(*pdf)->stream = NULL;
@@ -153,7 +155,8 @@ int
pdf_obj_prepend(pdf_object_t **pdf, int id,
const char * restrict object,
const char * restrict dictionary,
- const char * restrict stream)
+ const char * restrict stream,
+ int stream_size)
{
if (*pdf == NULL)
return 1;
@@ -163,7 +166,8 @@ pdf_obj_prepend(pdf_object_t **pdf, int id,
pdf_object_t *ptr = NULL;
- if (pdf_obj_add(&ptr, id, object, dictionary, stream) != 0) {
+ if (pdf_obj_add(&ptr, id, object, dictionary,
+ stream, stream_size) != 0) {
free(ptr);
return 1;
}
@@ -178,7 +182,8 @@ int
pdf_obj_append(pdf_object_t **pdf, int id,
const char * restrict object,
const char * restrict dictionary,
- const char * restrict stream)
+ const char * restrict stream,
+ int stream_size)
{
if (*pdf == NULL)
return 1;
@@ -190,13 +195,68 @@ pdf_obj_append(pdf_object_t **pdf, int id,
while (ptr->next != NULL)
ptr = ptr->next;
- if (pdf_obj_add(&ptr->next, id, object, dictionary, stream) != 0)
+ if (pdf_obj_add(&ptr->next, id, object, dictionary,
+ stream, stream_size) != 0)
return 1;
return 0;
}
int
+pdf_obj_replace(pdf_object_t **pdf, int id,
+ const char * restrict object,
+ const char * restrict dictionary,
+ const char * restrict stream,
+ int stream_size)
+{
+ pdf_object_t *ptr;
+ char *ret;
+
+ if (pdf_get_obj(pdf, id, &ptr) != 0)
+ return 1;
+
+ if (object != NULL && dictionary != NULL)
+ return 1;
+
+ if (dictionary != NULL) {
+ ret = realloc(ptr->dictionary, strlen(dictionary));
+
+ if (ret == NULL)
+ return 1;
+
+ ptr->dictionary_size = strlen(dictionary);
+ ptr->dictionary = ret;
+
+ memcpy(ptr->dictionary, dictionary, ptr->dictionary_size);
+ } else if (object != NULL) {
+ ret = realloc(ptr->object, strlen(object));
+
+ if (ret == NULL)
+ return 1;
+
+ ptr->object_size = strlen(object);
+ ptr->object = ret;
+
+ memcpy(ptr->object, object, ptr->object_size);
+ }
+
+ if (stream != NULL && stream_size > 0) {
+ ret = realloc(ptr->stream, stream_size + 1);
+
+ if (ret == NULL)
+ return 1;
+
+ ptr->stream_size = stream_size + 1;
+ ptr->stream = ret;
+
+ memcpy(ptr->stream, stream, ptr->stream_size);
+ ptr->stream[ptr->stream_size - 1] = '\n';
+ }
+
+ return 0;
+}
+
+int
pdf_obj_sort(pdf_object_t **pdf)
{
if (*pdf == NULL)
diff --git a/src/pdf.h b/src/pdf.h
index a5630d8..f02035a 100644
--- a/src/pdf.h
+++ b/src/pdf.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2020, yzrh <yzrh@noema.org>
+ * Copyright (c) 2020-2021, yzrh <yzrh@noema.org>
*
* SPDX-License-Identifier: Apache-2.0
*/
@@ -26,16 +26,24 @@ void pdf_obj_destroy(pdf_object_t **pdf);
int pdf_obj_add(pdf_object_t **pdf, int id,
const char * restrict object,
const char * restrict dictionary,
- const char * restrict stream);
+ const char * restrict stream,
+ int stream_size);
int pdf_obj_del(pdf_object_t **pdf, int id);
int pdf_obj_prepend(pdf_object_t **pdf, int id,
const char * restrict object,
const char * restrict dictionary,
- const char * restrict stream);
+ const char * restrict stream,
+ int stream_size);
int pdf_obj_append(pdf_object_t **pdf, int id,
const char * restrict object,
const char * restrict dictionary,
- const char * restrict stream);
+ const char * restrict stream,
+ int stream_size);
+int pdf_obj_replace(pdf_object_t **pdf, int id,
+ const char * restrict object,
+ const char * restrict dictionary,
+ const char * restrict stream,
+ int stream_size);
int pdf_obj_sort(pdf_object_t **pdf);
/* pdf_parser.c */
diff --git a/src/pdf_cnki.c b/src/pdf_cnki.c
index 6e5f810..84274b8 100644
--- a/src/pdf_cnki.c
+++ b/src/pdf_cnki.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2020, yzrh <yzrh@noema.org>
+ * Copyright (c) 2020-2021, yzrh <yzrh@noema.org>
*
* SPDX-License-Identifier: Apache-2.0
*/
@@ -93,7 +93,7 @@ _outline(pdf_object_t **pdf, object_outline_tree_t **outline_tree, int id, int *
atoi(ptr->item->page) - 1);
strcat(dictionary, buf);
- pdf_obj_append(pdf, ptr->id, NULL, dictionary, NULL);
+ pdf_obj_append(pdf, ptr->id, NULL, dictionary, NULL, 0);
if (ptr->left == NULL)
(*stat)[1] = ptr->id;
@@ -128,7 +128,7 @@ pdf_cnki_outline(pdf_object_t **pdf, object_outline_t **outline, int **ids)
free(ret);
- pdf_obj_append(pdf, (*ids)[0], NULL, buf, NULL);
+ pdf_obj_append(pdf, (*ids)[0], NULL, buf, NULL, 0);
return 0;
}
diff --git a/src/pdf_cnki.h b/src/pdf_cnki.h
index 6885d4f..9ec2e1c 100644
--- a/src/pdf_cnki.h
+++ b/src/pdf_cnki.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2020, yzrh <yzrh@noema.org>
+ * Copyright (c) 2020-2021, yzrh <yzrh@noema.org>
*
* SPDX-License-Identifier: Apache-2.0
*/
diff --git a/src/pdf_get.c b/src/pdf_get.c
index c5ab788..f72f4aa 100644
--- a/src/pdf_get.c
+++ b/src/pdf_get.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2020, yzrh <yzrh@noema.org>
+ * Copyright (c) 2020-2021, yzrh <yzrh@noema.org>
*
* SPDX-License-Identifier: Apache-2.0
*/
diff --git a/src/pdf_parser.c b/src/pdf_parser.c
index 9531d28..3b29c52 100644
--- a/src/pdf_parser.c
+++ b/src/pdf_parser.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2020, yzrh <yzrh@noema.org>
+ * Copyright (c) 2020-2021, yzrh <yzrh@noema.org>
*
* SPDX-License-Identifier: Apache-2.0
*/
diff --git a/src/pdf_writer.c b/src/pdf_writer.c
index cda998a..cd188fc 100644
--- a/src/pdf_writer.c
+++ b/src/pdf_writer.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2020, yzrh <yzrh@noema.org>
+ * Copyright (c) 2020-2021, yzrh <yzrh@noema.org>
*
* SPDX-License-Identifier: Apache-2.0
*/
diff --git a/src/version.h b/src/version.h
index ea04c55..4731e6a 100644
--- a/src/version.h
+++ b/src/version.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2020, yzrh <yzrh@noema.org>
+ * Copyright (c) 2020-2021, yzrh <yzrh@noema.org>
*
* SPDX-License-Identifier: Apache-2.0
*/
diff --git a/src/zlib.c b/src/zlib.c
index 49004b7..76f049e 100644
--- a/src/zlib.c
+++ b/src/zlib.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2020, yzrh <yzrh@noema.org>
+ * Copyright (c) 2020-2021, yzrh <yzrh@noema.org>
*
* SPDX-License-Identifier: Apache-2.0
*/
@@ -20,12 +20,34 @@ strinflate(char **dst, int dst_size,
unsigned long size = dst_size;
- uncompress((Bytef *) *dst, &size, (const Bytef *) src, src_size);
+ if (uncompress((Bytef *) *dst,
+ &size, (const Bytef *) src, src_size) != Z_OK) {
+ free(*dst);
+ return 1;
+ }
+
+ return 0;
+}
- if (size != dst_size) {
+int
+strdeflate(char **dst, int *dst_size,
+ const char * restrict src, int src_size)
+{
+ *dst_size = compressBound(src_size);
+ *dst = malloc(*dst_size);
+
+ if (*dst == NULL)
+ return 1;
+
+ unsigned long size = *dst_size;
+
+ if (compress((Bytef *) *dst, &size,
+ (const Bytef *) src, src_size) != Z_OK) {
free(*dst);
return 1;
}
+ *dst_size = size;
+
return 0;
}
diff --git a/src/zlib.h b/src/zlib.h
index 1563c6c..6c9f36a 100644
--- a/src/zlib.h
+++ b/src/zlib.h
@@ -1,8 +1,11 @@
/*
- * Copyright (c) 2020, yzrh <yzrh@noema.org>
+ * Copyright (c) 2020-2021, yzrh <yzrh@noema.org>
*
* SPDX-License-Identifier: Apache-2.0
*/
int strinflate(char **dst, int dst_size,
const char * restrict src, int src_size);
+
+int strdeflate(char **dst, int *dst_size,
+ const char * restrict src, int src_size);