aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authoryzrh <yzrh@tuta.io>2020-08-14 22:04:26 +0000
committeryzrh <yzrh@tuta.io>2020-08-14 22:04:26 +0000
commit12ecdd71592eccf7bdb6214edbc7318246469c1c (patch)
treefda27e41c37a2345702ad3e90480154d975e426f
downloadmelon-12ecdd71592eccf7bdb6214edbc7318246469c1c.tar.gz
melon-12ecdd71592eccf7bdb6214edbc7318246469c1c.tar.zst
Initial commit.
-rw-r--r--CHANGE.md4
-rw-r--r--COPYING202
-rw-r--r--README.md40
-rw-r--r--src/GNUmakefile28
-rw-r--r--src/Makefile28
-rw-r--r--src/cnki.c168
-rw-r--r--src/cnki.h86
-rw-r--r--src/cnki_caj.c40
-rw-r--r--src/cnki_kdh.c49
-rw-r--r--src/cnki_nh.c110
-rw-r--r--src/cnki_outline_tree.c73
-rw-r--r--src/cnki_pdf.c351
-rw-r--r--src/cnki_xml.c14
-rw-r--r--src/extern.h21
-rw-r--r--src/iconv.c70
-rw-r--r--src/iconv.h12
-rw-r--r--src/melon.c127
-rw-r--r--src/pdf.c228
-rw-r--r--src/pdf.h61
-rw-r--r--src/pdf_cnki.c134
-rw-r--r--src/pdf_cnki.h7
-rw-r--r--src/pdf_get.c296
-rw-r--r--src/pdf_parser.c216
-rw-r--r--src/pdf_writer.c188
-rw-r--r--src/version.h10
25 files changed, 2563 insertions, 0 deletions
diff --git a/CHANGE.md b/CHANGE.md
new file mode 100644
index 0000000..1622de1
--- /dev/null
+++ b/CHANGE.md
@@ -0,0 +1,4 @@
+0.1.0 (2020-04-08)
+==================
+
+* Initial release
diff --git a/COPYING b/COPYING
new file mode 100644
index 0000000..d645695
--- /dev/null
+++ b/COPYING
@@ -0,0 +1,202 @@
+
+ Apache License
+ Version 2.0, January 2004
+ http://www.apache.org/licenses/
+
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+ 1. Definitions.
+
+ "License" shall mean the terms and conditions for use, reproduction,
+ and distribution as defined by Sections 1 through 9 of this document.
+
+ "Licensor" shall mean the copyright owner or entity authorized by
+ the copyright owner that is granting the License.
+
+ "Legal Entity" shall mean the union of the acting entity and all
+ other entities that control, are controlled by, or are under common
+ control with that entity. For the purposes of this definition,
+ "control" means (i) the power, direct or indirect, to cause the
+ direction or management of such entity, whether by contract or
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
+ outstanding shares, or (iii) beneficial ownership of such entity.
+
+ "You" (or "Your") shall mean an individual or Legal Entity
+ exercising permissions granted by this License.
+
+ "Source" form shall mean the preferred form for making modifications,
+ including but not limited to software source code, documentation
+ source, and configuration files.
+
+ "Object" form shall mean any form resulting from mechanical
+ transformation or translation of a Source form, including but
+ not limited to compiled object code, generated documentation,
+ and conversions to other media types.
+
+ "Work" shall mean the work of authorship, whether in Source or
+ Object form, made available under the License, as indicated by a
+ copyright notice that is included in or attached to the work
+ (an example is provided in the Appendix below).
+
+ "Derivative Works" shall mean any work, whether in Source or Object
+ form, that is based on (or derived from) the Work and for which the
+ editorial revisions, annotations, elaborations, or other modifications
+ represent, as a whole, an original work of authorship. For the purposes
+ of this License, Derivative Works shall not include works that remain
+ separable from, or merely link (or bind by name) to the interfaces of,
+ the Work and Derivative Works thereof.
+
+ "Contribution" shall mean any work of authorship, including
+ the original version of the Work and any modifications or additions
+ to that Work or Derivative Works thereof, that is intentionally
+ submitted to Licensor for inclusion in the Work by the copyright owner
+ or by an individual or Legal Entity authorized to submit on behalf of
+ the copyright owner. For the purposes of this definition, "submitted"
+ means any form of electronic, verbal, or written communication sent
+ to the Licensor or its representatives, including but not limited to
+ communication on electronic mailing lists, source code control systems,
+ and issue tracking systems that are managed by, or on behalf of, the
+ Licensor for the purpose of discussing and improving the Work, but
+ excluding communication that is conspicuously marked or otherwise
+ designated in writing by the copyright owner as "Not a Contribution."
+
+ "Contributor" shall mean Licensor and any individual or Legal Entity
+ on behalf of whom a Contribution has been received by Licensor and
+ subsequently incorporated within the Work.
+
+ 2. Grant of Copyright License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ copyright license to reproduce, prepare Derivative Works of,
+ publicly display, publicly perform, sublicense, and distribute the
+ Work and such Derivative Works in Source or Object form.
+
+ 3. Grant of Patent License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ (except as stated in this section) patent license to make, have made,
+ use, offer to sell, sell, import, and otherwise transfer the Work,
+ where such license applies only to those patent claims licensable
+ by such Contributor that are necessarily infringed by their
+ Contribution(s) alone or by combination of their Contribution(s)
+ with the Work to which such Contribution(s) was submitted. If You
+ institute patent litigation against any entity (including a
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
+ or a Contribution incorporated within the Work constitutes direct
+ or contributory patent infringement, then any patent licenses
+ granted to You under this License for that Work shall terminate
+ as of the date such litigation is filed.
+
+ 4. Redistribution. You may reproduce and distribute copies of the
+ Work or Derivative Works thereof in any medium, with or without
+ modifications, and in Source or Object form, provided that You
+ meet the following conditions:
+
+ (a) You must give any other recipients of the Work or
+ Derivative Works a copy of this License; and
+
+ (b) You must cause any modified files to carry prominent notices
+ stating that You changed the files; and
+
+ (c) You must retain, in the Source form of any Derivative Works
+ that You distribute, all copyright, patent, trademark, and
+ attribution notices from the Source form of the Work,
+ excluding those notices that do not pertain to any part of
+ the Derivative Works; and
+
+ (d) If the Work includes a "NOTICE" text file as part of its
+ distribution, then any Derivative Works that You distribute must
+ include a readable copy of the attribution notices contained
+ within such NOTICE file, excluding those notices that do not
+ pertain to any part of the Derivative Works, in at least one
+ of the following places: within a NOTICE text file distributed
+ as part of the Derivative Works; within the Source form or
+ documentation, if provided along with the Derivative Works; or,
+ within a display generated by the Derivative Works, if and
+ wherever such third-party notices normally appear. The contents
+ of the NOTICE file are for informational purposes only and
+ do not modify the License. You may add Your own attribution
+ notices within Derivative Works that You distribute, alongside
+ or as an addendum to the NOTICE text from the Work, provided
+ that such additional attribution notices cannot be construed
+ as modifying the License.
+
+ You may add Your own copyright statement to Your modifications and
+ may provide additional or different license terms and conditions
+ for use, reproduction, or distribution of Your modifications, or
+ for any such Derivative Works as a whole, provided Your use,
+ reproduction, and distribution of the Work otherwise complies with
+ the conditions stated in this License.
+
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
+ any Contribution intentionally submitted for inclusion in the Work
+ by You to the Licensor shall be under the terms and conditions of
+ this License, without any additional terms or conditions.
+ Notwithstanding the above, nothing herein shall supersede or modify
+ the terms of any separate license agreement you may have executed
+ with Licensor regarding such Contributions.
+
+ 6. Trademarks. This License does not grant permission to use the trade
+ names, trademarks, service marks, or product names of the Licensor,
+ except as required for reasonable and customary use in describing the
+ origin of the Work and reproducing the content of the NOTICE file.
+
+ 7. Disclaimer of Warranty. Unless required by applicable law or
+ agreed to in writing, Licensor provides the Work (and each
+ Contributor provides its Contributions) on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ implied, including, without limitation, any warranties or conditions
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+ PARTICULAR PURPOSE. You are solely responsible for determining the
+ appropriateness of using or redistributing the Work and assume any
+ risks associated with Your exercise of permissions under this License.
+
+ 8. Limitation of Liability. In no event and under no legal theory,
+ whether in tort (including negligence), contract, or otherwise,
+ unless required by applicable law (such as deliberate and grossly
+ negligent acts) or agreed to in writing, shall any Contributor be
+ liable to You for damages, including any direct, indirect, special,
+ incidental, or consequential damages of any character arising as a
+ result of this License or out of the use or inability to use the
+ Work (including but not limited to damages for loss of goodwill,
+ work stoppage, computer failure or malfunction, or any and all
+ other commercial damages or losses), even if such Contributor
+ has been advised of the possibility of such damages.
+
+ 9. Accepting Warranty or Additional Liability. While redistributing
+ the Work or Derivative Works thereof, You may choose to offer,
+ and charge a fee for, acceptance of support, warranty, indemnity,
+ or other liability obligations and/or rights consistent with this
+ License. However, in accepting such obligations, You may act only
+ on Your own behalf and on Your sole responsibility, not on behalf
+ of any other Contributor, and only if You agree to indemnify,
+ defend, and hold each Contributor harmless for any liability
+ incurred by, or claims asserted against, such Contributor by reason
+ of your accepting any such warranty or additional liability.
+
+ END OF TERMS AND CONDITIONS
+
+ APPENDIX: How to apply the Apache License to your work.
+
+ To apply the Apache License to your work, attach the following
+ boilerplate notice, with the fields enclosed by brackets "[]"
+ replaced with your own identifying information. (Don't include
+ the brackets!) The text should be enclosed in the appropriate
+ comment syntax for the file format. We also recommend that a
+ file or class name and description of purpose be included on the
+ same "printed page" as the copyright notice for easier
+ identification within third-party archives.
+
+ Copyright [yyyy] [name of copyright owner]
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..55e0fa9
--- /dev/null
+++ b/README.md
@@ -0,0 +1,40 @@
+Melon
+=====
+
+Melon: Converter that produces PDF from CNKI proprietary formats
+
+Development
+-----------
+
+Currently, PDF, CAJ, and KDH can be converted. Please report
+any failures with a sample that can reproduce the behaviour.
+
+KDH is essentially an invalid PDF file xor'ed with a predetermined key.
+You may want to convert the decrypted KDH to valid PDF, although some
+PDF readers can display the invalid PDF.
+
+Usage
+=====
+
+`make`
+
+Optionally, `make install`
+
+`melon -o OUTPUT INPUT`
+
+Options
+-------
+
+-o, --output
+Specify output file
+
+-b, --buffer
+Set buffer size (default 512k)
+
+-v, --verbose
+Print more information (twice for even more)
+
+Thanks
+======
+
+This project is inspired by [https://github.com/JeziL/caj2pdf](https://github.com/JeziL/caj2pdf)
diff --git a/src/GNUmakefile b/src/GNUmakefile
new file mode 100644
index 0000000..118501a
--- /dev/null
+++ b/src/GNUmakefile
@@ -0,0 +1,28 @@
+#
+# Copyright (c) 2020, yzrh <yzrh@tuta.io>
+#
+# SPDX-License-Identifier: Apache-2.0
+#
+
+src != ls *.c
+obj = ${src:.c=.o}
+
+PREFIX = /usr/local
+
+CFLAGS = -O3 -march=native -pipe -Wall
+LDFLAGS = -Wl,-O3 -lcrypto -Wl,--as-needed
+
+all: ${obj}
+ ${CC} ${LDFLAGS} -o melon $^
+
+clean:
+ rm -f melon ${obj}
+
+install:
+ install -d ${PREFIX}/bin
+ install melon ${PREFIX}/bin/
+
+deinstall:
+ rm -f ${PREFIX}/bin/melon
+
+.PHONY: all clean install deinstall
diff --git a/src/Makefile b/src/Makefile
new file mode 100644
index 0000000..33da2cd
--- /dev/null
+++ b/src/Makefile
@@ -0,0 +1,28 @@
+#
+# Copyright (c) 2020, yzrh <yzrh@tuta.io>
+#
+# SPDX-License-Identifier: Apache-2.0
+#
+
+src != ls *.c
+obj = ${src:.c=.o}
+
+PREFIX = /usr/local
+
+CFLAGS = -O3 -march=native -pipe -flto=thin -Wall
+LDFLAGS = -Wl,-O3 -lcrypto -Wl,--as-needed
+
+all: ${obj}
+ ${CC} ${LDFLAGS} -o melon $>
+
+clean:
+ rm -f melon ${obj}
+
+install:
+ install -d ${PREFIX}/bin
+ install melon ${PREFIX}/bin/
+
+deinstall:
+ rm -f ${PREFIX}/bin/melon
+
+.PHONY: all clean install deinstall
diff --git a/src/cnki.c b/src/cnki.c
new file mode 100644
index 0000000..001be54
--- /dev/null
+++ b/src/cnki.c
@@ -0,0 +1,168 @@
+/*
+ * Copyright (c) 2020, yzrh <yzrh@tuta.io>
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#include <stdlib.h>
+#include <string.h>
+
+#include "cnki.h"
+
+int
+cnki_create(cnki_t **param)
+{
+ if (*param != NULL)
+ return 1;
+
+ *param = malloc(sizeof(cnki_t));
+
+ if (*param == NULL)
+ return 1;
+
+ (*param)->stat = 0;
+ (*param)->size_buf = 524288;
+ (*param)->fp_i = NULL;
+ (*param)->fp_o = NULL;
+
+ (*param)->file_stat = malloc(sizeof(file_stat_t));
+
+ if ((*param)->file_stat== NULL)
+ return 1;
+
+ memset((*param)->file_stat, 0, sizeof(file_stat_t));
+
+ (*param)->object_outline = NULL;
+ (*param)->object_nh = NULL;
+
+ return 0;
+}
+
+void
+cnki_destroy(cnki_t **param)
+{
+ if (*param != NULL) {
+ if ((*param)->file_stat != NULL)
+ free((*param)->file_stat);
+ if ((*param)->object_outline != NULL)
+ free((*param)->object_outline);
+ if ((*param)->object_nh != NULL)
+ free((*param)->object_nh);
+ free(*param);
+ }
+}
+
+int
+cnki_info(cnki_t **param)
+{
+ if (*param == NULL)
+ return 1;
+
+ if ((*param)->stat > 1)
+ printf("Reading file header at %x\n", ADDRESS_HEAD);
+
+ int addr[2];
+
+ fseek((*param)->fp_i, ADDRESS_HEAD, SEEK_SET);
+ fread((*param)->file_stat->type, 4, 1, (*param)->fp_i);
+
+ if ((*param)->stat > 0)
+ printf("File type is '%s'\n", (*param)->file_stat->type);
+
+ if (strcmp((*param)->file_stat->type, "%PDF") == 0) {
+ return 0;
+ } else if (strcmp((*param)->file_stat->type, "CAJ") == 0) {
+ addr[0] = ADDRESS_CAJ_PAGE;
+ addr[1] = ADDRESS_CAJ_OUTLINE;
+ } else if (strcmp((*param)->file_stat->type, "HN") == 0) {
+ addr[0] = ADDRESS_HN_PAGE;
+ addr[1] = ADDRESS_HN_OUTLINE;
+ } else if (strcmp((*param)->file_stat->type, "KDH ") == 0) {
+ return 0;
+ } else {
+ return 1;
+ }
+
+ if ((*param)->stat > 1)
+ printf("Reading page count at %x\n", addr[0]);
+
+ fseek((*param)->fp_i, addr[0], SEEK_SET);
+ fread(&(*param)->file_stat->page, 4, 1, (*param)->fp_i);
+
+ if ((*param)->stat > 0)
+ printf("Advised %d page(s)\n",
+ (*param)->file_stat->page);
+
+ if ((*param)->stat > 1)
+ printf("Reading outline count at %x\n", addr[1]);
+
+ fseek((*param)->fp_i, addr[1], SEEK_SET);
+ fread(&(*param)->file_stat->outline, 4, 1, (*param)->fp_i);
+
+ if ((*param)->stat > 0)
+ printf("Advised %d outline(s)\n",
+ (*param)->file_stat->outline);
+
+ if ((*param)->file_stat->outline > 0) {
+ if ((*param)->stat > 1) {
+ printf("Loading outline(s)\n");
+ printf("\t%16s\t%-24s\t%12s\t%12s\t%5s\n",
+ "title",
+ "hierarchy",
+ "page",
+ "text",
+ "depth");
+ }
+
+ (*param)->object_outline = malloc(sizeof(object_outline_t));
+
+ if ((*param)->object_outline == NULL)
+ return 1;
+
+ object_outline_t *ptr = (*param)->object_outline;
+ for (int i = 0; i < (*param)->file_stat->outline; i++) {
+ fread(ptr->title, 256, 1, (*param)->fp_i);
+ fread(ptr->hierarchy, 24, 1, (*param)->fp_i);
+ fread(ptr->page, 12, 1, (*param)->fp_i);
+ fread(ptr->text, 12, 1, (*param)->fp_i);
+ fread(&ptr->depth, 4, 1, (*param)->fp_i);
+
+ ptr->next = NULL;
+
+ if ((*param)->stat > 1) {
+ printf("\t");
+ for (int j = 1; j <= 256; j++) {
+ printf("%02x", (unsigned char) ptr->title[j - 1]);
+
+ if (j % 8 == 0 && ptr->title[j] == '\0')
+ break;
+
+ if (j % 8 == 0)
+ printf("\n\t");
+ else if (j % 2 == 0)
+ printf(" ");
+ }
+ printf("\t%-24s\t%12s\t%12s\t%5d\n",
+ ptr->hierarchy,
+ ptr->page,
+ ptr->text,
+ ptr->depth);
+ }
+
+ if (i < (*param)->file_stat->outline - 1) {
+ ptr->next = malloc(sizeof(object_outline_t));
+
+ if (ptr->next == NULL)
+ return 1;
+ }
+
+ ptr = ptr->next;
+ }
+
+ if ((*param)->stat > 0)
+ printf("Loaded %d outline(s)\n",
+ (*param)->file_stat->outline);
+ }
+
+ return 0;
+}
diff --git a/src/cnki.h b/src/cnki.h
new file mode 100644
index 0000000..6e3565f
--- /dev/null
+++ b/src/cnki.h
@@ -0,0 +1,86 @@
+/*
+ * Copyright (c) 2020, yzrh <yzrh@tuta.io>
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#include <stdint.h>
+#include <stdio.h>
+
+#define ADDRESS_HEAD 0x0000
+
+#define ADDRESS_CAJ_PAGE 0x0010
+#define ADDRESS_CAJ_OUTLINE 0x0110
+#define ADDRESS_CAJ_BODY 0x0014
+
+#define ADDRESS_HN_PAGE 0x0090
+#define ADDRESS_HN_OUTLINE 0x0158
+
+#define ADDRESS_KDH_BODY 0x00fe
+
+#define KEY_KDH "FZHMEI"
+#define KEY_KDH_LENGTH 6
+
+typedef struct _file_stat_t {
+ char type[4];
+ int32_t page;
+ int32_t outline;
+} file_stat_t;
+
+typedef struct _object_outline_t {
+ char title[256]; /* Starting at file_stat_t->outline + 4 */
+ char hierarchy[24];
+ char page[12];
+ char text[12];
+ int32_t depth;
+ struct _object_outline_t *next;
+} object_outline_t;
+
+typedef struct _object_outline_tree_t {
+ int id;
+ struct _object_outline_t *item;
+ struct _object_outline_tree_t *up;
+ struct _object_outline_tree_t *left;
+ struct _object_outline_tree_t *right;
+} object_outline_tree_t;
+
+typedef enum _nh_code {
+ CCITTFAX,
+ DCT_0,
+ DCT_1,
+ JBIG2,
+ JPX
+} nh_code;
+
+typedef struct _object_nh_t {
+ int32_t address; /* Starting at end of object_outline_t */
+ int32_t size;
+ int16_t page[2];
+ int32_t zero[2];
+ char *text;
+ int32_t image_format; /* nh_code */
+ int32_t image_address;
+ int32_t image_size;
+ char *image;
+ struct _object_nh_t *next;
+} object_nh_t;
+
+typedef struct _cnki_t {
+ int stat;
+ int size_buf;
+ FILE *fp_i;
+ FILE *fp_o;
+ file_stat_t *file_stat;
+ object_outline_t *object_outline;
+ object_nh_t *object_nh;
+} cnki_t;
+
+/* cnki_pdf.c */
+int cnki_pdf(cnki_t **param);
+
+/* cnki_outline_tree.c */
+int cnki_outline_tree(object_outline_tree_t **outline_tree,
+ object_outline_t **outline, int *ids);
+
+/* cnki_xml.c */
+int cnki_xml(char **xml, FILE **fp);
diff --git a/src/cnki_caj.c b/src/cnki_caj.c
new file mode 100644
index 0000000..c541064
--- /dev/null
+++ b/src/cnki_caj.c
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2020, yzrh <yzrh@tuta.io>
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#include <stdlib.h>
+
+#include "cnki.h"
+
+int
+cnki_caj(cnki_t **param)
+{
+ if (*param == NULL)
+ return 1;
+
+ if ((*param)->stat > 0)
+ printf("Begin 'CAJ' conversion\n");
+
+ if ((*param)->stat > 1)
+ printf("Reading document body address at %x\n", ADDRESS_CAJ_BODY);
+
+ int addr;
+
+ fseek((*param)->fp_i, ADDRESS_CAJ_BODY, SEEK_SET);
+ fread(&addr, 4, 1, (*param)->fp_i);
+ fseek((*param)->fp_i, addr, SEEK_SET);
+ fread(&addr, 4, 1, (*param)->fp_i);
+ fseek((*param)->fp_i, addr, SEEK_SET);
+
+ if ((*param)->stat > 0)
+ printf("Advised document body address is %x\n", addr);
+
+ cnki_pdf(param);
+
+ if ((*param)->stat > 0)
+ printf("Conversion ended\n");
+
+ return 0;
+}
diff --git a/src/cnki_kdh.c b/src/cnki_kdh.c
new file mode 100644
index 0000000..8441319
--- /dev/null
+++ b/src/cnki_kdh.c
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2020, yzrh <yzrh@tuta.io>
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#include "cnki.h"
+
+int
+cnki_kdh(cnki_t **param)
+{
+ if (*param == NULL)
+ return 1;
+
+ if ((*param)->stat > 0)
+ printf("Begin 'KDH' decryption\n");
+
+ fseek((*param)->fp_i, 0, SEEK_END);
+
+ long size = ftell((*param)->fp_i);
+
+ fseek((*param)->fp_i, ADDRESS_KDH_BODY, SEEK_SET);
+
+ const char key[] = KEY_KDH;
+ const int key_len = KEY_KDH_LENGTH;
+ long key_cur = 0;
+
+ char buf[(*param)->size_buf];
+
+ for (;;) {
+ fread(buf, (*param)->size_buf, 1, (*param)->fp_i);
+
+ for (int i = 0; i < (*param)->size_buf; i++) {
+ buf[i] ^= key[key_cur % key_len];
+ key_cur++;
+ }
+
+ fwrite(buf, (*param)->size_buf, 1, (*param)->fp_o);
+
+ if (ftell((*param)->fp_i) == size)
+ break;
+ }
+
+ if ((*param)->stat > 0)
+ printf("Decryption ended total %ld byte(s) written\n",
+ ftell((*param)->fp_o));
+
+ return 0;
+}
diff --git a/src/cnki_nh.c b/src/cnki_nh.c
new file mode 100644
index 0000000..7b9378f
--- /dev/null
+++ b/src/cnki_nh.c
@@ -0,0 +1,110 @@
+/*
+ * Copyright (c) 2020, yzrh <yzrh@tuta.io>
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#include <stdlib.h>
+
+#include "cnki.h"
+
+int
+cnki_nh(cnki_t **param)
+{
+ if (*param == NULL)
+ return 1;
+
+ if ((*param)->stat > 0)
+ printf("Begin 'HN' conversion\n");
+
+ if ((*param)->file_stat->page > 0)
+ (*param)->object_nh = malloc(sizeof(object_nh_t));
+ else
+ return 1;
+
+ if ((*param)->object_nh == NULL)
+ return 1;
+
+ if ((*param)->stat > 1) {
+ printf("Loading page(s)\n");
+ printf("\t%8s\t%8s\t%13s\t%6s\t%4s\t%8s\t%8s\n",
+ "address",
+ "text",
+ "page",
+ "zero",
+ "code",
+ "address",
+ "image");
+ }
+
+ object_nh_t *ptr = (*param)->object_nh;
+ for (int i = 0; i < (*param)->file_stat->page; i++) {
+ fread(&ptr->address, 4, 1, (*param)->fp_i);
+ fread(&ptr->size, 4, 1, (*param)->fp_i);
+ fread(&ptr->page, 4, 1, (*param)->fp_i);
+ fread(&ptr->zero, 8, 1, (*param)->fp_i);
+
+ ptr->text = NULL;
+ ptr->image_format = -1;
+ ptr->image_address = 0;
+ ptr->image_size = 0;
+ ptr->image = NULL;
+ ptr->next = NULL;
+
+ if (i < (*param)->file_stat->page - 1) {
+ ptr->next = malloc(sizeof(object_nh_t));
+
+ if (ptr->next == NULL)
+ return 1;
+ }
+
+ ptr = ptr->next;
+ }
+
+ ptr = (*param)->object_nh;
+ while (ptr != NULL) {
+ ptr->text = malloc(ptr->size);
+
+ if (ptr->text == NULL)
+ return 1;
+
+ fseek((*param)->fp_i, ptr->address, SEEK_SET);
+ fread(ptr->text, ptr->size, 1, (*param)->fp_i);
+ fread(&ptr->image_format, 4, 1, (*param)->fp_i);
+ fread(&ptr->image_address, 4, 1, (*param)->fp_i);
+ fread(&ptr->image_size, 4, 1, (*param)->fp_i);
+
+ ptr->image = malloc(ptr->image_size);
+
+ if (ptr->image == NULL)
+ return 1;
+
+ fseek((*param)->fp_i, ptr->image_address, SEEK_SET);
+ fread(ptr->image, ptr->image_size, 1, (*param)->fp_i);
+
+ if ((*param)->stat > 1)
+ printf("\t%08x\t%8d\t{%d, %8d}\t{%d, %d}\t%4d\t%08x\t%8d\n",
+ ptr->address,
+ ptr->size,
+ ptr->page[0],
+ ptr->page[1],
+ ptr->zero[0],
+ ptr->zero[1],
+ ptr->image_format,
+ ptr->image_address,
+ ptr->image_size);
+
+ ptr = ptr->next;
+ }
+
+ if ((*param)->stat > 1)
+ printf("Loaded %d page(s)\n", (*param)->file_stat->page);
+
+ /* TODO: Study signed int __fastcall CAJDoc::OpenNHCAJFile(int a1, int a2) */
+
+ if ((*param)->stat > 0)
+ printf("Conversion ended\n");
+
+ /* TODO: Finish me please :) */
+ return 1;
+}
diff --git a/src/cnki_outline_tree.c b/src/cnki_outline_tree.c
new file mode 100644
index 0000000..7d16ddb
--- /dev/null
+++ b/src/cnki_outline_tree.c
@@ -0,0 +1,73 @@
+/*
+ * Copyright (c) 2020, yzrh <yzrh@tuta.io>
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#include <stdlib.h>
+
+#include "cnki.h"
+
+int
+cnki_outline_tree(object_outline_tree_t **outline_tree,
+ object_outline_t **outline, int *ids)
+{
+ if (*outline_tree != NULL || *outline == NULL)
+ return 1;
+
+ int pos = 0;
+
+ *outline_tree = malloc(sizeof(object_outline_tree_t));
+
+ if (*outline_tree == NULL)
+ return 1;
+
+ object_outline_tree_t *tree = *outline_tree;
+
+ tree->id = ids[pos++];
+ tree->item = NULL;
+ tree->up = NULL;
+ tree->left = NULL;
+ tree->right = NULL;
+
+ object_outline_t *ptr = *outline;
+ while (ptr != NULL) {
+ if (tree->item == NULL ||
+ ptr->depth == tree->item->depth) {
+ while (tree->left != NULL)
+ tree = tree->left;
+
+ tree->left = malloc(sizeof(object_outline_tree_t));
+
+ if (tree->left == NULL)
+ return 1;
+
+ tree->left->id = ids[pos++];
+ tree->left->item = ptr;
+ tree->left->up = tree;
+ tree->left->left = NULL;
+ tree->left->right = NULL;
+
+ tree = tree->left;
+ } else if (ptr->depth == tree->item->depth + 1) {
+ tree->right = malloc(sizeof(object_outline_tree_t));
+
+ if (tree->right == NULL)
+ return 1;
+
+ tree->right->id = ids[pos++];
+ tree->right->item = ptr;
+ tree->right->up = tree;
+ tree->right->left = NULL;
+ tree->right->right = NULL;
+
+ tree = tree->right;
+ } else {
+ tree = tree->up;
+ continue;
+ }
+ ptr = ptr->next;
+ }
+
+ return 0;
+}
diff --git a/src/cnki_pdf.c b/src/cnki_pdf.c
new file mode 100644
index 0000000..e56decb
--- /dev/null
+++ b/src/cnki_pdf.c
@@ -0,0 +1,351 @@
+/*
+ * Copyright (c) 2020, yzrh <yzrh@tuta.io>
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#include <stdlib.h>
+#include <string.h>
+
+#include "cnki.h"
+#include "pdf.h"
+#include "pdf_cnki.h"
+
+int
+cnki_pdf(cnki_t **param)
+{
+ if (*param == NULL)
+ return 1;
+
+ pdf_object_t *pdf = NULL;
+
+ if (pdf_obj_create(&pdf) != 0)
+ return 1;
+
+ if ((*param)->stat > 0)
+ printf("Begin processing PDF\n");
+
+ if ((*param)->stat > 1)
+ printf("Loading object(s)\n");
+
+ if (pdf_load(&pdf, &(*param)->fp_i, (*param)->size_buf) != 0)
+ return 1;
+
+ if ((*param)->stat > 1) {
+ printf("\t%8s\t%8s\t%8s\t%12s\t%12s\t%12s\n",
+ "address",
+ "size",
+ "id",
+ "object",
+ "dictionary",
+ "stream");
+
+ pdf_object_t *ptr = pdf->next;
+ while (ptr != NULL) {
+ printf("\t%08x\t%8d\t%8d\t%12d\t%12d\t%12d\n",
+ ptr->address,
+ ptr->size,
+ ptr->id,
+ ptr->object_size,
+ ptr->dictionary_size,
+ ptr->stream_size);
+ ptr = ptr->next;
+ }
+ }
+
+ if ((*param)->stat > 0)
+ printf("Loaded %d object(s)\n",
+ pdf_get_count(&pdf));
+
+ if ((*param)->stat > 1)
+ printf("Searching for parent object(s)\n");
+
+ int *parent = NULL;
+ pdf_get_parent_id(&pdf, &parent);
+
+ if (parent[0] == 0)
+ return 1;
+
+ if ((*param)->stat > 0)
+ printf("Discovered %d parent object(s)\n", parent[0]);
+
+ char buf[64];
+
+ int parent_missing[parent[0]];
+ int *kid;
+ int dictionary_size;
+ char *dictionary;
+
+ for (int i = 1; i <= parent[0]; i++) {
+ if ((*param)->stat > 1)
+ printf("Searching for object %d\n", parent[i]);
+
+ kid = NULL;
+ pdf_get_kid_id(&pdf, parent[i], &kid);
+
+ if (kid[0] != 0) {
+ if ((*param)->stat > 0)
+ printf("Object is missing\n");
+
+ if ((*param)->stat > 1)
+ printf("Generating object\n");
+
+ dictionary_size = 64 + 12 * kid[0];
+ dictionary = malloc(dictionary_size);
+
+ if (dictionary == NULL)
+ return 1;
+
+ memset(dictionary, 0, dictionary_size);
+
+ snprintf(buf, 64,
+ "<<\n/Type /Pages\n/Kids [");
+ strcat(dictionary, buf);
+ for (int j = 1; j <= kid[0]; j++) {
+ snprintf(buf, 64,
+ "%d 0 R",
+ kid[j]);
+ strcat(dictionary, buf);
+ if (j < kid[0])
+ strcat(dictionary, " ");
+ }
+ snprintf(buf, 64,
+ "]\n/Count %d\n>>\n",
+ pdf_get_kid_count(&pdf, parent[i]));
+ strcat(dictionary, buf);
+
+ pdf_obj_prepend(&pdf, parent[i], NULL, dictionary, NULL);
+
+ parent_missing[i - 1] = 1;
+
+ if ((*param)->stat > 0)
+ printf("Generated object for %d child(ren)\n",
+ kid[0]);
+
+ free(dictionary);
+ } else {
+ parent_missing[i - 1] = 0;
+
+ if ((*param)->stat > 0)
+ printf("Object exists\n");
+ }
+
+ free(kid);
+ }
+
+ if ((*param)->stat > 1)
+ printf("Searching for root object\n");
+
+ dictionary_size = 128;
+ dictionary = malloc(dictionary_size);
+
+ if (dictionary == NULL)
+ return 1;
+
+ memset(dictionary, 0, dictionary_size);
+
+ int root = 0;
+
+ int root_kid = 0;
+ for (int i = 0; i < parent[0]; i++)
+ if (parent_missing[i])
+ root_kid++;
+
+ if (root_kid <= 1) {
+ if (root_kid == 0) {
+ for (int i = 1; i <= parent[0]; i++)
+ if (root == 0 || root < parent[i])
+ root = parent[i];
+ } else {
+ for (int i = 0; i < parent[0]; i++)
+ if (parent_missing[i])
+ root = i;
+ }
+
+ if ((*param)->stat > 0)
+ printf("Root object is %d.\n",
+ root);
+ } else {
+ if ((*param)->stat > 0)
+ printf("Root object is missing\n");
+
+ if ((*param)->stat > 1)
+ printf("Generating root object\n");
+
+ root = pdf_get_free_id(&pdf);
+
+ snprintf(buf, 64,
+ "<<\n/Type /Pages\n/Kids ");
+ strcat(dictionary, buf);
+
+ if (parent[0] > 1)
+ strcat(dictionary, "[");
+
+ for (int i = 0; i < parent[0]; i++) {
+ if (parent_missing[i]) {
+ snprintf(buf, 64, "%d 0 R", parent[i + 1]);
+ strcat(dictionary, buf);
+ if (i < root_kid)
+ strcat(dictionary, " ");
+ }
+ }
+
+ if (parent[0] > 1)
+ strcat(dictionary, "]");
+
+ strcat(dictionary, "\n");
+
+ snprintf(buf, 64, "/Count %d\n", (*param)->file_stat->page);
+ strcat(dictionary, buf);
+
+ strcat(dictionary, ">>\n");
+
+ pdf_obj_prepend(&pdf, root, NULL, dictionary, NULL);
+
+ memset(dictionary, 0, dictionary_size);
+
+ if ((*param)->stat > 0)
+ printf("Generated root object %d.\n",
+ root);
+ }
+
+ int *ids = NULL;
+
+ if ((*param)->file_stat->outline > 0) {
+ if ((*param)->stat > 1)
+ printf("Generating outline object(s)\n\t%8s\n", "id");
+
+ pdf_get_free_ids(&pdf, &ids, (*param)->file_stat->outline + 1);
+ int outline = pdf_cnki_outline(&pdf, &(*param)->object_outline, &ids);
+
+ if ((*param)->stat > 1)
+ for (int i = 0; i < (*param)->file_stat->outline + 1; i++)
+ printf("\t%8d\n", ids[i]);
+
+ if ((*param)->stat > 0) {
+ if (outline != 0)
+ printf("No outline information\n");
+ else
+ printf("Generated %d outline object(s)\n",
+ (*param)->file_stat->outline + 1);
+ }
+ }
+
+ if ((*param)->stat > 1)
+ printf("Generating '/Catalog' dictionary\n");
+
+ snprintf(buf, 64,
+ "<<\n/Type /Catalog\n/Pages %d 0 R\n",
+ root);
+ strcat(dictionary, buf);
+
+ if (ids != NULL) {
+ snprintf(buf, 64,
+ "/Outlines %d 0 R\n/PageMode /UseOutlines\n",
+ ids[0]);
+ strcat(dictionary, buf);
+ }
+
+ strcat(dictionary, ">>\n");
+
+ pdf_obj_append(&pdf, 0, NULL, dictionary, NULL);
+
+ free(dictionary);
+
+ if ((*param)->stat > 0)
+ printf("Generated '/Catalog' dictionary\n");
+
+ if ((*param)->stat > 1)
+ printf("Sorting object(s)\n");
+
+ pdf_obj_sort(&pdf);
+
+ if ((*param)->stat > 0)
+ printf("Sorted object(s)\n");
+
+ if ((*param)->stat > 1)
+ printf("Writing header\n");
+
+ long cur = 0;
+
+ if ((*param)->stat > 0)
+ cur = ftell((*param)->fp_o);
+
+ if (pdf_dump_header(&pdf, &(*param)->fp_o) != 0) {
+ fprintf(stderr, "Header not written\n");
+ return 1;
+ } else {
+ if ((*param)->stat > 0)
+ printf("Header %ld byte(s) written\n",
+ ftell((*param)->fp_o) - cur);
+ }
+
+ if ((*param)->stat > 1)
+ printf("Writing object(s)\n");
+
+ pdf_dump_obj(&pdf, &(*param)->fp_o);
+
+ if ((*param)->stat > 1) {
+ printf("\t%8s\t%8s\t%8s\t%12s\t%12s\t%12s\n",
+ "address",
+ "size",
+ "id",
+ "object",
+ "dictionary",
+ "stream");
+
+ pdf_object_t *ptr = pdf->next;
+ while (ptr != NULL) {
+ printf("\t%08x\t%8d\t%8d\t%12d\t%12d\t%12d\n",
+ ptr->address,
+ ptr->size,
+ ptr->id,
+ ptr->object_size,
+ ptr->dictionary_size,
+ ptr->stream_size);
+ ptr = ptr->next;
+ }
+ }
+
+ if ((*param)->stat > 0)
+ printf("%d object(s) %ld byte(s) written\n",
+ pdf_get_count(&pdf),
+ ftell((*param)->fp_o));
+
+ long xref = ftell((*param)->fp_o);
+
+ if ((*param)->stat > 1)
+ printf("Writing cross-reference table\n");
+
+ if (pdf_dump_xref(&pdf, &(*param)->fp_o) != 0) {
+ if ((*param)->stat > 0)
+ printf("Cross-reference table not written\n");
+ } else {
+ if ((*param)->stat > 0)
+ printf("Cross-reference table %ld byte(s) written\n",
+ ftell((*param)->fp_o) - xref);
+ }
+
+ if ((*param)->stat > 1)
+ printf("Writing trailer\n");
+
+ if ((*param)->stat > 0)
+ cur = ftell((*param)->fp_o);
+
+ if (pdf_dump_trailer(&pdf, &(*param)->fp_o, xref) != 0) {
+ if ((*param)->stat > 0)
+ printf("Trailer not written\n");
+ } else {
+ if ((*param)->stat > 0)
+ printf("Trailer %ld byte(s) written\n",
+ ftell((*param)->fp_o) - cur);
+ }
+
+ if ((*param)->stat > 0)
+ printf("Total %ld byte(s) written\n",
+ ftell((*param)->fp_o));
+
+ pdf_obj_destroy(&pdf);
+
+ return 0;
+}
diff --git a/src/cnki_xml.c b/src/cnki_xml.c
new file mode 100644
index 0000000..7933738
--- /dev/null
+++ b/src/cnki_xml.c
@@ -0,0 +1,14 @@
+/*
+ * Copyright (c) 2020, yzrh <yzrh@tuta.io>
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#include <stdio.h>
+
+int
+cnki_xml(char **xml, FILE **fp)
+{
+ /* TODO: Extract XML and embed into `/Metadata' */
+ return 1;
+}
diff --git a/src/extern.h b/src/extern.h
new file mode 100644
index 0000000..b7abc6e
--- /dev/null
+++ b/src/extern.h
@@ -0,0 +1,21 @@
+/*
+ * Copyright (c) 2020, yzrh <yzrh@tuta.io>
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#include "cnki.h"
+
+/* cnki.c */
+int cnki_create(cnki_t **param);
+void cnki_destroy(cnki_t **param);
+int cnki_info(cnki_t **param);
+
+/* cnki_caj.c */
+int cnki_caj(cnki_t **param);
+
+/* cnki_nh.c */
+int cnki_nh(cnki_t **param);
+
+/* cnki_kdh.c */
+int cnki_kdh(cnki_t **param);
diff --git a/src/iconv.c b/src/iconv.c
new file mode 100644
index 0000000..1bf4d94
--- /dev/null
+++ b/src/iconv.c
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2020, yzrh <yzrh@tuta.io>
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#include <stdlib.h>
+#include <string.h>
+
+#include <iconv.h>
+
+/* So, why would anyone use something other than UTF-8? */
+int
+strconv(char **dst,
+ const char * restrict dst_code,
+ const char * restrict src,
+ const char * restrict src_code,
+ int *size)
+{
+ size_t dst_size = *size;
+ char *dst_conv = malloc(dst_size);
+
+ if (dst_conv == NULL)
+ return 1;
+
+ size_t src_size = strlen(src) + 1;
+ char *src_conv = malloc(src_size);
+
+ if (src_conv == NULL) {
+ free(dst_conv);
+ return 1;
+ }
+
+ strncpy(src_conv, src, src_size);
+
+ char *dst_start = dst_conv;
+ char *src_start = src_conv;
+
+ iconv_t conv_src_dst = iconv_open(dst_code, src_code);
+
+ if (conv_src_dst == (iconv_t) - 1) {
+ free(dst_conv);
+ free(src_conv);
+ return 1;
+ }
+
+ if (iconv(conv_src_dst,
+ &src_conv, &src_size,
+ &dst_conv, &dst_size) == (size_t) - 1) {
+ free(dst_start);
+ free(src_start);
+ return 1;
+ } else {
+ /* Not including NULL */
+ *size -= dst_size + 2;
+
+ *dst = malloc(*size);
+
+ if (*dst != NULL)
+ memcpy(*dst, dst_start, *size);
+
+ free(dst_start);
+ free(src_start);
+ }
+
+ if (iconv_close(conv_src_dst) != 0 || *dst == NULL)
+ return 1;
+
+ return 0;
+}
diff --git a/src/iconv.h b/src/iconv.h
new file mode 100644
index 0000000..81af034
--- /dev/null
+++ b/src/iconv.h
@@ -0,0 +1,12 @@
+/*
+ * Copyright (c) 2020, yzrh <yzrh@tuta.io>
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+int
+strconv(char **dst,
+ const char * restrict dst_code,
+ const char * restrict src,
+ const char * restrict src_code,
+ int *size);
diff --git a/src/melon.c b/src/melon.c
new file mode 100644
index 0000000..d1a08d1
--- /dev/null
+++ b/src/melon.c
@@ -0,0 +1,127 @@
+/*
+ * Copyright (c) 2020, yzrh <yzrh@tuta.io>
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <getopt.h>
+
+#include "extern.h"
+#include "version.h"
+
+int
+main(int argc, char **argv, char **envp)
+{
+ printf("Melon " VERSION "." RELEASE "." PATCH EXTRA "\n");
+ printf("Copyright (c) 2020, yzrh <yzrh@tuta.io>\n\n");
+
+ cnki_t *param = NULL;
+
+ if (cnki_create(&param) != 0) {
+ fprintf(stderr, "%s: %s\n", argv[0], strerror(errno));
+ return EXIT_FAILURE;
+ }
+
+ int c;
+
+ for (;;) {
+ static struct option long_options[] = {
+ {"output", required_argument, 0, 'o'},
+ {"buffer", required_argument, 0, 'b'},
+ {"verbose", no_argument, 0, 'v'},
+ {0, 0, 0, 0}
+ };
+
+ int option_index = 0;
+
+ c = getopt_long(argc, argv, "o:b:v",
+ long_options, &option_index);
+
+ if (c == -1)
+ break;
+
+ switch (c) {
+ case 'o':
+ if ((param->fp_o = fopen(optarg, "w")) == NULL) {
+ fprintf(stderr, "%s: %s\n", argv[0],
+ strerror(errno));
+ return EXIT_FAILURE;
+ }
+ break;
+ case 'b':
+ param->size_buf = atoi(optarg);
+ break;
+ case 'v':
+ param->stat += 1;
+ break;
+ case '?':
+ break;
+ default:
+ abort();
+ }
+ }
+
+ if (argc - optind == 1) {
+ if (param->fp_o == NULL) {
+ if (param->stat == 0) {
+ param->fp_o = stdout;
+ } else {
+ fprintf(stderr, "%s: --verbose ", argv[0]);
+ fprintf(stderr, "must not be set ");
+ fprintf(stderr, "when using stdout\n");
+ return EXIT_FAILURE;
+ }
+ }
+
+ if ((param->fp_i = fopen(argv[optind], "r")) == NULL) {
+ fprintf(stderr, "%s: %s\n", argv[0],
+ strerror(errno));
+ return EXIT_FAILURE;
+ }
+
+ cnki_info(&param);
+
+ if (strcmp(param->file_stat->type, "%PDF") == 0) {
+ if (cnki_pdf(&param) != 0) {
+ fprintf(stderr, "%s: %s\n", argv[0],
+ strerror(errno));
+ return EXIT_FAILURE;
+ }
+ } else if (strcmp(param->file_stat->type, "CAJ") == 0) {
+ if (cnki_caj(&param) != 0) {
+ fprintf(stderr, "%s: %s\n", argv[0],
+ strerror(errno));
+ return EXIT_FAILURE;
+ }
+ } else if (strcmp(param->file_stat->type, "HN") == 0) {
+ if (cnki_nh(&param) != 0) {
+ fprintf(stderr, "%s: %s\n", argv[0],
+ strerror(errno));
+ return EXIT_FAILURE;
+ }
+ } else if (strcmp(param->file_stat->type, "KDH ") == 0) {
+ if (cnki_kdh(&param) != 0) {
+ fprintf(stderr, "%s: %s\n", argv[0],
+ strerror(errno));
+ return EXIT_FAILURE;
+ }
+ } else {
+ fprintf(stderr, "%s: %s\n", argv[0],
+ "Invalid file");
+ return EXIT_FAILURE;
+ }
+
+ fclose(param->fp_i);
+ fclose(param->fp_o);
+ } else {
+ fprintf(stderr, "Usage: %s ", argv[0]);
+ fprintf(stderr, "[--output --buffer --verbose] file\n");
+ return EXIT_FAILURE;
+ }
+
+ cnki_destroy(&param);
+}
diff --git a/src/pdf.c b/src/pdf.c
new file mode 100644
index 0000000..92dd717
--- /dev/null
+++ b/src/pdf.c
@@ -0,0 +1,228 @@
+/*
+ * Copyright (c) 2020, yzrh <yzrh@tuta.io>
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#include <stdlib.h>
+#include <string.h>
+
+#include "pdf.h"
+
+static int
+_min_id(pdf_object_t **pdf)
+{
+ int min = 0;
+
+ pdf_object_t *ptr = (*pdf)->next;
+ while (ptr != NULL) {
+ if (min == 0 || ptr->id < min)
+ min = ptr->id;
+
+ ptr = ptr->next;
+ }
+
+ return min;
+}
+
+int
+pdf_obj_create(pdf_object_t **pdf)
+{
+ if (*pdf != NULL)
+ return 1;
+
+ *pdf = malloc(sizeof(pdf_object_t));
+
+ if (*pdf == NULL)
+ return 1;
+
+ (*pdf)->address = 0;
+ (*pdf)->size = 0;
+ (*pdf)->id = 0;
+ (*pdf)->object_size = 0;
+ (*pdf)->object = NULL;
+ (*pdf)->dictionary_size = 0;
+ (*pdf)->dictionary = NULL;
+ (*pdf)->stream_size = 0;
+ (*pdf)->stream= NULL;
+ (*pdf)->next = NULL;
+
+ return 0;
+}
+
+void
+pdf_obj_destroy(pdf_object_t **pdf)
+{
+ pdf_object_t *ptr;
+ while ((ptr = *pdf) != NULL) {
+ *pdf = (*pdf)->next;
+ free(ptr->object);
+ free(ptr->dictionary);
+ free(ptr->stream);
+ free(ptr);
+ }
+}
+
+int
+pdf_obj_add(pdf_object_t **pdf, int id,
+ const char * restrict object,
+ const char * restrict dictionary,
+ const char * restrict stream)
+{
+ if (*pdf != NULL || id <= 0 ||
+ (object != NULL && dictionary != NULL))
+ return 1;
+
+ *pdf = malloc(sizeof(pdf_object_t));
+
+ if (*pdf == NULL)
+ return 1;
+
+ (*pdf)->address = 0;
+ (*pdf)->size = 0;
+
+ (*pdf)->id = id;
+
+ if (dictionary != NULL) {
+ (*pdf)->dictionary_size = strlen(dictionary) + 1;
+ (*pdf)->dictionary = malloc((*pdf)->dictionary_size);
+
+ if ((*pdf)->dictionary == NULL)
+ return 1;
+
+ strncpy((*pdf)->dictionary, dictionary, (*pdf)->dictionary_size);
+
+ (*pdf)->object_size = 0;
+ (*pdf)->object = NULL;
+ } else if (object != NULL) {
+ (*pdf)->object_size = strlen(object) + 1;
+ (*pdf)->object = malloc((*pdf)->object_size);
+
+ if ((*pdf)->object == NULL)
+ return 1;
+
+ strncpy((*pdf)->object, object, (*pdf)->object_size);
+
+ (*pdf)->dictionary_size = 0;
+ (*pdf)->dictionary = NULL;
+ } else {
+ (*pdf)->object_size = 0;
+ (*pdf)->object = NULL;
+ (*pdf)->dictionary_size = 0;
+ (*pdf)->dictionary = NULL;
+ }
+
+ if (stream != NULL) {
+ (*pdf)->stream_size = sizeof(stream);
+ (*pdf)->stream = malloc((*pdf)->stream_size);
+
+ if ((*pdf)->stream == NULL)
+ return 1;
+
+ memcpy((*pdf)->stream, stream, (*pdf)->stream_size);
+ } else {
+ (*pdf)->stream_size = 0;
+ (*pdf)->stream = NULL;
+ }
+
+ (*pdf)->next = NULL;
+
+ return 0;
+}
+
+int
+pdf_obj_del(pdf_object_t **pdf, int id)
+{
+ if (*pdf == NULL || id <= 0)
+ return 1;
+
+ pdf_object_t *ptr = *pdf;
+ while (ptr->next != NULL) {
+ if (ptr->next->id == id) {
+ ptr->next = ptr->next->next;
+ break;
+ }
+
+ ptr = ptr->next;
+ }
+
+ return 0;
+}
+
+int
+pdf_obj_prepend(pdf_object_t **pdf, int id,
+ const char * restrict object,
+ const char * restrict dictionary,
+ const char * restrict stream)
+{
+ if (*pdf == NULL)
+ return 1;
+
+ if (id <= 0)
+ id = pdf_get_free_id(pdf);
+
+ pdf_object_t *ptr = NULL;
+
+ if (pdf_obj_add(&ptr, id, object, dictionary, stream) != 0) {
+ free(ptr);
+ return 1;
+ }
+
+ ptr->next = (*pdf)->next;
+ (*pdf)->next = ptr;
+
+ return 0;
+}
+
+int
+pdf_obj_append(pdf_object_t **pdf, int id,
+ const char * restrict object,
+ const char * restrict dictionary,
+ const char * restrict stream)
+{
+ if (*pdf == NULL)
+ return 1;
+
+ if (id <= 0)
+ id = pdf_get_free_id(pdf);
+
+ pdf_object_t *ptr = *pdf;
+ while (ptr->next != NULL)
+ ptr = ptr->next;
+
+ if (pdf_obj_add(&ptr->next, id, object, dictionary, stream) != 0)
+ return 1;
+
+ return 0;
+}
+
+int
+pdf_obj_sort(pdf_object_t **pdf)
+{
+ if (*pdf == NULL)
+ return 1;
+
+ int id;
+ pdf_object_t *tmp;
+ pdf_object_t *ptr;
+
+ ptr = *pdf;
+ while (ptr->next != NULL) {
+ id = _min_id(&ptr->next);
+
+ if (id == 0)
+ return 1;
+
+ if (id < ptr->next->id) {
+ pdf_get_obj(&ptr->next, id, &tmp);
+ pdf_obj_del(&ptr->next, id);
+
+ tmp->next = ptr->next;
+ ptr->next = tmp;
+ }
+
+ ptr = ptr->next;
+ }
+
+ return 0;
+}
diff --git a/src/pdf.h b/src/pdf.h
new file mode 100644
index 0000000..61f64d5
--- /dev/null
+++ b/src/pdf.h
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2020, yzrh <yzrh@tuta.io>
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#include <stdio.h>
+
+typedef struct _pdf_object_t {
+ int address;
+ int size;
+ int id;
+ int object_size;
+ char *object;
+ int dictionary_size;
+ char *dictionary;
+ int stream_size;
+ char *stream;
+ struct _pdf_object_t *next;
+} pdf_object_t;
+
+/* pdf.c */
+/* TODO: Rewrite object dictionary */
+/* TODO: Compact object id */
+/* TODO: `mutool clean -gggsz' */
+int pdf_obj_create(pdf_object_t **pdf);
+void pdf_obj_destroy(pdf_object_t **pdf);
+int pdf_obj_add(pdf_object_t **pdf, int id,
+ const char * restrict object,
+ const char * restrict dictionary,
+ const char * restrict stream);
+int pdf_obj_del(pdf_object_t **pdf, int id);
+int pdf_obj_prepend(pdf_object_t **pdf, int id,
+ const char * restrict object,
+ const char * restrict dictionary,
+ const char * restrict stream);
+int pdf_obj_append(pdf_object_t **pdf, int id,
+ const char * restrict object,
+ const char * restrict dictionary,
+ const char * restrict stream);
+int pdf_obj_sort(pdf_object_t **pdf);
+
+/* pdf_parser.c */
+int pdf_load(pdf_object_t **pdf, FILE **fp, int size_buf);
+
+/* pdf_writer.c */
+int pdf_dump_obj(pdf_object_t **pdf, FILE **fp);
+int pdf_dump_header(pdf_object_t **pdf, FILE **fp);
+int pdf_dump_xref(pdf_object_t **pdf, FILE **fp);
+int pdf_dump_trailer(pdf_object_t **pdf, FILE **fp, int xref);
+
+/* pdf_get.c */
+int pdf_get_obj(pdf_object_t **pdf, int id, pdf_object_t **obj);
+int pdf_get_count(pdf_object_t **pdf);
+int pdf_get_size(pdf_object_t **pdf);
+int pdf_get_free_id(pdf_object_t **pdf);
+int pdf_get_free_ids(pdf_object_t **pdf, int **ids, int count);
+int pdf_get_catalog_id(pdf_object_t **pdf);
+int pdf_get_parent_id(pdf_object_t **pdf, int **id);
+int pdf_get_kid_id(pdf_object_t **pdf, int id, int **kid);
+int pdf_get_kid_count(pdf_object_t **pdf, int id);
diff --git a/src/pdf_cnki.c b/src/pdf_cnki.c
new file mode 100644
index 0000000..2dec0b6
--- /dev/null
+++ b/src/pdf_cnki.c
@@ -0,0 +1,134 @@
+/*
+ * Copyright (c) 2020, yzrh <yzrh@tuta.io>
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#include <stdlib.h>
+#include <string.h>
+
+#include "cnki.h"
+#include "iconv.h"
+#include "pdf.h"
+
+/*
+ * It will write first, list, and count to *stat
+ * so that when called recursively, it knows
+ * what to do
+ */
+static int
+_outline(pdf_object_t **pdf, object_outline_tree_t **outline_tree, int id, int **stat)
+{
+ *stat = malloc(3 * sizeof(int));
+
+ if (*stat == NULL)
+ return 1;
+
+ int size;
+ char *str;
+
+ int *ret;
+
+ char buf[64];
+ char dictionary[1024];
+
+ object_outline_tree_t *ptr = *outline_tree;
+
+ (*stat)[0] = ptr->id;
+ (*stat)[2] = 0;
+
+ while (ptr != NULL) {
+ memset(dictionary, 0, 1024);
+
+ strcat(dictionary, "<<\n");
+
+ size = 512;
+ str = NULL;
+
+ if (strconv(&str, "UTF-16BE",
+ ptr->item->title, "GB18030",
+ &size) == 0) {
+ strcat(dictionary, "/Title <feff");
+
+ for (int i = 0; i < size; i++) {
+ snprintf(buf, 64, "%02x", (unsigned char) str[i]);
+ strcat(dictionary, buf);
+ }
+
+ strcat(dictionary, ">\n");
+ }
+
+ free(str);
+
+ snprintf(buf, 64, "/Parent %d 0 R\n", id);
+ strcat(dictionary, buf);
+
+ if (ptr->up != NULL && ptr->up->id != id) {
+ snprintf(buf, 64, "/Prev %d 0 R\n", ptr->up->id);
+ strcat(dictionary, buf);
+ }
+
+ if (ptr->left != NULL) {
+ snprintf(buf, 64, "/Next %d 0 R\n", ptr->left->id);
+ strcat(dictionary, buf);
+ }
+
+ if (ptr->right != NULL) {
+ _outline(pdf, &ptr->right, ptr->id, &ret);
+
+ snprintf(buf, 64, "/First %d 0 R\n", ret[0]);
+ strcat(dictionary, buf);
+
+ snprintf(buf, 64, "/Last %d 0 R\n", ret[1]);
+ strcat(dictionary, buf);
+
+ snprintf(buf, 64, "/Count -%d\n", ret[2]);
+ strcat(dictionary, buf);
+
+ free(ret);
+ }
+
+ /* Page starts from 0 */
+ snprintf(buf, 64, "/Dest [%d /XYZ null null null]\n>>\n",
+ atoi(ptr->item->page) - 1);
+ strcat(dictionary, buf);
+
+ pdf_obj_append(pdf, ptr->id, NULL, dictionary, NULL);
+
+ if (ptr->left == NULL)
+ (*stat)[1] = ptr->id;
+
+ (*stat)[2]++;
+
+ ptr = ptr->left;
+ }
+
+ return 0;
+}
+
+int
+pdf_cnki_outline(pdf_object_t **pdf, object_outline_t **outline, int **ids)
+{
+ if (*pdf == NULL || *outline == NULL || *ids == NULL)
+ return 1;
+
+ object_outline_tree_t *outline_tree = NULL;
+ cnki_outline_tree(&outline_tree, outline, *ids);
+
+ char buf[128];
+ int *ret;
+
+ _outline(pdf, &outline_tree->left, outline_tree->id, &ret);
+
+ free(outline_tree);
+
+ snprintf(buf, 128,
+ "<<\n/Type Outlines\n/First %d 0 R\n/Last %d 0 R\n/Count %d\n>>\n",
+ ret[0], ret[1], ret[2]);
+
+ free(ret);
+
+ pdf_obj_append(pdf, (*ids)[0], NULL, buf, NULL);
+
+ return 0;
+}
diff --git a/src/pdf_cnki.h b/src/pdf_cnki.h
new file mode 100644
index 0000000..f0210d0
--- /dev/null
+++ b/src/pdf_cnki.h
@@ -0,0 +1,7 @@
+/*
+ * Copyright (c) 2020, yzrh <yzrh@tuta.io>
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+int pdf_cnki_outline(pdf_object_t **pdf, object_outline_t **outline, int **ids);
diff --git a/src/pdf_get.c b/src/pdf_get.c
new file mode 100644
index 0000000..33fb271
--- /dev/null
+++ b/src/pdf_get.c
@@ -0,0 +1,296 @@
+/*
+ * Copyright (c) 2020, yzrh <yzrh@tuta.io>
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#include <stdlib.h>
+#include <string.h>
+
+#include "pdf.h"
+
+static int
+_id_in(int id, int *ids)
+{
+ for (int i = 1; i <= ids[0]; i++)
+ if (ids[i] == id)
+ return 1;
+
+ return 0;
+}
+
+int
+pdf_get_obj(pdf_object_t **pdf, int id, pdf_object_t **obj)
+{
+ if (*pdf == NULL || id <= 0)
+ return 1;
+
+ pdf_object_t *ptr = *pdf;
+ while (ptr->next != NULL) {
+ if (ptr->next->id == id) {
+ *obj = ptr->next;
+ return 0;
+ }
+ ptr = ptr->next;
+ }
+
+ return 1;
+}
+
+int
+pdf_get_count(pdf_object_t **pdf)
+{
+ if (*pdf == NULL)
+ return 1;
+
+ int count = 0;
+
+ pdf_object_t *ptr = (*pdf)->next;
+ while (ptr != NULL) {
+ count++;
+ ptr = ptr->next;
+ }
+
+ return count;
+}
+
+int
+pdf_get_size(pdf_object_t **pdf)
+{
+ if (*pdf == NULL)
+ return 1;
+
+ int size = 0;
+
+ pdf_object_t *ptr = (*pdf)->next;
+ while (ptr != NULL) {
+ size += ptr->size;
+ ptr = ptr->next;
+ }
+
+ return size;
+}
+
+int
+pdf_get_free_id(pdf_object_t **pdf)
+{
+ if (*pdf == NULL)
+ return 1;
+
+ int free_id = 0;
+
+ pdf_object_t *ptr;
+
+ int id = 0;
+
+ for (int i = 1; i < 99999999; i++) {
+ ptr = (*pdf)->next;
+ while (ptr != NULL) {
+ if (ptr->id == i) {
+ id = i;
+ break;
+ }
+ ptr = ptr->next;
+ }
+
+ if (i != id) {
+ free_id = i;
+ break;
+ }
+ }
+
+ return free_id;
+}
+
+int
+pdf_get_free_ids(pdf_object_t **pdf, int **ids, int count)
+{
+ if (*pdf == NULL || *ids != NULL || count <= 0)
+ return 1;
+
+ *ids = malloc(count * sizeof(int));
+
+ if (*ids == NULL)
+ return 1;
+
+ int pos = 0;
+ int id = 0;
+
+ pdf_object_t *ptr;
+ for (int i = 1; i < 99999999; i++) {
+ ptr = (*pdf)->next;
+ while (ptr != NULL) {
+ if (ptr->id == i) {
+ id = i;
+ break;
+ }
+ ptr = ptr->next;
+ }
+
+ if (i != id) {
+ (*ids)[pos] = i;
+
+ if (pos == count)
+ return 0;
+
+ pos++;
+ }
+ }
+
+ return 1;
+}
+
+int
+pdf_get_catalog_id(pdf_object_t **pdf)
+{
+ if (*pdf == NULL)
+ return 1;
+
+ int catalog_id = 0;
+
+ pdf_object_t *ptr = (*pdf)->next;
+
+ while (ptr != NULL) {
+ if (ptr->dictionary != NULL &&
+ strstr(ptr->dictionary, "/Catalog") != NULL)
+ catalog_id = ptr->id;
+
+ ptr = ptr->next;
+ }
+
+ return catalog_id;
+}
+
+int
+pdf_get_parent_id(pdf_object_t **pdf, int **id)
+{
+ if (*pdf == NULL || *id != NULL)
+ return 1;
+
+ int id_size = 1;
+ *id = malloc(sizeof(int));
+
+ if (*id == NULL)
+ return 1;
+
+ (*id)[0] = 0;
+
+ pdf_object_t *ptr = (*pdf)->next;
+
+ char *head;
+ char *tail;
+
+ char str[8];
+ int str_val;
+
+ int *ret;
+
+ while (ptr != NULL) {
+ if (ptr->dictionary != NULL &&
+ (head = strstr(ptr->dictionary, "/Parent ")) != NULL &&
+ (tail = strchr(head + 8, ' ')) != NULL) {
+ memset(str, 0, 8);
+ strncpy(str, head + 8, (tail - head) - 8);
+ str_val = atoi(str);
+
+ if (!_id_in(str_val, *id)) {
+ ret = realloc(*id, ++id_size * sizeof(int));
+
+ if (ret == NULL)
+ return 1;
+ else
+ *id = ret;
+
+ (*id)[0]++;
+ (*id)[id_size - 1] = str_val;
+ }
+ }
+ ptr = ptr->next;
+ }
+
+ return 0;
+}
+
+int
+pdf_get_kid_id(pdf_object_t **pdf, int id, int **kid)
+{
+ if (*pdf == NULL || *kid != NULL)
+ return 1;
+
+ int kid_size = 1;
+ *kid = malloc(sizeof(int));
+
+ if (*kid == NULL)
+ return 1;
+
+ pdf_object_t *ptr = (*pdf)->next;
+
+ char str[32];
+ int *ret;
+
+ snprintf(str, 32, "/Parent %d 0 R", id);
+
+ while (ptr != NULL) {
+ if (ptr->id == id) {
+ (*kid)[0] = 0;
+ return 1;
+ }
+
+ if (ptr->dictionary != NULL &&
+ strstr(ptr->dictionary, str) != NULL) {
+ ret = realloc(*kid, ++kid_size * sizeof(int));
+
+ if (ret == NULL)
+ return 1;
+ else
+ *kid = ret;
+
+ (*kid)[kid_size - 1] = ptr->id;
+ }
+
+ ptr = ptr->next;
+ }
+
+ (*kid)[0] = kid_size - 1;
+
+ return 0;
+}
+
+int
+pdf_get_kid_count(pdf_object_t **pdf, int id)
+{
+ if (*pdf == NULL || id <= 0)
+ return 1;
+
+ int count = 0;
+
+ pdf_object_t *ptr = (*pdf)->next;
+
+ char id_str[32];
+ char *pos;
+
+ char str[8];
+ int str_val;
+
+ snprintf(id_str, 32, "/Parent %d 0 R", id);
+
+ while (ptr != NULL) {
+ if (ptr->dictionary != NULL &&
+ strstr(ptr->dictionary, id_str) != NULL &&
+ (pos = strstr(ptr->dictionary, "/Count ")) != NULL) {
+ for (int i = 8; i >= 0; i--) {
+ if (i + 7 <= ptr->dictionary_size - (pos - ptr->dictionary) &&
+ pos[i + 7] >= '0' && pos[i + 7] <= '9') {
+ memset(str, 0, 8);
+ strncpy(str, pos + 7, i + 1);
+ str_val = atoi(str);
+ count += str_val;
+ break;
+ }
+ }
+ }
+ ptr = ptr->next;
+ }
+
+ return count;
+}
diff --git a/src/pdf_parser.c b/src/pdf_parser.c
new file mode 100644
index 0000000..1da8dff
--- /dev/null
+++ b/src/pdf_parser.c
@@ -0,0 +1,216 @@
+/*
+ * Copyright (c) 2020, yzrh <yzrh@tuta.io>
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#ifdef __linux__
+
+#define _GNU_SOURCE
+
+#endif /* __linux__ */
+
+#include <stdlib.h>
+#include <string.h>
+
+#include "pdf.h"
+
+static void *
+_memmem_whitespace(const void *p0, size_t s0, const void *p1, size_t s1)
+{
+ const char whitespace[6] = {
+ '\r',
+ '\n',
+ '\f',
+ '\t',
+ '\0',
+ ' '
+ };
+
+ char tmp[s1 + 1];
+ memcpy(tmp, p1, s1);
+
+ char *ret;
+
+ for (int i = 0; i < 6; i++) {
+ tmp[s1] = whitespace[i];
+ if((ret = memmem(p0, s0, tmp, s1 + 1)) != NULL)
+ return ret;
+ }
+
+ return NULL;
+}
+
+static int
+_locate(pdf_object_t **pdf, FILE **fp, int size_buf)
+{
+ pdf_object_t *ptr = *pdf;
+ while (ptr->next != NULL)
+ ptr = ptr->next;
+
+ char buf[size_buf];
+
+ long cur = ftell(*fp);
+ long end;
+
+ fseek(*fp, 0, SEEK_END);
+ end = ftell(*fp);
+ fseek(*fp, cur, SEEK_SET);
+
+ int head = 0;
+ int tail = 0;
+ char *pos;
+ char *tmp;
+
+ for (;;) {
+ fread(buf, size_buf, 1, *fp);
+
+ if (head == 0 && (pos = _memmem_whitespace(buf, size_buf, " 0 obj", 6)) != NULL)
+ head = cur + (pos - buf) + 7;
+
+ if (tail == 0 && (pos = _memmem_whitespace(buf, size_buf, "endobj", 6)) != NULL) {
+ /* We need to check if it is the object stored in stream */
+ while (memcmp(pos + 7,
+ "\r\nendstream", 11) == 0 &&
+ (tmp = _memmem_whitespace(pos + 6,
+ size_buf - (pos - buf) - 6,
+ "endobj", 6)) != NULL)
+ pos = tmp;
+
+ if (pos - buf < size_buf - 7)
+ tail = cur + (pos - buf);
+ }
+
+ if (tail > head) {
+ if (ptr->next == NULL) {
+ ptr->next = malloc(sizeof(pdf_object_t));
+
+ if (ptr->next == NULL)
+ return 1;
+
+ ptr->next->id = 0;
+ ptr->next->object_size = 0;
+ ptr->next->object = NULL;
+ ptr->next->dictionary_size = 0;
+ ptr->next->dictionary = NULL;
+ ptr->next->stream_size = 0;
+ ptr->next->stream = NULL;
+ ptr->next->next = NULL;
+ ptr = ptr->next;
+ }
+
+ ptr->address = head;
+ ptr->size = tail - head;
+
+ fseek(*fp, tail + 6, SEEK_SET);
+ head = tail = 0;
+ } else {
+ fseek(*fp, -6, SEEK_CUR);
+ }
+
+ if ((cur = ftell(*fp)) + 6 >= end)
+ break;
+ }
+
+ return 0;
+}
+
+int
+pdf_load(pdf_object_t **pdf, FILE **fp, int size_buf)
+{
+ if (*pdf == NULL || *fp == NULL || size_buf < 7)
+ return 1;
+
+ if (_locate(pdf, fp, size_buf) != 0)
+ return 1;
+
+ pdf_object_t *ptr = (*pdf)->next;
+
+ char *buf;
+ char *head;
+ char *tail;
+ char *tmp;
+
+ while (ptr != NULL) {
+ buf = malloc(ptr->size);
+
+ if (buf == NULL)
+ return 1;
+
+ memset(buf, 0, ptr->size);
+
+ fseek(*fp, ptr->address - 12, SEEK_SET);
+ fread(buf, 8, 1, *fp);
+
+ for (int i = 0; i < 8; i++) {
+ if (buf[i] >= '0' && buf[i] <= '9') {
+ ptr->id = atoi(buf + i);
+ break;
+ }
+ }
+
+ fseek(*fp, ptr->address, SEEK_SET);
+ fread(buf, ptr->size, 1, *fp);
+
+ if ((head = memmem(buf, ptr->size, "<<", 2)) != NULL &&
+ (tail = _memmem_whitespace(buf, ptr->size, ">>", 2)) != NULL) {
+ /* A dictionary object may have nested dictionary */
+ while ((tmp = _memmem_whitespace(tail + 2,
+ ptr->size - (tail - buf) - 2,
+ ">>", 2)) != NULL)
+ tail = tmp;
+
+ ptr->dictionary_size = tail - head + 2;
+ ptr->dictionary = malloc(ptr->dictionary_size + 1);
+
+ if (ptr->dictionary == NULL)
+ return 1;
+
+ memset(ptr->dictionary, 0, ptr->dictionary_size + 1);
+ memcpy(ptr->dictionary, head, ptr->dictionary_size);
+
+ if ((head = memmem(tail,
+ ptr->size - (tail - buf),
+ "stream\r\n", 8)) != NULL &&
+ (tail = _memmem_whitespace(head,
+ ptr->size - (head - buf),
+ "endstream", 9)) != NULL) {
+ /*
+ * An object may contain a stream that
+ * contains another object that
+ * contains another stream
+ */
+ while (_memmem_whitespace(tail,
+ ptr->size - (tail - buf),
+ "endobj", 6) != NULL &&
+ (tmp = _memmem_whitespace(tail + 9,
+ ptr->size - (tail - buf) - 9,
+ "endstream", 9)) != NULL)
+ tail = tmp;
+
+ ptr->stream_size = (tail - head) - 8;
+ ptr->stream = malloc(ptr->stream_size);
+
+ if (ptr->stream == NULL)
+ return 1;
+
+ memcpy(ptr->stream, head + 8, ptr->stream_size);
+ }
+ } else {
+ ptr->object_size = ptr->size;
+ ptr->object = malloc(ptr->object_size + 1);
+
+ if (ptr->object == NULL)
+ return 1;
+
+ memset(ptr->object, 0, ptr->object_size + 1);
+ memcpy(ptr->object, buf, ptr->object_size);
+ }
+
+ free(buf);
+
+ ptr = ptr->next;
+ }
+
+ return 0;
+}
diff --git a/src/pdf_writer.c b/src/pdf_writer.c
new file mode 100644
index 0000000..3cf4f7c
--- /dev/null
+++ b/src/pdf_writer.c
@@ -0,0 +1,188 @@
+/*
+ * Copyright (c) 2020, yzrh <yzrh@tuta.io>
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+
+#include <openssl/md5.h>
+
+#include "pdf.h"
+
+int
+pdf_dump_obj(pdf_object_t **pdf, FILE **fp)
+{
+ if (*pdf == NULL || *fp == NULL)
+ return 1;
+
+ long cur;
+
+ pdf_object_t *ptr = (*pdf)->next;
+ while (ptr != NULL) {
+ ptr->address = cur = ftell(*fp);
+
+ fprintf(*fp, "%d 0 obj\n", ptr->id);
+
+ if (ptr->dictionary != NULL)
+ fputs(ptr->dictionary, *fp);
+ else if (ptr->object != NULL)
+ fputs(ptr->object, *fp);
+ else if (ptr->stream == NULL)
+ fputs("null\n", *fp);
+
+ if (ptr->stream != NULL) {
+ fputs("stream\r\n", *fp);
+ fwrite(ptr->stream, ptr->stream_size, 1, *fp);
+ fputs("endstream\n", *fp);
+ }
+
+ fputs("endobj\n", *fp);
+
+ ptr->size = ftell(*fp) - cur;
+
+ ptr = ptr->next;
+ }
+
+ return 0;
+}
+
+int
+pdf_dump_header(pdf_object_t **pdf, FILE **fp)
+{
+ if (*pdf == NULL || *fp == NULL)
+ return 1;
+
+ fputs("%PDF-1.7\n", *fp);
+
+ const unsigned char bin[4] = {
+ 0xf6,
+ 0xe4,
+ 0xfc,
+ 0xdf,
+ };
+
+ fputs("%", *fp);
+ fwrite(bin, 4, 1, *fp);
+ fputs("\n", *fp);
+
+ return 0;
+}
+
+int
+pdf_dump_xref(pdf_object_t **pdf, FILE **fp)
+{
+ if (*pdf == NULL || *fp == NULL)
+ return 1;
+
+ fputs("xref\n", *fp);
+
+ pdf_object_t *ptr = *pdf;
+
+ pdf_object_t *start = ptr;
+ int count = 1;
+
+ while (ptr != NULL) {
+ if (ptr->next == NULL ||
+ (ptr->next != NULL && ptr->next->id != ptr->id + 1)) {
+ fprintf(*fp, "%d %d\n", start->id, count);
+
+ for (; count > 0; count--) {
+ fprintf(*fp, "%010d %05d %s\r\n",
+ start->address,
+ start->address > 0 ? 0 : 65535,
+ start->size > 0 ? "n" : "f");
+ start = start->next;
+ }
+
+ if (ptr->next != NULL)
+ start = ptr->next;
+ }
+
+ ptr = ptr->next;
+ count++;
+ }
+
+ return 0;
+}
+
+int
+pdf_dump_trailer(pdf_object_t **pdf, FILE **fp, int xref)
+{
+ if (*pdf == NULL || *fp == NULL)
+ return 1;
+
+ fputs("trailer\n", *fp);
+
+ fputs("<<\n", *fp);
+
+ /*
+ * File identifiers should be generated using
+ * (a) Current time
+ * (b) File path
+ * (c) Size of file
+ * (d) Values of all entries in the
+ * file's document information dictionary
+ *
+ * It is recommended to be computed according to RFC 1321
+ */
+
+ time_t timestamp = time(NULL);
+ int size = pdf_get_size(pdf);
+
+ int buf_size;
+ char buf[64];
+
+ buf_size = snprintf(buf, 64, "%lx%x", timestamp, size);
+
+ unsigned char str[64];
+ memcpy(str, buf, 64);
+
+ unsigned char fid[MD5_DIGEST_LENGTH];
+ MD5(str, buf_size, fid);
+
+ pdf_object_t *ptr = *pdf;
+ while (ptr->next != NULL)
+ ptr = ptr->next;
+
+ /*
+ * TODO: Document information dictionary
+ * `"/Producer (Melon)"'
+ * `"/CreationDate (D:YYYYMMDDHHmmSS+00'00')"'
+ *
+ * Trailer dictionary
+ * `"/Info %d 0 R"'
+ */
+ fprintf(*fp,
+ "/Size %d\n/Root %d 0 R\n",
+ ptr->id + 1,
+ pdf_get_catalog_id(pdf));
+
+ fputs("/ID [", *fp);
+
+ for (int i = 0; i < 2; i++) {
+ fputs("<", *fp);
+
+ for (int j = 0; j < MD5_DIGEST_LENGTH; j++)
+ fprintf(*fp, "%02x", fid[j]);
+
+ fputs(">", *fp);
+
+ if (i < 1)
+ fputs(" ", *fp);
+ }
+
+ fputs("]\n", *fp);
+
+ fputs(">>\n", *fp);
+
+ fputs("startxref\n", *fp);
+
+ fprintf(*fp, "%d\n", xref);
+
+ fputs("%%EOF\n", *fp);
+
+ return 0;
+}
diff --git a/src/version.h b/src/version.h
new file mode 100644
index 0000000..3773617
--- /dev/null
+++ b/src/version.h
@@ -0,0 +1,10 @@
+/*
+ * Copyright (c) 2020, yzrh <yzrh@tuta.io>
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#define VERSION "0"
+#define RELEASE "1"
+#define PATCH "0"
+#define EXTRA ""