aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authoryzrh <yzrh@tuta.io>2020-08-14 22:04:26 +0000
committeryzrh <yzrh@tuta.io>2020-08-14 22:04:26 +0000
commit12ecdd71592eccf7bdb6214edbc7318246469c1c (patch)
treefda27e41c37a2345702ad3e90480154d975e426f /src
downloadmelon-12ecdd71592eccf7bdb6214edbc7318246469c1c.tar.gz
melon-12ecdd71592eccf7bdb6214edbc7318246469c1c.tar.zst
Initial commit.
Diffstat (limited to 'src')
-rw-r--r--src/GNUmakefile28
-rw-r--r--src/Makefile28
-rw-r--r--src/cnki.c168
-rw-r--r--src/cnki.h86
-rw-r--r--src/cnki_caj.c40
-rw-r--r--src/cnki_kdh.c49
-rw-r--r--src/cnki_nh.c110
-rw-r--r--src/cnki_outline_tree.c73
-rw-r--r--src/cnki_pdf.c351
-rw-r--r--src/cnki_xml.c14
-rw-r--r--src/extern.h21
-rw-r--r--src/iconv.c70
-rw-r--r--src/iconv.h12
-rw-r--r--src/melon.c127
-rw-r--r--src/pdf.c228
-rw-r--r--src/pdf.h61
-rw-r--r--src/pdf_cnki.c134
-rw-r--r--src/pdf_cnki.h7
-rw-r--r--src/pdf_get.c296
-rw-r--r--src/pdf_parser.c216
-rw-r--r--src/pdf_writer.c188
-rw-r--r--src/version.h10
22 files changed, 2317 insertions, 0 deletions
diff --git a/src/GNUmakefile b/src/GNUmakefile
new file mode 100644
index 0000000..118501a
--- /dev/null
+++ b/src/GNUmakefile
@@ -0,0 +1,28 @@
+#
+# Copyright (c) 2020, yzrh <yzrh@tuta.io>
+#
+# SPDX-License-Identifier: Apache-2.0
+#
+
+src != ls *.c
+obj = ${src:.c=.o}
+
+PREFIX = /usr/local
+
+CFLAGS = -O3 -march=native -pipe -Wall
+LDFLAGS = -Wl,-O3 -lcrypto -Wl,--as-needed
+
+all: ${obj}
+ ${CC} ${LDFLAGS} -o melon $^
+
+clean:
+ rm -f melon ${obj}
+
+install:
+ install -d ${PREFIX}/bin
+ install melon ${PREFIX}/bin/
+
+deinstall:
+ rm -f ${PREFIX}/bin/melon
+
+.PHONY: all clean install deinstall
diff --git a/src/Makefile b/src/Makefile
new file mode 100644
index 0000000..33da2cd
--- /dev/null
+++ b/src/Makefile
@@ -0,0 +1,28 @@
+#
+# Copyright (c) 2020, yzrh <yzrh@tuta.io>
+#
+# SPDX-License-Identifier: Apache-2.0
+#
+
+src != ls *.c
+obj = ${src:.c=.o}
+
+PREFIX = /usr/local
+
+CFLAGS = -O3 -march=native -pipe -flto=thin -Wall
+LDFLAGS = -Wl,-O3 -lcrypto -Wl,--as-needed
+
+all: ${obj}
+ ${CC} ${LDFLAGS} -o melon $>
+
+clean:
+ rm -f melon ${obj}
+
+install:
+ install -d ${PREFIX}/bin
+ install melon ${PREFIX}/bin/
+
+deinstall:
+ rm -f ${PREFIX}/bin/melon
+
+.PHONY: all clean install deinstall
diff --git a/src/cnki.c b/src/cnki.c
new file mode 100644
index 0000000..001be54
--- /dev/null
+++ b/src/cnki.c
@@ -0,0 +1,168 @@
+/*
+ * Copyright (c) 2020, yzrh <yzrh@tuta.io>
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#include <stdlib.h>
+#include <string.h>
+
+#include "cnki.h"
+
+int
+cnki_create(cnki_t **param)
+{
+ if (*param != NULL)
+ return 1;
+
+ *param = malloc(sizeof(cnki_t));
+
+ if (*param == NULL)
+ return 1;
+
+ (*param)->stat = 0;
+ (*param)->size_buf = 524288;
+ (*param)->fp_i = NULL;
+ (*param)->fp_o = NULL;
+
+ (*param)->file_stat = malloc(sizeof(file_stat_t));
+
+ if ((*param)->file_stat== NULL)
+ return 1;
+
+ memset((*param)->file_stat, 0, sizeof(file_stat_t));
+
+ (*param)->object_outline = NULL;
+ (*param)->object_nh = NULL;
+
+ return 0;
+}
+
+void
+cnki_destroy(cnki_t **param)
+{
+ if (*param != NULL) {
+ if ((*param)->file_stat != NULL)
+ free((*param)->file_stat);
+ if ((*param)->object_outline != NULL)
+ free((*param)->object_outline);
+ if ((*param)->object_nh != NULL)
+ free((*param)->object_nh);
+ free(*param);
+ }
+}
+
+int
+cnki_info(cnki_t **param)
+{
+ if (*param == NULL)
+ return 1;
+
+ if ((*param)->stat > 1)
+ printf("Reading file header at %x\n", ADDRESS_HEAD);
+
+ int addr[2];
+
+ fseek((*param)->fp_i, ADDRESS_HEAD, SEEK_SET);
+ fread((*param)->file_stat->type, 4, 1, (*param)->fp_i);
+
+ if ((*param)->stat > 0)
+ printf("File type is '%s'\n", (*param)->file_stat->type);
+
+ if (strcmp((*param)->file_stat->type, "%PDF") == 0) {
+ return 0;
+ } else if (strcmp((*param)->file_stat->type, "CAJ") == 0) {
+ addr[0] = ADDRESS_CAJ_PAGE;
+ addr[1] = ADDRESS_CAJ_OUTLINE;
+ } else if (strcmp((*param)->file_stat->type, "HN") == 0) {
+ addr[0] = ADDRESS_HN_PAGE;
+ addr[1] = ADDRESS_HN_OUTLINE;
+ } else if (strcmp((*param)->file_stat->type, "KDH ") == 0) {
+ return 0;
+ } else {
+ return 1;
+ }
+
+ if ((*param)->stat > 1)
+ printf("Reading page count at %x\n", addr[0]);
+
+ fseek((*param)->fp_i, addr[0], SEEK_SET);
+ fread(&(*param)->file_stat->page, 4, 1, (*param)->fp_i);
+
+ if ((*param)->stat > 0)
+ printf("Advised %d page(s)\n",
+ (*param)->file_stat->page);
+
+ if ((*param)->stat > 1)
+ printf("Reading outline count at %x\n", addr[1]);
+
+ fseek((*param)->fp_i, addr[1], SEEK_SET);
+ fread(&(*param)->file_stat->outline, 4, 1, (*param)->fp_i);
+
+ if ((*param)->stat > 0)
+ printf("Advised %d outline(s)\n",
+ (*param)->file_stat->outline);
+
+ if ((*param)->file_stat->outline > 0) {
+ if ((*param)->stat > 1) {
+ printf("Loading outline(s)\n");
+ printf("\t%16s\t%-24s\t%12s\t%12s\t%5s\n",
+ "title",
+ "hierarchy",
+ "page",
+ "text",
+ "depth");
+ }
+
+ (*param)->object_outline = malloc(sizeof(object_outline_t));
+
+ if ((*param)->object_outline == NULL)
+ return 1;
+
+ object_outline_t *ptr = (*param)->object_outline;
+ for (int i = 0; i < (*param)->file_stat->outline; i++) {
+ fread(ptr->title, 256, 1, (*param)->fp_i);
+ fread(ptr->hierarchy, 24, 1, (*param)->fp_i);
+ fread(ptr->page, 12, 1, (*param)->fp_i);
+ fread(ptr->text, 12, 1, (*param)->fp_i);
+ fread(&ptr->depth, 4, 1, (*param)->fp_i);
+
+ ptr->next = NULL;
+
+ if ((*param)->stat > 1) {
+ printf("\t");
+ for (int j = 1; j <= 256; j++) {
+ printf("%02x", (unsigned char) ptr->title[j - 1]);
+
+ if (j % 8 == 0 && ptr->title[j] == '\0')
+ break;
+
+ if (j % 8 == 0)
+ printf("\n\t");
+ else if (j % 2 == 0)
+ printf(" ");
+ }
+ printf("\t%-24s\t%12s\t%12s\t%5d\n",
+ ptr->hierarchy,
+ ptr->page,
+ ptr->text,
+ ptr->depth);
+ }
+
+ if (i < (*param)->file_stat->outline - 1) {
+ ptr->next = malloc(sizeof(object_outline_t));
+
+ if (ptr->next == NULL)
+ return 1;
+ }
+
+ ptr = ptr->next;
+ }
+
+ if ((*param)->stat > 0)
+ printf("Loaded %d outline(s)\n",
+ (*param)->file_stat->outline);
+ }
+
+ return 0;
+}
diff --git a/src/cnki.h b/src/cnki.h
new file mode 100644
index 0000000..6e3565f
--- /dev/null
+++ b/src/cnki.h
@@ -0,0 +1,86 @@
+/*
+ * Copyright (c) 2020, yzrh <yzrh@tuta.io>
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#include <stdint.h>
+#include <stdio.h>
+
+#define ADDRESS_HEAD 0x0000
+
+#define ADDRESS_CAJ_PAGE 0x0010
+#define ADDRESS_CAJ_OUTLINE 0x0110
+#define ADDRESS_CAJ_BODY 0x0014
+
+#define ADDRESS_HN_PAGE 0x0090
+#define ADDRESS_HN_OUTLINE 0x0158
+
+#define ADDRESS_KDH_BODY 0x00fe
+
+#define KEY_KDH "FZHMEI"
+#define KEY_KDH_LENGTH 6
+
+typedef struct _file_stat_t {
+ char type[4];
+ int32_t page;
+ int32_t outline;
+} file_stat_t;
+
+typedef struct _object_outline_t {
+ char title[256]; /* Starting at file_stat_t->outline + 4 */
+ char hierarchy[24];
+ char page[12];
+ char text[12];
+ int32_t depth;
+ struct _object_outline_t *next;
+} object_outline_t;
+
+typedef struct _object_outline_tree_t {
+ int id;
+ struct _object_outline_t *item;
+ struct _object_outline_tree_t *up;
+ struct _object_outline_tree_t *left;
+ struct _object_outline_tree_t *right;
+} object_outline_tree_t;
+
+typedef enum _nh_code {
+ CCITTFAX,
+ DCT_0,
+ DCT_1,
+ JBIG2,
+ JPX
+} nh_code;
+
+typedef struct _object_nh_t {
+ int32_t address; /* Starting at end of object_outline_t */
+ int32_t size;
+ int16_t page[2];
+ int32_t zero[2];
+ char *text;
+ int32_t image_format; /* nh_code */
+ int32_t image_address;
+ int32_t image_size;
+ char *image;
+ struct _object_nh_t *next;
+} object_nh_t;
+
+typedef struct _cnki_t {
+ int stat;
+ int size_buf;
+ FILE *fp_i;
+ FILE *fp_o;
+ file_stat_t *file_stat;
+ object_outline_t *object_outline;
+ object_nh_t *object_nh;
+} cnki_t;
+
+/* cnki_pdf.c */
+int cnki_pdf(cnki_t **param);
+
+/* cnki_outline_tree.c */
+int cnki_outline_tree(object_outline_tree_t **outline_tree,
+ object_outline_t **outline, int *ids);
+
+/* cnki_xml.c */
+int cnki_xml(char **xml, FILE **fp);
diff --git a/src/cnki_caj.c b/src/cnki_caj.c
new file mode 100644
index 0000000..c541064
--- /dev/null
+++ b/src/cnki_caj.c
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2020, yzrh <yzrh@tuta.io>
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#include <stdlib.h>
+
+#include "cnki.h"
+
+int
+cnki_caj(cnki_t **param)
+{
+ if (*param == NULL)
+ return 1;
+
+ if ((*param)->stat > 0)
+ printf("Begin 'CAJ' conversion\n");
+
+ if ((*param)->stat > 1)
+ printf("Reading document body address at %x\n", ADDRESS_CAJ_BODY);
+
+ int addr;
+
+ fseek((*param)->fp_i, ADDRESS_CAJ_BODY, SEEK_SET);
+ fread(&addr, 4, 1, (*param)->fp_i);
+ fseek((*param)->fp_i, addr, SEEK_SET);
+ fread(&addr, 4, 1, (*param)->fp_i);
+ fseek((*param)->fp_i, addr, SEEK_SET);
+
+ if ((*param)->stat > 0)
+ printf("Advised document body address is %x\n", addr);
+
+ cnki_pdf(param);
+
+ if ((*param)->stat > 0)
+ printf("Conversion ended\n");
+
+ return 0;
+}
diff --git a/src/cnki_kdh.c b/src/cnki_kdh.c
new file mode 100644
index 0000000..8441319
--- /dev/null
+++ b/src/cnki_kdh.c
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2020, yzrh <yzrh@tuta.io>
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#include "cnki.h"
+
+int
+cnki_kdh(cnki_t **param)
+{
+ if (*param == NULL)
+ return 1;
+
+ if ((*param)->stat > 0)
+ printf("Begin 'KDH' decryption\n");
+
+ fseek((*param)->fp_i, 0, SEEK_END);
+
+ long size = ftell((*param)->fp_i);
+
+ fseek((*param)->fp_i, ADDRESS_KDH_BODY, SEEK_SET);
+
+ const char key[] = KEY_KDH;
+ const int key_len = KEY_KDH_LENGTH;
+ long key_cur = 0;
+
+ char buf[(*param)->size_buf];
+
+ for (;;) {
+ fread(buf, (*param)->size_buf, 1, (*param)->fp_i);
+
+ for (int i = 0; i < (*param)->size_buf; i++) {
+ buf[i] ^= key[key_cur % key_len];
+ key_cur++;
+ }
+
+ fwrite(buf, (*param)->size_buf, 1, (*param)->fp_o);
+
+ if (ftell((*param)->fp_i) == size)
+ break;
+ }
+
+ if ((*param)->stat > 0)
+ printf("Decryption ended total %ld byte(s) written\n",
+ ftell((*param)->fp_o));
+
+ return 0;
+}
diff --git a/src/cnki_nh.c b/src/cnki_nh.c
new file mode 100644
index 0000000..7b9378f
--- /dev/null
+++ b/src/cnki_nh.c
@@ -0,0 +1,110 @@
+/*
+ * Copyright (c) 2020, yzrh <yzrh@tuta.io>
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#include <stdlib.h>
+
+#include "cnki.h"
+
+int
+cnki_nh(cnki_t **param)
+{
+ if (*param == NULL)
+ return 1;
+
+ if ((*param)->stat > 0)
+ printf("Begin 'HN' conversion\n");
+
+ if ((*param)->file_stat->page > 0)
+ (*param)->object_nh = malloc(sizeof(object_nh_t));
+ else
+ return 1;
+
+ if ((*param)->object_nh == NULL)
+ return 1;
+
+ if ((*param)->stat > 1) {
+ printf("Loading page(s)\n");
+ printf("\t%8s\t%8s\t%13s\t%6s\t%4s\t%8s\t%8s\n",
+ "address",
+ "text",
+ "page",
+ "zero",
+ "code",
+ "address",
+ "image");
+ }
+
+ object_nh_t *ptr = (*param)->object_nh;
+ for (int i = 0; i < (*param)->file_stat->page; i++) {
+ fread(&ptr->address, 4, 1, (*param)->fp_i);
+ fread(&ptr->size, 4, 1, (*param)->fp_i);
+ fread(&ptr->page, 4, 1, (*param)->fp_i);
+ fread(&ptr->zero, 8, 1, (*param)->fp_i);
+
+ ptr->text = NULL;
+ ptr->image_format = -1;
+ ptr->image_address = 0;
+ ptr->image_size = 0;
+ ptr->image = NULL;
+ ptr->next = NULL;
+
+ if (i < (*param)->file_stat->page - 1) {
+ ptr->next = malloc(sizeof(object_nh_t));
+
+ if (ptr->next == NULL)
+ return 1;
+ }
+
+ ptr = ptr->next;
+ }
+
+ ptr = (*param)->object_nh;
+ while (ptr != NULL) {
+ ptr->text = malloc(ptr->size);
+
+ if (ptr->text == NULL)
+ return 1;
+
+ fseek((*param)->fp_i, ptr->address, SEEK_SET);
+ fread(ptr->text, ptr->size, 1, (*param)->fp_i);
+ fread(&ptr->image_format, 4, 1, (*param)->fp_i);
+ fread(&ptr->image_address, 4, 1, (*param)->fp_i);
+ fread(&ptr->image_size, 4, 1, (*param)->fp_i);
+
+ ptr->image = malloc(ptr->image_size);
+
+ if (ptr->image == NULL)
+ return 1;
+
+ fseek((*param)->fp_i, ptr->image_address, SEEK_SET);
+ fread(ptr->image, ptr->image_size, 1, (*param)->fp_i);
+
+ if ((*param)->stat > 1)
+ printf("\t%08x\t%8d\t{%d, %8d}\t{%d, %d}\t%4d\t%08x\t%8d\n",
+ ptr->address,
+ ptr->size,
+ ptr->page[0],
+ ptr->page[1],
+ ptr->zero[0],
+ ptr->zero[1],
+ ptr->image_format,
+ ptr->image_address,
+ ptr->image_size);
+
+ ptr = ptr->next;
+ }
+
+ if ((*param)->stat > 1)
+ printf("Loaded %d page(s)\n", (*param)->file_stat->page);
+
+ /* TODO: Study signed int __fastcall CAJDoc::OpenNHCAJFile(int a1, int a2) */
+
+ if ((*param)->stat > 0)
+ printf("Conversion ended\n");
+
+ /* TODO: Finish me please :) */
+ return 1;
+}
diff --git a/src/cnki_outline_tree.c b/src/cnki_outline_tree.c
new file mode 100644
index 0000000..7d16ddb
--- /dev/null
+++ b/src/cnki_outline_tree.c
@@ -0,0 +1,73 @@
+/*
+ * Copyright (c) 2020, yzrh <yzrh@tuta.io>
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#include <stdlib.h>
+
+#include "cnki.h"
+
+int
+cnki_outline_tree(object_outline_tree_t **outline_tree,
+ object_outline_t **outline, int *ids)
+{
+ if (*outline_tree != NULL || *outline == NULL)
+ return 1;
+
+ int pos = 0;
+
+ *outline_tree = malloc(sizeof(object_outline_tree_t));
+
+ if (*outline_tree == NULL)
+ return 1;
+
+ object_outline_tree_t *tree = *outline_tree;
+
+ tree->id = ids[pos++];
+ tree->item = NULL;
+ tree->up = NULL;
+ tree->left = NULL;
+ tree->right = NULL;
+
+ object_outline_t *ptr = *outline;
+ while (ptr != NULL) {
+ if (tree->item == NULL ||
+ ptr->depth == tree->item->depth) {
+ while (tree->left != NULL)
+ tree = tree->left;
+
+ tree->left = malloc(sizeof(object_outline_tree_t));
+
+ if (tree->left == NULL)
+ return 1;
+
+ tree->left->id = ids[pos++];
+ tree->left->item = ptr;
+ tree->left->up = tree;
+ tree->left->left = NULL;
+ tree->left->right = NULL;
+
+ tree = tree->left;
+ } else if (ptr->depth == tree->item->depth + 1) {
+ tree->right = malloc(sizeof(object_outline_tree_t));
+
+ if (tree->right == NULL)
+ return 1;
+
+ tree->right->id = ids[pos++];
+ tree->right->item = ptr;
+ tree->right->up = tree;
+ tree->right->left = NULL;
+ tree->right->right = NULL;
+
+ tree = tree->right;
+ } else {
+ tree = tree->up;
+ continue;
+ }
+ ptr = ptr->next;
+ }
+
+ return 0;
+}
diff --git a/src/cnki_pdf.c b/src/cnki_pdf.c
new file mode 100644
index 0000000..e56decb
--- /dev/null
+++ b/src/cnki_pdf.c
@@ -0,0 +1,351 @@
+/*
+ * Copyright (c) 2020, yzrh <yzrh@tuta.io>
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#include <stdlib.h>
+#include <string.h>
+
+#include "cnki.h"
+#include "pdf.h"
+#include "pdf_cnki.h"
+
+int
+cnki_pdf(cnki_t **param)
+{
+ if (*param == NULL)
+ return 1;
+
+ pdf_object_t *pdf = NULL;
+
+ if (pdf_obj_create(&pdf) != 0)
+ return 1;
+
+ if ((*param)->stat > 0)
+ printf("Begin processing PDF\n");
+
+ if ((*param)->stat > 1)
+ printf("Loading object(s)\n");
+
+ if (pdf_load(&pdf, &(*param)->fp_i, (*param)->size_buf) != 0)
+ return 1;
+
+ if ((*param)->stat > 1) {
+ printf("\t%8s\t%8s\t%8s\t%12s\t%12s\t%12s\n",
+ "address",
+ "size",
+ "id",
+ "object",
+ "dictionary",
+ "stream");
+
+ pdf_object_t *ptr = pdf->next;
+ while (ptr != NULL) {
+ printf("\t%08x\t%8d\t%8d\t%12d\t%12d\t%12d\n",
+ ptr->address,
+ ptr->size,
+ ptr->id,
+ ptr->object_size,
+ ptr->dictionary_size,
+ ptr->stream_size);
+ ptr = ptr->next;
+ }
+ }
+
+ if ((*param)->stat > 0)
+ printf("Loaded %d object(s)\n",
+ pdf_get_count(&pdf));
+
+ if ((*param)->stat > 1)
+ printf("Searching for parent object(s)\n");
+
+ int *parent = NULL;
+ pdf_get_parent_id(&pdf, &parent);
+
+ if (parent[0] == 0)
+ return 1;
+
+ if ((*param)->stat > 0)
+ printf("Discovered %d parent object(s)\n", parent[0]);
+
+ char buf[64];
+
+ int parent_missing[parent[0]];
+ int *kid;
+ int dictionary_size;
+ char *dictionary;
+
+ for (int i = 1; i <= parent[0]; i++) {
+ if ((*param)->stat > 1)
+ printf("Searching for object %d\n", parent[i]);
+
+ kid = NULL;
+ pdf_get_kid_id(&pdf, parent[i], &kid);
+
+ if (kid[0] != 0) {
+ if ((*param)->stat > 0)
+ printf("Object is missing\n");
+
+ if ((*param)->stat > 1)
+ printf("Generating object\n");
+
+ dictionary_size = 64 + 12 * kid[0];
+ dictionary = malloc(dictionary_size);
+
+ if (dictionary == NULL)
+ return 1;
+
+ memset(dictionary, 0, dictionary_size);
+
+ snprintf(buf, 64,
+ "<<\n/Type /Pages\n/Kids [");
+ strcat(dictionary, buf);
+ for (int j = 1; j <= kid[0]; j++) {
+ snprintf(buf, 64,
+ "%d 0 R",
+ kid[j]);
+ strcat(dictionary, buf);
+ if (j < kid[0])
+ strcat(dictionary, " ");
+ }
+ snprintf(buf, 64,
+ "]\n/Count %d\n>>\n",
+ pdf_get_kid_count(&pdf, parent[i]));
+ strcat(dictionary, buf);
+
+ pdf_obj_prepend(&pdf, parent[i], NULL, dictionary, NULL);
+
+ parent_missing[i - 1] = 1;
+
+ if ((*param)->stat > 0)
+ printf("Generated object for %d child(ren)\n",
+ kid[0]);
+
+ free(dictionary);
+ } else {
+ parent_missing[i - 1] = 0;
+
+ if ((*param)->stat > 0)
+ printf("Object exists\n");
+ }
+
+ free(kid);
+ }
+
+ if ((*param)->stat > 1)
+ printf("Searching for root object\n");
+
+ dictionary_size = 128;
+ dictionary = malloc(dictionary_size);
+
+ if (dictionary == NULL)
+ return 1;
+
+ memset(dictionary, 0, dictionary_size);
+
+ int root = 0;
+
+ int root_kid = 0;
+ for (int i = 0; i < parent[0]; i++)
+ if (parent_missing[i])
+ root_kid++;
+
+ if (root_kid <= 1) {
+ if (root_kid == 0) {
+ for (int i = 1; i <= parent[0]; i++)
+ if (root == 0 || root < parent[i])
+ root = parent[i];
+ } else {
+ for (int i = 0; i < parent[0]; i++)
+ if (parent_missing[i])
+ root = i;
+ }
+
+ if ((*param)->stat > 0)
+ printf("Root object is %d.\n",
+ root);
+ } else {
+ if ((*param)->stat > 0)
+ printf("Root object is missing\n");
+
+ if ((*param)->stat > 1)
+ printf("Generating root object\n");
+
+ root = pdf_get_free_id(&pdf);
+
+ snprintf(buf, 64,
+ "<<\n/Type /Pages\n/Kids ");
+ strcat(dictionary, buf);
+
+ if (parent[0] > 1)
+ strcat(dictionary, "[");
+
+ for (int i = 0; i < parent[0]; i++) {
+ if (parent_missing[i]) {
+ snprintf(buf, 64, "%d 0 R", parent[i + 1]);
+ strcat(dictionary, buf);
+ if (i < root_kid)
+ strcat(dictionary, " ");
+ }
+ }
+
+ if (parent[0] > 1)
+ strcat(dictionary, "]");
+
+ strcat(dictionary, "\n");
+
+ snprintf(buf, 64, "/Count %d\n", (*param)->file_stat->page);
+ strcat(dictionary, buf);
+
+ strcat(dictionary, ">>\n");
+
+ pdf_obj_prepend(&pdf, root, NULL, dictionary, NULL);
+
+ memset(dictionary, 0, dictionary_size);
+
+ if ((*param)->stat > 0)
+ printf("Generated root object %d.\n",
+ root);
+ }
+
+ int *ids = NULL;
+
+ if ((*param)->file_stat->outline > 0) {
+ if ((*param)->stat > 1)
+ printf("Generating outline object(s)\n\t%8s\n", "id");
+
+ pdf_get_free_ids(&pdf, &ids, (*param)->file_stat->outline + 1);
+ int outline = pdf_cnki_outline(&pdf, &(*param)->object_outline, &ids);
+
+ if ((*param)->stat > 1)
+ for (int i = 0; i < (*param)->file_stat->outline + 1; i++)
+ printf("\t%8d\n", ids[i]);
+
+ if ((*param)->stat > 0) {
+ if (outline != 0)
+ printf("No outline information\n");
+ else
+ printf("Generated %d outline object(s)\n",
+ (*param)->file_stat->outline + 1);
+ }
+ }
+
+ if ((*param)->stat > 1)
+ printf("Generating '/Catalog' dictionary\n");
+
+ snprintf(buf, 64,
+ "<<\n/Type /Catalog\n/Pages %d 0 R\n",
+ root);
+ strcat(dictionary, buf);
+
+ if (ids != NULL) {
+ snprintf(buf, 64,
+ "/Outlines %d 0 R\n/PageMode /UseOutlines\n",
+ ids[0]);
+ strcat(dictionary, buf);
+ }
+
+ strcat(dictionary, ">>\n");
+
+ pdf_obj_append(&pdf, 0, NULL, dictionary, NULL);
+
+ free(dictionary);
+
+ if ((*param)->stat > 0)
+ printf("Generated '/Catalog' dictionary\n");
+
+ if ((*param)->stat > 1)
+ printf("Sorting object(s)\n");
+
+ pdf_obj_sort(&pdf);
+
+ if ((*param)->stat > 0)
+ printf("Sorted object(s)\n");
+
+ if ((*param)->stat > 1)
+ printf("Writing header\n");
+
+ long cur = 0;
+
+ if ((*param)->stat > 0)
+ cur = ftell((*param)->fp_o);
+
+ if (pdf_dump_header(&pdf, &(*param)->fp_o) != 0) {
+ fprintf(stderr, "Header not written\n");
+ return 1;
+ } else {
+ if ((*param)->stat > 0)
+ printf("Header %ld byte(s) written\n",
+ ftell((*param)->fp_o) - cur);
+ }
+
+ if ((*param)->stat > 1)
+ printf("Writing object(s)\n");
+
+ pdf_dump_obj(&pdf, &(*param)->fp_o);
+
+ if ((*param)->stat > 1) {
+ printf("\t%8s\t%8s\t%8s\t%12s\t%12s\t%12s\n",
+ "address",
+ "size",
+ "id",
+ "object",
+ "dictionary",
+ "stream");
+
+ pdf_object_t *ptr = pdf->next;
+ while (ptr != NULL) {
+ printf("\t%08x\t%8d\t%8d\t%12d\t%12d\t%12d\n",
+ ptr->address,
+ ptr->size,
+ ptr->id,
+ ptr->object_size,
+ ptr->dictionary_size,
+ ptr->stream_size);
+ ptr = ptr->next;
+ }
+ }
+
+ if ((*param)->stat > 0)
+ printf("%d object(s) %ld byte(s) written\n",
+ pdf_get_count(&pdf),
+ ftell((*param)->fp_o));
+
+ long xref = ftell((*param)->fp_o);
+
+ if ((*param)->stat > 1)
+ printf("Writing cross-reference table\n");
+
+ if (pdf_dump_xref(&pdf, &(*param)->fp_o) != 0) {
+ if ((*param)->stat > 0)
+ printf("Cross-reference table not written\n");
+ } else {
+ if ((*param)->stat > 0)
+ printf("Cross-reference table %ld byte(s) written\n",
+ ftell((*param)->fp_o) - xref);
+ }
+
+ if ((*param)->stat > 1)
+ printf("Writing trailer\n");
+
+ if ((*param)->stat > 0)
+ cur = ftell((*param)->fp_o);
+
+ if (pdf_dump_trailer(&pdf, &(*param)->fp_o, xref) != 0) {
+ if ((*param)->stat > 0)
+ printf("Trailer not written\n");
+ } else {
+ if ((*param)->stat > 0)
+ printf("Trailer %ld byte(s) written\n",
+ ftell((*param)->fp_o) - cur);
+ }
+
+ if ((*param)->stat > 0)
+ printf("Total %ld byte(s) written\n",
+ ftell((*param)->fp_o));
+
+ pdf_obj_destroy(&pdf);
+
+ return 0;
+}
diff --git a/src/cnki_xml.c b/src/cnki_xml.c
new file mode 100644
index 0000000..7933738
--- /dev/null
+++ b/src/cnki_xml.c
@@ -0,0 +1,14 @@
+/*
+ * Copyright (c) 2020, yzrh <yzrh@tuta.io>
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#include <stdio.h>
+
+int
+cnki_xml(char **xml, FILE **fp)
+{
+ /* TODO: Extract XML and embed into `/Metadata' */
+ return 1;
+}
diff --git a/src/extern.h b/src/extern.h
new file mode 100644
index 0000000..b7abc6e
--- /dev/null
+++ b/src/extern.h
@@ -0,0 +1,21 @@
+/*
+ * Copyright (c) 2020, yzrh <yzrh@tuta.io>
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#include "cnki.h"
+
+/* cnki.c */
+int cnki_create(cnki_t **param);
+void cnki_destroy(cnki_t **param);
+int cnki_info(cnki_t **param);
+
+/* cnki_caj.c */
+int cnki_caj(cnki_t **param);
+
+/* cnki_nh.c */
+int cnki_nh(cnki_t **param);
+
+/* cnki_kdh.c */
+int cnki_kdh(cnki_t **param);
diff --git a/src/iconv.c b/src/iconv.c
new file mode 100644
index 0000000..1bf4d94
--- /dev/null
+++ b/src/iconv.c
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2020, yzrh <yzrh@tuta.io>
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#include <stdlib.h>
+#include <string.h>
+
+#include <iconv.h>
+
+/* So, why would anyone use something other than UTF-8? */
+int
+strconv(char **dst,
+ const char * restrict dst_code,
+ const char * restrict src,
+ const char * restrict src_code,
+ int *size)
+{
+ size_t dst_size = *size;
+ char *dst_conv = malloc(dst_size);
+
+ if (dst_conv == NULL)
+ return 1;
+
+ size_t src_size = strlen(src) + 1;
+ char *src_conv = malloc(src_size);
+
+ if (src_conv == NULL) {
+ free(dst_conv);
+ return 1;
+ }
+
+ strncpy(src_conv, src, src_size);
+
+ char *dst_start = dst_conv;
+ char *src_start = src_conv;
+
+ iconv_t conv_src_dst = iconv_open(dst_code, src_code);
+
+ if (conv_src_dst == (iconv_t) - 1) {
+ free(dst_conv);
+ free(src_conv);
+ return 1;
+ }
+
+ if (iconv(conv_src_dst,
+ &src_conv, &src_size,
+ &dst_conv, &dst_size) == (size_t) - 1) {
+ free(dst_start);
+ free(src_start);
+ return 1;
+ } else {
+ /* Not including NULL */
+ *size -= dst_size + 2;
+
+ *dst = malloc(*size);
+
+ if (*dst != NULL)
+ memcpy(*dst, dst_start, *size);
+
+ free(dst_start);
+ free(src_start);
+ }
+
+ if (iconv_close(conv_src_dst) != 0 || *dst == NULL)
+ return 1;
+
+ return 0;
+}
diff --git a/src/iconv.h b/src/iconv.h
new file mode 100644
index 0000000..81af034
--- /dev/null
+++ b/src/iconv.h
@@ -0,0 +1,12 @@
+/*
+ * Copyright (c) 2020, yzrh <yzrh@tuta.io>
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+int
+strconv(char **dst,
+ const char * restrict dst_code,
+ const char * restrict src,
+ const char * restrict src_code,
+ int *size);
diff --git a/src/melon.c b/src/melon.c
new file mode 100644
index 0000000..d1a08d1
--- /dev/null
+++ b/src/melon.c
@@ -0,0 +1,127 @@
+/*
+ * Copyright (c) 2020, yzrh <yzrh@tuta.io>
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <getopt.h>
+
+#include "extern.h"
+#include "version.h"
+
+int
+main(int argc, char **argv, char **envp)
+{
+ printf("Melon " VERSION "." RELEASE "." PATCH EXTRA "\n");
+ printf("Copyright (c) 2020, yzrh <yzrh@tuta.io>\n\n");
+
+ cnki_t *param = NULL;
+
+ if (cnki_create(&param) != 0) {
+ fprintf(stderr, "%s: %s\n", argv[0], strerror(errno));
+ return EXIT_FAILURE;
+ }
+
+ int c;
+
+ for (;;) {
+ static struct option long_options[] = {
+ {"output", required_argument, 0, 'o'},
+ {"buffer", required_argument, 0, 'b'},
+ {"verbose", no_argument, 0, 'v'},
+ {0, 0, 0, 0}
+ };
+
+ int option_index = 0;
+
+ c = getopt_long(argc, argv, "o:b:v",
+ long_options, &option_index);
+
+ if (c == -1)
+ break;
+
+ switch (c) {
+ case 'o':
+ if ((param->fp_o = fopen(optarg, "w")) == NULL) {
+ fprintf(stderr, "%s: %s\n", argv[0],
+ strerror(errno));
+ return EXIT_FAILURE;
+ }
+ break;
+ case 'b':
+ param->size_buf = atoi(optarg);
+ break;
+ case 'v':
+ param->stat += 1;
+ break;
+ case '?':
+ break;
+ default:
+ abort();
+ }
+ }
+
+ if (argc - optind == 1) {
+ if (param->fp_o == NULL) {
+ if (param->stat == 0) {
+ param->fp_o = stdout;
+ } else {
+ fprintf(stderr, "%s: --verbose ", argv[0]);
+ fprintf(stderr, "must not be set ");
+ fprintf(stderr, "when using stdout\n");
+ return EXIT_FAILURE;
+ }
+ }
+
+ if ((param->fp_i = fopen(argv[optind], "r")) == NULL) {
+ fprintf(stderr, "%s: %s\n", argv[0],
+ strerror(errno));
+ return EXIT_FAILURE;
+ }
+
+ cnki_info(&param);
+
+ if (strcmp(param->file_stat->type, "%PDF") == 0) {
+ if (cnki_pdf(&param) != 0) {
+ fprintf(stderr, "%s: %s\n", argv[0],
+ strerror(errno));
+ return EXIT_FAILURE;
+ }
+ } else if (strcmp(param->file_stat->type, "CAJ") == 0) {
+ if (cnki_caj(&param) != 0) {
+ fprintf(stderr, "%s: %s\n", argv[0],
+ strerror(errno));
+ return EXIT_FAILURE;
+ }
+ } else if (strcmp(param->file_stat->type, "HN") == 0) {
+ if (cnki_nh(&param) != 0) {
+ fprintf(stderr, "%s: %s\n", argv[0],
+ strerror(errno));
+ return EXIT_FAILURE;
+ }
+ } else if (strcmp(param->file_stat->type, "KDH ") == 0) {
+ if (cnki_kdh(&param) != 0) {
+ fprintf(stderr, "%s: %s\n", argv[0],
+ strerror(errno));
+ return EXIT_FAILURE;
+ }
+ } else {
+ fprintf(stderr, "%s: %s\n", argv[0],
+ "Invalid file");
+ return EXIT_FAILURE;
+ }
+
+ fclose(param->fp_i);
+ fclose(param->fp_o);
+ } else {
+ fprintf(stderr, "Usage: %s ", argv[0]);
+ fprintf(stderr, "[--output --buffer --verbose] file\n");
+ return EXIT_FAILURE;
+ }
+
+ cnki_destroy(&param);
+}
diff --git a/src/pdf.c b/src/pdf.c
new file mode 100644
index 0000000..92dd717
--- /dev/null
+++ b/src/pdf.c
@@ -0,0 +1,228 @@
+/*
+ * Copyright (c) 2020, yzrh <yzrh@tuta.io>
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#include <stdlib.h>
+#include <string.h>
+
+#include "pdf.h"
+
+static int
+_min_id(pdf_object_t **pdf)
+{
+ int min = 0;
+
+ pdf_object_t *ptr = (*pdf)->next;
+ while (ptr != NULL) {
+ if (min == 0 || ptr->id < min)
+ min = ptr->id;
+
+ ptr = ptr->next;
+ }
+
+ return min;
+}
+
+int
+pdf_obj_create(pdf_object_t **pdf)
+{
+ if (*pdf != NULL)
+ return 1;
+
+ *pdf = malloc(sizeof(pdf_object_t));
+
+ if (*pdf == NULL)
+ return 1;
+
+ (*pdf)->address = 0;
+ (*pdf)->size = 0;
+ (*pdf)->id = 0;
+ (*pdf)->object_size = 0;
+ (*pdf)->object = NULL;
+ (*pdf)->dictionary_size = 0;
+ (*pdf)->dictionary = NULL;
+ (*pdf)->stream_size = 0;
+ (*pdf)->stream= NULL;
+ (*pdf)->next = NULL;
+
+ return 0;
+}
+
+void
+pdf_obj_destroy(pdf_object_t **pdf)
+{
+ pdf_object_t *ptr;
+ while ((ptr = *pdf) != NULL) {
+ *pdf = (*pdf)->next;
+ free(ptr->object);
+ free(ptr->dictionary);
+ free(ptr->stream);
+ free(ptr);
+ }
+}
+
+int
+pdf_obj_add(pdf_object_t **pdf, int id,
+ const char * restrict object,
+ const char * restrict dictionary,
+ const char * restrict stream)
+{
+ if (*pdf != NULL || id <= 0 ||
+ (object != NULL && dictionary != NULL))
+ return 1;
+
+ *pdf = malloc(sizeof(pdf_object_t));
+
+ if (*pdf == NULL)
+ return 1;
+
+ (*pdf)->address = 0;
+ (*pdf)->size = 0;
+
+ (*pdf)->id = id;
+
+ if (dictionary != NULL) {
+ (*pdf)->dictionary_size = strlen(dictionary) + 1;
+ (*pdf)->dictionary = malloc((*pdf)->dictionary_size);
+
+ if ((*pdf)->dictionary == NULL)
+ return 1;
+
+ strncpy((*pdf)->dictionary, dictionary, (*pdf)->dictionary_size);
+
+ (*pdf)->object_size = 0;
+ (*pdf)->object = NULL;
+ } else if (object != NULL) {
+ (*pdf)->object_size = strlen(object) + 1;
+ (*pdf)->object = malloc((*pdf)->object_size);
+
+ if ((*pdf)->object == NULL)
+ return 1;
+
+ strncpy((*pdf)->object, object, (*pdf)->object_size);
+
+ (*pdf)->dictionary_size = 0;
+ (*pdf)->dictionary = NULL;
+ } else {
+ (*pdf)->object_size = 0;
+ (*pdf)->object = NULL;
+ (*pdf)->dictionary_size = 0;
+ (*pdf)->dictionary = NULL;
+ }
+
+ if (stream != NULL) {
+ (*pdf)->stream_size = sizeof(stream);
+ (*pdf)->stream = malloc((*pdf)->stream_size);
+
+ if ((*pdf)->stream == NULL)
+ return 1;
+
+ memcpy((*pdf)->stream, stream, (*pdf)->stream_size);
+ } else {
+ (*pdf)->stream_size = 0;
+ (*pdf)->stream = NULL;
+ }
+
+ (*pdf)->next = NULL;
+
+ return 0;
+}
+
+int
+pdf_obj_del(pdf_object_t **pdf, int id)
+{
+ if (*pdf == NULL || id <= 0)
+ return 1;
+
+ pdf_object_t *ptr = *pdf;
+ while (ptr->next != NULL) {
+ if (ptr->next->id == id) {
+ ptr->next = ptr->next->next;
+ break;
+ }
+
+ ptr = ptr->next;
+ }
+
+ return 0;
+}
+
+int
+pdf_obj_prepend(pdf_object_t **pdf, int id,
+ const char * restrict object,
+ const char * restrict dictionary,
+ const char * restrict stream)
+{
+ if (*pdf == NULL)
+ return 1;
+
+ if (id <= 0)
+ id = pdf_get_free_id(pdf);
+
+ pdf_object_t *ptr = NULL;
+
+ if (pdf_obj_add(&ptr, id, object, dictionary, stream) != 0) {
+ free(ptr);
+ return 1;
+ }
+
+ ptr->next = (*pdf)->next;
+ (*pdf)->next = ptr;
+
+ return 0;
+}
+
+int
+pdf_obj_append(pdf_object_t **pdf, int id,
+ const char * restrict object,
+ const char * restrict dictionary,
+ const char * restrict stream)
+{
+ if (*pdf == NULL)
+ return 1;
+
+ if (id <= 0)
+ id = pdf_get_free_id(pdf);
+
+ pdf_object_t *ptr = *pdf;
+ while (ptr->next != NULL)
+ ptr = ptr->next;
+
+ if (pdf_obj_add(&ptr->next, id, object, dictionary, stream) != 0)
+ return 1;
+
+ return 0;
+}
+
+int
+pdf_obj_sort(pdf_object_t **pdf)
+{
+ if (*pdf == NULL)
+ return 1;
+
+ int id;
+ pdf_object_t *tmp;
+ pdf_object_t *ptr;
+
+ ptr = *pdf;
+ while (ptr->next != NULL) {
+ id = _min_id(&ptr->next);
+
+ if (id == 0)
+ return 1;
+
+ if (id < ptr->next->id) {
+ pdf_get_obj(&ptr->next, id, &tmp);
+ pdf_obj_del(&ptr->next, id);
+
+ tmp->next = ptr->next;
+ ptr->next = tmp;
+ }
+
+ ptr = ptr->next;
+ }
+
+ return 0;
+}
diff --git a/src/pdf.h b/src/pdf.h
new file mode 100644
index 0000000..61f64d5
--- /dev/null
+++ b/src/pdf.h
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2020, yzrh <yzrh@tuta.io>
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#include <stdio.h>
+
+typedef struct _pdf_object_t {
+ int address;
+ int size;
+ int id;
+ int object_size;
+ char *object;
+ int dictionary_size;
+ char *dictionary;
+ int stream_size;
+ char *stream;
+ struct _pdf_object_t *next;
+} pdf_object_t;
+
+/* pdf.c */
+/* TODO: Rewrite object dictionary */
+/* TODO: Compact object id */
+/* TODO: `mutool clean -gggsz' */
+int pdf_obj_create(pdf_object_t **pdf);
+void pdf_obj_destroy(pdf_object_t **pdf);
+int pdf_obj_add(pdf_object_t **pdf, int id,
+ const char * restrict object,
+ const char * restrict dictionary,
+ const char * restrict stream);
+int pdf_obj_del(pdf_object_t **pdf, int id);
+int pdf_obj_prepend(pdf_object_t **pdf, int id,
+ const char * restrict object,
+ const char * restrict dictionary,
+ const char * restrict stream);
+int pdf_obj_append(pdf_object_t **pdf, int id,
+ const char * restrict object,
+ const char * restrict dictionary,
+ const char * restrict stream);
+int pdf_obj_sort(pdf_object_t **pdf);
+
+/* pdf_parser.c */
+int pdf_load(pdf_object_t **pdf, FILE **fp, int size_buf);
+
+/* pdf_writer.c */
+int pdf_dump_obj(pdf_object_t **pdf, FILE **fp);
+int pdf_dump_header(pdf_object_t **pdf, FILE **fp);
+int pdf_dump_xref(pdf_object_t **pdf, FILE **fp);
+int pdf_dump_trailer(pdf_object_t **pdf, FILE **fp, int xref);
+
+/* pdf_get.c */
+int pdf_get_obj(pdf_object_t **pdf, int id, pdf_object_t **obj);
+int pdf_get_count(pdf_object_t **pdf);
+int pdf_get_size(pdf_object_t **pdf);
+int pdf_get_free_id(pdf_object_t **pdf);
+int pdf_get_free_ids(pdf_object_t **pdf, int **ids, int count);
+int pdf_get_catalog_id(pdf_object_t **pdf);
+int pdf_get_parent_id(pdf_object_t **pdf, int **id);
+int pdf_get_kid_id(pdf_object_t **pdf, int id, int **kid);
+int pdf_get_kid_count(pdf_object_t **pdf, int id);
diff --git a/src/pdf_cnki.c b/src/pdf_cnki.c
new file mode 100644
index 0000000..2dec0b6
--- /dev/null
+++ b/src/pdf_cnki.c
@@ -0,0 +1,134 @@
+/*
+ * Copyright (c) 2020, yzrh <yzrh@tuta.io>
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#include <stdlib.h>
+#include <string.h>
+
+#include "cnki.h"
+#include "iconv.h"
+#include "pdf.h"
+
+/*
+ * It will write first, list, and count to *stat
+ * so that when called recursively, it knows
+ * what to do
+ */
+static int
+_outline(pdf_object_t **pdf, object_outline_tree_t **outline_tree, int id, int **stat)
+{
+ *stat = malloc(3 * sizeof(int));
+
+ if (*stat == NULL)
+ return 1;
+
+ int size;
+ char *str;
+
+ int *ret;
+
+ char buf[64];
+ char dictionary[1024];
+
+ object_outline_tree_t *ptr = *outline_tree;
+
+ (*stat)[0] = ptr->id;
+ (*stat)[2] = 0;
+
+ while (ptr != NULL) {
+ memset(dictionary, 0, 1024);
+
+ strcat(dictionary, "<<\n");
+
+ size = 512;
+ str = NULL;
+
+ if (strconv(&str, "UTF-16BE",
+ ptr->item->title, "GB18030",
+ &size) == 0) {
+ strcat(dictionary, "/Title <feff");
+
+ for (int i = 0; i < size; i++) {
+ snprintf(buf, 64, "%02x", (unsigned char) str[i]);
+ strcat(dictionary, buf);
+ }
+
+ strcat(dictionary, ">\n");
+ }
+
+ free(str);
+
+ snprintf(buf, 64, "/Parent %d 0 R\n", id);
+ strcat(dictionary, buf);
+
+ if (ptr->up != NULL && ptr->up->id != id) {
+ snprintf(buf, 64, "/Prev %d 0 R\n", ptr->up->id);
+ strcat(dictionary, buf);
+ }
+
+ if (ptr->left != NULL) {
+ snprintf(buf, 64, "/Next %d 0 R\n", ptr->left->id);
+ strcat(dictionary, buf);
+ }
+
+ if (ptr->right != NULL) {
+ _outline(pdf, &ptr->right, ptr->id, &ret);
+
+ snprintf(buf, 64, "/First %d 0 R\n", ret[0]);
+ strcat(dictionary, buf);
+
+ snprintf(buf, 64, "/Last %d 0 R\n", ret[1]);
+ strcat(dictionary, buf);
+
+ snprintf(buf, 64, "/Count -%d\n", ret[2]);
+ strcat(dictionary, buf);
+
+ free(ret);
+ }
+
+ /* Page starts from 0 */
+ snprintf(buf, 64, "/Dest [%d /XYZ null null null]\n>>\n",
+ atoi(ptr->item->page) - 1);
+ strcat(dictionary, buf);
+
+ pdf_obj_append(pdf, ptr->id, NULL, dictionary, NULL);
+
+ if (ptr->left == NULL)
+ (*stat)[1] = ptr->id;
+
+ (*stat)[2]++;
+
+ ptr = ptr->left;
+ }
+
+ return 0;
+}
+
+int
+pdf_cnki_outline(pdf_object_t **pdf, object_outline_t **outline, int **ids)
+{
+ if (*pdf == NULL || *outline == NULL || *ids == NULL)
+ return 1;
+
+ object_outline_tree_t *outline_tree = NULL;
+ cnki_outline_tree(&outline_tree, outline, *ids);
+
+ char buf[128];
+ int *ret;
+
+ _outline(pdf, &outline_tree->left, outline_tree->id, &ret);
+
+ free(outline_tree);
+
+ snprintf(buf, 128,
+ "<<\n/Type Outlines\n/First %d 0 R\n/Last %d 0 R\n/Count %d\n>>\n",
+ ret[0], ret[1], ret[2]);
+
+ free(ret);
+
+ pdf_obj_append(pdf, (*ids)[0], NULL, buf, NULL);
+
+ return 0;
+}
diff --git a/src/pdf_cnki.h b/src/pdf_cnki.h
new file mode 100644
index 0000000..f0210d0
--- /dev/null
+++ b/src/pdf_cnki.h
@@ -0,0 +1,7 @@
+/*
+ * Copyright (c) 2020, yzrh <yzrh@tuta.io>
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+int pdf_cnki_outline(pdf_object_t **pdf, object_outline_t **outline, int **ids);
diff --git a/src/pdf_get.c b/src/pdf_get.c
new file mode 100644
index 0000000..33fb271
--- /dev/null
+++ b/src/pdf_get.c
@@ -0,0 +1,296 @@
+/*
+ * Copyright (c) 2020, yzrh <yzrh@tuta.io>
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#include <stdlib.h>
+#include <string.h>
+
+#include "pdf.h"
+
+static int
+_id_in(int id, int *ids)
+{
+ for (int i = 1; i <= ids[0]; i++)
+ if (ids[i] == id)
+ return 1;
+
+ return 0;
+}
+
+int
+pdf_get_obj(pdf_object_t **pdf, int id, pdf_object_t **obj)
+{
+ if (*pdf == NULL || id <= 0)
+ return 1;
+
+ pdf_object_t *ptr = *pdf;
+ while (ptr->next != NULL) {
+ if (ptr->next->id == id) {
+ *obj = ptr->next;
+ return 0;
+ }
+ ptr = ptr->next;
+ }
+
+ return 1;
+}
+
+int
+pdf_get_count(pdf_object_t **pdf)
+{
+ if (*pdf == NULL)
+ return 1;
+
+ int count = 0;
+
+ pdf_object_t *ptr = (*pdf)->next;
+ while (ptr != NULL) {
+ count++;
+ ptr = ptr->next;
+ }
+
+ return count;
+}
+
+int
+pdf_get_size(pdf_object_t **pdf)
+{
+ if (*pdf == NULL)
+ return 1;
+
+ int size = 0;
+
+ pdf_object_t *ptr = (*pdf)->next;
+ while (ptr != NULL) {
+ size += ptr->size;
+ ptr = ptr->next;
+ }
+
+ return size;
+}
+
+int
+pdf_get_free_id(pdf_object_t **pdf)
+{
+ if (*pdf == NULL)
+ return 1;
+
+ int free_id = 0;
+
+ pdf_object_t *ptr;
+
+ int id = 0;
+
+ for (int i = 1; i < 99999999; i++) {
+ ptr = (*pdf)->next;
+ while (ptr != NULL) {
+ if (ptr->id == i) {
+ id = i;
+ break;
+ }
+ ptr = ptr->next;
+ }
+
+ if (i != id) {
+ free_id = i;
+ break;
+ }
+ }
+
+ return free_id;
+}
+
+int
+pdf_get_free_ids(pdf_object_t **pdf, int **ids, int count)
+{
+ if (*pdf == NULL || *ids != NULL || count <= 0)
+ return 1;
+
+ *ids = malloc(count * sizeof(int));
+
+ if (*ids == NULL)
+ return 1;
+
+ int pos = 0;
+ int id = 0;
+
+ pdf_object_t *ptr;
+ for (int i = 1; i < 99999999; i++) {
+ ptr = (*pdf)->next;
+ while (ptr != NULL) {
+ if (ptr->id == i) {
+ id = i;
+ break;
+ }
+ ptr = ptr->next;
+ }
+
+ if (i != id) {
+ (*ids)[pos] = i;
+
+ if (pos == count)
+ return 0;
+
+ pos++;
+ }
+ }
+
+ return 1;
+}
+
+int
+pdf_get_catalog_id(pdf_object_t **pdf)
+{
+ if (*pdf == NULL)
+ return 1;
+
+ int catalog_id = 0;
+
+ pdf_object_t *ptr = (*pdf)->next;
+
+ while (ptr != NULL) {
+ if (ptr->dictionary != NULL &&
+ strstr(ptr->dictionary, "/Catalog") != NULL)
+ catalog_id = ptr->id;
+
+ ptr = ptr->next;
+ }
+
+ return catalog_id;
+}
+
+int
+pdf_get_parent_id(pdf_object_t **pdf, int **id)
+{
+ if (*pdf == NULL || *id != NULL)
+ return 1;
+
+ int id_size = 1;
+ *id = malloc(sizeof(int));
+
+ if (*id == NULL)
+ return 1;
+
+ (*id)[0] = 0;
+
+ pdf_object_t *ptr = (*pdf)->next;
+
+ char *head;
+ char *tail;
+
+ char str[8];
+ int str_val;
+
+ int *ret;
+
+ while (ptr != NULL) {
+ if (ptr->dictionary != NULL &&
+ (head = strstr(ptr->dictionary, "/Parent ")) != NULL &&
+ (tail = strchr(head + 8, ' ')) != NULL) {
+ memset(str, 0, 8);
+ strncpy(str, head + 8, (tail - head) - 8);
+ str_val = atoi(str);
+
+ if (!_id_in(str_val, *id)) {
+ ret = realloc(*id, ++id_size * sizeof(int));
+
+ if (ret == NULL)
+ return 1;
+ else
+ *id = ret;
+
+ (*id)[0]++;
+ (*id)[id_size - 1] = str_val;
+ }
+ }
+ ptr = ptr->next;
+ }
+
+ return 0;
+}
+
+int
+pdf_get_kid_id(pdf_object_t **pdf, int id, int **kid)
+{
+ if (*pdf == NULL || *kid != NULL)
+ return 1;
+
+ int kid_size = 1;
+ *kid = malloc(sizeof(int));
+
+ if (*kid == NULL)
+ return 1;
+
+ pdf_object_t *ptr = (*pdf)->next;
+
+ char str[32];
+ int *ret;
+
+ snprintf(str, 32, "/Parent %d 0 R", id);
+
+ while (ptr != NULL) {
+ if (ptr->id == id) {
+ (*kid)[0] = 0;
+ return 1;
+ }
+
+ if (ptr->dictionary != NULL &&
+ strstr(ptr->dictionary, str) != NULL) {
+ ret = realloc(*kid, ++kid_size * sizeof(int));
+
+ if (ret == NULL)
+ return 1;
+ else
+ *kid = ret;
+
+ (*kid)[kid_size - 1] = ptr->id;
+ }
+
+ ptr = ptr->next;
+ }
+
+ (*kid)[0] = kid_size - 1;
+
+ return 0;
+}
+
+int
+pdf_get_kid_count(pdf_object_t **pdf, int id)
+{
+ if (*pdf == NULL || id <= 0)
+ return 1;
+
+ int count = 0;
+
+ pdf_object_t *ptr = (*pdf)->next;
+
+ char id_str[32];
+ char *pos;
+
+ char str[8];
+ int str_val;
+
+ snprintf(id_str, 32, "/Parent %d 0 R", id);
+
+ while (ptr != NULL) {
+ if (ptr->dictionary != NULL &&
+ strstr(ptr->dictionary, id_str) != NULL &&
+ (pos = strstr(ptr->dictionary, "/Count ")) != NULL) {
+ for (int i = 8; i >= 0; i--) {
+ if (i + 7 <= ptr->dictionary_size - (pos - ptr->dictionary) &&
+ pos[i + 7] >= '0' && pos[i + 7] <= '9') {
+ memset(str, 0, 8);
+ strncpy(str, pos + 7, i + 1);
+ str_val = atoi(str);
+ count += str_val;
+ break;
+ }
+ }
+ }
+ ptr = ptr->next;
+ }
+
+ return count;
+}
diff --git a/src/pdf_parser.c b/src/pdf_parser.c
new file mode 100644
index 0000000..1da8dff
--- /dev/null
+++ b/src/pdf_parser.c
@@ -0,0 +1,216 @@
+/*
+ * Copyright (c) 2020, yzrh <yzrh@tuta.io>
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#ifdef __linux__
+
+#define _GNU_SOURCE
+
+#endif /* __linux__ */
+
+#include <stdlib.h>
+#include <string.h>
+
+#include "pdf.h"
+
+static void *
+_memmem_whitespace(const void *p0, size_t s0, const void *p1, size_t s1)
+{
+ const char whitespace[6] = {
+ '\r',
+ '\n',
+ '\f',
+ '\t',
+ '\0',
+ ' '
+ };
+
+ char tmp[s1 + 1];
+ memcpy(tmp, p1, s1);
+
+ char *ret;
+
+ for (int i = 0; i < 6; i++) {
+ tmp[s1] = whitespace[i];
+ if((ret = memmem(p0, s0, tmp, s1 + 1)) != NULL)
+ return ret;
+ }
+
+ return NULL;
+}
+
+static int
+_locate(pdf_object_t **pdf, FILE **fp, int size_buf)
+{
+ pdf_object_t *ptr = *pdf;
+ while (ptr->next != NULL)
+ ptr = ptr->next;
+
+ char buf[size_buf];
+
+ long cur = ftell(*fp);
+ long end;
+
+ fseek(*fp, 0, SEEK_END);
+ end = ftell(*fp);
+ fseek(*fp, cur, SEEK_SET);
+
+ int head = 0;
+ int tail = 0;
+ char *pos;
+ char *tmp;
+
+ for (;;) {
+ fread(buf, size_buf, 1, *fp);
+
+ if (head == 0 && (pos = _memmem_whitespace(buf, size_buf, " 0 obj", 6)) != NULL)
+ head = cur + (pos - buf) + 7;
+
+ if (tail == 0 && (pos = _memmem_whitespace(buf, size_buf, "endobj", 6)) != NULL) {
+ /* We need to check if it is the object stored in stream */
+ while (memcmp(pos + 7,
+ "\r\nendstream", 11) == 0 &&
+ (tmp = _memmem_whitespace(pos + 6,
+ size_buf - (pos - buf) - 6,
+ "endobj", 6)) != NULL)
+ pos = tmp;
+
+ if (pos - buf < size_buf - 7)
+ tail = cur + (pos - buf);
+ }
+
+ if (tail > head) {
+ if (ptr->next == NULL) {
+ ptr->next = malloc(sizeof(pdf_object_t));
+
+ if (ptr->next == NULL)
+ return 1;
+
+ ptr->next->id = 0;
+ ptr->next->object_size = 0;
+ ptr->next->object = NULL;
+ ptr->next->dictionary_size = 0;
+ ptr->next->dictionary = NULL;
+ ptr->next->stream_size = 0;
+ ptr->next->stream = NULL;
+ ptr->next->next = NULL;
+ ptr = ptr->next;
+ }
+
+ ptr->address = head;
+ ptr->size = tail - head;
+
+ fseek(*fp, tail + 6, SEEK_SET);
+ head = tail = 0;
+ } else {
+ fseek(*fp, -6, SEEK_CUR);
+ }
+
+ if ((cur = ftell(*fp)) + 6 >= end)
+ break;
+ }
+
+ return 0;
+}
+
+int
+pdf_load(pdf_object_t **pdf, FILE **fp, int size_buf)
+{
+ if (*pdf == NULL || *fp == NULL || size_buf < 7)
+ return 1;
+
+ if (_locate(pdf, fp, size_buf) != 0)
+ return 1;
+
+ pdf_object_t *ptr = (*pdf)->next;
+
+ char *buf;
+ char *head;
+ char *tail;
+ char *tmp;
+
+ while (ptr != NULL) {
+ buf = malloc(ptr->size);
+
+ if (buf == NULL)
+ return 1;
+
+ memset(buf, 0, ptr->size);
+
+ fseek(*fp, ptr->address - 12, SEEK_SET);
+ fread(buf, 8, 1, *fp);
+
+ for (int i = 0; i < 8; i++) {
+ if (buf[i] >= '0' && buf[i] <= '9') {
+ ptr->id = atoi(buf + i);
+ break;
+ }
+ }
+
+ fseek(*fp, ptr->address, SEEK_SET);
+ fread(buf, ptr->size, 1, *fp);
+
+ if ((head = memmem(buf, ptr->size, "<<", 2)) != NULL &&
+ (tail = _memmem_whitespace(buf, ptr->size, ">>", 2)) != NULL) {
+ /* A dictionary object may have nested dictionary */
+ while ((tmp = _memmem_whitespace(tail + 2,
+ ptr->size - (tail - buf) - 2,
+ ">>", 2)) != NULL)
+ tail = tmp;
+
+ ptr->dictionary_size = tail - head + 2;
+ ptr->dictionary = malloc(ptr->dictionary_size + 1);
+
+ if (ptr->dictionary == NULL)
+ return 1;
+
+ memset(ptr->dictionary, 0, ptr->dictionary_size + 1);
+ memcpy(ptr->dictionary, head, ptr->dictionary_size);
+
+ if ((head = memmem(tail,
+ ptr->size - (tail - buf),
+ "stream\r\n", 8)) != NULL &&
+ (tail = _memmem_whitespace(head,
+ ptr->size - (head - buf),
+ "endstream", 9)) != NULL) {
+ /*
+ * An object may contain a stream that
+ * contains another object that
+ * contains another stream
+ */
+ while (_memmem_whitespace(tail,
+ ptr->size - (tail - buf),
+ "endobj", 6) != NULL &&
+ (tmp = _memmem_whitespace(tail + 9,
+ ptr->size - (tail - buf) - 9,
+ "endstream", 9)) != NULL)
+ tail = tmp;
+
+ ptr->stream_size = (tail - head) - 8;
+ ptr->stream = malloc(ptr->stream_size);
+
+ if (ptr->stream == NULL)
+ return 1;
+
+ memcpy(ptr->stream, head + 8, ptr->stream_size);
+ }
+ } else {
+ ptr->object_size = ptr->size;
+ ptr->object = malloc(ptr->object_size + 1);
+
+ if (ptr->object == NULL)
+ return 1;
+
+ memset(ptr->object, 0, ptr->object_size + 1);
+ memcpy(ptr->object, buf, ptr->object_size);
+ }
+
+ free(buf);
+
+ ptr = ptr->next;
+ }
+
+ return 0;
+}
diff --git a/src/pdf_writer.c b/src/pdf_writer.c
new file mode 100644
index 0000000..3cf4f7c
--- /dev/null
+++ b/src/pdf_writer.c
@@ -0,0 +1,188 @@
+/*
+ * Copyright (c) 2020, yzrh <yzrh@tuta.io>
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+
+#include <openssl/md5.h>
+
+#include "pdf.h"
+
+int
+pdf_dump_obj(pdf_object_t **pdf, FILE **fp)
+{
+ if (*pdf == NULL || *fp == NULL)
+ return 1;
+
+ long cur;
+
+ pdf_object_t *ptr = (*pdf)->next;
+ while (ptr != NULL) {
+ ptr->address = cur = ftell(*fp);
+
+ fprintf(*fp, "%d 0 obj\n", ptr->id);
+
+ if (ptr->dictionary != NULL)
+ fputs(ptr->dictionary, *fp);
+ else if (ptr->object != NULL)
+ fputs(ptr->object, *fp);
+ else if (ptr->stream == NULL)
+ fputs("null\n", *fp);
+
+ if (ptr->stream != NULL) {
+ fputs("stream\r\n", *fp);
+ fwrite(ptr->stream, ptr->stream_size, 1, *fp);
+ fputs("endstream\n", *fp);
+ }
+
+ fputs("endobj\n", *fp);
+
+ ptr->size = ftell(*fp) - cur;
+
+ ptr = ptr->next;
+ }
+
+ return 0;
+}
+
+int
+pdf_dump_header(pdf_object_t **pdf, FILE **fp)
+{
+ if (*pdf == NULL || *fp == NULL)
+ return 1;
+
+ fputs("%PDF-1.7\n", *fp);
+
+ const unsigned char bin[4] = {
+ 0xf6,
+ 0xe4,
+ 0xfc,
+ 0xdf,
+ };
+
+ fputs("%", *fp);
+ fwrite(bin, 4, 1, *fp);
+ fputs("\n", *fp);
+
+ return 0;
+}
+
+int
+pdf_dump_xref(pdf_object_t **pdf, FILE **fp)
+{
+ if (*pdf == NULL || *fp == NULL)
+ return 1;
+
+ fputs("xref\n", *fp);
+
+ pdf_object_t *ptr = *pdf;
+
+ pdf_object_t *start = ptr;
+ int count = 1;
+
+ while (ptr != NULL) {
+ if (ptr->next == NULL ||
+ (ptr->next != NULL && ptr->next->id != ptr->id + 1)) {
+ fprintf(*fp, "%d %d\n", start->id, count);
+
+ for (; count > 0; count--) {
+ fprintf(*fp, "%010d %05d %s\r\n",
+ start->address,
+ start->address > 0 ? 0 : 65535,
+ start->size > 0 ? "n" : "f");
+ start = start->next;
+ }
+
+ if (ptr->next != NULL)
+ start = ptr->next;
+ }
+
+ ptr = ptr->next;
+ count++;
+ }
+
+ return 0;
+}
+
+int
+pdf_dump_trailer(pdf_object_t **pdf, FILE **fp, int xref)
+{
+ if (*pdf == NULL || *fp == NULL)
+ return 1;
+
+ fputs("trailer\n", *fp);
+
+ fputs("<<\n", *fp);
+
+ /*
+ * File identifiers should be generated using
+ * (a) Current time
+ * (b) File path
+ * (c) Size of file
+ * (d) Values of all entries in the
+ * file's document information dictionary
+ *
+ * It is recommended to be computed according to RFC 1321
+ */
+
+ time_t timestamp = time(NULL);
+ int size = pdf_get_size(pdf);
+
+ int buf_size;
+ char buf[64];
+
+ buf_size = snprintf(buf, 64, "%lx%x", timestamp, size);
+
+ unsigned char str[64];
+ memcpy(str, buf, 64);
+
+ unsigned char fid[MD5_DIGEST_LENGTH];
+ MD5(str, buf_size, fid);
+
+ pdf_object_t *ptr = *pdf;
+ while (ptr->next != NULL)
+ ptr = ptr->next;
+
+ /*
+ * TODO: Document information dictionary
+ * `"/Producer (Melon)"'
+ * `"/CreationDate (D:YYYYMMDDHHmmSS+00'00')"'
+ *
+ * Trailer dictionary
+ * `"/Info %d 0 R"'
+ */
+ fprintf(*fp,
+ "/Size %d\n/Root %d 0 R\n",
+ ptr->id + 1,
+ pdf_get_catalog_id(pdf));
+
+ fputs("/ID [", *fp);
+
+ for (int i = 0; i < 2; i++) {
+ fputs("<", *fp);
+
+ for (int j = 0; j < MD5_DIGEST_LENGTH; j++)
+ fprintf(*fp, "%02x", fid[j]);
+
+ fputs(">", *fp);
+
+ if (i < 1)
+ fputs(" ", *fp);
+ }
+
+ fputs("]\n", *fp);
+
+ fputs(">>\n", *fp);
+
+ fputs("startxref\n", *fp);
+
+ fprintf(*fp, "%d\n", xref);
+
+ fputs("%%EOF\n", *fp);
+
+ return 0;
+}
diff --git a/src/version.h b/src/version.h
new file mode 100644
index 0000000..3773617
--- /dev/null
+++ b/src/version.h
@@ -0,0 +1,10 @@
+/*
+ * Copyright (c) 2020, yzrh <yzrh@tuta.io>
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#define VERSION "0"
+#define RELEASE "1"
+#define PATCH "0"
+#define EXTRA ""