aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authoryzrh <yzrh@noema.org>2020-12-30 17:06:55 +0000
committeryzrh <yzrh@noema.org>2020-12-30 21:12:52 +0000
commit1f62c53da6edc5a82a1e0eceb401b2274cd4a0d1 (patch)
tree24359d29576a26ae70c446d4d01a8a93a61d62c2 /src
parent98691d4203f4e578b84b2014db0fbe0c1209cc48 (diff)
downloadmelon-1f62c53da6edc5a82a1e0eceb401b2274cd4a0d1.tar.gz
melon-1f62c53da6edc5a82a1e0eceb401b2274cd4a0d1.tar.zst
Produce PDF directly from KDH.
Diffstat (limited to 'src')
-rw-r--r--src/cnki_kdh.c23
-rw-r--r--src/cnki_pdf.c66
-rw-r--r--src/pdf.h1
-rw-r--r--src/pdf_get.c21
-rw-r--r--src/pdf_parser.c10
-rw-r--r--src/pdf_writer.c4
6 files changed, 102 insertions, 23 deletions
diff --git a/src/cnki_kdh.c b/src/cnki_kdh.c
index 450242d..78e8957 100644
--- a/src/cnki_kdh.c
+++ b/src/cnki_kdh.c
@@ -27,6 +27,11 @@ cnki_kdh(cnki_t **param)
char buf[(*param)->size_buf];
+ FILE *tmp = tmpfile();
+
+ if (tmp == NULL)
+ return 1;
+
for (;;) {
fread(buf, (*param)->size_buf, 1, (*param)->fp_i);
@@ -35,15 +40,27 @@ cnki_kdh(cnki_t **param)
key_cur++;
}
- fwrite(buf, (*param)->size_buf, 1, (*param)->fp_o);
+ fwrite(buf, (*param)->size_buf, 1, tmp);
if (ftell((*param)->fp_i) == size)
break;
}
if ((*param)->stat > 0)
- printf("Decryption ended total %ld byte(s) written\n",
- ftell((*param)->fp_o));
+ printf("Decrypted %ld byte(s)\n", ftell(tmp));
+
+ fseek(tmp, 0, SEEK_SET);
+
+ FILE *orig = (*param)->fp_i;
+ (*param)->fp_i = tmp;
+
+ cnki_pdf(param);
+
+ (*param)->fp_i = orig;
+ fclose(tmp);
+
+ if ((*param)->stat > 0)
+ printf("Conversion ended\n");
return 0;
}
diff --git a/src/cnki_pdf.c b/src/cnki_pdf.c
index 6964cac..d8eabff 100644
--- a/src/cnki_pdf.c
+++ b/src/cnki_pdf.c
@@ -232,28 +232,62 @@ cnki_pdf(cnki_t **param)
}
if ((*param)->stat > 1)
- printf("Generating '/Catalog' dictionary\n");
+ printf("Searching for catalog object\n");
- snprintf(buf, 64,
- "<<\n/Type /Catalog\n/Pages %d 0 R\n",
- root);
- strcat(dictionary, buf);
+ int catalog = pdf_get_catalog_id(&pdf);
+
+ if (catalog != 0) {
+ if ((*param)->stat > 0)
+ printf("catalog object is %d.\n", catalog);
+ } else {
+ if ((*param)->stat > 0)
+ printf("catalog object is missing\n");
+
+ if ((*param)->stat > 1)
+ printf("Generating catalog object\n");
- if (ids != NULL) {
snprintf(buf, 64,
- "/Outlines %d 0 R\n/PageMode /UseOutlines\n",
- ids[0]);
+ "<<\n/Type /Catalog\n/Pages %d 0 R\n",
+ root);
strcat(dictionary, buf);
+
+ if (ids != NULL) {
+ snprintf(buf, 64,
+ "/Outlines %d 0 R\n/PageMode /UseOutlines\n",
+ ids[0]);
+ strcat(dictionary, buf);
+ }
+
+ strcat(dictionary, ">>\n");
+
+ pdf_obj_append(&pdf, 0, NULL, dictionary, NULL);
+
+ if ((*param)->stat > 0)
+ printf("Generated catalog object\n");
}
- strcat(dictionary, ">>\n");
+ if ((*param)->stat > 1)
+ printf("Searching for xref object\n");
- pdf_obj_append(&pdf, 0, NULL, dictionary, NULL);
+ int xref = pdf_get_xref_id(&pdf);
- free(dictionary);
+ if (xref != 0) {
+ if ((*param)->stat > 0)
+ printf("xref object is %d.\n", xref);
- if ((*param)->stat > 0)
- printf("Generated '/Catalog' dictionary\n");
+ if ((*param)->stat > 1)
+ printf("Deleting xref object\n");
+
+ pdf_obj_del(&pdf, xref);
+
+ if ((*param)->stat > 0)
+ printf("Deleted xref object\n");
+ } else {
+ if ((*param)->stat > 0)
+ printf("xref object is missing\n");
+ }
+
+ free(dictionary);
if ((*param)->stat > 1)
printf("Sorting object(s)\n");
@@ -312,7 +346,7 @@ cnki_pdf(cnki_t **param)
pdf_get_count(&pdf),
ftell((*param)->fp_o));
- long xref = ftell((*param)->fp_o);
+ long cur_xref = ftell((*param)->fp_o);
if ((*param)->stat > 1)
printf("Writing cross-reference table\n");
@@ -323,7 +357,7 @@ cnki_pdf(cnki_t **param)
} else {
if ((*param)->stat > 0)
printf("Cross-reference table %ld byte(s) written\n",
- ftell((*param)->fp_o) - xref);
+ ftell((*param)->fp_o) - cur_xref);
}
if ((*param)->stat > 1)
@@ -332,7 +366,7 @@ cnki_pdf(cnki_t **param)
if ((*param)->stat > 0)
cur = ftell((*param)->fp_o);
- if (pdf_dump_trailer(&pdf, &(*param)->fp_o, xref) != 0) {
+ if (pdf_dump_trailer(&pdf, &(*param)->fp_o, cur_xref) != 0) {
if ((*param)->stat > 0)
printf("Trailer not written\n");
} else {
diff --git a/src/pdf.h b/src/pdf.h
index 394da5a..a5630d8 100644
--- a/src/pdf.h
+++ b/src/pdf.h
@@ -54,6 +54,7 @@ int pdf_get_size(pdf_object_t **pdf);
int pdf_get_free_id(pdf_object_t **pdf);
int pdf_get_free_ids(pdf_object_t **pdf, int **ids, int count);
int pdf_get_catalog_id(pdf_object_t **pdf);
+int pdf_get_xref_id(pdf_object_t **pdf);
int pdf_get_parent_id(pdf_object_t **pdf, int **id);
int pdf_get_kid_id(pdf_object_t **pdf, int id, int **kid);
int pdf_get_kid_count(pdf_object_t **pdf, int id);
diff --git a/src/pdf_get.c b/src/pdf_get.c
index a441b1e..95d5b66 100644
--- a/src/pdf_get.c
+++ b/src/pdf_get.c
@@ -162,6 +162,27 @@ pdf_get_catalog_id(pdf_object_t **pdf)
}
int
+pdf_get_xref_id(pdf_object_t **pdf)
+{
+ if (*pdf == NULL)
+ return 1;
+
+ int xref_id = 0;
+
+ pdf_object_t *ptr = (*pdf)->next;
+
+ while (ptr != NULL) {
+ if (ptr->dictionary != NULL &&
+ strstr(ptr->dictionary, "/XRef") != NULL)
+ xref_id = ptr->id;
+
+ ptr = ptr->next;
+ }
+
+ return xref_id;
+}
+
+int
pdf_get_parent_id(pdf_object_t **pdf, int **id)
{
if (*pdf == NULL || *id != NULL)
diff --git a/src/pdf_parser.c b/src/pdf_parser.c
index 9c361a2..9531d28 100644
--- a/src/pdf_parser.c
+++ b/src/pdf_parser.c
@@ -154,10 +154,16 @@ pdf_load(pdf_object_t **pdf, FILE **fp, int size_buf)
if ((head = memmem(buf, ptr->size, "<<", 2)) != NULL &&
(tail = _memmem_whitespace(buf, ptr->size, ">>", 2)) != NULL) {
- /* A dictionary object may have nested dictionary */
+ /*
+ * A dictionary object may have nested dictionary,
+ * but it should not be in a stream
+ */
while ((tmp = _memmem_whitespace(tail + 2,
ptr->size - (tail - buf) - 2,
- ">>", 2)) != NULL)
+ ">>", 2)) != NULL &&
+ memmem(tail + 2,
+ ptr->size - (tail - buf) - 2,
+ "stream\r\n", 8) == NULL)
tail = tmp;
ptr->dictionary_size = tail - head + 2;
diff --git a/src/pdf_writer.c b/src/pdf_writer.c
index 8d5fc16..cda998a 100644
--- a/src/pdf_writer.c
+++ b/src/pdf_writer.c
@@ -27,10 +27,10 @@ pdf_dump_obj(pdf_object_t **pdf, FILE **fp)
fprintf(*fp, "%d 0 obj\n", ptr->id);
if (ptr->dictionary != NULL) {
- fputs(ptr->dictionary, *fp);
+ fwrite(ptr->dictionary, ptr->dictionary_size, 1, *fp);
fputs("\n", *fp);
} else if (ptr->object != NULL) {
- fputs(ptr->object, *fp);
+ fwrite(ptr->object, ptr->object_size, 1, *fp);
fputs("\n", *fp);
} else if (ptr->stream == NULL) {
fputs("null\n", *fp);