aboutsummaryrefslogtreecommitdiffstats
path: root/src/cnki_pdf.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/cnki_pdf.c')
-rw-r--r--src/cnki_pdf.c147
1 files changed, 107 insertions, 40 deletions
diff --git a/src/cnki_pdf.c b/src/cnki_pdf.c
index dcf6d30..b59b7c6 100644
--- a/src/cnki_pdf.c
+++ b/src/cnki_pdf.c
@@ -238,7 +238,7 @@ cnki_pdf(cnki_t **param)
if ((*param)->stat > 1)
printf("Generating object\n");
- dictionary_size = 64 + 12 * kid[0];
+ dictionary_size = 64 + 16 * kid[0];
dictionary = malloc(dictionary_size);
if (dictionary == NULL) {
@@ -483,6 +483,9 @@ cnki_pdf_hn(cnki_t **param)
int *ids = NULL;
pdf_get_free_ids(&pdf, &ids, ptr->image_length + 3);
+ int bitmap_size;
+ char *bitmap;
+
int stream_size;
char *stream;
@@ -493,32 +496,34 @@ cnki_pdf_hn(cnki_t **param)
if (dim == NULL) {
free(root_kid);
+ free(ids);
return 1;
}
- for (int i = 0; i < ptr->image_length; i++) {
- dictionary_size = 128;
- dictionary = malloc(dictionary_size);
+ dictionary_size = 256;
+ dictionary = malloc(dictionary_size);
- if (dictionary == NULL) {
- free(root_kid);
- free(dim);
- return 1;
- }
+ if (dictionary == NULL) {
+ free(root_kid);
+ free(ids);
+ free(dim);
+ return 1;
+ }
+ for (int i = 0; i < ptr->image_length; i++) {
memset(dictionary, 0, dictionary_size);
strcat(dictionary, "<<\n/Type /XObject\n"
"/Subtype /Image\n");
if ((*param)->stat > 2)
- printf("\tDecoding data, page %04d item %02d... ",
- ptr->page, i);
+ printf("\tDecoding data, page %04d item %02d format %d... ",
+ ptr->page, i, ptr->image_data[i].format);
switch (ptr->image_data[i].format) {
case JBIG:
- ret = cnki_jbig(&stream,
- &stream_size,
+ ret = cnki_jbig(&bitmap,
+ &bitmap_size,
&wh[0],
&wh[1],
ptr->image_data[i].image,
@@ -530,18 +535,30 @@ cnki_pdf_hn(cnki_t **param)
break;
}
+ if (strdeflate(&stream, &stream_size,
+ bitmap, bitmap_size) != 0) {
+ free(root_kid);
+ free(ids);
+ free(dim);
+ free(dictionary);
+ return 1;
+ }
+
+ free(bitmap);
+
snprintf(buf, 64, "/Width %d\n/Height %d\n",
wh[0], wh[1]);
strcat(dictionary, buf);
strcat(dictionary, "/ColorSpace /DeviceGray\n"
"/BitsPerComponent 1\n");
+ strcat(dictionary, "/Decode [1.0 0.0]\n");
snprintf(buf, 64, "/Length %d\n",
stream_size);
strcat(dictionary, buf);
- strcat(dictionary, "/Filter /CCITTFaxDecode\n");
+ strcat(dictionary, "/Filter /FlateDecode\n");
dim[i * 2] = wh[0];
dim[i * 2 + 1] = wh[1];
@@ -562,9 +579,10 @@ cnki_pdf_hn(cnki_t **param)
stream_size = ptr->image_data[i].size;
stream = malloc(stream_size);
if (stream == NULL) {
- free(dictionary);
free(root_kid);
+ free(ids);
free(dim);
+ free(dictionary);
return 1;
}
memcpy(stream, ptr->image_data[i].image, stream_size);
@@ -573,7 +591,7 @@ cnki_pdf_hn(cnki_t **param)
wh[0], wh[1]);
strcat(dictionary, buf);
- strcat(dictionary, "/ColorSpace /DeviceRGB\n"
+ strcat(dictionary, "/ColorSpace /DeviceGray\n"
"/BitsPerComponent 8\n");
snprintf(buf, 64, "/Length %d\n",
@@ -586,6 +604,47 @@ cnki_pdf_hn(cnki_t **param)
dim[i * 2 + 1] = wh[1];
break;
case JBIG2:
+ ret = cnki_jbig2(&bitmap,
+ &bitmap_size,
+ &wh[0],
+ &wh[1],
+ ptr->image_data[i].image,
+ ptr->image_data[i].size);
+
+ if (ret != 0) {
+ dim[i * 2] = 0;
+ dim[i * 2 + 1] = 0;
+ break;
+ }
+
+ if (strdeflate(&stream, &stream_size,
+ bitmap, bitmap_size) != 0) {
+ free(root_kid);
+ free(ids);
+ free(dim);
+ free(dictionary);
+ return 1;
+ }
+
+ free(bitmap);
+
+ snprintf(buf, 64, "/Width %d\n/Height %d\n",
+ wh[0], wh[1]);
+ strcat(dictionary, buf);
+
+ strcat(dictionary, "/ColorSpace /DeviceGray\n"
+ "/BitsPerComponent 1\n");
+ strcat(dictionary, "/Decode [1.0 0.0]\n");
+
+ snprintf(buf, 64, "/Length %d\n",
+ stream_size);
+ strcat(dictionary, buf);
+
+ strcat(dictionary, "/Filter /FlateDecode\n");
+
+ dim[i * 2] = wh[0];
+ dim[i * 2 + 1] = wh[1];
+ break;
case JPX:
default:
ret = -1;
@@ -598,37 +657,26 @@ cnki_pdf_hn(cnki_t **param)
if (ret == 0) {
if ((*param)->stat > 2)
- printf("Done\n");
+ printf("%6d byte(s), width %4d, height %4d.\n",
+ stream_size, wh[0], wh[1]);
pdf_obj_append(&pdf, ids[i],
NULL, dictionary, stream, stream_size);
- free(dictionary);
free(stream);
} else if (ret == 1) {
if ((*param)->stat > 2)
- printf("Failed\n");
-
- free(dictionary);
+ printf("Not extracted.\n");
pdf_obj_append(&pdf, ids[i], NULL, NULL, NULL, 0);
} else {
if ((*param)->stat > 2)
- printf("Unsupported format\n");
+ printf("Unsupported format.\n");
- free(dictionary);
+ pdf_obj_append(&pdf, ids[i], NULL, NULL, NULL, 0);
}
}
- dictionary_size = 128;
- dictionary = malloc(dictionary_size);
-
- if (dictionary == NULL) {
- free(root_kid);
- free(dim);
- return 1;
- }
-
memset(dictionary, 0, dictionary_size);
strcat(dictionary, "<<\n/XObject <<");
@@ -655,11 +703,12 @@ cnki_pdf_hn(cnki_t **param)
if (strncmp(ptr->text + 8, "COMPRESSTEXT", 12) == 0) {
cnki_zlib(&stream, &stream_size, ptr->text, ptr->text_size);
- dictionary_size = stream_size / 8 + 7;
+ dictionary_size = 64 + 2 * stream_size;
dictionary = malloc(dictionary_size);
if (dictionary == NULL) {
free(root_kid);
+ free(ids);
free(dim);
return 1;
}
@@ -688,11 +737,12 @@ cnki_pdf_hn(cnki_t **param)
strcat(dictionary, ">");
} else {
- dictionary_size = ptr->text_size;
+ dictionary_size = 64 + 2 * ptr->text_size;
dictionary = malloc(dictionary_size);
if (dictionary == NULL) {
free(root_kid);
+ free(ids);
free(dim);
return 1;
}
@@ -724,11 +774,12 @@ cnki_pdf_hn(cnki_t **param)
/* FIXME: Use the text somehow? */
free(dictionary);
- dictionary_size = 64 + 12 * ptr->image_length;
+ dictionary_size = 64 + 64 * ptr->image_length;
dictionary = malloc(dictionary_size);
if (dictionary == NULL) {
free(root_kid);
+ free(ids);
free(dim);
return 1;
}
@@ -739,12 +790,27 @@ cnki_pdf_hn(cnki_t **param)
strcat(dictionary, "0.25 0 0 0.25 0 0 cm\n");
+ double resize_x;
+ double resize_y;
+
for (int i = 0; i < ptr->image_length; i++) {
if (dim[i * 2] <= 0 || dim[i * 2 + 1] <= 0)
continue;
+ /* Scale within bound of A4 paper */
+ resize_x = 595.276 * 4 / dim[i * 2];
+ resize_y = 841.89 * 4 / dim[i * 2 + 1];
+
+ if (resize_y < resize_x)
+ snprintf(buf, 64, "%f 0 0 %f 0 0 cm\n",
+ resize_y, resize_y);
+ else
+ snprintf(buf, 64, "%f 0 0 %f 0 0 cm\n",
+ resize_x, resize_x);
+ strcat(dictionary, buf);
+
/* Apply transformation matrix */
- if (ptr->image_data[i].format == DCT_1) {
+ if (ptr->image_data[i].format == JBIG || ptr->image_data[i].format == DCT_1) {
snprintf(buf, 64, "1 0 0 1 0 %d cm\n",
dim[i * 2 + 1]);
strcat(dictionary, buf);
@@ -763,9 +829,10 @@ cnki_pdf_hn(cnki_t **param)
strcat(dictionary, "Q");
if (strdeflate(&stream, &stream_size, dictionary, strlen(dictionary)) != 0) {
- free(dictionary);
free(root_kid);
+ free(ids);
free(dim);
+ free(dictionary);
return 1;
}
@@ -796,7 +863,7 @@ cnki_pdf_hn(cnki_t **param)
strcat(dictionary, buf);
/* A4 paper */
- strcat(dictionary, "/MediaBox [ 0 0 595.276 841.89 ]\n");
+ strcat(dictionary, "/MediaBox [0 0 595.276 841.89]\n");
/* Add /Parent when we know root */
pdf_obj_append(&pdf, ids[ptr->image_length + 2], NULL, dictionary, NULL, 0);
@@ -838,7 +905,7 @@ cnki_pdf_hn(cnki_t **param)
if ((*param)->stat > 1)
printf("Generating root object\n");
- dictionary_size = 64 + 12 * (*param)->file_stat->page;
+ dictionary_size = 64 + 64 * (*param)->file_stat->page;
dictionary = malloc(dictionary_size);
if (dictionary == NULL) {
@@ -877,7 +944,7 @@ cnki_pdf_hn(cnki_t **param)
free(dictionary);
- dictionary_size = 128;
+ dictionary_size = 256;
dictionary = malloc(dictionary_size);
if (dictionary == NULL) {