/*
* Copyright (c) 2020-2023, yzrh <yzrh@noema.org>
*
* SPDX-License-Identifier: Apache-2.0
*/
#include <stdlib.h>
#include <string.h>
#include "cnki.h"
#include "iconv.h"
#include "zlib.h"
#include "jpeg.h"
#include "jp2.h"
#include "pdf.h"
#include "pdf_cnki.h"
static long
_pdf_dump(cnki_t **param, pdf_object_t **pdf)
{
if ((*param)->stat > 1)
printf("Writing header\n");
long cur = 0;
if ((*param)->stat > 0)
cur = ftell((*param)->fp_o);
if (pdf_dump_header(pdf, &(*param)->fp_o) != 0) {
fprintf(stderr, "Header not written\n");
return -1;
} else {
if ((*param)->stat > 0)
printf("Header %ld byte(s) written\n",
ftell((*param)->fp_o) - cur);
}
if ((*param)->stat > 1)
printf("Writing object(s)\n");
pdf_dump_obj(pdf, &(*param)->fp_o);
if ((*param)->stat > 1) {
printf("\t%8s\t%8s\t%8s\t%12s\t%12s\t%12s\n",
"address",
"size",
"id",
"object",
"dictionary",
"stream");
pdf_object_t *ptr = (*pdf)->next;
while (ptr != NULL) {
printf("\t%08x\t%8d\t%8d\t%12d\t%12d\t%12d\n",
ptr->address,
ptr->size,
ptr->id,
ptr->object_size,
ptr->dictionary_size,
ptr->stream_size);
ptr = ptr->next;
}
}
if ((*param)->stat > 0)
printf("%d object(s) %ld byte(s) written\n",
pdf_get_count(pdf),
ftell((*param)->fp_o));
long xref = ftell((*param)->fp_o);
if ((*param)->stat > 1)
printf("Writing cross-reference table\n");
if (pdf_dump_xref(pdf, &(*param)->fp_o) != 0) {
if ((*param)->stat > 0)
printf("Cross-reference table not written\n");
} else {
if ((*param)->stat > 0)
printf("Cross-reference table %ld byte(s) written\n",
ftell((*param)->fp_o) - xref);
}
if ((*param)->stat > 1)
printf("Writing trailer\n");
if ((*param)->stat > 0)
cur = ftell((*param)->fp_o);
if (pdf_dump_trailer(pdf, &(*param)->fp_o, xref) != 0) {
if ((*param)->stat > 0)
printf("Trailer not written\n");
} else {
if ((*param)->stat > 0)
printf("Trailer %ld byte(s) written\n",
ftell((*param)->fp_o) - cur);
}
if ((*param)->stat > 0)
printf("Total %ld byte(s) written\n",
ftell((*param)->fp_o));
return ftell((*param)->fp_o);
}
static int
_pdf_cnki_outline(cnki_t **param, pdf_object_t **pdf)
{
int ret = -1;
int *ids = NULL;
if ((*param)->file_stat->outline > 0) {
if ((*param)->stat > 1)
printf("Generating outline object(s)\n\t%8s\n", "id");
pdf_get_free_ids(pdf, &ids, (*param)->file_stat->outline + 1);
int outline = pdf_cnki_outline(pdf, &(*param)->object_outline, &ids);
if ((*param)->stat > 1)
for (int i = 0; i < (*param)->file_stat->outline + 1; i++)
printf("\t%8d\n", ids[i]);
if ((*param)->stat > 0) {
if (outline != 0)
printf("No outline information\n");
else
printf("Generated %d outline object(s)\n",
(*param)->file_stat->outline + 1);
}
ret = ids[0];
free(ids);
}
return ret;
}
static int
_pdf_obj_sort(cnki_t **param, pdf_object_t **pdf)
{
int ret;
if ((*param)->stat > 1)
printf("Sorting object(s)\n");
ret = pdf_obj_sort(pdf);
if ((*param)->stat > 0)
printf("Sorted object(s)\n");
return ret;
}
static int
_pdf_obj_dedup(cnki_t **param, pdf_object_t **pdf)
{
int ret = 0;
pdf_object_t *tmp;
pdf_object_t *ptr;
if ((*param)->stat > 1)
printf("Deleting duplicated object\n");
ptr = *pdf;
while (ptr->next != NULL && ptr->next->next != NULL) {
if (ptr->next->id == ptr->next->next->id) {
/* Keep the bigger one, the smaller one is usually incomplete */
if (ptr->next->size < ptr->next->next->size) {
pdf_get_obj(&ptr, ptr->next->id, &tmp);
pdf_obj_del(&ptr, ptr->next->id);
} else {
pdf_get_obj(&ptr->next, ptr->next->id, &tmp);
pdf_obj_del(&ptr->next, ptr->next->id);
}
tmp->next = NULL;
pdf_obj_destroy(&tmp);
ret++;
if ((*param)->stat > 1)
printf("Deleted duplicated object %d.\n", ptr->next->id);
continue;
}
ptr = ptr->next;
}
if ((*param)->stat > 0) {
if (ret == 0)
printf("No duplicated object\n");
else
printf("Deleted %d duplicated object(s)\n", ret);
}
return ret;
}
int
cnki_pdf(cnki_t **param)
{
if (*param == NULL)
return 1;
pdf_object_t *pdf = NULL;
if (pdf_obj_create(&pdf) != 0)
return 1;
if ((*param)->stat > 0)
printf("Begin processing PDF\n");
if ((*param)->stat > 1)
printf("Loading object(s)\n");
if (pdf_load(&pdf, &(*param)->fp_i, (*param)->size_buf) != 0)
return 1;
if ((*param)->stat > 1) {
printf("\t%8s\t%8s\t%8s\t%12s\t%12s\t%12s\n",
"address",
"size",
"id",
"object",
"dictionary",
"stream");
pdf_object_t *ptr = pdf->next;
while (ptr != NULL) {
printf("\t%08x\t%8d\t%8d\t%12d\t%12d\t%12d\n",
ptr->address,
ptr->size,
ptr->id,
ptr->object_size,
ptr->dictionary_size,
ptr->stream_size);
ptr = ptr->next;
}
}
if ((*param)->stat > 0)
printf("Loaded %d object(s)\n",
pdf_get_count(&pdf));
int dictionary_size;
char *dictionary;
char buf[64];
if ((*param)->stat > 1)
printf("Searching for parent object(s)\n");
int *parent = NULL;
pdf_get_parent_id(&pdf, &parent);
if ((*param)->stat > 0)
printf("Discovered %d parent object(s)\n", parent[0]);
pdf_obj_sort(&pdf);
_pdf_obj_dedup(param, &pdf);
int8_t *parent_missing;
int *kid;
if (parent[0] > 0) {
parent_missing = malloc(parent[0] * sizeof(int8_t));
if (parent_missing == NULL)
return 1;
}
for (int i = 1; i <= parent[0]; i++) {
if ((*param)->stat > 1)
printf("Searching for object %d\n", parent[i]);
kid = NULL;
pdf_get_kid_id(&pdf, parent[i], &kid);
if (kid[0] != 0) {
if ((*param)->stat > 0)
printf("Object %d is missing\n", parent[i]);
if ((*param)->stat > 1)
printf("Generating object\n");
dictionary_size = 64 + 16 * kid[0];
dictionary = malloc(dictionary_size);
if (dictionary == NULL) {
free(parent);
free(parent_missing);
return 1;
}
memset(dictionary, 0, dictionary_size);
snprintf(buf, 64,
"<<\n/Type /Pages\n/Kids [");
strcat(dictionary, buf);
for (int j = 1; j <= kid[0]; j++) {
snprintf(buf, 64,
"%d 0 R",
kid[j]);
strcat(dictionary, buf);
if (j < kid[0])
strcat(dictionary, " ");
}
snprintf(buf, 64,
"]\n/Count %d\n>>",
pdf_get_kid_count(&pdf, parent[i]) > 0 ? pdf_get_kid_count(&pdf, parent[i]) : kid[0]);
strcat(dictionary, buf);
pdf_obj_prepend(&pdf, parent[i], NULL, dictionary, NULL, 0);
parent_missing[i - 1] = 1;
if ((*param)->stat > 0)
printf("Generated object for %d child(ren)\n",
kid[0]);
free(dictionary);
} else {
parent_missing[i - 1] = 0;
if ((*param)->stat > 0)
printf("Object %d exists\n", parent[i]);
}
free(kid);
}
if ((*param)->stat > 1)
printf("Searching for root object\n");
dictionary_size = 128 + 12 * parent[0];
dictionary = malloc(dictionary_size);
if (dictionary == NULL) {
free(parent);
free(parent_missing);
return 1;
}
memset(dictionary, 0, dictionary_size);
int root = 0;
int root_kid = 0;
for (int i = 0; i < parent[0]; i++)
if (parent_missing[i] == 1)
root_kid++;
if (root_kid > 1) {
root = pdf_get_free_id(&pdf);
} else {
if (root_kid == 0) {
for (int i = 1; i <= parent[0]; i++)
if (root == 0 || root < parent[i])
root = parent[i];
} else {
for (int i = 0; i < parent[0]; i++)
if (parent_missing[i] == 1)
root = parent[i + 1];
}
if (root == 0)
root = pdf_get_free_id(&pdf);
else if ((*param)->stat > 0)
printf("Root object is %d.\n", root);
}
int root_gen;
pdf_object_t *tmp;
if ((root_gen = pdf_get_obj(&pdf, root, &tmp)) != 0) {
if ((*param)->stat > 0)
printf("Root object is missing\n");
if ((*param)->stat > 1)
printf("Generating root object\n");
snprintf(buf, 64,
"<<\n/Type /Pages\n/Kids [");
strcat(dictionary, buf);
for (int i = 0, j = 0; i < parent[0]; i++) {
if (parent_missing[i] == 1) {
snprintf(buf, 64, "%d 0 R", parent[i + 1]);
strcat(dictionary, buf);
if (++j < root_kid)
strcat(dictionary, " ");
}
}
snprintf(buf, 64, "]\n/Count %d\n", (*param)->file_stat->page);
strcat(dictionary, buf);
strcat(dictionary, ">>");
pdf_obj_prepend(&pdf, root, NULL, dictionary, NULL, 0);
memset(dictionary, 0, dictionary_size);
if ((*param)->stat > 0)
printf("Generated root object %d.\n",
root);
}
if (parent[0] > 0)
free(parent_missing);
free(parent);
int outline = _pdf_cnki_outline(param, &pdf);
snprintf(buf, 64,
"<<\n/Type /Catalog\n/Pages %d 0 R\n",
root);
strcat(dictionary, buf);
if (outline != -1) {
snprintf(buf, 64,
"/Outlines %d 0 R\n/PageMode /UseOutlines\n",
outline);
strcat(dictionary, buf);
}
strcat(dictionary, ">>");
if ((*param)->stat > 1)
printf("Searching for catalog object\n");
int catalog = pdf_get_catalog_id(&pdf);
if (catalog != 0) {
if ((*param)->stat > 0)
printf("Catalog object is %d.\n", catalog);
if (root_gen != 0) {
if ((*param)->stat > 1)
printf("Replacing catalog object\n");
pdf_obj_replace(&pdf, catalog, NULL, dictionary, NULL, 0);
if ((*param)->stat > 0)
printf("Replaced catalog object\n");
}
} else {
if ((*param)->stat > 0)
printf("Catalog object is missing\n");
if ((*param)->stat > 1)
printf("Generating catalog object\n");
pdf_obj_append(&pdf, 0, NULL, dictionary, NULL, 0);
if ((*param)->stat > 0)
printf("Generated catalog object\n");
}
if ((*param)->stat > 1)
printf("Searching for xref object\n");
int xref = pdf_get_xref_id(&pdf);
if (xref != 0) {
if ((*param)->stat > 0)
printf("Xref object is %d.\n", xref);
if ((*param)->stat > 1)
printf("Deleting xref object\n");
pdf_get_obj(&pdf, xref, &tmp);
pdf_obj_del(&pdf, xref);
tmp->next = NULL;
pdf_obj_destroy(&tmp);
if ((*param)->stat > 0)
printf("Deleted xref object\n");
} else {
if ((*param)->stat > 0)
printf("Xref object is missing\n");
}
free(dictionary);
_pdf_obj_sort(param, &pdf);
_pdf_dump(param, &pdf);
pdf_obj_destroy(&pdf);
return 0;
}
int
cnki_pdf_hn(cnki_t **param)
{
if (*param == NULL)
return 1;
pdf_object_t *pdf = NULL;
if (pdf_obj_create(&pdf) != 0)
return 1;
int font = pdf_get_free_id(&pdf);
pdf_obj_append(&pdf, font, NULL, "<<\n/Type /Font\n/Subtype /TrueType\n/BaseFont /NotoSansCJKSC\n>>", NULL, 0);
if ((*param)->stat > 1)
printf("Generating PDF object(s)\n");
int dictionary_size;
char *dictionary;
char buf[64];
pdf_object_t *tmp;
int cnt = 0;
int *root_kid = malloc((*param)->file_stat->page * sizeof(int));
if (root_kid == NULL)
return 1;
memset(root_kid, 0, (*param)->file_stat->page * sizeof(int));
object_hn_t *ptr = (*param)->object_hn;
while (ptr != NULL) {
/*
* External object (ptr->image_length) +
* resource object +
* content object +
* page object
*/
int *ids = NULL;
pdf_get_free_ids(&pdf, &ids, ptr->image_length + 3);
int bitmap_size;
char *bitmap;
int stream_size;
char *stream;
double *dim;
if (ptr->image_length > 0) {
dim = malloc(2 * ptr->image_length * sizeof(double));
if (dim == NULL) {
free(root_kid);
free(ids);
return 1;
}
dictionary_size = 256;
dictionary = malloc(dictionary_size);
if (dictionary == NULL) {
free(root_kid);
free(ids);
free(dim);
return 1;
}
}
int ret;
int info[3];
for (int i = 0; i < ptr->image_length; i++) {
memset(dictionary, 0, dictionary_size);
strcat(dictionary, "<<\n/Type /XObject\n"
"/Subtype /Image\n");
if ((*param)->stat > 2)
printf("\tProcessing image, page %04d item %d format %d... ",
ptr->page, i, ptr->image_data[i].format);
switch (ptr->image_data[i].format) {
case JBIG:
ret = cnki_jbig(&bitmap,
&bitmap_size,
&info[0],
&info[1],
ptr->image_data[i].image,
ptr->image_data[i].size);
if (ret != 0) {
dim[i * 2] = 0;
dim[i * 2 + 1] = 0;
break;
}
if (strdeflate(&stream, &stream_size,
bitmap, bitmap_size) != 0) {
free(root_kid);
free(ids);
free(dim);
free(dictionary);
return 1;
}
free(bitmap);
snprintf(buf, 64, "/Width %d\n/Height %d\n",
info[0], info[1]);
strcat(dictionary, buf);
strcat(dictionary, "/ColorSpace /DeviceGray\n"
"/BitsPerComponent 1\n");
strcat(dictionary, "/Decode [1.0 0.0]\n");
snprintf(buf, 64, "/Length %d\n",
stream_size);
strcat(dictionary, buf);
strcat(dictionary, "/Filter /FlateDecode\n");
dim[i * 2] = info[0];
dim[i * 2 + 1] = info[1];
break;
case DCT_0:
case DCT_1:
ret = strinfo_jpeg_dim(&info[0],
&info[1],
&info[2],
ptr->image_data[i].image,
ptr->image_data[i].size);
if (ret != 0) {
dim[i * 2] = 0;
dim[i * 2 + 1] = 0;
break;
}
stream_size = ptr->image_data[i].size;
stream = malloc(stream_size);
if (stream == NULL) {
free(root_kid);
free(ids);
free(dim);
free(dictionary);
return 1;
}
memcpy(stream, ptr->image_data[i].image, stream_size);
snprintf(buf, 64, "/Width %d\n/Height %d\n",
info[0], info[1]);
strcat(dictionary, buf);
if (info[2] == 1)
strcat(dictionary, "/ColorSpace /DeviceGray\n");
else if (info[2] == 3)
strcat(dictionary, "/ColorSpace /DeviceRGB\n");
else
strcat(dictionary, "/ColorSpace /DeviceCMYK\n");
strcat(dictionary, "/BitsPerComponent 8\n");
snprintf(buf, 64, "/Length %d\n",
stream_size);
strcat(dictionary, buf);
strcat(dictionary, "/Filter /DCTDecode\n");
dim[i * 2] = info[0];
dim[i * 2 + 1] = info[1];
break;
case JBIG2:
ret = cnki_jbig2(&bitmap,
&bitmap_size,
&info[0],
&info[1],
ptr->image_data[i].image,
ptr->image_data[i].size);
if (ret != 0) {
dim[i * 2] = 0;
dim[i * 2 + 1] = 0;
break;
}
if (strdeflate(&stream, &stream_size,
bitmap, bitmap_size) != 0) {
free(root_kid);
free(ids);
free(dim);
free(dictionary);
return 1;
}
free(bitmap);
snprintf(buf, 64, "/Width %d\n/Height %d\n",
info[0], info[1]);
strcat(dictionary, buf);
strcat(dictionary, "/ColorSpace /DeviceGray\n"
"/BitsPerComponent 1\n");
strcat(dictionary, "/Decode [1.0 0.0]\n");
snprintf(buf, 64, "/Length %d\n",
stream_size);
strcat(dictionary, buf);
strcat(dictionary, "/Filter /FlateDecode\n");
dim[i * 2] = info[0];
dim[i * 2 + 1] = info[1];
break;
case JPX:
ret = strinfo_jp2_dim(&info[0],
&info[1],
ptr->image_data[i].image,
ptr->image_data[i].size);
if (ret != 0) {
dim[i * 2] = 0;
dim[i * 2 + 1] = 0;
break;
}
stream_size = ptr->image_data[i].size;
stream = malloc(stream_size);
if (stream == NULL) {
free(root_kid);
free(ids);
free(dim);
free(dictionary);
return 1;
}
memcpy(stream, ptr->image_data[i].image, stream_size);
snprintf(buf, 64, "/Width %d\n/Height %d\n",
info[0], info[1]);
strcat(dictionary, buf);
snprintf(buf, 64, "/Length %d\n",
stream_size);
strcat(dictionary, buf);
strcat(dictionary, "/Filter /JPXDecode\n");
dim[i * 2] = info[0];
dim[i * 2 + 1] = info[1];
break;
default:
ret = -1;
dim[i * 2] = -1;
dim[i * 2 + 1] = -1;
break;
}
strcat(dictionary, ">>");
if (ret == 0) {
if ((*param)->stat > 2)
printf("%6d byte(s), width %4d, height %4d.\n",
stream_size, info[0], info[1]);
pdf_obj_append(&pdf, ids[i],
NULL, dictionary, stream, stream_size);
free(stream);
} else if (ret == 1) {
if ((*param)->stat > 2)
printf("Not extracted.\n");
pdf_obj_append(&pdf, ids[i], NULL, NULL, NULL, 0);
} else {
if ((*param)->stat > 2)
printf("Unsupported format.\n");
pdf_obj_append(&pdf, ids[i], NULL, NULL, NULL, 0);
}
}
if (ptr->image_length > 0)
free(dictionary);
dictionary_size = 128 + 2 * ptr->text_size + 128 * ptr->image_length;
dictionary = malloc(dictionary_size);
if (dictionary == NULL) {
free(root_kid);
free(ids);
free(dim);
return 1;
}
memset(dictionary, 0, dictionary_size);
strcat(dictionary, "<<\n");
if (ptr->text_size > 0) {
snprintf(buf, 64, "/Font <</F0 %d 0 R>>\n", font);
strcat(dictionary, buf);
}
if (ptr->image_length > 0) {
strcat(dictionary, "/XObject <<");
for (int i = 0; i < ptr->image_length; i++) {
snprintf(buf, 64, "/Im%d %d 0 R", i, ids[i]);
strcat(dictionary, buf);
if (i < ptr->image_length - 1)
strcat(dictionary, " ");
}
strcat(dictionary, ">>\n");
}
strcat(dictionary, ">>");
pdf_obj_append(&pdf, ids[ptr->image_length], NULL, dictionary, NULL, 0);
memset(dictionary, 0, dictionary_size);
int conv_size;
char *conv_dst;
char conv_src[2];
char conv_hex[3];
if (ptr->text_size > 0) {
if (strncmp(ptr->text + 8, "COMPRESSTEXT", 12) == 0 ||
strncmp(ptr->text, "COMPRESSTEXT", 12) == 0) {
cnki_zlib(&stream, &stream_size, ptr->text, ptr->text_size);
free(ptr->text);
ptr->text_size = stream_size;
ptr->text = stream;
}
strcat(dictionary, "BT\n");
strcat(dictionary, "/F0 10 Tf\n");
for (int i = 0, j = 0; i < ptr->text_size - 1;) {
switch (((unsigned char) ptr->text[i + 1] << 8) + (unsigned char) ptr->text[i]) {
case 0x8001:
if (ptr->address_next > ptr->address)
strcat(dictionary, "T*\n");
case 0x8070:
if (ptr->address_next > ptr->address) {
i += 4;
for (;;) {
if (i + 3 >= ptr->text_size ||
(unsigned char) ptr->text[i + 1] == 0x80)
break;
conv_src[0] = ptr->text[i + 3];
conv_src[1] = ptr->text[i + 2];
//snprintf(buf, 64, "%f %f Td\n");
//strcat(dictionary, buf);
conv_size = 6;
if (strconv(&conv_dst, "UTF-16BE",
conv_src, "GB18030", &conv_size) == 0) {
if (conv_size - 2 > 0) {
strcat(dictionary, "<feff");
for (int k = 0; k < conv_size - 2; k++) {
snprintf(conv_hex, 3,
"%02x", (unsigned char) conv_dst[k]);
strcat(dictionary, conv_hex);
}
strcat(dictionary, "> Tj\n");
}
free(conv_dst);
}
i += 4;
}
break;
}
if (i + 7 >= ptr->text_size) {
i += 2;
break;
}
conv_src[0] = ptr->text[i + 7];
conv_src[1] = ptr->text[i + 6];
//snprintf(buf, 64, "%f %f Td\n");
//strcat(dictionary, buf);
conv_size = 6;
if (strconv(&conv_dst, "UTF-16BE",
conv_src, "GB18030", &conv_size) == 0) {
if (conv_size - 2 > 0) {
strcat(dictionary, "<feff");
for (int k = 0; k < conv_size - 2; k++) {
snprintf(conv_hex, 3,
"%02x", (unsigned char) conv_dst[k]);
strcat(dictionary, conv_hex);
}
strcat(dictionary, "> Tj\n");
}
free(conv_dst);
}
i += 8;
break;
case 0x800a:
if (i + 27 >= ptr->text_size || j >= ptr->image_length) {
i += 2;
if (j >= ptr->image_length)
i += 26;
break;
}
if (ptr->image_length > 0) {
ptr->image_data[j].x += (unsigned char) ptr->text[i + 5] << 8;
ptr->image_data[j].x += (unsigned char) ptr->text[i + 4];
ptr->image_data[j].y += (unsigned char) ptr->text[i + 7] << 8;
ptr->image_data[j].y += (unsigned char) ptr->text[i + 6];
ptr->image_data[j].w += (unsigned char) ptr->text[i + 9] << 8;
ptr->image_data[j].w += (unsigned char) ptr->text[i + 8];
ptr->image_data[j].h += (unsigned char) ptr->text[i + 11] << 8;
ptr->image_data[j].h += (unsigned char) ptr->text[i + 10];
if ((*param)->stat > 2)
printf("\tItem %d: origin (%4d, %4d), width %4d, height %4d\n",
j,
ptr->image_data[j].x,
ptr->image_data[j].y,
ptr->image_data[j].w,
ptr->image_data[j].h);
}
i += 28;
if (j == 0 || ptr->image_data[j].x > 0 || ptr->image_data[j].y > 0)
j++;
break;
default:
i += 4;
break;
}
}
strcat(dictionary, "ET");
if (ptr->image_length > 0)
strcat(dictionary, "\n");
}
/* FIXME: Use the text somehow? */
memset(dictionary, 0, dictionary_size);
if (ptr->image_length > 0) {
double resize_x = 1;
double resize_y = 1;
double margin_x = 0;
double margin_y = 0;
if (ptr->image_data[0].x == 0 && ptr->image_data[0].y == 0 && dim[0] > 0 && dim[1] > 0) {
/* Scale within bound of A4 paper */
resize_x = 2480.315 / dim[0];
resize_y = 3507.874 / dim[1];
if (resize_y < resize_x) {
for (int i = 0; i < ptr->image_length; i++) {
dim[i * 2] *= resize_y;
dim[i * 2 + 1] *= resize_y;
}
} else {
for (int i = 0; i < ptr->image_length; i++) {
dim[i * 2] *= resize_x;
dim[i * 2 + 1] *= resize_x;
}
}
margin_x = (2480.315 - dim[0]) / 2;
margin_y = (3507.874 - dim[1]) / 2;
}
/* Remove duplicated image, ptr->image_length is sometimes squared */
for (int i = 1; i < ptr->image_length; i++) {
if ((ptr->image_data[i].x > 0 || ptr->image_data[i].y > 0) &&
dim[i * 2] < dim[0] && dim[i * 2 + 1] < dim[1])
continue;
for (int j = i; j < ptr->image_length; j++) {
pdf_get_obj(&pdf, ids[j], &tmp);
pdf_obj_del(&pdf, ids[j]);
tmp->next = NULL;
pdf_obj_destroy(&tmp);
dim[j * 2] = -1;
dim[j * 2 + 1] = -1;
pdf_obj_append(&pdf, ids[j], NULL, NULL, NULL, 0);
}
break;
}
for (int i = 0; i < ptr->image_length; i++) {
if (dim[i * 2] <= 0 || dim[i * 2 + 1] <= 0)
continue;
strcat(dictionary, "q\n");
strcat(dictionary, "0.24 0 0 0.24 0 0 cm\n");
/* Rotate image */
if (ptr->image_data[i].format == JBIG || ptr->image_data[i].format == DCT_1) {
snprintf(buf, 64, "1 0 0 -1 0 %f cm\n", dim[i * 2 + 1]);
strcat(dictionary, buf);
}
/* Translate figure */
if (ptr->image_data[i].x > 0 || ptr->image_data[i].y > 0) {
double origin_x = ptr->image_data[i].x * 0.40433;
double origin_y = ptr->image_data[i].y * 0.40433;
if (resize_y < resize_x) {
origin_x *= resize_y;
origin_y *= resize_y;
} else {
origin_x *= resize_x;
origin_y *= resize_x;
}
if (ptr->image_data[i].format == JBIG || ptr->image_data[i].format == DCT_1)
origin_y = -3507.874 + origin_y + dim[i * 2 + 1];
else
origin_y = 3507.874 - origin_y - dim[i * 2 + 1];
snprintf(buf, 64, "1 0 0 1 %f %f cm\n", origin_x, origin_y);
strcat(dictionary, buf);
}
if (margin_x > 0 || margin_y > 0) {
if (ptr->image_data[i].format == JBIG || ptr->image_data[i].format == DCT_1) {
snprintf(buf, 64, "1 0 0 1 %f %f cm\n", margin_x, -margin_y);
strcat(dictionary, buf);
} else {
snprintf(buf, 64, "1 0 0 1 %f %f cm\n", margin_x, margin_y);
strcat(dictionary, buf);
}
}
snprintf(buf, 64, "%f 0 0 %f 0 0 cm\n", dim[i * 2], dim[i * 2 + 1]);
strcat(dictionary, buf);
snprintf(buf, 64, "/Im%d Do\n", i);
strcat(dictionary, buf);
strcat(dictionary, "Q");
if (i < ptr->image_length - 1)
strcat(dictionary, "\n");
}
free(dim);
}
if (strlen(dictionary) > 0) {
if (strdeflate(&stream, &stream_size, dictionary, strlen(dictionary)) != 0) {
free(root_kid);
free(ids);
free(dictionary);
return 1;
}
memset(dictionary, 0, dictionary_size);
strcat(dictionary, "<<\n");
snprintf(buf, 64, "/Length %d\n", stream_size);
strcat(dictionary, buf);
strcat(dictionary, "/Filter /FlateDecode\n");
strcat(dictionary, ">>");
pdf_obj_append(&pdf, ids[ptr->image_length + 1],
NULL, dictionary, stream, stream_size);
free(stream);
} else {
pdf_obj_append(&pdf, ids[ptr->image_length + 1],
NULL, NULL, NULL, 0);
}
memset(dictionary, 0, dictionary_size);
strcat(dictionary, "<<\n/Type /Page\n");
/* A4 paper */
strcat(dictionary, "/MediaBox [0 0 595.2756 841.8898]\n");
snprintf(buf, 64, "/Resources %d 0 R\n", ids[ptr->image_length]);
strcat(dictionary, buf);
snprintf(buf, 64, "/Contents %d 0 R\n", ids[ptr->image_length + 1]);
strcat(dictionary, buf);
/* Add /Parent when we know root */
pdf_obj_append(&pdf, ids[ptr->image_length + 2], NULL, dictionary, NULL, 0);
root_kid[cnt++] = ids[ptr->image_length + 2];
free(ids);
free(dictionary);
ptr = ptr->next;
}
if ((*param)->stat > 1) {
printf("\t%8s\t%12s\t%12s\t%12s\n",
"id",
"object",
"dictionary",
"stream");
pdf_object_t *ptr = pdf->next;
while (ptr != NULL) {
printf("\t%8d\t%12d\t%12d\t%12d\n",
ptr->id,
ptr->object_size,
ptr->dictionary_size,
ptr->stream_size);
ptr = ptr->next;
}
}
if ((*param)->stat > 0)
printf("Generated %d object(s)\n",
pdf_get_count(&pdf));
int outline = _pdf_cnki_outline(param, &pdf);
if ((*param)->stat > 1)
printf("Generating root object\n");
dictionary_size = 64 + 64 * (*param)->file_stat->page;
dictionary = malloc(dictionary_size);
if (dictionary == NULL) {
free(root_kid);
return 1;
}
memset(dictionary, 0, dictionary_size);
int root = pdf_get_free_id(&pdf);
snprintf(buf, 64, "<<\n/Type /Pages\n/Kids [");
strcat(dictionary, buf);
for (int i = 0; i < (*param)->file_stat->page; i++) {
snprintf(buf, 64, "%d 0 R", root_kid[i]);
strcat(dictionary, buf);
if (i < (*param)->file_stat->page - 1)
strcat(dictionary, " ");
}
snprintf(buf, 64, "]\n/Count %d\n", (*param)->file_stat->page);
strcat(dictionary, buf);
strcat(dictionary, ">>");
pdf_obj_prepend(&pdf, root, NULL, dictionary, NULL, 0);
free(dictionary);
/* Add /Parent to page object */
for (int i = 0; i < (*param)->file_stat->page; i++) {
if (pdf_get_obj(&pdf, root_kid[i], &tmp) != 0) {
free(dictionary);
free(root_kid);
return 1;
}
dictionary_size = tmp->dictionary_size + 24;
dictionary = malloc(dictionary_size);
if (dictionary == NULL) {
free(root_kid);
return 1;
}
memcpy(dictionary, tmp->dictionary, tmp->dictionary_size);
memset(dictionary + tmp->dictionary_size, 0, 24);
snprintf(buf, 64, "/Parent %d 0 R\n>>", root);
strcat(dictionary, buf);
if (pdf_obj_replace(&pdf, root_kid[i], NULL, dictionary, NULL, 0) != 0) {
free(dictionary);
free(root_kid);
return 1;
}
free(dictionary);
}
free(root_kid);
dictionary_size = 128;
dictionary = malloc(dictionary_size);
if (dictionary == NULL) {
free(root_kid);
return 1;
}
memset(dictionary, 0, dictionary_size);
if ((*param)->stat > 0)
printf("Generated root object %d.\n",
root);
if ((*param)->stat > 1)
printf("Generating catalog object\n");
snprintf(buf, 64,
"<<\n/Type /Catalog\n/Pages %d 0 R\n",
root);
strcat(dictionary, buf);
if (outline != -1) {
snprintf(buf, 64,
"/Outlines %d 0 R\n/PageMode /UseOutlines\n",
outline);
strcat(dictionary, buf);
}
strcat(dictionary, ">>");
pdf_obj_append(&pdf, 0, NULL, dictionary, NULL, 0);
free(dictionary);
if ((*param)->stat > 0)
printf("Generated catalog object\n");
_pdf_obj_sort(param, &pdf);
_pdf_dump(param, &pdf);
pdf_obj_destroy(&pdf);
return 0;
}