diff options
Diffstat (limited to 'src/cnki_pdf.c')
-rw-r--r-- | src/cnki_pdf.c | 30 |
1 files changed, 20 insertions, 10 deletions
diff --git a/src/cnki_pdf.c b/src/cnki_pdf.c index af38aa6..76931ea 100644 --- a/src/cnki_pdf.c +++ b/src/cnki_pdf.c @@ -163,10 +163,16 @@ _pdf_obj_dedup(cnki_t **param, pdf_object_t **pdf) printf("Deleting duplicated object\n"); ptr = *pdf; - while (ptr != NULL && ptr->next != NULL) { - if (ptr->id == ptr->next->id) { - pdf_get_obj(&ptr, ptr->id, &tmp); - pdf_obj_del(&ptr, ptr->id); + while (ptr->next != NULL && ptr->next->next != NULL) { + if (ptr->next->id == ptr->next->next->id) { + /* Keep the bigger one, the smaller one is usually incomplete */ + if (ptr->next->size < ptr->next->next->size) { + pdf_get_obj(&ptr, ptr->next->id, &tmp); + pdf_obj_del(&ptr, ptr->next->id); + } else { + pdf_get_obj(&ptr->next, ptr->next->id, &tmp); + pdf_obj_del(&ptr->next, ptr->next->id); + } tmp->next = NULL; pdf_obj_destroy(&tmp); @@ -174,7 +180,9 @@ _pdf_obj_dedup(cnki_t **param, pdf_object_t **pdf) ret++; if ((*param)->stat > 1) - printf("Deleted duplicated object %d.\n", ptr->id); + printf("Deleted duplicated object %d.\n", ptr->next->id); + + continue; } ptr = ptr->next; @@ -236,6 +244,10 @@ cnki_pdf(cnki_t **param) printf("Loaded %d object(s)\n", pdf_get_count(&pdf)); + pdf_obj_sort(&pdf); + + _pdf_obj_dedup(param, &pdf); + int dictionary_size; char *dictionary; @@ -301,7 +313,7 @@ cnki_pdf(cnki_t **param) snprintf(buf, 64, "]\n/Count %d\n>>", - pdf_get_kid_count(&pdf, parent[i])); + pdf_get_kid_count(&pdf, parent[i]) > 0 ? pdf_get_kid_count(&pdf, parent[i]) : kid[0]); strcat(dictionary, buf); pdf_obj_prepend(&pdf, parent[i], NULL, dictionary, NULL, 0); @@ -354,7 +366,7 @@ cnki_pdf(cnki_t **param) } else { for (int i = 0; i < parent[0]; i++) if (parent_missing[i] == 1) - root = i; + root = parent[i + 1]; } if (root == 0) @@ -471,8 +483,6 @@ cnki_pdf(cnki_t **param) _pdf_obj_sort(param, &pdf); - _pdf_obj_dedup(param, &pdf); - _pdf_dump(param, &pdf); pdf_obj_destroy(&pdf); @@ -510,7 +520,7 @@ cnki_pdf_hn(cnki_t **param) if (root_kid == NULL) return 1; - memset(root_kid, 0, (*param)->file_stat->page); + memset(root_kid, 0, (*param)->file_stat->page * sizeof(int)); object_hn_t *ptr = (*param)->object_hn; while (ptr != NULL) { |