aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authoryzrh <yzrh@noema.org>2023-01-14 23:52:28 +0000
committeryzrh <yzrh@noema.org>2023-01-15 15:34:46 +0000
commit2fa2b760aef552982250dad346bd255be08cd9bb (patch)
treea7e2e20713498db57feebd68bbbec8fff61b737f
parentdd5854678cfe7bab499925175b5b40314d71fede (diff)
downloadmelon-master.tar.gz
melon-master.tar.zst
Fix HN text parsing.HEADmaster
Signed-off-by: yzrh <yzrh@noema.org>
-rw-r--r--src/cnki_pdf.c102
1 files changed, 52 insertions, 50 deletions
diff --git a/src/cnki_pdf.c b/src/cnki_pdf.c
index 0c1ebb0..d96ea49 100644
--- a/src/cnki_pdf.c
+++ b/src/cnki_pdf.c
@@ -850,73 +850,75 @@ cnki_pdf_hn(cnki_t **param)
for (int i = 0, j = 0; i < ptr->text_size - 1;) {
switch (((unsigned char) ptr->text[i + 1] << 8) + (unsigned char) ptr->text[i]) {
case 0x8001:
- if (ptr->address_next > ptr->address)
- strcat(dictionary, "T*\n");
- case 0x8070:
- if (ptr->address_next > ptr->address) {
- i += 4;
+ if (ptr->address_next <= ptr->address) {
+ if (i + 7 >= ptr->text_size) {
+ i += 2;
+ break;
+ }
+
+ conv_src[0] = ptr->text[i + 7];
+ conv_src[1] = ptr->text[i + 6];
- for (;;) {
- if (i + 3 >= ptr->text_size ||
- (unsigned char) ptr->text[i + 1] == 0x80)
- break;
-
- conv_src[0] = ptr->text[i + 3];
- conv_src[1] = ptr->text[i + 2];
-
- //snprintf(buf, 64, "%f %f Td\n");
- //strcat(dictionary, buf);
-
- conv_size = 6;
-
- if (strconv(&conv_dst, "UTF-16BE",
- conv_src, "GB18030", &conv_size) == 0) {
- if (conv_size - 2 > 0) {
- strcat(dictionary, "<feff");
- for (int k = 0; k < conv_size - 2; k++) {
- snprintf(conv_hex, 3,
- "%02x", (unsigned char) conv_dst[k]);
- strcat(dictionary, conv_hex);
- }
- strcat(dictionary, "> Tj\n");
+ //snprintf(buf, 64, "1 0 0 1 %d %d Tm\n")
+ //strcat(dictionary, buf);
+
+ conv_size = 6;
+
+ if (strconv(&conv_dst, "UTF-16BE",
+ conv_src, "GB18030", &conv_size) == 0) {
+ if (conv_size - 2 > 0) {
+ strcat(dictionary, "<");
+ for (int k = 0; k < conv_size - 2; k++) {
+ snprintf(conv_hex, 3,
+ "%02x", (unsigned char) conv_dst[k]);
+ strcat(dictionary, conv_hex);
}
- free(conv_dst);
+ strcat(dictionary, "> Tj\n");
}
-
- i += 4;
+ free(conv_dst);
}
+ i += 8;
break;
}
- if (i + 7 >= ptr->text_size) {
- i += 2;
+ strcat(dictionary, "T*\n");
+ case 0x8070:
+ i += 4;
+
+ if (ptr->address_next <= ptr->address)
break;
- }
- conv_src[0] = ptr->text[i + 7];
- conv_src[1] = ptr->text[i + 6];
+ for (;;) {
+ if (i + 3 >= ptr->text_size ||
+ (unsigned char) ptr->text[i + 1] == 0x80)
+ break;
- //snprintf(buf, 64, "%f %f Td\n");
- //strcat(dictionary, buf);
+ conv_src[0] = ptr->text[i + 3];
+ conv_src[1] = ptr->text[i + 2];
- conv_size = 6;
+ //snprintf(buf, 64, "1 0 0 1 %d %d Tm\n")
+ //strcat(dictionary, buf);
- if (strconv(&conv_dst, "UTF-16BE",
- conv_src, "GB18030", &conv_size) == 0) {
- if (conv_size - 2 > 0) {
- strcat(dictionary, "<feff");
- for (int k = 0; k < conv_size - 2; k++) {
- snprintf(conv_hex, 3,
- "%02x", (unsigned char) conv_dst[k]);
- strcat(dictionary, conv_hex);
+ conv_size = 6;
+
+ if (strconv(&conv_dst, "UTF-16BE",
+ conv_src, "GB18030", &conv_size) == 0) {
+ if (conv_size - 2 > 0) {
+ strcat(dictionary, "<");
+ for (int k = 0; k < conv_size - 2; k++) {
+ snprintf(conv_hex, 3,
+ "%02x", (unsigned char) conv_dst[k]);
+ strcat(dictionary, conv_hex);
+ }
+ strcat(dictionary, "> Tj\n");
}
- strcat(dictionary, "> Tj\n");
+ free(conv_dst);
}
- free(conv_dst);
+
+ i += 4;
}
- i += 8;
break;
case 0x800a:
if (i + 27 >= ptr->text_size || j >= ptr->image_length) {