diff --git a/Makefile b/Makefile index 3e3fa91..a689fa6 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ -VERSION := 2.08 +VERSION := 2.09 CC := gcc EXTRA_CFLAGS ?= EXTRA_LDLAGS ?= diff --git a/README.md b/README.md index 2ac80e3..b206a6f 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # epub2txt -- Extract text from EPUB documents -Version 2.07, June 2024 +Version 2.09, June 2024 ## What is this? @@ -244,6 +244,7 @@ covered. Date | Change -----|------- +2.09, Aug 2024 | Improved failure mode wth certain corrupt EPUBs 2.08, Jun 2024 | Fixed a memory-management warning ?, Jun 2024 | Removed position-independent code attributes from defaults 2.07, Jun 2024 | Improved clean-up if program killed in a pipe diff --git a/TODO b/TODO index 674a405..87da0c6 100644 --- a/TODO +++ b/TODO @@ -1,7 +1,7 @@ Format characters include the white space after tokens, which doesn't show up with bold, etc., but is still wrong. -There is additional white-space inserted after a format change is +There is additional white-space inserted when a format change is followed by a numeric HTML entity. This _does_ show up, and it looks ugly. It's a format that's rarely used, but really needs fixing. Unfortunately, this will require a radical change to the formatting diff --git a/man1/epub2txt.1 b/man1/epub2txt.1 index b4ff2fb..f5b58e9 100644 --- a/man1/epub2txt.1 +++ b/man1/epub2txt.1 @@ -1,9 +1,9 @@ -.\" Copyright (C) 2013-22 Kevin Boone +.\" Copyright (C) 2013-24 Kevin Boone .\" Permission is granted to any individual or institution to use, copy, or .\" redistribute this software so long as all of the original files are .\" included, and that this copyright notice is retained. .\" -.TH epub2txt 1 "June 2024" +.TH epub2txt 1 "August 2024" .SH NAME epub2txt \- Extract text from EPUB documents .SH SYNOPSIS diff --git a/src/epub2txt.c b/src/epub2txt.c index 4b294fe..b637edf 100644 --- a/src/epub2txt.c +++ b/src/epub2txt.c @@ -257,17 +257,26 @@ List *epub2txt_get_items (const char *opf, char **error) { XMLNode *root = XMLDoc_root (&doc); - int i, l = root->n_children; - for (i = 0; i < l; i++) - { - XMLNode *r1 = root->children[i]; - // Add workaround for bug #4 - if (strcmp (r1->tag, "manifest") == 0 || strstr (r1->tag, ":manifest")) + int l; + if (root) + { + int i; + l = root->n_children; + for (i = 0; i < l; i++) { - manifest = r1; - got_manifest = TRUE; + XMLNode *r1 = root->children[i]; + // Add workaround for bug #4 + if (strcmp (r1->tag, "manifest") == 0 || strstr (r1->tag, ":manifest")) + { + manifest = r1; + got_manifest = TRUE; + } } - } + } + else + { + log_warning ("'%s' has no root eleemnt -- corrupt EPUB?", opf); + } if (!got_manifest) { @@ -278,7 +287,7 @@ List *epub2txt_get_items (const char *opf, char **error) ret = list_create_strings(); - for (i = 0; i < l; i++) + for (int i = 0; i < l; i++) { XMLNode *r1 = root->children[i]; // Add workaround for bug #4 @@ -358,33 +367,40 @@ String *epub2txt_get_root_file (const char *opf, char **error) if (XMLDoc_parse_buffer_DOM (buff_cstr, APPNAME, &doc)) { XMLNode *root = XMLDoc_root (&doc); - int i, l = root->n_children; - for (i = 0; i < l; i++) - { - XMLNode *r1 = root->children[i]; - if (strcmp (r1->tag, "rootfiles") == 0) + if (root) + { + int i, l = root->n_children; + for (i = 0; i < l; i++) { - XMLNode *rootfiles = r1; - int i, l = rootfiles->n_children; - for (i = 0; i < l; i++) + XMLNode *r1 = root->children[i]; + if (strcmp (r1->tag, "rootfiles") == 0) { - XMLNode *r1 = rootfiles->children[i]; - if (strcmp (r1->tag, "rootfile") == 0) + XMLNode *rootfiles = r1; + int i, l = rootfiles->n_children; + for (i = 0; i < l; i++) { - int k, nattrs = r1->n_attributes; - for (k = 0; k < nattrs; k++) + XMLNode *r1 = rootfiles->children[i]; + if (strcmp (r1->tag, "rootfile") == 0) { - char *name = r1->attributes[k].name; - char *value = r1->attributes[k].value; - if (strcmp (name, "full-path") == 0) + int k, nattrs = r1->n_attributes; + for (k = 0; k < nattrs; k++) { - ret = string_create (value); + char *name = r1->attributes[k].name; + char *value = r1->attributes[k].value; + if (strcmp (name, "full-path") == 0) + { + ret = string_create (value); + } } } } } } - } + } + else + { + log_warning ("No root element in '%s' -- corrupt EPUB?", opf); + } if (ret == NULL) asprintf (error, "container.xml does not specify a root file");