wepub

Check-in [3a830eb6e5]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Initial bringup, ePub reader
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | descendants | master | trunk
Files: files | file ages | folders
SHA3-256:3a830eb6e544633a06267a4cf15f5a3274118ce108113ed2c8f18a57fb0b4cd2
User & Date: ajv-899-334-8894@vsta.org 2016-10-17 14:31:08
Context
2016-10-17
19:54
git status cleanup check-in: 323162f35a user: ajv-899-334-8894@vsta.org tags: master, trunk
14:31
Initial bringup, ePub reader check-in: 3a830eb6e5 user: ajv-899-334-8894@vsta.org tags: master, trunk
Changes
Hide Diffs Unified Diffs Ignore Whitespace

Added css/main.css.



>
1
../chore/css/main.css

Added css/normalize.css.



>
1
../chore/css/normalize.css

Added epub/__init__.py.













































































































































































































































































































































































































































































































































































































































































































































































































































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
# -*- coding: utf-8 -*-
"""Library to open and read files in the epub version 2."""
from __future__ import unicode_literals


__author__ = 'Florian Strzelecki <florian.strzelecki@gmail.com>'
__version__ = '0.5.3'
__all__ = ['opf', 'ncx', 'utils']


import os
import shutil
import tempfile
import uuid
import warnings
import zipfile

from xml.dom import minidom

from . import ncx, opf, utils


MIMETYPE_EPUB = 'application/epub+zip'
MIMETYPE_OPF = 'application/oebps-package+xml'
MIMETYPE_NCX = 'application/x-dtbncx+xml'

DEFAULT_OPF_PATH = 'OEBPS/content.opf'
DEFAULT_NCX_PATH = 'toc.ncx'


def open(filename, mode=None):
    """Open an epub file and return an EpubFile object"""
    warnings.warn('Function `epub.open` is deprecated since 0.5.0.',
                  DeprecationWarning)
    return open_epub(filename, mode)


def open_epub(filename, mode=None):
    return EpubFile(filename, mode)


class BadEpubFile(zipfile.BadZipfile):
    pass


class EpubFile(zipfile.ZipFile):
    """Represent an epub zip file, as described in version 2.0.1 of epub spec.

    This class allow an access throught a low-level API to the epub real file.
    It extends zipfile.ZipFile class and modify only a little some of its
    behavior.

    See http://idpf.org/epub/201 for more information about Epub 2.0.1.

    """
    @property
    def content_path(self):
        """Return the content path, ie, the path relative to OPF file.

        If OPF file is located in `OEBPS/content.opf`, then `content_path` is
        equal to `OEBPS`.

        """
        return os.path.dirname(self.opf_path).replace('\\', '/')

    def __init__(self, filename, mode=None):
        """Open the Epub zip file with mode read "r", write "w" or append "a".
        """
        mode = mode or 'r'
        zipfile.ZipFile.__init__(self, filename, mode)
        self.uid = None
        self.opf_path = None
        self.opf = None
        self.toc = None

        if self.mode == 'r':
            self._init_read()
        elif self.mode == 'w':
            self._init_new()
        elif self.mode == 'a':
            if len(self.namelist()) == 0:
                self._init_new()
            else:
                self._init_read()

    def _init_new(self):
        """Build an empty epub archive."""
        # Write mimetype file: 'application/epub+zip'
        self.writestr('mimetype', MIMETYPE_EPUB)
        # Default path for opf
        self.opf_path = DEFAULT_OPF_PATH
        # Uid & Uid's id
        uid_id = 'BookId'
        self.uid = '%s' % uuid.uuid4()
        # Create metadata, manifest, and spine, as minimalist as possible
        metadata = opf.Metadata()
        metadata.add_identifier(self.uid, uid_id, 'uid')
        manifest = opf.Manifest()
        manifest.add_item('ncx', 'toc.ncx', MIMETYPE_NCX)
        spine = opf.Spine('ncx')
        # Create Opf object
        self.opf = opf.Opf(uid_id=uid_id,
                           metadata=metadata, manifest=manifest, spine=spine)
        # Create Ncx object
        self.toc = ncx.Ncx()
        self.toc.uid = self.uid

    def _init_read(self):
        """Get content from existing epub file"""
        # Read container.xml to get OPF xml file path
        xmlstring = self.read('META-INF/container.xml')
        container_xml = minidom.parseString(xmlstring).documentElement

        for element in container_xml.getElementsByTagName('rootfile'):
            if element.getAttribute('media-type') == MIMETYPE_OPF:
                # Only take the first full-path available
                self.opf_path = element.getAttribute('full-path')
                break

        # Read OPF xml file
        xml_string = self.read(self.opf_path)
        self.opf = opf.parse_opf(xml_string)
        uids = [x for x in self.opf.metadata.identifiers
                      if x[1] == self.opf.uid_id]
        if uids:
            self.uid = uids[0]
        else:
            self.uid = None
            warnings.warn('The ePub does not define any uid', SyntaxWarning)

        item_toc = self.get_item(self.opf.spine.toc)

        # Inspect NCX toc file
        self.toc = None
        if item_toc is not None:
            self.toc = ncx.parse_toc(self.read_item(item_toc))
        else:
            warnings.warn('The ePub does not define any NCX file',
                          SyntaxWarning)
            self.toc = ncx.Ncx()
            self.toc.uid = self.uid

    def close(self):
        if self.fp is None:
            return
        if self.mode in ('w', 'a'):
            self._write_close()
        zipfile.ZipFile.close(self)

    def remove_paths(self, paths):
        """Remove files from the archive

        Warning: This will be slow, it needs to recreate from scratch the
        complete archive.

        This method (well, the whole behavior of "write epub file") needs
        a rework in a future version.

        """
        with tempfile.NamedTemporaryFile('rb', delete=False) as temp:
            with zipfile.ZipFile(temp.name, 'w') as new_zip:
                for item in self.infolist():
                    if item.filename not in paths:
                        new_zip.writestr(item, self.read(item.filename))
            zipfile.ZipFile.close(self)
            shutil.move(temp.name, self.filename)
            zipfile.ZipFile.__init__(self, self.filename, self.mode)

    def _write_close(self):
        """Handle writes when closing epub.

        Both new file mode (w) and append file mode (a), some files must be
        generated: container, OPF, and NCX.

        """
        item_toc = self.get_item(self.opf.spine.toc)

        # Remove the old files
        to_remove = ['META-INF/container.xml', self.opf_path]
        if item_toc:
            to_remove.append(
                # Replace \ by /, no matter what OS's separator could be
                os.path.join(self.content_path,
                             item_toc.href).replace('\\', '/')
            )

        self.remove_paths(to_remove)

        # Write META-INF/container.xml
        self.writestr('META-INF/container.xml',
                      self._build_container().encode('utf-8'))
        # Write OPF File
        self.writestr(self.opf_path,
                      self.opf.as_xml_document().toxml().encode('utf-8'))
        # Write NCX File if exist
        if item_toc:
            toc_path = os.path.join(
                self.content_path, item_toc.href
            ).replace('\\', '/')
            toc_content = self.toc.as_xml_document().toxml().encode('utf-8')

            self.writestr(toc_path, toc_content)

    def _build_container(self):
        """Build a simple XML container as in epub 2.0.1 specification."""
        template = """<?xml version="1.0" encoding="UTF-8"?>
    <container version="1.0"
               xmlns="urn:oasis:names:tc:opendocument:xmlns:container">
        <rootfiles>
             <rootfile full-path="%s"
                       media-type="application/oebps-package+xml"/>
        </rootfiles>
    </container>"""
        return template % self.opf_path

    def add_item(self, filename, manifest_item,
                 append_to_spine=False, is_linear=True):
        """Add a file to epub.

        A manifest item must be provide to describe it.

        This function will raise a RuntimeError if epub is already closed. It
        will raise an IOError if epub is open in read-only (`r` mode).

        Optional: you can use `append_to_spine` flag (default=False) to append
        item to spine, and use `is_linear` (default=True) to specify if it is
        linear or not.

        """
        self.check_mode_write()
        self.opf.manifest.append(manifest_item)

        write_path = os.path.join(
            self.content_path, manifest_item.href
        ).replace('\\', '/')

        self.write(filename, write_path)

        if append_to_spine:
            self.opf.spine.add_itemref(manifest_item.identifier, is_linear)

    def check_mode_write(self):
        """Raise error if epub file is not writable.

        Raise RuntimeError if file is already closed.

        Raise IOError if file is opened read-only.

        """
        if not self.fp:
            raise RuntimeError(
                  'Attempt to write to EPUB file that was already closed')

        if self.mode == 'r':
            raise IOError(
                  'Attempt to write to EPUB file that was open as read-only.')

    # extract method is  zipfile.ZipFile.extract(member[, path[, pwd]])

    def extract_item(self, item, to_path=None):
        """Extract an item from its href in epub to `to_path` location.
        """
        path = item if not hasattr(item, 'href') else item.href
        member_path = os.path.join(self.content_path, path).replace('\\', '/')

        return  self.extract(member=member_path, path=to_path)

    def get_item(self, identifier):
        """Get an item from manifest through its "id" attribute.

        Return an EpubManifestItem if found, else None.

        """
        return self.opf.manifest.get(identifier, None)

    def get_item_by_href(self, href):
        """Get an item from manifest through its "href" attribute.

        Return an EpubManifestItem if found, else None.

        """
        found = [x for x in self.opf.manifest.values() if x.href == href]
        size = len(found)
        if size == 1:
            return found[0]
        elif size > 1:
            raise LookupError('Multiple items are found with this href.')
        else:
            return None

    # read method is zipfile.ZipFile.read(path)

    def read_item(self, item):
        """Read a file from the epub zipfile container.

        "item" parameter can be the relative path to the opf file or an
        EpubManifestItem object.

        Html fragments are not acceptable : the path must be exactly the same
        as indicated in the opf file.

        """
        path = item
        if hasattr(item, 'href'):
            path = item.href

        return self.read(
            # Replace \ by /, as ZipFile always uses / as path separator.
            os.path.join(self.content_path, path).replace('\\', '/')
        )


class Book(object):
    """This class is an attempt to expose a simpler object model than EpubFile.

    WARNING: Work in progress. Use with caution.

    """

    def __init__(self, epub_file):
        self.epub_file = epub_file

    @property
    def creators(self):
        return self.epub_file.opf.metadata.creators

    @property
    def description(self):
        return self.epub_file.opf.metadata.description

    @property
    def isbn(self):
        return self.epub_file.opf.metadata.get_isbn()

    @property
    def publisher(self):
        return self.epub_file.opf.metadata.publisher

    @property
    def contributors(self):
        return self.epub_file.opf.metadata.contributors

    @property
    def dates(self):
        return self.epub_file.opf.metadata.dates

    @property
    def dc_type(self):
        return self.epub_file.opf.metadata.dc_type

    @property
    def dc_format(self):
        return self.epub_file.opf.metadata.format

    @property
    def identifiers(self):
        return self.epub_file.opf.metadata.identifiers

    @property
    def source(self):
        return self.epub_file.opf.metadata.source

    @property
    def languages(self):
        return self.epub_file.opf.metadata.languages

    @property
    def relation(self):
        return self.epub_file.opf.metadata.relation

    @property
    def coverage(self):
        return self.epub_file.opf.metadata.coverage

    @property
    def right(self):
        return self.epub_file.opf.metadata.right

    @property
    def metas(self):
        return self.epub_file.opf.metadata.metas

    @property
    def subjects(self):
        return self.epub_file.opf.metadata.subjects

    @property
    def titles(self):
        return self.epub_file.opf.metadata.titles

    @property
    def chapters(self):
        """
        Return a list of linear chapter from spine.
        """
        return [BookChapter(self, identifier)
                for identifier, linear in self.epub_file.opf.spine.itemrefs
                if linear]

    @property
    def extra_chapters(self):
        """
        Return a list of non-linear chapter from spine.
        """
        return [BookChapter(self, identifier)
                for identifier, linear in self.epub_file.opf.spine.itemrefs
                if not linear]


class BookChapter(object):

    @property
    def identifier(self):
        return self._manifest_item.identifier

    def __init__(self, book, identifier, fragment=None):
        self._book = book
        self._manifest_item = self._book.epub_file.get_item(identifier)
        self._fragment = fragment

    def read(self):
        return self._book.epub_file.read_item(self._manifest_item)

Added epub/ncx.py.







































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
# -*- coding: utf-8 -*-
from __future__ import unicode_literals


"""
Python lib for reading NCX formated file for epub.

There is some difference between NCX original format and one for Epub; see
officiel documention for more information.1111

NCX doc: http://www.niso.org/workrooms/daisy/Z39-86-2005.html#NCX
NCX Epub spec: http://idpf.org/epub/20/spec/OPF_2.0.1_draft.htm#Section2.4.1
"""


from xml.dom import minidom


def parse_toc(xmlstring):
    """Inspect an NCX formated xml document."""
    toc = Ncx()
    toc_xml = minidom.parseString(xmlstring).documentElement

    xmlns = toc_xml.getAttribute('xmlns')
    if xmlns:
        toc.xmlns = xmlns

    version = toc_xml.getAttribute('version')
    if version:
        toc.version = version

    lang = toc_xml.getAttribute('xml:lang')
    if lang:
        toc.lang = lang

    # Inspect head > meta; unknow meta are ignored
    head = toc_xml.getElementsByTagName('head')[0]
    metas = {'dtb:uid': '',
             'dtb:depth': '',
             'dtb:totalPageCount': '',
             'dtb:maxPageNumber': '',
             'dtb:generator': ''}

    for meta in head.getElementsByTagName('meta'):
        metas[meta.getAttribute('name')] = meta.getAttribute('content')

    toc.uid = metas['dtb:uid']
    toc.depth = metas['dtb:depth']
    toc.total_page_count = metas['dtb:totalPageCount']
    toc.max_page_number = metas['dtb:maxPageNumber']
    toc.generator = metas['dtb:generator']

    # Get title (one and only one <docTitle> tag is required)
    doc_title_node = toc_xml.getElementsByTagName('docTitle')[0]
    toc.title = _parse_for_text_tag(doc_title_node)

    # Get authors (<docAuthor> tags are optionnal)
    for author in toc_xml.getElementsByTagName('docAuthor'):
        toc.authors.append(_parse_for_text_tag(author))

    # Inspect <navMap> (one is required)
    nav_map_node = toc_xml.getElementsByTagName('navMap')[0]
    toc.nav_map = _parse_xml_nav_map(nav_map_node)

    # Inspect <pageList> (optionnal, only one)
    page_lists = toc_xml.getElementsByTagName('pageList')
    if len(page_lists) > 0:
        toc.page_list = _parse_xml_page_list(page_lists[0])

    # Inspect <navList> (optionnal, many are possible)
    for nav_list in toc_xml.getElementsByTagName('navList'):
        toc.add_nav_list(_parse_xml_nav_list(nav_list))

    return toc


def _parse_xml_nav_map(element):
    """Inspect an xml.dom.Element <navMap> and return a NcxNavMap object."""
    nav_map = NavMap()
    nav_map.identifier = element.getAttribute('id')

    children = [e for e in element.childNodes if e.nodeType == e.ELEMENT_NODE]
    for node in children:
        if node.tagName == 'navLabel':
            nav_map.add_label(_parse_for_text_tag(node),
                              node.getAttribute('xml:lang'),
                              node.getAttribute('dir'))
        elif node.tagName == 'navInfo':
            nav_map.add_info(_parse_for_text_tag(node),
                             node.getAttribute('xml:lang'),
                             node.getAttribute('dir'))
        elif node.tagName == 'navPoint':
            nav_map.add_point(_parse_xml_nav_point(node))

    return nav_map


def _parse_xml_nav_point(element):
    """Inspect an xml.dom.Element <navPoint> and return a NcxNavPoint object.
    """
    nav_point = NavPoint()
    nav_point.identifier = element.getAttribute('id')
    nav_point.class_name = element.getAttribute('class')
    nav_point.play_order = element.getAttribute('playOrder')

    children = [e for e in element.childNodes if e.nodeType == e.ELEMENT_NODE]
    for node in children:
        if node.tagName == 'navLabel':
            nav_point.add_label(_parse_for_text_tag(node),
                                node.getAttribute('xml:lang'),
                                node.getAttribute('dir'))
        elif node.tagName == 'content':
            nav_point.src = node.getAttribute('src')
        elif node.tagName == 'navPoint':
            nav_point.add_point(_parse_xml_nav_point(node))

    return nav_point


def _parse_xml_page_list(element):
    """Inspect an xml.dom.Element <pageList> and return a NcxPageList object.
    """
    page_list = PageList()
    page_list.identifier = element.getAttribute('id')
    page_list.class_name = element.getAttribute('class')

    children = [e for e in element.childNodes if e.nodeType == e.ELEMENT_NODE]
    for node in children:
        if node.tagName == 'navLabel':
            page_list.add_label(_parse_for_text_tag(node),
                                node.getAttribute('xml:lang'),
                                node.getAttribute('dir'))
        elif node.tagName == 'navInfo':
            page_list.add_info(_parse_for_text_tag(node),
                               node.getAttribute('xml:lang'),
                               node.getAttribute('dir'))
        elif node.tagName == 'pageTarget':
            page_list.add_target(_parse_xml_page_target(node))

    return page_list


def _parse_xml_page_target(element):
    """Inspect an xml.dom.Element <pageTarget> and return a NcxPageTarget
    object."""
    page_target = PageTarget()
    page_target.identifier = element.getAttribute('id')
    page_target.value = element.getAttribute('value')
    page_target.target_type = element.getAttribute('type')
    page_target.class_name = element.getAttribute('class')
    page_target.play_order = element.getAttribute('playOrder')

    children = [e for e in element.childNodes if e.nodeType == e.ELEMENT_NODE]
    for node in children:
        if node.tagName == 'navLabel':
            page_target.add_label(_parse_for_text_tag(node),
                                  node.getAttribute('xml:lang'),
                                  node.getAttribute('dir'))
        elif node.tagName == 'content':
            page_target.src = node.getAttribute('src')

    return page_target


def _parse_xml_nav_list(element):
    """Inspect an xml.dom.Element <navList> and return a NcxNavList object."""
    nav_list = NavList()
    nav_list.identifier = element.getAttribute('id')
    nav_list.class_name = element.getAttribute('class')

    children = [e for e in element.childNodes if e.nodeType == e.ELEMENT_NODE]
    for node in children:
        if node.tagName == 'navLabel':
            nav_list.add_label(_parse_for_text_tag(node),
                                node.getAttribute('xml:lang'),
                                node.getAttribute('dir'))
        elif node.tagName == 'navInfo':
            nav_list.add_info(_parse_for_text_tag(node),
                               node.getAttribute('xml:lang'),
                               node.getAttribute('dir'))
        elif node.tagName == 'navTarget':
            nav_list.add_target(_parse_xml_nav_target(node))

    return nav_list


def _parse_xml_nav_target(element):
    """Inspect an xml.dom.Element <navTarget> and return a NcxNavTarget
    object."""
    nav_target = NavTarget()
    nav_target.identifier = element.getAttribute('id')
    nav_target.value = element.getAttribute('value')
    nav_target.class_name = element.getAttribute('class')
    nav_target.play_order = element.getAttribute('playOrder')

    children = [e for e in element.childNodes if e.nodeType == e.ELEMENT_NODE]
    for node in children:
        if node.tagName == 'navLabel':
            nav_target.add_label(_parse_for_text_tag(node),
                                  node.getAttribute('xml:lang'),
                                  node.getAttribute('dir'))
        elif node.tagName == 'content':
            nav_target.src = node.getAttribute('src')

    return nav_target


def _parse_for_text_tag(xml_element, name=None):
    """Inspect an xml.dom.Element with a child 'name' to get its text value.

    NCX file has many element with a child likes
    "navLabel" > "text" > TEXT_NODE
    and this function allow to avoid some boilerplate code.

    First parameter must be an xml.dom.Element, having one child named by the
    second parameter (by default a "text" tag).

    If nothing is founded, an empty string '' is returned.

    Whitespaces and tabulations are stripped."""
    name = name or 'text'
    tags = [e for e in xml_element.childNodes
              if e.nodeType == e.ELEMENT_NODE and e.tagName == name]
    text = ''
    if len(tags) > 0:
        tag = tags[0]
        if tag.firstChild and tag.firstChild.data:
            tag.normalize()
            text = tag.firstChild.data.strip()
    return text


def _create_xml_element_text(data, name=None):
    """Create a <text> ... </text> Element node.

    You can use a different tag name with the name argument
    (default is "text").

    If data is None or empty, it will create an empty element tag, eg. :
    <emptyTag/> instead of <emptyTag></emptyTag>"""
    if name is None:
        name = 'text'
    doc = minidom.Document()
    element = doc.createElement(name)
    if data:
        element.appendChild(doc.createTextNode(data))
    return element


class Ncx(object):
    """Represent the structured content of a NCX file."""

    def __init__(self, nav_map=None, page_list=None):
        self.xmlns = 'http://www.daisy.org/z3986/2005/ncx/'
        self.version = '2005-1'
        self.lang = None
        self.uid = None
        self.depth = None
        self.total_page_count = None
        self.max_page_number = None
        self.generator = None
        self.title = None
        self.authors = []
        if nav_map is None:
            nav_map = NavMap()
        self.nav_map = nav_map
        if page_list is None:
            page_list = PageList()
        self.page_list = page_list
        self.nav_lists = []

    def add_nav_list(self, nav_list):
        self.nav_lists.append(nav_list)

    def as_xml_document(self):
        """Return an xml dom Document node."""
        doc = minidom.Document()
        ncx = doc.createElement('ncx')
        ncx.setAttribute('xmlns', self.xmlns)
        ncx.setAttribute('version', self.version)
        if self.lang:
            ncx.setAttribute('xml:lang', self.lang)

        # head
        ncx.appendChild(self._head_as_xml_element())

        # title
        title = doc.createElement('docTitle')
        title.appendChild(_create_xml_element_text(self.title))
        ncx.appendChild(title)

        # authors
        for text in self.authors:
            author = doc.createElement('docAuthor')
            author.appendChild(_create_xml_element_text(text))
            ncx.appendChild(author)

        # nav_map
        ncx.appendChild(self.nav_map.as_xml_element())

        # page_list
        if self.page_list:
            ncx.appendChild(self.page_list.as_xml_element())

        # nav_lists
        for nav_list in self.nav_lists:
            ncx.appendChild(nav_list.as_xml_element())

        doc.appendChild(ncx)
        return doc

    def _head_as_xml_element(self):
        """Create an xml Element node <head> with meta-data of Ncx item."""
        doc = minidom.Document()
        head = doc.createElement('head')
        if self.uid:
            head.appendChild(self._meta_as_xml_element('dtb:uid', self.uid))
        if self.depth:
            head.appendChild(self._meta_as_xml_element('dtb:depth',
                                                       self.depth))
        if self.total_page_count:
            head.appendChild(self._meta_as_xml_element('dtb:totalPageCount',
                                                       self.total_page_count))
        if self.max_page_number:
            head.appendChild(self._meta_as_xml_element('dtb:maxPageNumber',
                                                       self.max_page_number))
        if self.generator:
            head.appendChild(self._meta_as_xml_element('dtb:generator',
                                                       self.generator))
        return head

    def _meta_as_xml_element(self, name, content):
        """Create an xml Element node <meta> with attributes name & content."""
        doc = minidom.Document()
        meta = doc.createElement('meta')
        meta.setAttribute('name', name)
        meta.setAttribute('content', content)
        return meta


class NavMap(object):
    """Represente navMap tag of an NCX file."""

    def __init__(self):
        self.identifier = None
        self.labels = []
        self.infos = []
        self.nav_point = []

    def add_label(self, label, lang=None, direction=None):
        lang = lang or ''
        direction = direction or ''
        self.labels.append((label, lang, direction))

    def add_info(self, label, lang=None, direction=None):
        lang = lang or ''
        direction = direction or ''
        self.infos.append((label, lang, direction))

    def add_point(self, point):
        self.nav_point.append(point)

    def as_xml_element(self):
        """Return an xml dom Element node."""
        doc = minidom.Document()
        nav_map = doc.createElement('navMap')

        if self.identifier:
            nav_map.setAttribute('id', self.identifier)

        for text, lang, direction in self.labels:
            label = doc.createElement('navLabel')
            label.appendChild(_create_xml_element_text(text))
            if lang:
                label.setAttribute('xml:lang', lang)
            if direction:
                label.setAttribute('dir', direction)
            nav_map.appendChild(label)

        for text, lang, direction in self.infos:
            info = doc.createElement('navInfo')
            info.appendChild(_create_xml_element_text(text))
            if lang:
                info.setAttribute('xml:lang', lang)
            if direction:
                info.setAttribute('dir', direction)
            nav_map.appendChild(info)

        for nav_point in self.nav_point:
            nav_map.appendChild(nav_point.as_xml_element())

        return nav_map


class NavPoint(object):

    def __init__(self):
        self.identifier = None
        self.class_name = None
        self.play_order = None
        self.labels = []
        self.src = None
        self.nav_point = []

    def add_label(self, label, lang=None, direction=None):
        lang = lang or ''
        direction = direction or ''
        self.labels.append((label, lang, direction))

    def add_point(self, nav_point):
        self.nav_point.append(nav_point)

    def as_xml_element(self):
        """Return an xml dom Element node."""
        doc = minidom.Document()
        nav_point = doc.createElement('navPoint')

        # Attributes
        if self.identifier:
            nav_point.setAttribute('id', self.identifier)

        if self.class_name:
            nav_point.setAttribute('class', self.class_name)

        if self.play_order:
            nav_point.setAttribute('playOrder', self.play_order)

        # navLabel
        for text, lang, direction in self.labels:
            label = doc.createElement('navLabel')
            label.appendChild(_create_xml_element_text(text))
            if lang:
                label.setAttribute('xml:lang', lang)
            if direction:
                label.setAttribute('dir', direction)
            nav_point.appendChild(label)

        # content
        content = doc.createElement('content')
        content.setAttribute('src', self.src)
        nav_point.appendChild(content)

        # navPoint
        for child in self.nav_point:
            nav_point.appendChild(child.as_xml_element())

        return nav_point


class PageList(object):

    def __init__(self):
        self.identifier = None
        self.class_name = None
        self.page_target = []
        self.labels = []
        self.infos = []

    def add_label(self, label, lang=None, direction=None):
        lang = lang or ''
        direction = direction or ''
        self.labels.append((label, lang, direction))

    def add_info(self, label, lang=None, direction=None):
        lang = lang or ''
        direction = direction or ''
        self.infos.append((label, lang, direction))

    def add_target(self, page_target):
        self.page_target.append(page_target)

    def as_xml_element(self):
        """Return an xml dom Element node."""
        doc = minidom.Document()
        page_list = doc.createElement('pageList')

        # attributes
        if self.identifier:
            page_list.setAttribute('id', self.identifier)

        if self.class_name:
            page_list.setAttribute('class', self.class_name)

        # navLabel
        for text, lang, direction in self.labels:
            label = doc.createElement('navLabel')
            label.appendChild(_create_xml_element_text(text))
            if lang:
                label.setAttribute('xml:lang', lang)
            if direction:
                label.setAttribute('dir', direction)
            page_list.appendChild(label)

        # navInfo
        for text, lang, direction in self.infos:
            info = doc.createElement('navInfo')
            info.appendChild(_create_xml_element_text(text))
            if lang:
                info.setAttribute('xml:lang', lang)
            if direction:
                info.setAttribute('dir', direction)
            page_list.appendChild(info)

        # pageTarget
        for child in self.page_target:
            page_list.appendChild(child.as_xml_element())

        return page_list


class PageTarget(object):

    def __init__(self):
        self.identifier = None
        self.value = None
        self.target_type = None
        self.class_name = None
        self.play_order = None
        self.src = None
        self.labels = []

    def add_label(self, label, lang=None, direction=None):
        lang = lang or ''
        direction = direction or ''
        self.labels.append((label, lang, direction))

    def as_xml_element(self):
        """Return an xml dom Element node."""
        doc = minidom.Document()
        page_target = doc.createElement('pageTarget')

        # attributes
        if self.identifier:
            page_target.setAttribute('id', self.identifier)

        if self.value:
            page_target.setAttribute('value', self.value)

        if self.target_type:
            page_target.setAttribute('type', self.target_type)

        if self.class_name:
            page_target.setAttribute('class', self.class_name)

        if self.play_order:
            page_target.setAttribute('playOrder', self.play_order)

        # navLabel
        for text, lang, direction in self.labels:
            label = doc.createElement('navLabel')
            label.appendChild(_create_xml_element_text(text))
            if lang:
                label.setAttribute('xml:lang', lang)
            if direction:
                label.setAttribute('dir', direction)
            page_target.appendChild(label)

        # content
        content = doc.createElement('content')
        content.setAttribute('src', self.src)
        page_target.appendChild(content)

        return page_target


class NavList(object):

    def __init__(self):
        self.identifier = None
        self.class_name = None
        self.nav_target = []
        self.labels = []
        self.infos = []

    def add_label(self, label, lang=None, direction=None):
        lang = lang or ''
        direction = direction or ''
        self.labels.append((label, lang, direction))

    def add_info(self, label, lang=None, direction=None):
        lang = lang or ''
        direction = direction or ''
        self.infos.append((label, lang, direction))

    def add_target(self, nav_target):
        self.nav_target.append(nav_target)

    def as_xml_element(self):
        """Return an xml dom Element node."""
        doc = minidom.Document()
        nav_list = doc.createElement('navList')

        # attributes
        if self.identifier:
            nav_list.setAttribute('id', self.identifier)

        if self.class_name:
            nav_list.setAttribute('class', self.class_name)

        # navLabel
        for text, lang, direction in self.labels:
            label = doc.createElement('navLabel')
            label.appendChild(_create_xml_element_text(text))
            if lang:
                label.setAttribute('xml:lang', lang)
            if direction:
                label.setAttribute('dir', direction)
            nav_list.appendChild(label)

        # navInfo
        for text, lang, direction in self.infos:
            info = doc.createElement('navInfo')
            info.appendChild(_create_xml_element_text(text))
            if lang:
                info.setAttribute('xml:lang', lang)
            if direction:
                info.setAttribute('dir', direction)
            nav_list.appendChild(info)

        # navTarget
        for nav_target in self.nav_target:
            nav_list.appendChild(nav_target.as_xml_element())

        return nav_list


class NavTarget(object):

    def __init__(self):
        self.identifier = None
        self.class_name = None
        self.value = None
        self.play_order = None
        self.labels = []
        self.src = None

    def add_label(self, label, lang=None, direction=None):
        lang = lang or ''
        direction = direction or ''
        self.labels.append((label, lang, direction))

    def as_xml_element(self):
        """Return an xml dom Element node."""
        doc = minidom.Document()
        nav_target = doc.createElement('navTarget')

        # attributes
        if self.identifier:
            nav_target.setAttribute('id', self.identifier)

        if self.class_name:
            nav_target.setAttribute('class', self.class_name)

        if self.value:
            nav_target.setAttribute('value', self.value)

        if self.play_order:
            nav_target.setAttribute('playOrder', self.play_order)

        # navLabel
        for text, lang, direction in self.labels:
            label = doc.createElement('navLabel')
            label.appendChild(_create_xml_element_text(text))
            if lang:
                label.setAttribute('xml:lang', lang)
            if direction:
                label.setAttribute('dir', direction)
            nav_target.appendChild(label)

        # content
        content = doc.createElement('content')
        content.setAttribute('src', self.src)
        nav_target.appendChild(content)

        return nav_target

Added epub/opf.py.















































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
# -*- coding: utf-8 -*-
from __future__ import unicode_literals


"""
Python lib for reading OPF formated file for epub.

Since the "Tour" element is deprecated in Epub 2, it is not supported by this
library.

OPF epub : http://idpf.org/epub/20/spec/OPF_2.0.1_draft.htm
"""


from xml.dom import minidom


try:
    # Only for Python 2.7+
    from collections import OrderedDict
except ImportError:
    try:
        # For Python 2.6
        from ordereddict import OrderedDict
    except ImportError:
        raise ImportError(
            'You should use Python 2.7 or install `ordereddict` from pypi.')


from epub.utils import get_node_text


XMLNS_DC = 'http://purl.org/dc/elements/1.1/'
XMLNS_OPF = 'http://www.idpf.org/2007/opf'


def parse_opf(xml_string):
    package = minidom.parseString(xml_string).documentElement

    # Get Uid
    uid_id = package.getAttribute('unique-identifier')

    # Store each child nodes into a dict (metadata, manifest, spine, guide)
    data = {'metadata': None,
            'manifest': None,
            'spine': None,
            'guide': None}
    elements = [e for e in package.childNodes if e.nodeType == e.ELEMENT_NODE]
    for node in elements:
        tag = node.tagName.lower()
        if tag.startswith('opf:'):
            tag = tag[4:]
        data[tag] = node

    # Inspect metadata
    metadata = _parse_xml_metadata(data['metadata'])

    # Inspect manifest
    manifest = _parse_xml_manifest(data['manifest'])

    # Inspect spine
    spine = _parse_xml_spine(data['spine'])

    # Inspect guide if exist
    if data['guide'] is None:
        guide = None
    else:
        guide = _parse_xml_guide(data['guide'])

    opf = Opf(uid_id=uid_id,
              metadata=metadata,
              manifest=manifest,
              spine=spine,
              guide=guide)
    return opf


def _parse_xml_metadata(element):
    """Extract metadata from an xml.dom.Element object (ELEMENT_NODE)

    The "<metadata>" tag has a lot of metadatas about the epub this method
    inspect and store into object attributes (like "title" or "creator").
    """
    metadata = Metadata()

    for node in element.getElementsByTagName('dc:title'):
        metadata.add_title(get_node_text(node),
                           node.getAttribute('xml:lang'))

    for node in element.getElementsByTagName('dc:creator'):
        metadata.add_creator(get_node_text(node),
                             node.getAttribute('opf:role'),
                             node.getAttribute('opf:file-as'))

    for node in element.getElementsByTagName('dc:subject'):
        metadata.add_subject(get_node_text(node))

    for node in element.getElementsByTagName('dc:description'):
        metadata.description = get_node_text(node)

    for node in element.getElementsByTagName('dc:publisher'):
        metadata.publisher = get_node_text(node)

    for node in element.getElementsByTagName('dc:contributor'):
        metadata.add_contributor(get_node_text(node),
                                 node.getAttribute('opf:role'),
                                 node.getAttribute('opf:file-as'))

    for node in element.getElementsByTagName('dc:date'):
        metadata.add_date(get_node_text(node),
                          node.getAttribute('opf:event'))

    for node in element.getElementsByTagName('dc:type'):
        metadata.dc_type = get_node_text(node)

    for node in element.getElementsByTagName('dc:format'):
        metadata.format = get_node_text(node)

    for node in element.getElementsByTagName('dc:identifier'):
        metadata.add_identifier(get_node_text(node),
                            node.getAttribute('id'),
                            node.getAttribute('opf:scheme'))

    for node in element.getElementsByTagName('dc:source'):
        metadata.source = get_node_text(node)

    for node in element.getElementsByTagName('dc:language'):
        metadata.add_language(get_node_text(node))

    for node in element.getElementsByTagName('dc:relation'):
        metadata.relation = get_node_text(node)

    for node in element.getElementsByTagName('dc:coverage'):
        metadata.coverage = get_node_text(node)

    for node in element.getElementsByTagName('dc:rights'):
        metadata.right = get_node_text(node)

    for node in element.getElementsByTagName('meta'):
        metadata.add_meta(node.getAttribute('name'),
                          node.getAttribute('content'))

    return metadata


def _parse_xml_manifest(element):
    """Inspect an xml.dom.Element <manifest> and return a list of
    epub.EpubManifestItem object."""

    manifest = Manifest()
    for e in element.getElementsByTagName('item'):
        manifest.add_item(e.getAttribute('id'),
                          e.getAttribute('href'),
                          e.getAttribute('media-type'),
                          e.getAttribute('fallback'),
                          e.getAttribute('required-namespace'),
                          e.getAttribute('required-modules'),
                          e.getAttribute('fallback-style'))
    return manifest


def _parse_xml_spine(element):
    """Inspect an xml.dom.Element <spine> and return epub.opf.Spine object"""

    spine = Spine()
    spine.toc = element.getAttribute('toc')
    for e in element.getElementsByTagName('itemref'):
        spine.add_itemref(e.getAttribute('idref'),
                          e.getAttribute('linear').lower() != 'no')
    return spine


def _parse_xml_guide(element):
    """Inspect an xml.dom.Element <guide> and return a list of ref as tuple."""

    guide = Guide()
    for e in element.getElementsByTagName('reference'):
        guide.add_reference(e.getAttribute('href'),
                            e.getAttribute('type'),
                            e.getAttribute('title'))
    return guide


class Opf(object):
    """Represent an OPF formated file.

    OPF is an xml formated file, used in the epub spec."""

    def __init__(self, uid_id=None, version=None, xmlns=None,
                 metadata=None, manifest=None, spine=None, guide=None):
        self.uid_id = uid_id
        self.version = version if version else '2.0'
        self.xmlns = xmlns if xmlns else XMLNS_OPF

        if metadata is None:
            self.metadata = Metadata()
        else:
            self.metadata = metadata
        if manifest is None:
            self.manifest = Manifest()
        else:
            self.manifest = manifest
        if spine is None:
            self.spine = Spine()
        else:
            self.spine = spine
        if guide is None:
            self.guide = Guide()
        else:
            self.guide = guide

    def as_xml_document(self):
        doc = minidom.Document()
        package = doc.createElement('package')
        package.setAttribute('version', self.version)
        package.setAttribute('unique-identifier', self.uid_id)
        package.setAttribute('xmlns', self.xmlns)
        package.appendChild(self.metadata.as_xml_element())
        package.appendChild(self.manifest.as_xml_element())
        package.appendChild(self.spine.as_xml_element())
        package.appendChild(self.guide.as_xml_element())
        doc.appendChild(package)
        return doc


class Metadata(object):
    """Represent an epub's metadatas set.

    See http://idpf.org/epub/20/spec/OPF_2.0.1_draft.htm#Section2.2"""

    def __init__(self):
        self.titles = []
        self.creators = []
        self.subjects = []
        self.description = None
        self.publisher = None
        self.contributors = []
        self.dates = []
        self.dc_type = None
        self.format = None
        self.identifiers = []
        self.source = None
        self.languages = []
        self.relation = None
        self.coverage = None
        self.right = None
        self.metas = []

    def add_title(self, title, lang=None):
        lang = lang or ''
        self.titles.append((title, lang))

    def add_creator(self, name, role=None, file_as=None):
        role = role or ''
        file_as = file_as or ''
        self.creators.append((name, role, file_as))

    def add_subject(self, subject):
        self.subjects.append(subject)

    def add_contributor(self, name, role=None, file_as=None):
        role = role or ''
        file_as = file_as or ''
        self.contributors.append((name, role, file_as))

    def add_date(self, date, event=None):
        event = event or ''
        self.dates.append((date, event))

    def add_identifier(self, content, identifier=None, scheme=None):
        identifier = identifier or ''
        scheme = scheme or ''
        self.identifiers.append((content, identifier, scheme))

    def add_language(self, lang):
        self.languages.append(lang)

    def add_meta(self, name, content):
        self.metas.append((name, content))

    def get_isbn(self):
        l = [x[0] for x in self.identifiers if x[2].lower() == 'isbn']
        isbn = None
        if l:
            isbn = l[0]
        return isbn

    def as_xml_element(self):
        """Return an xml dom Element node."""
        doc = minidom.Document()
        metadata = doc.createElement('metadata')
        metadata.setAttribute('xmlns:dc', XMLNS_DC)
        metadata.setAttribute('xmlns:opf', XMLNS_OPF)

        for text, lang in self.titles:
            title = doc.createElement('dc:title')
            if lang:
                title.setAttribute('xml:lang', lang)
            title.appendChild(doc.createTextNode(text))
            metadata.appendChild(title)

        for name, role, file_as in self.creators:
            creator = doc.createElement('dc:creator')
            if role:
                creator.setAttribute('opf:role', role)
            if file_as:
                creator.setAttribute('opf:file-as', file_as)
            creator.appendChild(doc.createTextNode(name))
            metadata.appendChild(creator)

        for text in self.subjects:
            subject = doc.createElement('dc:subject')
            subject.appendChild(doc.createTextNode(text))
            metadata.appendChild(subject)

        if self.description:
            description = doc.createElement('dc:description')
            description.appendChild(doc.createTextNode(self.description))
            metadata.appendChild(description)

        if self.publisher:
            publisher = doc.createElement('dc:publisher')
            publisher.appendChild(doc.createTextNode(self.publisher))
            metadata.appendChild(publisher)

        for name, role, file_as in self.contributors:
            contributor = doc.createElement('dc:contributor')
            if role:
                contributor.setAttribute('opf:role', role)
            if file_as:
                contributor.setAttribute('opf:file-as', file_as)
            contributor.appendChild(doc.createTextNode(name))
            metadata.appendChild(contributor)

        for text, event in self.dates:
            date = doc.createElement('dc:date')
            if event:
                date.setAttribute('opf:event', event)
            date.appendChild(doc.createTextNode(text))
            metadata.appendChild(date)

        if self.dc_type:
            dc_type = doc.createElement('dc:type')
            dc_type.appendChild(doc.createTextNode(self.dc_type))
            metadata.appendChild(dc_type)

        if self.format:
            dc_format = doc.createElement('dc:format')
            dc_format.appendChild(doc.createTextNode(self.format))
            metadata.appendChild(dc_format)

        for text, identifier, scheme in self.identifiers:
            dc_identifier = doc.createElement('dc:identifier')
            if identifier:
                dc_identifier.setAttribute('id', identifier)
            if scheme:
                dc_identifier.setAttribute('opf:scheme', scheme)
            dc_identifier.appendChild(doc.createTextNode(text))
            metadata.appendChild(dc_identifier)

        if self.source:
            source = doc.createElement('dc:source')
            source.appendChild(doc.createTextNode(self.source))
            metadata.appendChild(source)

        for text in self.languages:
            language = doc.createElement('dc:language')
            language.appendChild(doc.createTextNode(text))
            metadata.appendChild(language)

        if self.relation:
            relation = doc.createElement('dc:relation')
            relation.appendChild(doc.createTextNode(self.relation))
            metadata.appendChild(relation)

        if self.coverage:
            coverage = doc.createElement('dc:coverage')
            coverage.appendChild(doc.createTextNode(self.coverage))
            metadata.appendChild(coverage)

        if self.right:
            right = doc.createElement('dc:rights')
            right.appendChild(doc.createTextNode(self.right))
            metadata.appendChild(right)

        for name, content in self.metas:
            meta = doc.createElement('meta')
            meta.setAttribute('name', name)
            meta.setAttribute('content', content)
            metadata.appendChild(meta)

        return metadata


class Manifest(OrderedDict):

    def __contains__(self, item):
        if hasattr(item, 'identifier'):
            return super(Manifest, self).__contains__(item.identifier)
        else:
            return super(Manifest, self).__contains__(item)

    def __setitem__(self, key, value):
        if hasattr(value, 'identifier') and hasattr(value, 'href'):
            if value.identifier == key:
                super(Manifest, self).__setitem__(key, value)
            else:
                raise ValueError('Value\'s id is different from insert key.')
        else:
            requierements = 'id and href attributes'
            msg = 'Value does not fit the requirement (%s).' % requierements
            raise ValueError(msg)

    def add_item(self, identifier, href, media_type=None, fallback=None,
                 required_namespace=None, required_modules=None,
                 fallback_style=None):
        item = ManifestItem(identifier, href, media_type,
                            fallback, required_namespace, required_modules,
                            fallback_style)
        self.append(item)

    def append(self, item):
        if hasattr(item, 'identifier') and \
           hasattr(item, 'href') and \
           hasattr(item, 'as_xml_element'):
            self.__setitem__(item.identifier, item)
        else:
            raise ValueError('Manifest item must have [identifier, href, ' + \
                             'as_xml_element()] attributes and method.')

    def as_xml_element(self):
        """Return an xml dom Element node."""
        doc = minidom.Document()
        manifest = doc.createElement('manifest')

        for item in self.values():
            manifest.appendChild(item.as_xml_element())

        return manifest


class ManifestItem(object):
    """
    Represent an item from the epub's manifest.

    """

    def __init__(self, identifier, href, media_type=None, fallback=None,
                 required_namespace=None, required_modules=None,
                 fallback_style=None):
        self.identifier = identifier
        self.href = href
        self.media_type = media_type
        self.fallback = fallback
        self.required_namespace = required_namespace
        self.required_modules = required_modules
        self.fallback_style = fallback_style

    def as_xml_element(self):
        """Return an xml dom Element node."""

        item = minidom.Document().createElement("item")

        item.setAttribute('id', self.identifier)
        item.setAttribute('href', self.href)
        if self.media_type:
            item.setAttribute('media-type', self.media_type)
        if self.fallback:
            item.setAttribute('fallback', self.fallback)
        if self.required_namespace:
            item.setAttribute('required-namespace', self.required_namespace)
        if self.required_modules:
            item.setAttribute('required-modules', self.required_modules)
        if self.fallback_style:
            item.setAttribute('fallback-style', self.fallback_style)

        return item


class Spine(object):

    def __init__(self, toc=None, itemrefs=None):
        self.toc = toc
        if itemrefs is None:
            self.itemrefs = []
        else:
            self.itemrefs = itemrefs

    def add_itemref(self, idref, linear=True):
        self.append((idref, linear))

    def append(self, itemref):
        self.itemrefs.append(itemref)

    def as_xml_element(self):
        doc = minidom.Document()
        spine = doc.createElement('spine')
        spine.setAttribute('toc', self.toc)

        for idref, linear in self.itemrefs:
            itemref = doc.createElement('itemref')
            itemref.setAttribute('idref', idref)
            if not linear:
                itemref.setAttribute('linear', 'no')
            spine.appendChild(itemref)

        return spine


class Guide(object):

    def __init__(self):
        self.references = []

    def add_reference(self, href, ref_type=None, title=None):
        self.append((href, ref_type, title))

    def append(self, reference):
        self.references.append(reference)

    def as_xml_element(self):
        doc = minidom.Document()
        guide = doc.createElement('guide')

        for href, ref_type, title in self.references:
            reference = doc.createElement('reference')
            if type:
                reference.setAttribute('type', ref_type)
            if title:
                reference.setAttribute('title', title)
            if href:
                reference.setAttribute('href', href)
            guide.appendChild(reference)

        return guide

Added epub/utils.py.













































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
# -*- coding: utf-8 -*-
from __future__ import unicode_literals


def get_node_text(node):
    """
    Return the text content of an xml.dom Element Node.

    If node does not have content, this function return an empty string.
    """
    text = ''

    node.normalize()
    if node.firstChild and node.firstChild.data:
        text = node.firstChild.data.strip()

    return text


def get_urlpath_part(urlpath):
    """
    Return a path without url fragment (something like `#frag` at the end).

    This function allow to use path from references and NCX file to read
    item from Manifest with a correct href (without losing the fragment part).

    eg.:

        url = 'text/chapter1.xhtml#part2'
        href, fragment = get_urlpath_part(url)
        print href # 'text/chapter1.xhtml'
        print fragment # '#part2'
    """
    href = urlpath
    fragment = None
    if urlpath.count('#'):
        href, fragment = urlpath.split('#')
    return (href, fragment)

Added get.py.





















































































































































































































































































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
#
# get.py
#	Mixin to implement HTML GET operations
#
# Structure of paths in the server:
#  /
#	Main UI.  Top part is playlist, bottom is file/dir browser
#  /media/<prefix>/...
#	For each prefix of files, its contents is served by
#	way of its path under here.
import pdb
import os, stat, urllib
import epub

# The GET part of our handling
class GET_Mixin(object):

    # Configure our WPlayer GET treatment
    def __init__(self):

	# GET handlers
	self.dispatchers.append( ("GET", self.open_path) )

    # "/"; main UI
    def send_top(self):
	app = self.server
	webroot = app.approot
        buf = self.build_header("EPUB categories")

        # Provide a list of top-level document categories
        buf += "<ul>\n"
	for f,cfg in webroot.config["files"]:
            buf += ' <li><a href="/%s/">%s</a></li>\n' % (f,f)
        buf += "</ul>\n"

        buf = self.build_tailer(buf)

        return self.send_result(buf, "text/html")

    # Access something along a path
    # It can be an actual epub file, or a path to a directory
    #  where we'll list what's available
    def open_path(self):
	app = self.server
	webroot = app.approot
        for f,cfg in webroot.config["files"]:
            section = f
            f = '/%s/' % (f,)
            if self.path.startswith(f):
                break
        else:
            # Not a known path
            return False,None

        # We have the "cfg" for this prefix, point at the
        #  root of those files
        path = cfg["path"]
        rest = self.path.replace(f, '', 1)

        # Deflect nonsense
        parts = [p for p in os.path.split(rest) if p]
        if any((f == "..") for f in parts):
            return False,None

        # Is this an active book?
        if (len(parts) > 1) and parts[-2].endswith(".epub"):
            try:
                chapnum = int(parts[-1])
            except:
                return False,None
            if chapnum < 0:
                return False,None

            # Access the book file
            try:
                doc = epub.open_epub(
                 os.path.join(path, *(parts[:-1])), "r")
                book = epub.Book(doc)
                chap = book.chapters[chapnum]
            except:
                return False,None

            # Here's your chapter
            nm = book.titles[0] if book.titles else parts[-2]
            head = "%s chapter %d" % (nm, chapnum)
            buf = self.build_header(head)
            buf += chap.read()
            buf = self.build_tailer(buf)

            return True,self.send_result(buf, "text/html")

        # Index of book?
        if parts and parts[-1].endswith(".epub"):
            try:
                doc = epub.open_epub(
                 os.path.join(path, *parts), "r")
                book = epub.Book(doc)
            except:
                return False,None
            buf = self.build_header("<h2>Chapters</h2><p>\n")
            buf += "<ul>\n"
            for chnum in xrange(len(book.chapters)):
                chnum += 1
                tpath = [section] + parts + [ "%d" % (chnum,) ]
                tpath = os.path.join(*tpath)
                buf += ' <li><a href="/%s">%d</a></li>\n' % \
                    (tpath, chnum)
            buf += "</ul>\n"

            # Your index
            buf = self.build_tailer(buf)
            return True,self.send_result(buf, "text/html")

        # List a folder/directory
        dirpath = os.path.join(path, *parts)
        try:
            st = os.stat(dirpath)
        except:
            return False,None
        if not stat.S_ISDIR(st.st_mode):
            return False,None

        # Listing; files, then sub-folders
        books = []
        dirs = []
        for path,dirs,files in os.walk(dirpath):
            for f in files:
                if f.endswith(".epub"):
                    books.append(f)
            break
        if (not books) and (not dirs):
            return False,None
        label = parts[-1] if parts else section
        buf = self.build_header("<h2>%s</h2><p>\n" % (label,))

        # List book files (if any)
        if books:
            buf += "<p><h3>Books</h3>\n<ul>\n"
            for f in books:
                buf += ' <li><a href="%s">%s</a></li>\n' % \
                    (os.path.join(self.path, f), f[:-5])
            buf += "</ul>\n"

        # List sub-folders (if any)
        if dirs:
            buf += "<p><h3>Sub-Folders</h3>\n<ul>\n"
            for d in dirs:
                buf += ' <li><a href="%s">%s</a></li>\n' % \
                    (os.path.join(self.path, d), d)
            buf += "</ul>\n"

        buf = self.build_tailer(buf)
        return True,self.send_result(buf, "text/html")

Added main.py.















































































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
#
# main.py
#	Main driver for WWW-Usenet interface
#
import sys, time
import chore
from get import GET_Mixin

# Tie our various handlers together
class App_Handler(chore.handlers.Chore_Handler, GET_Mixin):
    def __init__(self, conn, tup, approot):
	chore.handlers.Chore_Handler.__init__(self, conn, tup, approot,
	    (GET_Mixin.__init__,))

# Load our configuration file
#
# This includes configuring our config file elements,
#  then processing the supplied file.
def load_cfg(fn):

    # A configurator
    c = chore.config.Config()

    # Let the web network side add its config entries
    chore.www.add_config(c)

    # Here's how our config file looks
    c.onearg.add( ("files",) )
    c.mults.add( ("files",) )
    c.onearg.add( ("files", "path") )

    # Parse the input
    return c.load_cfg(fn)

# Root of our app server
class App(chore.server.Server):

    def __init__(self, config):
	# Let Chore handle most things
	chore.server.Server.__init__(self, config, App_Handler);

if __name__ == "__main__":
    if len(sys.argv) != 2:
	sys.stderr.write("Usage is: %s <config-file>\n" %
	    (sys.argv[0],))
	sys.exit(1)

    # Create the server with config
    app = App(load_cfg(sys.argv[1]))

    # It's an HTTP service
    app.start_http()

    # HTTP servers each get their own thread.
    sys.exit(0)