mirror of
				https://github.com/noDRM/DeDRM_tools.git
				synced 2025-10-23 23:07:47 -04:00 
			
		
		
		
	
		
			
				
	
	
		
			721 lines
		
	
	
	
		
			25 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			721 lines
		
	
	
	
		
			25 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| #! /usr/bin/python
 | |
| # vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
 | |
| 
 | |
| class Unbuffered:
 | |
|     def __init__(self, stream):
 | |
|         self.stream = stream
 | |
|     def write(self, data):
 | |
|         self.stream.write(data)
 | |
|         self.stream.flush()
 | |
|     def __getattr__(self, attr):
 | |
|         return getattr(self.stream, attr)
 | |
| 
 | |
| import sys
 | |
| sys.stdout=Unbuffered(sys.stdout)
 | |
| 
 | |
| import csv
 | |
| import os
 | |
| import getopt
 | |
| from struct import pack
 | |
| from struct import unpack
 | |
| 
 | |
| class TpzDRMError(Exception):
 | |
|     pass
 | |
| 
 | |
| # local support routines
 | |
| if 'calibre' in sys.modules:
 | |
|     inCalibre = True
 | |
| else:
 | |
|     inCalibre = False
 | |
| 
 | |
| if inCalibre :
 | |
|     from calibre_plugins.dedrm import convert2xml
 | |
|     from calibre_plugins.dedrm import flatxml2html
 | |
|     from calibre_plugins.dedrm import flatxml2svg
 | |
|     from calibre_plugins.dedrm import stylexml2css
 | |
| else :
 | |
|     import convert2xml
 | |
|     import flatxml2html
 | |
|     import flatxml2svg
 | |
|     import stylexml2css
 | |
| 
 | |
| # global switch
 | |
| buildXML = False
 | |
| 
 | |
| # Get a 7 bit encoded number from a file
 | |
| def readEncodedNumber(file):
 | |
|     flag = False
 | |
|     c = file.read(1)
 | |
|     if (len(c) == 0):
 | |
|         return None
 | |
|     data = ord(c)
 | |
|     if data == 0xFF:
 | |
|         flag = True
 | |
|         c = file.read(1)
 | |
|         if (len(c) == 0):
 | |
|             return None
 | |
|         data = ord(c)
 | |
|     if data >= 0x80:
 | |
|         datax = (data & 0x7F)
 | |
|         while data >= 0x80 :
 | |
|             c = file.read(1)
 | |
|             if (len(c) == 0):
 | |
|                 return None
 | |
|             data = ord(c)
 | |
|             datax = (datax <<7) + (data & 0x7F)
 | |
|         data = datax
 | |
|     if flag:
 | |
|         data = -data
 | |
|     return data
 | |
| 
 | |
| # Get a length prefixed string from the file
 | |
| def lengthPrefixString(data):
 | |
|     return encodeNumber(len(data))+data
 | |
| 
 | |
| def readString(file):
 | |
|     stringLength = readEncodedNumber(file)
 | |
|     if (stringLength == None):
 | |
|         return None
 | |
|     sv = file.read(stringLength)
 | |
|     if (len(sv)  != stringLength):
 | |
|         return ""
 | |
|     return unpack(str(stringLength)+"s",sv)[0]
 | |
| 
 | |
| def getMetaArray(metaFile):
 | |
|     # parse the meta file
 | |
|     result = {}
 | |
|     fo = file(metaFile,'rb')
 | |
|     size = readEncodedNumber(fo)
 | |
|     for i in xrange(size):
 | |
|         tag = readString(fo)
 | |
|         value = readString(fo)
 | |
|         result[tag] = value
 | |
|         # print tag, value
 | |
|     fo.close()
 | |
|     return result
 | |
| 
 | |
| 
 | |
| # dictionary of all text strings by index value
 | |
| class Dictionary(object):
 | |
|     def __init__(self, dictFile):
 | |
|         self.filename = dictFile
 | |
|         self.size = 0
 | |
|         self.fo = file(dictFile,'rb')
 | |
|         self.stable = []
 | |
|         self.size = readEncodedNumber(self.fo)
 | |
|         for i in xrange(self.size):
 | |
|             self.stable.append(self.escapestr(readString(self.fo)))
 | |
|         self.pos = 0
 | |
|     def escapestr(self, str):
 | |
|         str = str.replace('&','&')
 | |
|         str = str.replace('<','<')
 | |
|         str = str.replace('>','>')
 | |
|         str = str.replace('=','=')
 | |
|         return str
 | |
|     def lookup(self,val):
 | |
|         if ((val >= 0) and (val < self.size)) :
 | |
|             self.pos = val
 | |
|             return self.stable[self.pos]
 | |
|         else:
 | |
|             print "Error: %d outside of string table limits" % val
 | |
|             raise TpzDRMError('outside or string table limits')
 | |
|             # sys.exit(-1)
 | |
|     def getSize(self):
 | |
|         return self.size
 | |
|     def getPos(self):
 | |
|         return self.pos
 | |
| 
 | |
| 
 | |
| class PageDimParser(object):
 | |
|     def __init__(self, flatxml):
 | |
|         self.flatdoc = flatxml.split('\n')
 | |
|     # find tag if within pos to end inclusive
 | |
|     def findinDoc(self, tagpath, pos, end) :
 | |
|         result = None
 | |
|         docList = self.flatdoc
 | |
|         cnt = len(docList)
 | |
|         if end == -1 :
 | |
|             end = cnt
 | |
|         else:
 | |
|             end = min(cnt,end)
 | |
|         foundat = -1
 | |
|         for j in xrange(pos, end):
 | |
|             item = docList[j]
 | |
|             if item.find('=') >= 0:
 | |
|                 (name, argres) = item.split('=')
 | |
|             else :
 | |
|                 name = item
 | |
|                 argres = ''
 | |
|             if name.endswith(tagpath) :
 | |
|                 result = argres
 | |
|                 foundat = j
 | |
|                 break
 | |
|         return foundat, result
 | |
|     def process(self):
 | |
|         (pos, sph) = self.findinDoc('page.h',0,-1)
 | |
|         (pos, spw) = self.findinDoc('page.w',0,-1)
 | |
|         if (sph == None): sph = '-1'
 | |
|         if (spw == None): spw = '-1'
 | |
|         return sph, spw
 | |
| 
 | |
| def getPageDim(flatxml):
 | |
|     # create a document parser
 | |
|     dp = PageDimParser(flatxml)
 | |
|     (ph, pw) = dp.process()
 | |
|     return ph, pw
 | |
| 
 | |
| class GParser(object):
 | |
|     def __init__(self, flatxml):
 | |
|         self.flatdoc = flatxml.split('\n')
 | |
|         self.dpi = 1440
 | |
|         self.gh = self.getData('info.glyph.h')
 | |
|         self.gw = self.getData('info.glyph.w')
 | |
|         self.guse = self.getData('info.glyph.use')
 | |
|         if self.guse :
 | |
|             self.count = len(self.guse)
 | |
|         else :
 | |
|             self.count = 0
 | |
|         self.gvtx = self.getData('info.glyph.vtx')
 | |
|         self.glen = self.getData('info.glyph.len')
 | |
|         self.gdpi = self.getData('info.glyph.dpi')
 | |
|         self.vx = self.getData('info.vtx.x')
 | |
|         self.vy = self.getData('info.vtx.y')
 | |
|         self.vlen = self.getData('info.len.n')
 | |
|         if self.vlen :
 | |
|             self.glen.append(len(self.vlen))
 | |
|         elif self.glen:
 | |
|             self.glen.append(0)
 | |
|         if self.vx :
 | |
|             self.gvtx.append(len(self.vx))
 | |
|         elif self.gvtx :
 | |
|             self.gvtx.append(0)
 | |
|     def getData(self, path):
 | |
|         result = None
 | |
|         cnt = len(self.flatdoc)
 | |
|         for j in xrange(cnt):
 | |
|             item = self.flatdoc[j]
 | |
|             if item.find('=') >= 0:
 | |
|                 (name, argt) = item.split('=')
 | |
|                 argres = argt.split('|')
 | |
|             else:
 | |
|                 name = item
 | |
|                 argres = []
 | |
|             if (name == path):
 | |
|                 result = argres
 | |
|                 break
 | |
|         if (len(argres) > 0) :
 | |
|             for j in xrange(0,len(argres)):
 | |
|                 argres[j] = int(argres[j])
 | |
|         return result
 | |
|     def getGlyphDim(self, gly):
 | |
|         if self.gdpi[gly] == 0:
 | |
|             return 0, 0
 | |
|         maxh = (self.gh[gly] * self.dpi) / self.gdpi[gly]
 | |
|         maxw = (self.gw[gly] * self.dpi) / self.gdpi[gly]
 | |
|         return maxh, maxw
 | |
|     def getPath(self, gly):
 | |
|         path = ''
 | |
|         if (gly < 0) or (gly >= self.count):
 | |
|             return path
 | |
|         tx = self.vx[self.gvtx[gly]:self.gvtx[gly+1]]
 | |
|         ty = self.vy[self.gvtx[gly]:self.gvtx[gly+1]]
 | |
|         p = 0
 | |
|         for k in xrange(self.glen[gly], self.glen[gly+1]):
 | |
|             if (p == 0):
 | |
|                 zx = tx[0:self.vlen[k]+1]
 | |
|                 zy = ty[0:self.vlen[k]+1]
 | |
|             else:
 | |
|                 zx = tx[self.vlen[k-1]+1:self.vlen[k]+1]
 | |
|                 zy = ty[self.vlen[k-1]+1:self.vlen[k]+1]
 | |
|             p += 1
 | |
|             j = 0
 | |
|             while ( j  < len(zx) ):
 | |
|                 if (j == 0):
 | |
|                     # Start Position.
 | |
|                     path += 'M %d %d ' % (zx[j] * self.dpi / self.gdpi[gly], zy[j] * self.dpi / self.gdpi[gly])
 | |
|                 elif (j <= len(zx)-3):
 | |
|                     # Cubic Bezier Curve
 | |
|                     path += 'C %d %d %d %d %d %d ' % (zx[j] * self.dpi / self.gdpi[gly], zy[j] * self.dpi / self.gdpi[gly], zx[j+1] * self.dpi / self.gdpi[gly], zy[j+1] * self.dpi / self.gdpi[gly], zx[j+2] * self.dpi / self.gdpi[gly], zy[j+2] * self.dpi / self.gdpi[gly])
 | |
|                     j += 2
 | |
|                 elif (j == len(zx)-2):
 | |
|                     # Cubic Bezier Curve to Start Position
 | |
|                     path += 'C %d %d %d %d %d %d ' % (zx[j] * self.dpi / self.gdpi[gly], zy[j] * self.dpi / self.gdpi[gly], zx[j+1] * self.dpi / self.gdpi[gly], zy[j+1] * self.dpi / self.gdpi[gly], zx[0] * self.dpi / self.gdpi[gly], zy[0] * self.dpi / self.gdpi[gly])
 | |
|                     j += 1
 | |
|                 elif (j == len(zx)-1):
 | |
|                     # Quadratic Bezier Curve to Start Position
 | |
|                     path += 'Q %d %d %d %d ' % (zx[j] * self.dpi / self.gdpi[gly], zy[j] * self.dpi / self.gdpi[gly], zx[0] * self.dpi / self.gdpi[gly], zy[0] * self.dpi / self.gdpi[gly])
 | |
| 
 | |
|                 j += 1
 | |
|         path += 'z'
 | |
|         return path
 | |
| 
 | |
| 
 | |
| 
 | |
| # dictionary of all text strings by index value
 | |
| class GlyphDict(object):
 | |
|     def __init__(self):
 | |
|         self.gdict = {}
 | |
|     def lookup(self, id):
 | |
|         # id='id="gl%d"' % val
 | |
|         if id in self.gdict:
 | |
|             return self.gdict[id]
 | |
|         return None
 | |
|     def addGlyph(self, val, path):
 | |
|         id='id="gl%d"' % val
 | |
|         self.gdict[id] = path
 | |
| 
 | |
| 
 | |
| def generateBook(bookDir, raw, fixedimage):
 | |
|     # sanity check Topaz file extraction
 | |
|     if not os.path.exists(bookDir) :
 | |
|         print "Can not find directory with unencrypted book"
 | |
|         return 1
 | |
| 
 | |
|     dictFile = os.path.join(bookDir,'dict0000.dat')
 | |
|     if not os.path.exists(dictFile) :
 | |
|         print "Can not find dict0000.dat file"
 | |
|         return 1
 | |
| 
 | |
|     pageDir = os.path.join(bookDir,'page')
 | |
|     if not os.path.exists(pageDir) :
 | |
|         print "Can not find page directory in unencrypted book"
 | |
|         return 1
 | |
| 
 | |
|     imgDir = os.path.join(bookDir,'img')
 | |
|     if not os.path.exists(imgDir) :
 | |
|         print "Can not find image directory in unencrypted book"
 | |
|         return 1
 | |
| 
 | |
|     glyphsDir = os.path.join(bookDir,'glyphs')
 | |
|     if not os.path.exists(glyphsDir) :
 | |
|         print "Can not find glyphs directory in unencrypted book"
 | |
|         return 1
 | |
| 
 | |
|     metaFile = os.path.join(bookDir,'metadata0000.dat')
 | |
|     if not os.path.exists(metaFile) :
 | |
|         print "Can not find metadata0000.dat in unencrypted book"
 | |
|         return 1
 | |
| 
 | |
|     svgDir = os.path.join(bookDir,'svg')
 | |
|     if not os.path.exists(svgDir) :
 | |
|         os.makedirs(svgDir)
 | |
| 
 | |
|     if buildXML:
 | |
|         xmlDir = os.path.join(bookDir,'xml')
 | |
|         if not os.path.exists(xmlDir) :
 | |
|             os.makedirs(xmlDir)
 | |
| 
 | |
|     otherFile = os.path.join(bookDir,'other0000.dat')
 | |
|     if not os.path.exists(otherFile) :
 | |
|         print "Can not find other0000.dat in unencrypted book"
 | |
|         return 1
 | |
| 
 | |
|     print "Updating to color images if available"
 | |
|     spath = os.path.join(bookDir,'color_img')
 | |
|     dpath = os.path.join(bookDir,'img')
 | |
|     filenames = os.listdir(spath)
 | |
|     filenames = sorted(filenames)
 | |
|     for filename in filenames:
 | |
|         imgname = filename.replace('color','img')
 | |
|         sfile = os.path.join(spath,filename)
 | |
|         dfile = os.path.join(dpath,imgname)
 | |
|         imgdata = file(sfile,'rb').read()
 | |
|         file(dfile,'wb').write(imgdata)
 | |
| 
 | |
|     print "Creating cover.jpg"
 | |
|     isCover = False
 | |
|     cpath = os.path.join(bookDir,'img')
 | |
|     cpath = os.path.join(cpath,'img0000.jpg')
 | |
|     if os.path.isfile(cpath):
 | |
|         cover = file(cpath, 'rb').read()
 | |
|         cpath = os.path.join(bookDir,'cover.jpg')
 | |
|         file(cpath, 'wb').write(cover)
 | |
|         isCover = True
 | |
| 
 | |
| 
 | |
|     print 'Processing Dictionary'
 | |
|     dict = Dictionary(dictFile)
 | |
| 
 | |
|     print 'Processing Meta Data and creating OPF'
 | |
|     meta_array = getMetaArray(metaFile)
 | |
| 
 | |
|     # replace special chars in title and authors like & < >
 | |
|     title = meta_array.get('Title','No Title Provided')
 | |
|     title = title.replace('&','&')
 | |
|     title = title.replace('<','<')
 | |
|     title = title.replace('>','>')
 | |
|     meta_array['Title'] = title
 | |
|     authors = meta_array.get('Authors','No Authors Provided')
 | |
|     authors = authors.replace('&','&')
 | |
|     authors = authors.replace('<','<')
 | |
|     authors = authors.replace('>','>')
 | |
|     meta_array['Authors'] = authors
 | |
| 
 | |
|     if buildXML:
 | |
|         xname = os.path.join(xmlDir, 'metadata.xml')
 | |
|         mlst = []
 | |
|         for key in meta_array:
 | |
|             mlst.append('<meta name="' + key + '" content="' + meta_array[key] + '" />\n')
 | |
|         metastr = "".join(mlst)
 | |
|         mlst = None
 | |
|         file(xname, 'wb').write(metastr)
 | |
| 
 | |
|     print 'Processing StyleSheet'
 | |
| 
 | |
|     # get some scaling info from metadata to use while processing styles
 | |
|     # and first page info
 | |
| 
 | |
|     fontsize = '135'
 | |
|     if 'fontSize' in meta_array:
 | |
|         fontsize = meta_array['fontSize']
 | |
| 
 | |
|     # also get the size of a normal text page
 | |
|     # get the total number of pages unpacked as a safety check
 | |
|     filenames = os.listdir(pageDir)
 | |
|     numfiles = len(filenames)
 | |
| 
 | |
|     spage = '1'
 | |
|     if 'firstTextPage' in meta_array:
 | |
|         spage = meta_array['firstTextPage']
 | |
|     pnum = int(spage)
 | |
|     if pnum >= numfiles or pnum < 0:
 | |
|         # metadata is wrong so just select a page near the front
 | |
|         # 10% of the book to get a normal text page
 | |
|         pnum = int(0.10 * numfiles)
 | |
|     # print "first normal text page is", spage
 | |
| 
 | |
|     # get page height and width from first text page for use in stylesheet scaling
 | |
|     pname = 'page%04d.dat' % (pnum - 1)
 | |
|     fname = os.path.join(pageDir,pname)
 | |
|     flat_xml = convert2xml.fromData(dict, fname)
 | |
| 
 | |
|     (ph, pw) = getPageDim(flat_xml)
 | |
|     if (ph == '-1') or (ph == '0') : ph = '11000'
 | |
|     if (pw == '-1') or (pw == '0') : pw = '8500'
 | |
|     meta_array['pageHeight'] = ph
 | |
|     meta_array['pageWidth'] = pw
 | |
|     if 'fontSize' not in meta_array.keys():
 | |
|         meta_array['fontSize'] = fontsize
 | |
| 
 | |
|     # process other.dat for css info and for map of page files to svg images
 | |
|     # this map is needed because some pages actually are made up of multiple
 | |
|     # pageXXXX.xml files
 | |
|     xname = os.path.join(bookDir, 'style.css')
 | |
|     flat_xml = convert2xml.fromData(dict, otherFile)
 | |
| 
 | |
|     # extract info.original.pid to get original page information
 | |
|     pageIDMap = {}
 | |
|     pageidnums = stylexml2css.getpageIDMap(flat_xml)
 | |
|     if len(pageidnums) == 0:
 | |
|         filenames = os.listdir(pageDir)
 | |
|         numfiles = len(filenames)
 | |
|         for k in range(numfiles):
 | |
|             pageidnums.append(k)
 | |
|     # create a map from page ids to list of page file nums to process for that page
 | |
|     for i in range(len(pageidnums)):
 | |
|         id = pageidnums[i]
 | |
|         if id in pageIDMap.keys():
 | |
|             pageIDMap[id].append(i)
 | |
|         else:
 | |
|             pageIDMap[id] = [i]
 | |
| 
 | |
|     # now get the css info
 | |
|     cssstr , classlst = stylexml2css.convert2CSS(flat_xml, fontsize, ph, pw)
 | |
|     file(xname, 'wb').write(cssstr)
 | |
|     if buildXML:
 | |
|         xname = os.path.join(xmlDir, 'other0000.xml')
 | |
|         file(xname, 'wb').write(convert2xml.getXML(dict, otherFile))
 | |
| 
 | |
|     print 'Processing Glyphs'
 | |
|     gd = GlyphDict()
 | |
|     filenames = os.listdir(glyphsDir)
 | |
|     filenames = sorted(filenames)
 | |
|     glyfname = os.path.join(svgDir,'glyphs.svg')
 | |
|     glyfile = open(glyfname, 'w')
 | |
|     glyfile.write('<?xml version="1.0" standalone="no"?>\n')
 | |
|     glyfile.write('<!DOCTYPE svg PUBLIC "-//W3C/DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">\n')
 | |
|     glyfile.write('<svg width="512" height="512" viewBox="0 0 511 511" xmlns="http://www.w3.org/2000/svg" version="1.1">\n')
 | |
|     glyfile.write('<title>Glyphs for %s</title>\n' % meta_array['Title'])
 | |
|     glyfile.write('<defs>\n')
 | |
|     counter = 0
 | |
|     for filename in filenames:
 | |
|         # print '     ', filename
 | |
|         print '.',
 | |
|         fname = os.path.join(glyphsDir,filename)
 | |
|         flat_xml = convert2xml.fromData(dict, fname)
 | |
| 
 | |
|         if buildXML:
 | |
|             xname = os.path.join(xmlDir, filename.replace('.dat','.xml'))
 | |
|             file(xname, 'wb').write(convert2xml.getXML(dict, fname))
 | |
| 
 | |
|         gp = GParser(flat_xml)
 | |
|         for i in xrange(0, gp.count):
 | |
|             path = gp.getPath(i)
 | |
|             maxh, maxw = gp.getGlyphDim(i)
 | |
|             fullpath = '<path id="gl%d" d="%s" fill="black" /><!-- width=%d height=%d -->\n' % (counter * 256 + i, path, maxw, maxh)
 | |
|             glyfile.write(fullpath)
 | |
|             gd.addGlyph(counter * 256 + i, fullpath)
 | |
|         counter += 1
 | |
|     glyfile.write('</defs>\n')
 | |
|     glyfile.write('</svg>\n')
 | |
|     glyfile.close()
 | |
|     print " "
 | |
| 
 | |
| 
 | |
|     # start up the html
 | |
|     # also build up tocentries while processing html
 | |
|     htmlFileName = "book.html"
 | |
|     hlst = []
 | |
|     hlst.append('<?xml version="1.0" encoding="utf-8"?>\n')
 | |
|     hlst.append('<!DOCTYPE HTML PUBLIC "-//W3C//DTD XHTML 1.1 Strict//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11-strict.dtd">\n')
 | |
|     hlst.append('<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en">\n')
 | |
|     hlst.append('<head>\n')
 | |
|     hlst.append('<meta http-equiv="content-type" content="text/html; charset=utf-8"/>\n')
 | |
|     hlst.append('<title>' + meta_array['Title'] + ' by ' + meta_array['Authors'] + '</title>\n')
 | |
|     hlst.append('<meta name="Author" content="' + meta_array['Authors'] + '" />\n')
 | |
|     hlst.append('<meta name="Title" content="' + meta_array['Title'] + '" />\n')
 | |
|     if 'ASIN' in meta_array:
 | |
|         hlst.append('<meta name="ASIN" content="' + meta_array['ASIN'] + '" />\n')
 | |
|     if 'GUID' in meta_array:
 | |
|         hlst.append('<meta name="GUID" content="' + meta_array['GUID'] + '" />\n')
 | |
|     hlst.append('<link href="style.css" rel="stylesheet" type="text/css" />\n')
 | |
|     hlst.append('</head>\n<body>\n')
 | |
| 
 | |
|     print 'Processing Pages'
 | |
|     # Books are at 1440 DPI.  This is rendering at twice that size for
 | |
|     # readability when rendering to the screen.
 | |
|     scaledpi = 1440.0
 | |
| 
 | |
|     filenames = os.listdir(pageDir)
 | |
|     filenames = sorted(filenames)
 | |
|     numfiles = len(filenames)
 | |
| 
 | |
|     xmllst = []
 | |
|     elst = []
 | |
| 
 | |
|     for filename in filenames:
 | |
|         # print '     ', filename
 | |
|         print ".",
 | |
|         fname = os.path.join(pageDir,filename)
 | |
|         flat_xml = convert2xml.fromData(dict, fname)
 | |
| 
 | |
|         # keep flat_xml for later svg processing
 | |
|         xmllst.append(flat_xml)
 | |
| 
 | |
|         if buildXML:
 | |
|             xname = os.path.join(xmlDir, filename.replace('.dat','.xml'))
 | |
|             file(xname, 'wb').write(convert2xml.getXML(dict, fname))
 | |
| 
 | |
|         # first get the html
 | |
|         pagehtml, tocinfo = flatxml2html.convert2HTML(flat_xml, classlst, fname, bookDir, gd, fixedimage)
 | |
|         elst.append(tocinfo)
 | |
|         hlst.append(pagehtml)
 | |
| 
 | |
|     # finish up the html string and output it
 | |
|     hlst.append('</body>\n</html>\n')
 | |
|     htmlstr = "".join(hlst)
 | |
|     hlst = None
 | |
|     file(os.path.join(bookDir, htmlFileName), 'wb').write(htmlstr)
 | |
| 
 | |
|     print " "
 | |
|     print 'Extracting Table of Contents from Amazon OCR'
 | |
| 
 | |
|     # first create a table of contents file for the svg images
 | |
|     tlst = []
 | |
|     tlst.append('<?xml version="1.0" encoding="utf-8"?>\n')
 | |
|     tlst.append('<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">\n')
 | |
|     tlst.append('<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" >')
 | |
|     tlst.append('<head>\n')
 | |
|     tlst.append('<title>' + meta_array['Title'] + '</title>\n')
 | |
|     tlst.append('<meta name="Author" content="' + meta_array['Authors'] + '" />\n')
 | |
|     tlst.append('<meta name="Title" content="' + meta_array['Title'] + '" />\n')
 | |
|     if 'ASIN' in meta_array:
 | |
|         tlst.append('<meta name="ASIN" content="' + meta_array['ASIN'] + '" />\n')
 | |
|     if 'GUID' in meta_array:
 | |
|         tlst.append('<meta name="GUID" content="' + meta_array['GUID'] + '" />\n')
 | |
|     tlst.append('</head>\n')
 | |
|     tlst.append('<body>\n')
 | |
| 
 | |
|     tlst.append('<h2>Table of Contents</h2>\n')
 | |
|     start = pageidnums[0]
 | |
|     if (raw):
 | |
|         startname = 'page%04d.svg' % start
 | |
|     else:
 | |
|         startname = 'page%04d.xhtml' % start
 | |
| 
 | |
|     tlst.append('<h3><a href="' + startname + '">Start of Book</a></h3>\n')
 | |
|     # build up a table of contents for the svg xhtml output
 | |
|     tocentries = "".join(elst)
 | |
|     elst = None
 | |
|     toclst = tocentries.split('\n')
 | |
|     toclst.pop()
 | |
|     for entry in toclst:
 | |
|         print entry
 | |
|         title, pagenum = entry.split('|')
 | |
|         id = pageidnums[int(pagenum)]
 | |
|         if (raw):
 | |
|             fname = 'page%04d.svg' % id
 | |
|         else:
 | |
|             fname = 'page%04d.xhtml' % id
 | |
|         tlst.append('<h3><a href="'+ fname + '">' + title + '</a></h3>\n')
 | |
|     tlst.append('</body>\n')
 | |
|     tlst.append('</html>\n')
 | |
|     tochtml = "".join(tlst)
 | |
|     file(os.path.join(svgDir, 'toc.xhtml'), 'wb').write(tochtml)
 | |
| 
 | |
| 
 | |
|     # now create index_svg.xhtml that points to all required files
 | |
|     slst = []
 | |
|     slst.append('<?xml version="1.0" encoding="utf-8"?>\n')
 | |
|     slst.append('<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">\n')
 | |
|     slst.append('<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" >')
 | |
|     slst.append('<head>\n')
 | |
|     slst.append('<title>' + meta_array['Title'] + '</title>\n')
 | |
|     slst.append('<meta name="Author" content="' + meta_array['Authors'] + '" />\n')
 | |
|     slst.append('<meta name="Title" content="' + meta_array['Title'] + '" />\n')
 | |
|     if 'ASIN' in meta_array:
 | |
|         slst.append('<meta name="ASIN" content="' + meta_array['ASIN'] + '" />\n')
 | |
|     if 'GUID' in meta_array:
 | |
|         slst.append('<meta name="GUID" content="' + meta_array['GUID'] + '" />\n')
 | |
|     slst.append('</head>\n')
 | |
|     slst.append('<body>\n')
 | |
| 
 | |
|     print "Building svg images of each book page"
 | |
|     slst.append('<h2>List of Pages</h2>\n')
 | |
|     slst.append('<div>\n')
 | |
|     idlst = sorted(pageIDMap.keys())
 | |
|     numids = len(idlst)
 | |
|     cnt = len(idlst)
 | |
|     previd = None
 | |
|     for j in range(cnt):
 | |
|         pageid = idlst[j]
 | |
|         if j < cnt - 1:
 | |
|             nextid = idlst[j+1]
 | |
|         else:
 | |
|             nextid = None
 | |
|         print '.',
 | |
|         pagelst = pageIDMap[pageid]
 | |
|         flst = []
 | |
|         for page in pagelst:
 | |
|             flst.append(xmllst[page])
 | |
|         flat_svg = "".join(flst)
 | |
|         flst=None
 | |
|         svgxml = flatxml2svg.convert2SVG(gd, flat_svg, pageid, previd, nextid, svgDir, raw, meta_array, scaledpi)
 | |
|         if (raw) :
 | |
|             pfile = open(os.path.join(svgDir,'page%04d.svg' % pageid),'w')
 | |
|             slst.append('<a href="svg/page%04d.svg">Page %d</a>\n' % (pageid, pageid))
 | |
|         else :
 | |
|             pfile = open(os.path.join(svgDir,'page%04d.xhtml' % pageid), 'w')
 | |
|             slst.append('<a href="svg/page%04d.xhtml">Page %d</a>\n' % (pageid, pageid))
 | |
|         previd = pageid
 | |
|         pfile.write(svgxml)
 | |
|         pfile.close()
 | |
|         counter += 1
 | |
|     slst.append('</div>\n')
 | |
|     slst.append('<h2><a href="svg/toc.xhtml">Table of Contents</a></h2>\n')
 | |
|     slst.append('</body>\n</html>\n')
 | |
|     svgindex = "".join(slst)
 | |
|     slst = None
 | |
|     file(os.path.join(bookDir, 'index_svg.xhtml'), 'wb').write(svgindex)
 | |
| 
 | |
|     print " "
 | |
| 
 | |
|     # build the opf file
 | |
|     opfname = os.path.join(bookDir, 'book.opf')
 | |
|     olst = []
 | |
|     olst.append('<?xml version="1.0" encoding="utf-8"?>\n')
 | |
|     olst.append('<package xmlns="http://www.idpf.org/2007/opf" unique-identifier="guid_id">\n')
 | |
|     # adding metadata
 | |
|     olst.append('   <metadata xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:opf="http://www.idpf.org/2007/opf">\n')
 | |
|     if 'GUID' in meta_array:
 | |
|         olst.append('      <dc:identifier opf:scheme="GUID" id="guid_id">' + meta_array['GUID'] + '</dc:identifier>\n')
 | |
|     if 'ASIN' in meta_array:
 | |
|         olst.append('      <dc:identifier opf:scheme="ASIN">' + meta_array['ASIN'] + '</dc:identifier>\n')
 | |
|     if 'oASIN' in meta_array:
 | |
|         olst.append('      <dc:identifier opf:scheme="oASIN">' + meta_array['oASIN'] + '</dc:identifier>\n')
 | |
|     olst.append('      <dc:title>' + meta_array['Title'] + '</dc:title>\n')
 | |
|     olst.append('      <dc:creator opf:role="aut">' + meta_array['Authors'] + '</dc:creator>\n')
 | |
|     olst.append('      <dc:language>en</dc:language>\n')
 | |
|     olst.append('      <dc:date>' + meta_array['UpdateTime'] + '</dc:date>\n')
 | |
|     if isCover:
 | |
|         olst.append('      <meta name="cover" content="bookcover"/>\n')
 | |
|     olst.append('   </metadata>\n')
 | |
|     olst.append('<manifest>\n')
 | |
|     olst.append('   <item id="book" href="book.html" media-type="application/xhtml+xml"/>\n')
 | |
|     olst.append('   <item id="stylesheet" href="style.css" media-type="text/css"/>\n')
 | |
|     # adding image files to manifest
 | |
|     filenames = os.listdir(imgDir)
 | |
|     filenames = sorted(filenames)
 | |
|     for filename in filenames:
 | |
|         imgname, imgext = os.path.splitext(filename)
 | |
|         if imgext == '.jpg':
 | |
|             imgext = 'jpeg'
 | |
|         if imgext == '.svg':
 | |
|             imgext = 'svg+xml'
 | |
|         olst.append('   <item id="' + imgname + '" href="img/' + filename + '" media-type="image/' + imgext + '"/>\n')
 | |
|     if isCover:
 | |
|         olst.append('   <item id="bookcover" href="cover.jpg" media-type="image/jpeg" />\n')
 | |
|     olst.append('</manifest>\n')
 | |
|     # adding spine
 | |
|     olst.append('<spine>\n   <itemref idref="book" />\n</spine>\n')
 | |
|     if isCover:
 | |
|         olst.append('   <guide>\n')
 | |
|         olst.append('      <reference href="cover.jpg" type="cover" title="Cover"/>\n')
 | |
|         olst.append('   </guide>\n')
 | |
|     olst.append('</package>\n')
 | |
|     opfstr = "".join(olst)
 | |
|     olst = None
 | |
|     file(opfname, 'wb').write(opfstr)
 | |
| 
 | |
|     print 'Processing Complete'
 | |
| 
 | |
|     return 0
 | |
| 
 | |
| def usage():
 | |
|     print "genbook.py generates a book from the extract Topaz Files"
 | |
|     print "Usage:"
 | |
|     print "    genbook.py [-r] [-h [--fixed-image] <bookDir>  "
 | |
|     print "  "
 | |
|     print "Options:"
 | |
|     print "  -h            :  help - print this usage message"
 | |
|     print "  -r            :  generate raw svg files (not wrapped in xhtml)"
 | |
|     print "  --fixed-image :  genearate any Fixed Area as an svg image in the html"
 | |
|     print "  "
 | |
| 
 | |
| 
 | |
| def main(argv):
 | |
|     bookDir = ''
 | |
|     if len(argv) == 0:
 | |
|         argv = sys.argv
 | |
| 
 | |
|     try:
 | |
|         opts, args = getopt.getopt(argv[1:], "rh:",["fixed-image"])
 | |
| 
 | |
|     except getopt.GetoptError, err:
 | |
|         print str(err)
 | |
|         usage()
 | |
|         return 1
 | |
| 
 | |
|     if len(opts) == 0 and len(args) == 0 :
 | |
|         usage()
 | |
|         return 1
 | |
| 
 | |
|     raw = 0
 | |
|     fixedimage = True
 | |
|     for o, a in opts:
 | |
|         if o =="-h":
 | |
|             usage()
 | |
|             return 0
 | |
|         if o =="-r":
 | |
|             raw = 1
 | |
|         if o =="--fixed-image":
 | |
|             fixedimage = True
 | |
| 
 | |
|     bookDir = args[0]
 | |
| 
 | |
|     rv = generateBook(bookDir, raw, fixedimage)
 | |
|     return rv
 | |
| 
 | |
| 
 | |
| if __name__ == '__main__':
 | |
|     sys.exit(main(''))
 | 
