# -*- coding: utf-8 -*- from demeter.core import * from datetime import * import uuid import os import os.path class Convert(object): def getKey(self, site_key, file): return Demeter.md5(str(site_key) + '_' + file) def get(self, site, site_key, file): convert = Demeter.model('convert') convert.site_id = site convert.key = self.getKey(site_key, file) data = convert.select(type='fetchone') return data def update(self, site, site_key, file): info = self.getFile(site_key, file) convert = Demeter.model('convert') convert.site_id = site convert.key = info['key'] data = convert.select(type='fetchone') if not data: convert.site_id = site convert.file = info['file'] convert.key = info['key'] convert.name = info['name'] convert.ext = info['ext'] convert.local = info['local'] convert.path = info['path'] convert.pdf = info['pdf'] convert.html = info['html'] convert.url = info['url'] id = convert.insert() info['status'] = 1 info['id'] = id else: info['id'] = data['id'] info['status'] = data['status'] return info def getFile(self, site_key, file): info = {} (filepath,temp) = os.path.split(file) (filename,extension) = os.path.splitext(temp) info['file'] = file info['key'] = self.getKey(site_key, file) info['ext'] = extension info['name'] = filename info = self.getLocalFile(site_key, file, info) return info def getLocalFile(self, site_key, file, info): day = str(date.today()) day = day.split('-') #filename = Demeter.md5(str(uuid.uuid5(uuid.uuid1(), info['key']))) filename = info['key'] filepath = str(site_key) + '/' + day[0] + '/' + day[1] + '/' + day[2] filepath = File.mkdirs(os.path.join(Demeter.path, 'runtime','upload', filepath)) + '/' + filename local = filepath + info['ext'] info['local'] = local info['pdf'] = filepath + '/' + filename + '.pdf' # 这里要增加权限控制 html不能直接访问 info['html'] = filepath + '/' + filename + '.html' info['url'] = info['html'].replace(Demeter.path + 'runtime', '') info['path'] = filepath + '/' return info if File.exists(local): return info else: self.download(file, local); return info def download(self, file, local): if 'http' in file: import requests r = requests.get(file, stream=True) with open(local, 'wb') as up: for chunk in r.iter_content(chunk_size=1024): if chunk: up.write(chunk) else: import shutil shutil.copyfile(file, local) if File.exists(local): return True return False def command(self, info): convert = 'cd ' + info['path'] + ' && ' convert = convert + 'libreoffice --invisible --convert-to pdf ' + info['local'] convert = convert + ' && ' convert = convert + 'pdf2htmlEX --zoom 1.3 --no-drm 1 --split-pages 1 ' convert = convert + '--embed-css 1 --embed-javascript 0 --embed-image 0 --embed-font 1 --process-outline 0 ' convert = convert + '--embed-external-font 0 --dest-dir '+info['path']+' --page-filename %d.page ' + info['pdf'] return convert def total(self, path): page = 0 for parentdir,dirname,filenames in os.walk(path): for filename in filenames: if os.path.splitext(filename)[1]=='.page': page = page + 1 return page def handle(self, id): model = Demeter.model('convert') model.id = id info = model.select(type='fetchone') if not info: return status = True if info['status'] == 1 or info['status'] == 4: status = False if info and status == False: model.id = id update = {} update['status'] = 2 model.update(update) if not File.exists(info['local']): self.download(info['file'], info['local']) if not File.exists(info['html']): handle = self.command(info) Shell.popen(handle) if File.exists(info['html']): #self.string_switch(info['html'], "taste", "tasting") # 获取有多少页 page = self.total(info['path']) model.id = id update = {} update['page'] = page update['status'] = 3 model.update(update) return model.id = id update = {} update['status'] = 4 model.update(update) def string_switch(self, x,y,z,s=1): with open(x, "r", encoding="utf-8") as f: #readlines以列表的形式将文件读出 lines = f.readlines() with open(x, "w", encoding="utf-8") as f_w: #定义一个数字,用来记录在读取文件时在列表中的位置 n = 0 #默认选项,只替换第一次匹配到的行中的字符串 if s == 1: for line in lines: if y in line: line = line.replace(y,z) f_w.write(line) n += 1 break f_w.write(line) n += 1 #将剩余的文本内容继续输出 for i in range(n,len(lines)): f_w.write(lines[i]) #全局匹配替换 elif s == 'g': for line in lines: if y in line: line = line.replace(y,z) f_w.write(line)