123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219 |
- # -*- coding: utf-8 -*-
- from .__load__ import *
- class Convert(object):
- def getKey(self, site_key, file):
- return Demeter.md5(str(site_key) + '_' + file)
- def get(self, site, site_key, file):
- convert = Demeter.model('convert')
- convert.site_id = site
- convert.key = self.getKey(site_key, file)
- data = convert.select(type='fetchone')
- return data
- def update(self, site, site_key, file):
- info = self.getFile(site_key, file)
- convert = Demeter.model('convert')
- convert.site_id = site
- convert.key = info['key']
- data = convert.select(type='fetchone')
- if not data:
- convert.site_id = site
- convert.file = info['file']
- convert.key = info['key']
- convert.name = info['name']
- convert.ext = info['ext']
- convert.local = info['local']
- convert.path = info['path']
- convert.pdf = info['pdf']
- convert.html = info['html']
- convert.url = info['url']
- id = convert.insert()
- info['status'] = 1
- info['id'] = id
- else:
- info['id'] = data['id']
- info['status'] = data['status']
- return info
- def getFile(self, site_key, file):
- info = {}
- (filepath,temp) = os.path.split(file)
- (filename,extension) = os.path.splitext(temp)
- info['file'] = file
- info['key'] = self.getKey(site_key, file)
- info['ext'] = extension
- info['name'] = filename
- info = self.getLocalFile(site_key, file, info)
- return info
- def getLocalFile(self, site_key, file, info):
- day = str(date.today())
- day = day.split('-')
- #filename = Demeter.md5(str(uuid.uuid5(uuid.uuid1(), info['key'])))
- filename = info['key']
- filepath = str(site_key) + '/' + day[0] + '/' + day[1] + '/' + day[2]
- filepath = File.mkdirs(os.path.join(Demeter.path, 'runtime','upload', filepath)) + '/' + filename
- local = filepath + info['ext']
- info['local'] = local
- info['pdf'] = filepath + '/' + filename + '.pdf'
- # 这里要增加权限控制 html不能直接访问
- info['html'] = filepath + '/' + filename + '.html'
- info['url'] = info['html'].replace(Demeter.path + 'runtime', '')
- info['path'] = filepath + '/'
- return info
- if File.exists(local):
- return info
- else:
- self.download(file, local);
- return info
- def download(self, file, local):
- if 'http' in file:
- import requests
- r = requests.get(file, stream=True)
- with open(local, 'wb') as up:
- for chunk in r.iter_content(chunk_size=1024):
- if chunk:
- up.write(chunk)
- else:
- import shutil
- shutil.copyfile(file, local)
- if File.exists(local):
- return True
- return False
- def command(self, info):
- File.mkdir(info['path'])
- convert = 'cd ' + info['path'] + ' && '
- if info['ext'] != '.pdf':
- convert = convert + 'libreoffice --invisible --convert-to pdf ' + info['local']
- convert = convert + ' && '
- else:
- info['pdf'] = info['local']
- convert = convert + 'pdf2htmlEX --zoom 1.3 --no-drm 1 --split-pages 1 '
- convert = convert + '--embed-css 1 --embed-javascript 0 --embed-image 0 --embed-font 1 --process-outline 0 '
- convert = convert + '--embed-external-font 0 --dest-dir '+info['path']+' --page-filename %d.page ' + info['pdf']
- return convert
- def total(self, path):
- page = 0
- for parentdir,dirname,filenames in os.walk(path):
- for filename in filenames:
- if os.path.splitext(filename)[1]=='.page':
- page = page + 1
- return page
- def handle(self, id):
- model = Demeter.model('convert')
- model.id = id
- info = model.select(type='fetchone')
- if not info:
- return
- status = True
- if info['status'] == 1 or info['status'] == 4:
- status = False
- if info and status == False:
- model.id = id
- update = {}
- update['status'] = 2
- model.update(update)
- if not File.exists(info['local']):
- self.download(info['file'], info['local'])
- #if not File.exists(info['html']):
- if info:
- handle = self.command(info)
- Shell.popen(handle)
- if File.exists(info['html']):
- # 截屏
- self.cut(info)
- #self.string_switch(info['html'], "taste", "tasting")
- # 获取有多少页
- page = self.total(info['path'])
- model.id = id
- update = {}
- update['page'] = page
- update['status'] = 3
- model.update(update)
- return
- model.id = id
- update = {}
- update['status'] = 4
- model.update(update)
- def cut(self, info):
- pdf = Image(filename=info['pdf'], resolution=50)
- jpg = pdf.convert('jpg')
- req_image = []
- i = 0
- for img in jpg.sequence:
- if i == 0:
- img_page = Image(image=img)
- req_image.append(img_page.make_blob('jpg'))
- i = i+1
- for img in req_image:
- ff = open(info['html'] + '.jpg','wb')
- ff.write(img)
- ff.close()
- def string_switch(self, x,y,z,s=1):
- with open(x, "r", encoding="utf-8") as f:
- #readlines以列表的形式将文件读出
- lines = f.readlines()
-
- with open(x, "w", encoding="utf-8") as f_w:
- #定义一个数字,用来记录在读取文件时在列表中的位置
- n = 0
- #默认选项,只替换第一次匹配到的行中的字符串
- if s == 1:
- for line in lines:
- if y in line:
- line = line.replace(y,z)
- f_w.write(line)
- n += 1
- break
- f_w.write(line)
- n += 1
- #将剩余的文本内容继续输出
- for i in range(n,len(lines)):
- f_w.write(lines[i])
- #全局匹配替换
- elif s == 'g':
- for line in lines:
- if y in line:
- line = line.replace(y,z)
- f_w.write(line)
|