convert.py 6.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254
  1. # -*- coding: utf-8 -*-
  2. from .__load__ import *
  3. class Convert(object):
  4. def getKey(self, site_key, file):
  5. return Demeter.md5(str(site_key) + '_' + file)
  6. def get(self, site, site_key, file):
  7. convert = Demeter.model('convert')
  8. convert.site_id = site
  9. convert.key = self.getKey(site_key, file)
  10. data = convert.select(type='fetchone')
  11. return data
  12. def update(self, site, site_key, file, file_id):
  13. info = self.getFile(site_key, file)
  14. convert = Demeter.model('convert')
  15. convert.site_id = site
  16. convert.key = info['key']
  17. data = convert.select(type='fetchone')
  18. if not data:
  19. convert.site_id = site
  20. convert.file_id = file_id
  21. convert.file = info['file']
  22. convert.key = info['key']
  23. convert.name = info['name']
  24. convert.ext = info['ext']
  25. convert.local = info['local']
  26. convert.path = info['path']
  27. convert.pdf = info['pdf']
  28. convert.html = info['html']
  29. convert.url = info['url']
  30. id = convert.insert()
  31. info['status'] = 1
  32. info['id'] = id
  33. else:
  34. info['id'] = data['id']
  35. info['status'] = data['status']
  36. return info
  37. def getFile(self, site_key, file):
  38. info = {}
  39. (filepath,temp) = os.path.split(file)
  40. (filename,extension) = os.path.splitext(temp)
  41. info['file'] = file
  42. info['key'] = self.getKey(site_key, file)
  43. info['ext'] = extension
  44. info['name'] = filename
  45. info = self.getLocalFile(site_key, file, info)
  46. return info
  47. def getLocalFile(self, site_key, file, info):
  48. day = str(date.today())
  49. day = day.split('-')
  50. #filename = Demeter.md5(str(uuid.uuid5(uuid.uuid1(), info['key'])))
  51. filename = info['key']
  52. filepath = str(site_key) + '/' + day[0] + '/' + day[1] + '/' + day[2]
  53. filepath = File.mkdirs(os.path.join(Demeter.path, 'runtime','upload', filepath)) + '/' + filename
  54. local = filepath + info['ext']
  55. info['local'] = local
  56. info['pdf'] = filepath + '/' + filename + '.pdf'
  57. # 这里要增加权限控制 html不能直接访问
  58. info['html'] = filepath + '/' + filename + '.html'
  59. info['url'] = info['html'].replace(Demeter.path + 'runtime', '')
  60. info['path'] = filepath + '/'
  61. return info
  62. if File.exists(local):
  63. return info
  64. else:
  65. self.download(file, local);
  66. return info
  67. def download(self, file, local):
  68. if 'http' in file:
  69. import requests
  70. r = requests.get(file, stream=True)
  71. with open(local, 'wb') as up:
  72. for chunk in r.iter_content(chunk_size=1024):
  73. if chunk:
  74. up.write(chunk)
  75. else:
  76. import shutil
  77. shutil.copyfile(file, local)
  78. if File.exists(local):
  79. return True
  80. return False
  81. def command(self, info):
  82. File.mkdir(info['path'])
  83. convert = 'cd ' + info['path'] + ' && '
  84. if info['ext'] != '.pdf':
  85. convert = convert + 'libreoffice --invisible --convert-to pdf ' + info['local']
  86. convert = convert + ' && '
  87. else:
  88. info['pdf'] = info['local']
  89. convert = convert + 'pdf2htmlEX --zoom 1.3 --no-drm 1 --split-pages 1 '
  90. convert = convert + '--embed-css 1 --embed-javascript 0 --embed-image 0 --embed-font 1 --process-outline 0 '
  91. convert = convert + '--embed-external-font 0 --dest-dir '+info['path']+' --page-filename %d.page ' + info['pdf']
  92. return convert
  93. def total(self, path):
  94. page = 0
  95. for parentdir,dirname,filenames in os.walk(path):
  96. for filename in filenames:
  97. if os.path.splitext(filename)[1]=='.page':
  98. page = page + 1
  99. return page
  100. def handle(self, id):
  101. model = Demeter.model('convert')
  102. model.id = id
  103. info = model.select(type='fetchone')
  104. if not info:
  105. return
  106. siteModel = Demeter.model('site')
  107. siteModel.id = info['site_id']
  108. site = siteModel.select(type='fetchone')
  109. status = True
  110. if info['status'] == 1 or info['status'] == 4:
  111. status = False
  112. if info and status == False:
  113. model.id = id
  114. update = {}
  115. update['status'] = 2
  116. model.update(update)
  117. if not File.exists(info['local']):
  118. self.download(info['file'], info['local'])
  119. if not File.exists(info['html']):
  120. #if info:
  121. handle = self.command(info)
  122. Shell.popen(handle)
  123. if File.exists(info['html']):
  124. # 截屏
  125. self.cut(info)
  126. #self.string_switch(info['html'], "taste", "tasting")
  127. # 获取有多少页
  128. page = self.total(info['path'])
  129. model.id = id
  130. update = {}
  131. update['page'] = page
  132. update['status'] = 3
  133. model.update(update)
  134. # 通知接口 通知应用成功转换
  135. self.api(info, site)
  136. return
  137. model.id = id
  138. update = {}
  139. update['status'] = 4
  140. model.update(update)
  141. def api(self, info, site):
  142. if 'file_id' in info and info['file_id']:
  143. api = site['api']
  144. key = site['key']
  145. token = site['token']
  146. file = info['file']
  147. file_id = info['file_id']
  148. url = info['url']
  149. page = info['page']
  150. ext = info['ext']
  151. nonce = Demeter.time()
  152. sign = Demeter.md5(key + '&' + token + '&' + nonce + '&' + file_id + '&' + url);
  153. param = {}
  154. param['signature'] = sign
  155. param['key'] = key
  156. param['nonce'] = nonce
  157. param['file_id'] = file_id
  158. param['url'] = url
  159. param['page'] = page
  160. param['ext'] = ext
  161. Demeter.curl(api, param, 'post')
  162. def cut(self, info):
  163. pdf = Image(filename=info['pdf'], resolution=50)
  164. jpg = pdf.convert('jpg')
  165. req_image = []
  166. i = 0
  167. for img in jpg.sequence:
  168. if i == 0:
  169. img_page = Image(image=img)
  170. req_image.append(img_page.make_blob('jpg'))
  171. i = i+1
  172. for img in req_image:
  173. ff = open(info['html'] + '.jpg','wb')
  174. ff.write(img)
  175. ff.close()
  176. def string_switch(self, x,y,z,s=1):
  177. with open(x, "r", encoding="utf-8") as f:
  178. #readlines以列表的形式将文件读出
  179. lines = f.readlines()
  180. with open(x, "w", encoding="utf-8") as f_w:
  181. #定义一个数字,用来记录在读取文件时在列表中的位置
  182. n = 0
  183. #默认选项,只替换第一次匹配到的行中的字符串
  184. if s == 1:
  185. for line in lines:
  186. if y in line:
  187. line = line.replace(y,z)
  188. f_w.write(line)
  189. n += 1
  190. break
  191. f_w.write(line)
  192. n += 1
  193. #将剩余的文本内容继续输出
  194. for i in range(n,len(lines)):
  195. f_w.write(lines[i])
  196. #全局匹配替换
  197. elif s == 'g':
  198. for line in lines:
  199. if y in line:
  200. line = line.replace(y,z)
  201. f_w.write(line)