convert.py 7.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305
  1. # -*- coding: utf-8 -*-
  2. from .__load__ import *
  3. class Convert(object):
  4. def getKey(self, site_key, file):
  5. return Demeter.sha1(str(site_key) + '_' + file)
  6. def get(self, site, site_key, file):
  7. convert = Demeter.model('convert')
  8. convert.site_id = site
  9. #convert.key = self.getKey(site_key, file)
  10. convert.key = file
  11. data = convert.select(type='fetchone')
  12. return data
  13. def update(self, site, site_key, file, file_id, file_type, uid):
  14. info = self.getFile(site_key, file)
  15. convert = Demeter.model('convert')
  16. convert.site_id = site
  17. convert.key = info['key']
  18. data = convert.select(type='fetchone')
  19. if not data:
  20. convert.site_id = site
  21. convert.uid = uid
  22. convert.file_id = file_id
  23. convert.file_type = file_type
  24. convert.file_size = 0
  25. convert.file = info['file']
  26. convert.key = info['key']
  27. convert.name = info['name']
  28. convert.ext = info['ext']
  29. convert.local = info['local']
  30. convert.path = info['path']
  31. convert.pdf = info['pdf']
  32. convert.html = info['html']
  33. convert.url = info['url']
  34. id = convert.insert()
  35. info['status'] = 1
  36. info['id'] = id
  37. else:
  38. info['id'] = data['id']
  39. info['status'] = data['status']
  40. if uid:
  41. self.auth(site, uid, info['id'], 1)
  42. return info
  43. def getAuth(self, site, uid, convert_id):
  44. user = Demeter.model('user')
  45. user.uid = uid
  46. user.site_id = site
  47. user.convert_id = convert_id
  48. data = user.select(type='fetchone')
  49. return data
  50. def auth(self, site, uid, convert_id, status):
  51. user = Demeter.model('user')
  52. user.uid = uid
  53. user.site_id = site
  54. user.convert_id = convert_id
  55. data = user.select(type='fetchone')
  56. if not data:
  57. user.site_id = site
  58. user.uid = uid
  59. user.convert_id = convert_id
  60. user.status = status
  61. user.insert()
  62. elif data['status'] != status:
  63. # 适用于文档转让
  64. user.id = data['id']
  65. update = {}
  66. update['status'] = status
  67. user.update(update)
  68. return True
  69. def getFile(self, site_key, file):
  70. info = {}
  71. (filepath,temp) = os.path.split(file)
  72. (filename,extension) = os.path.splitext(temp)
  73. info['file'] = file
  74. info['key'] = self.getKey(site_key, file)
  75. info['ext'] = extension
  76. info['name'] = filename
  77. info = self.getLocalFile(site_key, file, info)
  78. return info
  79. def getLocalFile(self, site_key, file, info):
  80. day = str(date.today())
  81. day = day.split('-')
  82. #filename = Demeter.md5(str(uuid.uuid5(uuid.uuid1(), info['key'])))
  83. filename = info['key']
  84. filepath = str(site_key) + '/' + day[0] + '/' + day[1] + '/' + day[2]
  85. filepath = File.mkdirs(os.path.join(Demeter.path, 'runtime','files', filepath)) + '/' + filename
  86. local = filepath + info['ext']
  87. info['local'] = local
  88. info['pdf'] = filepath + '/' + filename + '.pdf'
  89. # 这里要增加权限控制 html不能直接访问
  90. info['html'] = filepath + '/' + filename + '.html'
  91. info['url'] = info['html'].replace(Demeter.path + 'runtime', '')
  92. info['path'] = filepath + '/'
  93. return info
  94. if File.exists(local):
  95. return info
  96. else:
  97. self.download(file, local);
  98. return info
  99. def download(self, file, local):
  100. if 'http' in file:
  101. import requests
  102. r = requests.get(file, stream=True)
  103. with open(local, 'wb') as up:
  104. for chunk in r.iter_content(chunk_size=1024):
  105. if chunk:
  106. up.write(chunk)
  107. else:
  108. import shutil
  109. shutil.copyfile(file, local)
  110. if File.exists(local):
  111. return True
  112. return False
  113. def command(self, info):
  114. File.mkdir(info['path'])
  115. convert = 'cd ' + info['path'] + ' && '
  116. if info['ext'] != '.pdf':
  117. convert = convert + 'libreoffice --invisible --convert-to pdf ' + info['local']
  118. convert = convert + ' && '
  119. else:
  120. info['pdf'] = info['local']
  121. convert = convert + 'pdf2htmlEX --zoom 1.3 --no-drm 1 --split-pages 1 '
  122. convert = convert + '--embed-css 1 --embed-javascript 0 --embed-image 0 --embed-font 1 --process-outline 0 '
  123. convert = convert + '--embed-external-font 0 --dest-dir '+info['path']+' --page-filename %d.page ' + info['pdf']
  124. return convert
  125. def total(self, path):
  126. page = 0
  127. for parentdir,dirname,filenames in os.walk(path):
  128. for filename in filenames:
  129. if os.path.splitext(filename)[1]=='.page':
  130. page = page + 1
  131. return page
  132. def handle(self, id):
  133. model = Demeter.model('convert')
  134. model.id = id
  135. info = model.select(type='fetchone')
  136. if not info:
  137. return
  138. siteModel = Demeter.model('site')
  139. siteModel.id = info['site_id']
  140. site = siteModel.select(type='fetchone')
  141. status = True
  142. if info['status'] == 1 or info['status'] == 4:
  143. status = False
  144. if info and status == False:
  145. model.id = id
  146. update = {}
  147. update['status'] = 2
  148. model.update(update)
  149. if not File.exists(info['local']):
  150. self.download(info['file'], info['local'])
  151. if not File.exists(info['html']):
  152. #if info:
  153. handle = self.command(info)
  154. Shell.popen(handle)
  155. if File.exists(info['html']):
  156. # 截屏
  157. self.cut(info)
  158. #self.string_switch(info['html'], "taste", "tasting")
  159. # 获取有多少页
  160. page = self.total(info['path'])
  161. model.id = id
  162. size = os.path.getsize(info['local'])
  163. update = {}
  164. update['file_size'] = size
  165. update['page'] = page
  166. update['status'] = 3
  167. model.update(update)
  168. # 通知接口 通知应用成功转换
  169. info['page'] = page
  170. info['file_size'] = size
  171. self.api(info, site)
  172. return
  173. model.id = id
  174. update = {}
  175. update['status'] = 4
  176. model.update(update)
  177. def api(self, info, site):
  178. if 'file_id' in info and info['file_id']:
  179. api = site['api']
  180. key = site['key']
  181. token = site['token']
  182. file = info['key']
  183. file_id = info['file_id']
  184. uid = info['uid']
  185. file_size = info['file_size']
  186. url = 'main/view'
  187. page = info['page']
  188. ext = info['ext']
  189. nonce = Demeter.time()
  190. sign = Demeter.sha1(key + '&' + token + '&' + str(nonce) + '&' + file + '&' + str(uid));
  191. param = {}
  192. param['signature'] = sign
  193. param['key'] = key
  194. param['nonce'] = nonce
  195. param['file'] = file
  196. param['file_id'] = file_id
  197. param['uid'] = uid
  198. param['url'] = url
  199. param['img'] = img
  200. param['page'] = page
  201. param['ext'] = ext
  202. param['file_size'] = file_size
  203. Demeter.curl(api, param, 'post')
  204. def cut(self, info):
  205. pdf = Image(filename=info['pdf'], resolution=50)
  206. jpg = pdf.convert('jpg')
  207. req_image = []
  208. i = 0
  209. for img in jpg.sequence:
  210. if i == 0:
  211. img_page = Image(image=img)
  212. req_image.append(img_page.make_blob('jpg'))
  213. i = i+1
  214. dest = info['html'].replace('/files/', '/upload/')
  215. for img in req_image:
  216. ff = open(dest + '.jpg','wb')
  217. ff.write(img)
  218. ff.close()
  219. def string_switch(self, x,y,z,s=1):
  220. with open(x, "r", encoding="utf-8") as f:
  221. #readlines以列表的形式将文件读出
  222. lines = f.readlines()
  223. with open(x, "w", encoding="utf-8") as f_w:
  224. #定义一个数字,用来记录在读取文件时在列表中的位置
  225. n = 0
  226. #默认选项,只替换第一次匹配到的行中的字符串
  227. if s == 1:
  228. for line in lines:
  229. if y in line:
  230. line = line.replace(y,z)
  231. f_w.write(line)
  232. n += 1
  233. break
  234. f_w.write(line)
  235. n += 1
  236. #将剩余的文本内容继续输出
  237. for i in range(n,len(lines)):
  238. f_w.write(lines[i])
  239. #全局匹配替换
  240. elif s == 'g':
  241. for line in lines:
  242. if y in line:
  243. line = line.replace(y,z)
  244. f_w.write(line)