convert.py 5.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219
  1. # -*- coding: utf-8 -*-
  2. from .__load__ import *
  3. class Convert(object):
  4. def getKey(self, site_key, file):
  5. return Demeter.md5(str(site_key) + '_' + file)
  6. def get(self, site, site_key, file):
  7. convert = Demeter.model('convert')
  8. convert.site_id = site
  9. convert.key = self.getKey(site_key, file)
  10. data = convert.select(type='fetchone')
  11. return data
  12. def update(self, site, site_key, file):
  13. info = self.getFile(site_key, file)
  14. convert = Demeter.model('convert')
  15. convert.site_id = site
  16. convert.key = info['key']
  17. data = convert.select(type='fetchone')
  18. if not data:
  19. convert.site_id = site
  20. convert.file = info['file']
  21. convert.key = info['key']
  22. convert.name = info['name']
  23. convert.ext = info['ext']
  24. convert.local = info['local']
  25. convert.path = info['path']
  26. convert.pdf = info['pdf']
  27. convert.html = info['html']
  28. convert.url = info['url']
  29. id = convert.insert()
  30. info['status'] = 1
  31. info['id'] = id
  32. else:
  33. info['id'] = data['id']
  34. info['status'] = data['status']
  35. return info
  36. def getFile(self, site_key, file):
  37. info = {}
  38. (filepath,temp) = os.path.split(file)
  39. (filename,extension) = os.path.splitext(temp)
  40. info['file'] = file
  41. info['key'] = self.getKey(site_key, file)
  42. info['ext'] = extension
  43. info['name'] = filename
  44. info = self.getLocalFile(site_key, file, info)
  45. return info
  46. def getLocalFile(self, site_key, file, info):
  47. day = str(date.today())
  48. day = day.split('-')
  49. #filename = Demeter.md5(str(uuid.uuid5(uuid.uuid1(), info['key'])))
  50. filename = info['key']
  51. filepath = str(site_key) + '/' + day[0] + '/' + day[1] + '/' + day[2]
  52. filepath = File.mkdirs(os.path.join(Demeter.path, 'runtime','upload', filepath)) + '/' + filename
  53. local = filepath + info['ext']
  54. info['local'] = local
  55. info['pdf'] = filepath + '/' + filename + '.pdf'
  56. # 这里要增加权限控制 html不能直接访问
  57. info['html'] = filepath + '/' + filename + '.html'
  58. info['url'] = info['html'].replace(Demeter.path + 'runtime', '')
  59. info['path'] = filepath + '/'
  60. return info
  61. if File.exists(local):
  62. return info
  63. else:
  64. self.download(file, local);
  65. return info
  66. def download(self, file, local):
  67. if 'http' in file:
  68. import requests
  69. r = requests.get(file, stream=True)
  70. with open(local, 'wb') as up:
  71. for chunk in r.iter_content(chunk_size=1024):
  72. if chunk:
  73. up.write(chunk)
  74. else:
  75. import shutil
  76. shutil.copyfile(file, local)
  77. if File.exists(local):
  78. return True
  79. return False
  80. def command(self, info):
  81. File.mkdir(info['path'])
  82. convert = 'cd ' + info['path'] + ' && '
  83. if info['ext'] != '.pdf':
  84. convert = convert + 'libreoffice --invisible --convert-to pdf ' + info['local']
  85. convert = convert + ' && '
  86. else:
  87. info['pdf'] = info['local']
  88. convert = convert + 'pdf2htmlEX --zoom 1.3 --no-drm 1 --split-pages 1 '
  89. convert = convert + '--embed-css 1 --embed-javascript 0 --embed-image 0 --embed-font 1 --process-outline 0 '
  90. convert = convert + '--embed-external-font 0 --dest-dir '+info['path']+' --page-filename %d.page ' + info['pdf']
  91. return convert
  92. def total(self, path):
  93. page = 0
  94. for parentdir,dirname,filenames in os.walk(path):
  95. for filename in filenames:
  96. if os.path.splitext(filename)[1]=='.page':
  97. page = page + 1
  98. return page
  99. def handle(self, id):
  100. model = Demeter.model('convert')
  101. model.id = id
  102. info = model.select(type='fetchone')
  103. if not info:
  104. return
  105. status = True
  106. if info['status'] == 1 or info['status'] == 4:
  107. status = False
  108. if info and status == False:
  109. model.id = id
  110. update = {}
  111. update['status'] = 2
  112. model.update(update)
  113. if not File.exists(info['local']):
  114. self.download(info['file'], info['local'])
  115. #if not File.exists(info['html']):
  116. if info:
  117. handle = self.command(info)
  118. Shell.popen(handle)
  119. if File.exists(info['html']):
  120. # 截屏
  121. self.cut(info)
  122. #self.string_switch(info['html'], "taste", "tasting")
  123. # 获取有多少页
  124. page = self.total(info['path'])
  125. model.id = id
  126. update = {}
  127. update['page'] = page
  128. update['status'] = 3
  129. model.update(update)
  130. return
  131. model.id = id
  132. update = {}
  133. update['status'] = 4
  134. model.update(update)
  135. def cut(self, info):
  136. pdf = Image(filename=info['pdf'], resolution=50)
  137. jpg = pdf.convert('jpg')
  138. req_image = []
  139. i = 0
  140. for img in jpg.sequence:
  141. if i == 0:
  142. img_page = Image(image=img)
  143. req_image.append(img_page.make_blob('jpg'))
  144. i = i+1
  145. for img in req_image:
  146. ff = open(info['html'] + '.jpg','wb')
  147. ff.write(img)
  148. ff.close()
  149. def string_switch(self, x,y,z,s=1):
  150. with open(x, "r", encoding="utf-8") as f:
  151. #readlines以列表的形式将文件读出
  152. lines = f.readlines()
  153. with open(x, "w", encoding="utf-8") as f_w:
  154. #定义一个数字,用来记录在读取文件时在列表中的位置
  155. n = 0
  156. #默认选项,只替换第一次匹配到的行中的字符串
  157. if s == 1:
  158. for line in lines:
  159. if y in line:
  160. line = line.replace(y,z)
  161. f_w.write(line)
  162. n += 1
  163. break
  164. f_w.write(line)
  165. n += 1
  166. #将剩余的文本内容继续输出
  167. for i in range(n,len(lines)):
  168. f_w.write(lines[i])
  169. #全局匹配替换
  170. elif s == 'g':
  171. for line in lines:
  172. if y in line:
  173. line = line.replace(y,z)
  174. f_w.write(line)