convert.py 5.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201
  1. # -*- coding: utf-8 -*-
  2. from demeter.core import *
  3. from datetime import *
  4. import uuid
  5. import os
  6. import os.path
  7. import requests
  8. import shutil
  9. class Convert(object):
  10. def getKey(self, site_key, file):
  11. return Demeter.md5(str(site_key) + '_' + file)
  12. def get(self, site, site_key, file):
  13. convert = Demeter.model('convert')
  14. convert.site_id = site
  15. convert.key = self.getKey(site_key, file)
  16. data = convert.select(type='fetchone')
  17. return data
  18. def update(self, site, site_key, file):
  19. info = self.getFile(site_key, file)
  20. convert = Demeter.model('convert')
  21. convert.site_id = site
  22. convert.key = info['key']
  23. data = convert.select(type='fetchone')
  24. if not data:
  25. convert.site_id = site
  26. convert.file = info['file']
  27. convert.key = info['key']
  28. convert.name = info['name']
  29. convert.ext = info['ext']
  30. convert.local = info['local']
  31. convert.path = info['path']
  32. convert.pdf = info['pdf']
  33. convert.html = info['html']
  34. convert.url = info['url']
  35. id = convert.insert()
  36. info['status'] = 1
  37. info['id'] = id
  38. else:
  39. info['id'] = data['id']
  40. info['status'] = data['status']
  41. return info
  42. def getFile(self, site_key, file):
  43. info = {}
  44. (filepath,temp) = os.path.split(file)
  45. (filename,extension) = os.path.splitext(temp)
  46. info['file'] = file
  47. info['key'] = self.getKey(site_key, file)
  48. info['ext'] = extension
  49. info['name'] = filename
  50. info = self.getLocalFile(site_key, file, info)
  51. return info
  52. def getLocalFile(self, site_key, file, info):
  53. day = str(date.today())
  54. day = day.split('-')
  55. #filename = Demeter.md5(str(uuid.uuid5(uuid.uuid1(), info['key'])))
  56. filename = info['key']
  57. filepath = str(site_key) + '/' + day[0] + '/' + day[1] + '/' + day[2]
  58. filepath = File.mkdirs(os.path.join(Demeter.path, 'runtime','upload', filepath)) + '/' + filename
  59. local = filepath + info['ext']
  60. info['local'] = local
  61. info['pdf'] = filepath + '/' + filename + '.pdf'
  62. # 这里要增加权限控制 html不能直接访问
  63. info['html'] = filepath + '/' + filename + '.html'
  64. info['url'] = info['html'].replace(Demeter.path + 'runtime', '')
  65. info['path'] = filepath + '/'
  66. return info
  67. if File.exists(local):
  68. return info
  69. else:
  70. self.download(file, local);
  71. return info
  72. def download(self, file, local):
  73. if 'http' in file:
  74. r = requests.get(file, stream=True)
  75. with open(local, 'wb') as up:
  76. for chunk in r.iter_content(chunk_size=1024):
  77. if chunk:
  78. up.write(chunk)
  79. else:
  80. shutil.copyfile(file, local)
  81. if File.exists(local):
  82. return True
  83. return False
  84. def command(self, info):
  85. convert = 'cd ' + info['path'] + ' && '
  86. convert = convert + 'libreoffice --invisible --convert-to pdf ' + info['local']
  87. convert = convert + ' && '
  88. convert = convert + 'pdf2htmlEX --zoom 1.3 --no-drm 1 --split-pages 1 '
  89. convert = convert + '--embed-css 1 --embed-javascript 0 --embed-image 0 --embed-font 1 '
  90. convert = convert + '--embed-external-font 0 --dest-dir '+info['path']+' --page-filename %d.page ' + info['pdf']
  91. return convert
  92. def total(self, path):
  93. page = 0
  94. for parentdir,dirname,filenames in os.walk(path):
  95. for filename in filenames:
  96. if os.path.splitext(filename)[1]=='.page':
  97. page = page + 1
  98. return page
  99. def handle(self, id):
  100. model = Demeter.model('convert')
  101. model.id = id
  102. info = model.select(type='fetchone')
  103. if not info:
  104. return
  105. status = True
  106. if info['status'] == 1 or info['status'] == 4:
  107. status = False
  108. if info and status == False:
  109. model.id = id
  110. update = {}
  111. update['status'] = 2
  112. #model.update(update)
  113. if not File.exists(info['local']):
  114. self.download(info['file'], info['local'])
  115. if not File.exists(info['html']):
  116. handle = self.command(info)
  117. print handle
  118. return
  119. Shell.popen(handle)
  120. if File.exists(info['html']):
  121. self.string_switch(info['html'], "taste", "tasting")
  122. # 获取有多少页
  123. page = self.total(info['path'])
  124. model.id = id
  125. update = {}
  126. update['page'] = page
  127. update['status'] = 3
  128. model.update(update)
  129. return
  130. model.id = id
  131. update = {}
  132. update['status'] = 4
  133. model.update(update)
  134. def string_switch(self, x,y,z,s=1):
  135. with open(x, "r", encoding="utf-8") as f:
  136. #readlines以列表的形式将文件读出
  137. lines = f.readlines()
  138. with open(x, "w", encoding="utf-8") as f_w:
  139. #定义一个数字,用来记录在读取文件时在列表中的位置
  140. n = 0
  141. #默认选项,只替换第一次匹配到的行中的字符串
  142. if s == 1:
  143. for line in lines:
  144. if y in line:
  145. line = line.replace(y,z)
  146. f_w.write(line)
  147. n += 1
  148. break
  149. f_w.write(line)
  150. n += 1
  151. #将剩余的文本内容继续输出
  152. for i in range(n,len(lines)):
  153. f_w.write(lines[i])
  154. #全局匹配替换
  155. elif s == 'g':
  156. for line in lines:
  157. if y in line:
  158. line = line.replace(y,z)
  159. f_w.write(line)