parser.py 4.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126
  1. # -*- coding: utf-8 -*-
  2. from .__load__ import *
  3. # 提取器
  4. class Parser(object):
  5. def get(self, host = '', id=0, site_id = 0, uid = 0, source_id = 0, source = '', audio = '', notify='', sync=True, method='json', **kwargs):
  6. extract = Demeter.model('extract')
  7. if int(id) > 0:
  8. extract.id = id
  9. else:
  10. extract.key = self.getKey(site_id, source)
  11. data = extract.select(type='fetchone')
  12. if not data:
  13. if not source:
  14. return 'error'
  15. info = self.getFile(site_id, source, host)
  16. info['uid'] = uid
  17. info['source_id'] = source_id
  18. info['audio'] = audio
  19. info['notify'] = notify
  20. info['status'] = 1
  21. info['id'] = Demeter.service('common').update('extract', False, info)
  22. else:
  23. info = data
  24. if sync:
  25. return self.handle(info, method);
  26. return info['id']
  27. def getFile(self, site_id, file, host):
  28. info = {}
  29. (filepath,temp) = os.path.split(file)
  30. (filename,extension) = os.path.splitext(temp)
  31. info['site_id'] = site_id;
  32. info['source'] = file
  33. info['key'] = self.getKey(site_id, file)
  34. info['ext'] = extension
  35. info['name'] = filename
  36. info['host'] = host;
  37. info['file'] = file
  38. info['path'] = filepath
  39. info = self.getLocal(info)
  40. info['size'] = os.path.getsize(info['file'])
  41. return info
  42. def getKey(self, site_id, file):
  43. return Demeter.md5(str(site_id) + '_' + str(file))
  44. def getLocal(self, info):
  45. if 'http' in info['source']:
  46. # 远程的
  47. day = str(date.today())
  48. day = day.split('-')
  49. filename = info['key']
  50. filepath = str(info['site_id']) + '/' + day[0] + '/' + day[1] + '/' + day[2]
  51. filepath = File.mkdirs(os.path.join(Demeter.path, 'runtime','files', filepath))
  52. else:
  53. # 本地的
  54. filename = info['name']
  55. filepath = info['path']
  56. info['host'] = ''
  57. filepath = File.mkdirs(filepath) + '/' + filename
  58. info['file'] = filepath + info['ext']
  59. info['path'] = filepath + '/'
  60. if File.exists(info['file']):
  61. return info
  62. else:
  63. self.download(info['source'], info['file']);
  64. return info
  65. def download(self, file, local):
  66. if 'http' in file:
  67. import requests
  68. r = requests.get(file, stream=True)
  69. with open(local, 'wb') as up:
  70. for chunk in r.iter_content(chunk_size=1024):
  71. if chunk:
  72. up.write(chunk)
  73. else:
  74. import shutil
  75. shutil.copyfile(file, local)
  76. if File.exists(local):
  77. return True
  78. return False
  79. def handle(self, info, method):
  80. param = {}
  81. param['method'] = 'extract'
  82. param['page'] = 0
  83. param['status'] = 4
  84. param['source_id'] = info['source_id']
  85. param['source'] = info['source']
  86. status = True
  87. if info['status'] == 1 or info['status'] == 4:
  88. status = False
  89. #if info and status == False:
  90. if info:
  91. Demeter.service('common').update('extract', info['id'], {'status':2})
  92. if not File.exists(info['file']):
  93. self.download(info['source'], info['file'])
  94. if True:
  95. File.mkdir(info['path'])
  96. obj = Demeter.service('loader', 'extract').get(info['file'], {'path':info['path'], 'host':info['host'], 'audio':info['audio']})
  97. func = getattr(obj, method)
  98. result = func()
  99. if result and 'total' in result and result['total'] > 0:
  100. param['content'] = result
  101. param['status'] = 3
  102. if method == 'json':
  103. method = 1
  104. else:
  105. method = 2
  106. data = {'extract_id': info['id'], 'method' : method}
  107. content = Demeter.service('common').one('extract_content', **data)
  108. if not content:
  109. result = json.dumps(result, ensure_ascii=False)
  110. data['content'] = result
  111. Demeter.service('common').update('extract_content', False, data)
  112. Demeter.service('common').update('extract', info['id'], {'status':param['status'], 'page':param['page']})
  113. Demeter.service('callback').send(info['site_id'], 'extract', param)
  114. return param