# -*- coding: utf-8 -*- from .__load__ import * # 提取器 class Parser(object): def get(self, host = '', id=0, site_id = 0, uid = 0, source_id = 0, source = '', audio = '', notify='', sync=True, method='json', **kwargs): extract = Demeter.model('extract') if int(id) > 0: extract.id = id else: extract.key = self.getKey(site_id, source) data = extract.select(type='fetchone') if not data: if not source: return 'error' info = self.getFile(site_id, source, host) info['uid'] = uid info['source_id'] = source_id info['audio'] = audio info['notify'] = notify info['status'] = 1 info['id'] = Demeter.service('common').update('extract', False, info) else: info = data if sync: return self.handle(info, method); return info['id'] def getFile(self, site_id, file, host): info = {} (filepath,temp) = os.path.split(file) (filename,extension) = os.path.splitext(temp) info['site_id'] = site_id; info['source'] = file info['key'] = self.getKey(site_id, file) info['ext'] = extension info['name'] = filename info['host'] = host; info['file'] = file info['path'] = filepath info = self.getLocal(info) info['size'] = os.path.getsize(info['file']) return info def getKey(self, site_id, file): return Demeter.md5(str(site_id) + '_' + str(file)) def getLocal(self, info): if 'http' in info['source']: # 远程的 day = str(date.today()) day = day.split('-') filename = info['key'] filepath = str(info['site_id']) + '/' + day[0] + '/' + day[1] + '/' + day[2] filepath = File.mkdirs(os.path.join(Demeter.path, 'runtime','files', filepath)) else: # 本地的 filename = info['name'] filepath = info['path'] info['host'] = '' filepath = File.mkdirs(filepath) + '/' + filename info['file'] = filepath + info['ext'] info['path'] = filepath + '/' if File.exists(info['file']): return info else: self.download(info['source'], info['file']); return info def download(self, file, local): if 'http' in file: import requests r = requests.get(file, stream=True) with open(local, 'wb') as up: for chunk in r.iter_content(chunk_size=1024): if chunk: up.write(chunk) else: import shutil shutil.copyfile(file, local) if File.exists(local): return True return False def handle(self, info, method): param = {} param['method'] = 'extract' param['page'] = 0 param['status'] = 4 param['source_id'] = info['source_id'] param['source'] = info['source'] status = True if info['status'] == 1 or info['status'] == 4: status = False #if info and status == False: if info: Demeter.service('common').update('extract', info['id'], {'status':2}) if not File.exists(info['file']): self.download(info['source'], info['file']) if True: File.mkdir(info['path']) obj = Demeter.service('loader', 'extract').get(info['file'], {'path':info['path'], 'host':info['host'], 'audio':info['audio']}) func = getattr(obj, method) result = func() if result and 'total' in result and result['total'] > 0: param['content'] = result param['status'] = 3 if method == 'json': method = 1 else: method = 2 data = {'extract_id': info['id'], 'method' : method} content = Demeter.service('common').one('extract_content', **data) if not content: result = json.dumps(result, ensure_ascii=False) data['content'] = result Demeter.service('common').update('extract_content', False, data) Demeter.service('common').update('extract', info['id'], {'status':param['status'], 'page':param['page']}) Demeter.service('callback').send(info['site_id'], 'extract', param) return param