123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122 |
- # -*- coding: utf-8 -*-
- from .__load__ import *
- # 提取器
- class Parser(object):
- def get(self, id=0, site_id = 0, uid = 0, source_id = 0, source = '', notify='', sync=True, method='json'):
- extract = Demeter.model('extract')
- if int(id) > 0:
- extract.id = id
- else:
- extract.key = self.getKey(site_id, source)
- data = extract.select(type='fetchone')
- if not data:
- if not source:
- return source
- info = self.getFile(site_id, source)
- info['uid'] = uid
- info['source_id'] = source_id
- info['notify'] = notify
- info['status'] = 1
- info['id'] = Demeter.service('common').update('extract', False, info)
- else:
- info = data
- if sync:
- return self.handle(info, method);
- return info['id']
- def getFile(self, site_id, file):
- info = {}
- (filepath,temp) = os.path.split(file)
- (filename,extension) = os.path.splitext(temp)
- info['site_id'] = site_id;
- info['source'] = file
- info['key'] = self.getKey(site_id, file)
- info['ext'] = extension
- info['name'] = filename
- info['file'] = file
- info['path'] = filepath
- info = self.getLocal(info)
- info['size'] = os.path.getsize(info['file'])
- return info
- def getKey(self, site_id, file):
- return Demeter.md5(str(site_id) + '_' + str(file))
- def getLocal(self, info):
- if 'http' in info['source']:
- day = str(date.today())
- day = day.split('-')
- filename = info['key']
- filepath = str(info['site_id']) + '/' + day[0] + '/' + day[1] + '/' + day[2]
- else:
- filename = info['name']
- filepath = info['path']
- if 'save' in Demeter.config['setting']:
- filepath = File.mkdirs(os.path.join(Demeter.config['setting']['save'], filepath)) + '/' + filename
- else:
- filepath = File.mkdirs(os.path.join(Demeter.path, 'runtime','files', filepath)) + '/' + filename
- info['file'] = filepath + info['ext']
- info['path'] = filepath + '/'
- if File.exists(info['file']):
- return info
- else:
- self.download(info['source'], info['file']);
- return info
- def download(self, file, local):
- if 'http' in file:
- import requests
- r = requests.get(file, stream=True)
- with open(local, 'wb') as up:
- for chunk in r.iter_content(chunk_size=1024):
- if chunk:
- up.write(chunk)
- else:
- import shutil
- shutil.copyfile(file, local)
- if File.exists(local):
- return True
- return False
- def handle(self, info, method):
- param = {}
- param['method'] = 'extract'
- param['page'] = 0
- param['status'] = 4
- param['source_id'] = info['source_id']
- status = True
- if info['status'] == 1 or info['status'] == 4:
- status = False
- #if info and status == False:
- if info:
- Demeter.service('common').update('extract', info['id'], {'status':2})
- if not File.exists(info['file']):
- self.download(info['source'], info['file'])
- if True:
- File.mkdir(info['path'])
- obj = Demeter.service('loader', 'extract').get(info['file'], {'path':info['path']})
- func = getattr(obj, method)
- result = func()
- if result and 'page' in result and result['page'] > 0:
- param['content'] = result['content']
- param['page'] = result['page']
- param['status'] = 3
- if method == 'json':
- method = 1
- else:
- method = 2
- data = {'extract_id': info['id'], 'method' : method}
- content = Demeter.service('common').one('extract_content', **data)
- if not content:
- result = json.dumps(result, ensure_ascii=False)
- data['content'] = result
- Demeter.service('common').update('extract_content', False, data)
- Demeter.service('common').update('extract', info['id'], {'status':param['status'], 'page':param['page']})
- Demeter.service('callback').send(info['site_id'], param)
- return param
|