shemic
/
diviner


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335
							# -*- coding: utf-8 -*-
from .__load__ import *

class Extract(object):

    def update(self, site, appid, source_file, source_id, source_type, uid):

        info = self.getFile(appid, source_file)

        extract = Demeter.model('extract')
        extract.site_id = site
        extract.key = info['key']

        data = extract.select(type='fetchone')
        if not data:
            extract.site_id = site
            extract.uid = uid
            extract.key = info['key']
            extract.name = info['name']
            extract.source_id = source_id
            extract.source_type = source_type
            extract.source_size = 0
            extract.source_file = info['source_file']
            extract.local_file = info['local_file']
            extract.local_path = info['local_path']
            id = extract.insert()
            info['status'] = 1
            info['id'] = id
        else:
            info['id'] = data['id']
            info['status'] = data['status']
        if uid:
            self.auth(site, uid, info['id'], 1)

        return info

    def getAuth(self, site, uid, extract_id):
        auth = Demeter.model('extract_auth')
        auth.uid = uid
        auth.site_id = site
        auth.extract_id = extract_id
        data = auth.select(type='fetchone')
        return data


    def auth(self, site, uid, extract_id, status):
        auth = Demeter.model('extract_auth')
        auth.uid = uid
        auth.site_id = site
        auth.extract_id = extract_id
        data = auth.select(type='fetchone')
        if not data:
            auth.site_id = site
            auth.uid = uid
            auth.extract_id = extract_id
            auth.status = status
            auth.insert()
        elif data['status'] != status:
            # 适用于文档转让
            auth.id = data['id']
            update = {}
            update['status'] = status
            auth.update(update)

        return True

    def get(self, site, appid, file):
        extract = Demeter.model('extract')
        extract.site_id = site
        #extract.key = self.getKey(appid, file)
        extract.key = file
        data = extract.select(type='fetchone')
        return data

    def getKey(self, appid, file):
        return Demeter.sha1(str(appid) + '_' + str(file))

    def getFile(self, appid, file):
        info = {}

        (filepath,temp) = os.path.split(file)
        (filename,extension) = os.path.splitext(temp)

        info['source_file'] = file
        info['key'] = self.getKey(appid, file)
        info['ext'] = extension
        info['name'] = filename

        info = self.getLocalFile(appid, file, info)

        return info

    def getLocalFile(self, appid, file, info):

        day = str(date.today())
        day = day.split('-')

        #filename =  Demeter.md5(str(uuid.uuid5(uuid.uuid1(), info['key'])))
        filename =  info['key']
        filepath = str(appid) + '/' + day[0] + '/' + day[1] + '/' + day[2]
        path = ''

        if 'save' in Demeter.config['setting']:
            filepath = File.mkdirs(os.path.join(Demeter.config['setting']['save'], filepath)) + '/' + filename
        else:
            filepath = File.mkdirs(os.path.join(Demeter.path, 'runtime','files', filepath)) + '/' + filename

        local = filepath + info['ext']

        info['local_file'] = local
        info['local_path'] = filepath + '/'

        return info
        if File.exists(local):
            return info
        else:
            self.download(file, local);
            return info

    def download(self, file, local):
        if 'http' in file:
            import requests
            r = requests.get(file, stream=True)
            with open(local, 'wb') as up:
                for chunk in r.iter_content(chunk_size=1024):
                    if chunk:
                        up.write(chunk)

        else:
            import shutil
            shutil.copyfile(file, local)

        if File.exists(local):
            return True
        return False

    def total(self, path):
        page = 0
        for parentdir,dirname,filenames in os.walk(path):  
            for filename in filenames:
                if os.path.splitext(filename)[1]=='.page':
                    page = page + 1
        return page

    def handle(self, id):
        model = Demeter.model('extract')
        model.id = id
        info = model.select(type='fetchone')

        if not info:
            return
        siteModel = Demeter.model('site')
        siteModel.id = info['site_id']
        site = siteModel.select(type='fetchone')

        status = True
        if info['status'] == 1 or info['status'] == 4:
            status = False

        if info and status == False:
            model.id = id
            update = {}
            update['status'] = 2
            model.update(update)

            if not File.exists(info['local_file']):
                self.download(info['source_file'], info['local_file'])

            if True:
                File.mkdir(info['local_path'])

                Demeter.service(info['ext'], 'docs').load(info)


                if 'txt' in info['ext']:
                    import chardet
                    file_path = info['local_file']
                    with open(file_path, 'rb') as fp:
                        file_data = fp.read()
                        result = chardet.detect(file_data)
                        fp.close()
                        if result['encoding'] != 'utf-8':
                            file_content = file_data.decode(result['encoding'])
                            f = open(file_path, 'w')
                            f.write(file_content)
                            f.close()
                if 'pdf' in info['ext']:

                handle = self.command(info)
                Shell.popen(handle)
                if File.exists(info['html']):

                    # 处理图片
                    self.pic(info)
                    #self.string_switch(info['html'], "taste", "tasting")
                    # 获取有多少页
                    page = self.total(info['path'])
                    model.id = id
                    size = os.path.getsize(info['local'])
                    update = {}
                    update['file_size'] = size
                    update['page'] = page
                    update['status'] = 3
                    model.update(update)

                    # 通知接口 通知应用成功转换
                    info['page'] = page
                    info['file_size'] = size
                    info['extract_status'] = 1
                    self.api(info, site)
                    return

            model.id = id
            update = {}
            update['status'] = 4
            model.update(update)
            # 通知接口 通知应用失败转换
            info['page'] = 0
            info['file_size'] = 0
            info['extract_status'] = 2
            self.api(info, site)

    def api(self, info, site):
        if 'file_id' in info and info['file_id']:
            api = site['api']

            appid = site['appid']
            appsecret = site['appsecret']
            timestamp = Demeter.time()
            nonce = Demeter.hash()
            file = info['key']
            file_id = info['file_id']
            uid = info['uid']

            param = self.signature(site['id'], appid, appsecret, timestamp, nonce, file, file_id, uid)

            param['url'] = 'main/view'
            param['img'] = info['url'] + '.jpg'
            param['page'] = info['page']
            param['ext'] = info['ext']
            param['file_size'] = info['file_size']
            param['status'] = info['extract_status']

            Demeter.curl(api, param, 'post')

    def pic(self, info):
        from extract2jpg import extract2jpg
        from wand.image import Image
        width = 800
        # 对生成的图片进行缩放
        files = File.getFiles(info['path'])
        if files:
            for file in files:
                if 'jpg' in file or 'png' in file:
                    file = info['path'] + file
                    with Image(filename=file) as img:
                        target_width, target_height = self.getSize(width, img.width, img.height)
                        img.sample(target_width, target_height)
                        img.save(filename=file)

        # 生成图片
        dest = info['html'] + '.photo'
        result = extract2jpg.extract_extract2jpg(info['extract'], dest, pages="0,1")

        i = 0
        for j in result[0]['output_jpgfiles']:
            source = j
            if i == 0:
                dest = info['html'] + '.jpg'
            else:
                dest = info['html'] + '.jpg_' + str(i) + '.jpg'
            command = 'mv '+source+' ' + dest
            Shell.popen(command)
            
            with Image(filename=dest) as img:
                target_width, target_height = self.getSize(width, img.width, img.height)
                img.sample(target_width, target_height)
                img.save(filename=dest)
            i = i+1

        '''
        from wand.image import Image
        extract = Image(filename=source, resolution=50)
        jpg = extract.extract('jpg')
        req_image = []
        i = 0
        for img in jpg.sequence:
            if i == 0:
                img_page = Image(image=img)
                req_image.append(img_page.make_blob('jpg'))
            i = i+1

        for img in req_image:
            ff = open(dest, 'wb')
            ff.write(img)
            ff.close()
        '''


    def getSize(self, target_width, img_width, img_height):
        if img_width > target_width:
            ratio = target_width / img_width
            target_height = int(ratio * img_height)
        else:
            target_width = img_width
            target_height = img_height
        return target_width, target_height

    def string_switch(self, x,y,z,s=1):
        with open(x, "r", encoding="utf-8") as f:
            #readlines以列表的形式将文件读出
            lines = f.readlines()
     
        with open(x, "w", encoding="utf-8") as f_w:
            #定义一个数字，用来记录在读取文件时在列表中的位置
            n = 0
            #默认选项，只替换第一次匹配到的行中的字符串
            if s == 1:
                for line in lines:
                    if y in line:
                        line = line.replace(y,z)
                        f_w.write(line)
                        n += 1
                        break
                    f_w.write(line)
                    n += 1
                #将剩余的文本内容继续输出
                for i in range(n,len(lines)):
                    f_w.write(lines[i])
            #全局匹配替换
            elif s == 'g':
                for line in lines:
                    if y in line:
                        line = line.replace(y,z)
                    f_w.write(line)