123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125 |
- # -*- coding: utf-8 -*-
- from demeter.core import *
- # 测试转换pdf python convert.py -f file.doc
- param = {}
- param['file'] = 'f'
- Demeter.getopt(param)
- #file = Demeter.option['file']
- file = '/data/dm/container/web/diviner/data/test.pdf'
- audio = 'http://s.fxjk.vip/f4/af8bf6ebcf837c717f2aeabe6fd618.mp3'
- # pdf提取功能
- # 直接提取
- #result = Demeter.service('loader', 'extract').get(file, {'aud1io':audio}).json()
- # 用通用方法 同步提取并记录已提取,下次直接用提取后的内容
- result = Demeter.service('loader').get(obj='parser', module='extract', sync=True, site_id=1, uid=1, source_id=1, source=file, audio=audio, method='json')
- print(result)
- '''
- import json
- import time
- from aliyunsdkcore.acs_exception.exceptions import ClientException
- from aliyunsdkcore.acs_exception.exceptions import ServerException
- from aliyunsdkcore.client import AcsClient
- from aliyunsdkcore.request import CommonRequest
- def fileTrans(akId, akSecret, appKey, fileLink) :
- # 地域ID,固定值。
- REGION_ID = "cn-beijing"
- PRODUCT = "nls-filetrans"
- DOMAIN = "filetrans.cn-beijing.aliyuncs.com"
- API_VERSION = "2018-08-17"
- POST_REQUEST_ACTION = "SubmitTask"
- GET_REQUEST_ACTION = "GetTaskResult"
- # 请求参数
- KEY_APP_KEY = "appkey"
- KEY_FILE_LINK = "file_link"
- KEY_VERSION = "version"
- KEY_ENABLE_WORDS = "enable_words"
- # 是否开启智能分轨
- KEY_AUTO_SPLIT = "auto_split"
- # 响应参数
- KEY_TASK = "Task"
- KEY_TASK_ID = "TaskId"
- KEY_STATUS_TEXT = "StatusText"
- KEY_RESULT = "Result"
- # 状态值
- STATUS_SUCCESS = "SUCCESS"
- STATUS_RUNNING = "RUNNING"
- STATUS_QUEUEING = "QUEUEING"
- # 创建AcsClient实例
- client = AcsClient(akId, akSecret, REGION_ID)
- # 提交录音文件识别请求
- postRequest = CommonRequest()
- postRequest.set_domain(DOMAIN)
- postRequest.set_version(API_VERSION)
- postRequest.set_product(PRODUCT)
- postRequest.set_action_name(POST_REQUEST_ACTION)
- postRequest.set_method('POST')
- # 新接入请使用4.0版本,已接入(默认2.0)如需维持现状,请注释掉该参数设置。
- # 设置是否输出词信息,默认为false,开启时需要设置version为4.0。
- task = {KEY_APP_KEY : appKey, KEY_FILE_LINK : fileLink, KEY_VERSION : "4.0", KEY_ENABLE_WORDS : False}
- # 开启智能分轨,如果开启智能分轨,task中设置KEY_AUTO_SPLIT为True。
- # task = {KEY_APP_KEY : appKey, KEY_FILE_LINK : fileLink, KEY_VERSION : "4.0", KEY_ENABLE_WORDS : False, KEY_AUTO_SPLIT : True}
- task = json.dumps(task)
- print(task)
- postRequest.add_body_params(KEY_TASK, task)
- taskId = ""
- try :
- postResponse = client.do_action_with_exception(postRequest)
- postResponse = json.loads(postResponse)
- print (postResponse)
- statusText = postResponse[KEY_STATUS_TEXT]
- if statusText == STATUS_SUCCESS :
- print ("录音文件识别请求成功响应!")
- taskId = postResponse[KEY_TASK_ID]
- else :
- print ("录音文件识别请求失败!")
- return
- except ServerException as e:
- print (e)
- except ClientException as e:
- print (e)
- # 创建CommonRequest,设置任务ID。
- getRequest = CommonRequest()
- getRequest.set_domain(DOMAIN)
- getRequest.set_version(API_VERSION)
- getRequest.set_product(PRODUCT)
- getRequest.set_action_name(GET_REQUEST_ACTION)
- getRequest.set_method('GET')
- getRequest.add_query_param(KEY_TASK_ID, taskId)
- # 提交录音文件识别结果查询请求
- # 以轮询的方式进行识别结果的查询,直到服务端返回的状态描述符为"SUCCESS"、"SUCCESS_WITH_NO_VALID_FRAGMENT",
- # 或者为错误描述,则结束轮询。
- statusText = ""
- while True :
- try :
- getResponse = client.do_action_with_exception(getRequest)
- getResponse = json.loads(getResponse)
- print (getResponse)
- statusText = getResponse[KEY_STATUS_TEXT]
- if statusText == STATUS_RUNNING or statusText == STATUS_QUEUEING :
- # 继续轮询
- time.sleep(10)
- else :
- # 退出轮询
- break
- except ServerException as e:
- print (e)
- except ClientException as e:
- print (e)
- if statusText == STATUS_SUCCESS :
- print ("录音文件识别成功!")
- else :
- print ("录音文件识别失败!")
- return
- accessKeyId = 'LTAI5tCFiVxuXz39MMkXFcMm'
- accessKeySecret = 'sdK3jVSrrqzz2nONAGyd6kvXZZwkie'
- appKey = 'm0mDna21AWao7b0A'
- fileLink = "http://s.fxjk.vip/f4/af8bf6ebcf837c717f2aeabe6fd618.mp3"
- # 执行录音文件识别
- fileTrans(accessKeyId, accessKeySecret, appKey, fileLink)
- '''
|