snowboydecoder.py 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402
  1. # -*- coding: utf-8 -*-
  2. from __future__ import division
  3. from .__load__ import *
  4. import collections
  5. import pyaudio
  6. from . import snowboydetect
  7. from ctypes import CFUNCTYPE, c_char_p, c_int, cdll
  8. from contextlib import contextmanager
  9. logger = Log.init('snowboy')
  10. TOP_DIR = os.path.dirname(os.path.abspath(__file__))
  11. RESOURCE_FILE = os.path.join(TOP_DIR, "resources/common.res")
  12. DETECT_DING = os.path.join(TOP_DIR, "resources/ding.wav")
  13. DETECT_DONG = os.path.join(TOP_DIR, "resources/dong.wav")
  14. def py_error_handler(filename, line, function, err, fmt):
  15. pass
  16. ERROR_HANDLER_FUNC = CFUNCTYPE(None, c_char_p, c_int, c_char_p, c_int, c_char_p)
  17. c_error_handler = ERROR_HANDLER_FUNC(py_error_handler)
  18. @contextmanager
  19. def no_alsa_error():
  20. try:
  21. asound = cdll.LoadLibrary('libasound.so')
  22. asound.snd_lib_error_set_handler(c_error_handler)
  23. yield
  24. asound.snd_lib_error_set_handler(None)
  25. except:
  26. yield
  27. pass
  28. class RingBuffer(object):
  29. """Ring buffer to hold audio from PortAudio"""
  30. def __init__(self, size=4096):
  31. self._buf = collections.deque(maxlen=size)
  32. def extend(self, data):
  33. """Adds data to the end of buffer"""
  34. self._buf.extend(data)
  35. def get(self):
  36. """Retrieves data from the beginning of buffer and clears it"""
  37. tmp = bytes(bytearray(self._buf))
  38. self._buf.clear()
  39. return tmp
  40. def play_audio_file(fname=DETECT_DING):
  41. """Simple callback function to play a wave file. By default it plays
  42. a Ding sound.
  43. :param str fname: wave file name
  44. :return: None
  45. """
  46. ding_wav = wave.open(fname, 'rb')
  47. ding_data = ding_wav.readframes(ding_wav.getnframes())
  48. with no_alsa_error():
  49. audio = pyaudio.PyAudio()
  50. stream_out = audio.open(
  51. format=audio.get_format_from_width(ding_wav.getsampwidth()),
  52. channels=ding_wav.getnchannels(),
  53. rate=ding_wav.getframerate(), input=False, output=True)
  54. stream_out.start_stream()
  55. stream_out.write(ding_data)
  56. time.sleep(0.2)
  57. stream_out.stop_stream()
  58. stream_out.close()
  59. audio.terminate()
  60. class ActiveListener(object):
  61. """ Active Listening with VAD """
  62. def __init__(self, decoder_model,
  63. resource=RESOURCE_FILE,
  64. outname = 'output',
  65. temp = ''):
  66. logger.debug("activeListen __init__()")
  67. self.recordedData = []
  68. model_str = ",".join(decoder_model)
  69. self.detector = snowboydetect.SnowboyDetect(
  70. resource_filename=resource.encode(), model_str=model_str.encode())
  71. self.ring_buffer = RingBuffer(
  72. self.detector.NumChannels() * self.detector.SampleRate() * 5)
  73. def listen(self, interrupt_check=lambda: False, sleep_time=0.03, silent_count_threshold=15, recording_timeout=100):
  74. """
  75. :param interrupt_check: a function that returns True if the main loop
  76. needs to stop.
  77. :param silent_count_threshold: indicates how long silence must be heard
  78. to mark the end of a phrase that is
  79. being recorded.
  80. :param float sleep_time: how much time in second every loop waits.
  81. :param recording_timeout: limits the maximum length of a recording.
  82. :return: recorded file path
  83. """
  84. logger.debug("activeListen listen()")
  85. self._running = True
  86. def audio_callback(in_data, frame_count, time_info, status):
  87. self.ring_buffer.extend(in_data)
  88. play_data = chr(0) * len(in_data)
  89. return play_data, pyaudio.paContinue
  90. with no_alsa_error():
  91. self.audio = pyaudio.PyAudio()
  92. logger.debug('opening audio stream')
  93. try:
  94. self.stream_in = self.audio.open(
  95. input=True, output=False,
  96. format=self.audio.get_format_from_width(
  97. self.detector.BitsPerSample() / 8),
  98. channels=self.detector.NumChannels(),
  99. rate=self.detector.SampleRate(),
  100. frames_per_buffer=2048,
  101. stream_callback=audio_callback)
  102. except Exception as e:
  103. logger.critical(e)
  104. return
  105. logger.debug('audio stream opened')
  106. if interrupt_check():
  107. logger.debug("detect voice return")
  108. return
  109. silentCount = 0
  110. recordingCount = 0
  111. logger.debug("begin activeListen loop")
  112. while self._running is True:
  113. if interrupt_check():
  114. logger.debug("detect voice break")
  115. break
  116. data = self.ring_buffer.get()
  117. if len(data) == 0:
  118. time.sleep(sleep_time)
  119. continue
  120. status = self.detector.RunDetection(data)
  121. if status == -1:
  122. logger.warning("Error initializing streams or reading audio data")
  123. stopRecording = False
  124. if recordingCount > recording_timeout:
  125. stopRecording = True
  126. elif status == -2: #silence found
  127. if silentCount > silent_count_threshold:
  128. stopRecording = True
  129. else:
  130. silentCount = silentCount + 1
  131. elif status == 0: #voice found
  132. silentCount = 0
  133. if stopRecording == True:
  134. return self.saveMessage()
  135. recordingCount = recordingCount + 1
  136. self.recordedData.append(data)
  137. logger.debug("finished.")
  138. def saveMessage(self):
  139. """
  140. Save the message stored in self.recordedData to a timestamped file.
  141. """
  142. filename = os.path.join(self.temp, self.outname + str(int(time.time())) + '.wav')
  143. data = b''.join(self.recordedData)
  144. #use wave to save data
  145. wf = wave.open(filename, 'wb')
  146. wf.setnchannels(self.detector.NumChannels())
  147. wf.setsampwidth(self.audio.get_sample_size(
  148. self.audio.get_format_from_width(self.detector.BitsPerSample() / 8)))
  149. wf.setframerate(self.detector.SampleRate())
  150. wf.writeframes(data)
  151. wf.close()
  152. logger.debug("finished saving: " + filename)
  153. self.stream_in.stop_stream()
  154. self.stream_in.close()
  155. self.audio.terminate()
  156. return filename
  157. class HotwordDetector(object):
  158. """
  159. Snowboy decoder to detect whether a keyword specified by `decoder_model`
  160. exists in a microphone input stream.
  161. :param decoder_model: decoder model file path, a string or a list of strings
  162. :param resource: resource file path.
  163. :param sensitivity: decoder sensitivity, a float of a list of floats.
  164. The bigger the value, the more senstive the
  165. decoder. If an empty list is provided, then the
  166. default sensitivity in the model will be used.
  167. :param audio_gain: multiply input volume by this factor.
  168. :param apply_frontend: applies the frontend processing algorithm if True.
  169. """
  170. def __init__(self, decoder_model,
  171. resource=RESOURCE_FILE,
  172. sensitivity=[],
  173. audio_gain=1,
  174. apply_frontend=False,
  175. outname='output',
  176. temp=''):
  177. self._running = False
  178. tm = type(decoder_model)
  179. ts = type(sensitivity)
  180. if tm is not list:
  181. decoder_model = [decoder_model]
  182. if ts is not list:
  183. sensitivity = [sensitivity]
  184. model_str = ",".join(decoder_model)
  185. self.detector = snowboydetect.SnowboyDetect(
  186. resource_filename=resource.encode(), model_str=model_str.encode())
  187. self.detector.SetAudioGain(audio_gain)
  188. self.detector.ApplyFrontend(apply_frontend)
  189. self.num_hotwords = self.detector.NumHotwords()
  190. if len(decoder_model) > 1 and len(sensitivity) == 1:
  191. sensitivity = sensitivity * self.num_hotwords
  192. if len(sensitivity) != 0:
  193. assert self.num_hotwords == len(sensitivity), \
  194. "number of hotwords in decoder_model (%d) and sensitivity " \
  195. "(%d) does not match" % (self.num_hotwords, len(sensitivity))
  196. sensitivity_str = ",".join([str(t) for t in sensitivity])
  197. if len(sensitivity) != 0:
  198. self.detector.SetSensitivity(sensitivity_str.encode())
  199. self.ring_buffer = RingBuffer(
  200. self.detector.NumChannels() * self.detector.SampleRate() * 5)
  201. def start(self, detected_callback=play_audio_file,
  202. interrupt_check=lambda: False,
  203. sleep_time=0.03,
  204. audio_recorder_callback=None,
  205. silent_count_threshold=15,
  206. recording_timeout=100):
  207. """
  208. Start the voice detector. For every `sleep_time` second it checks the
  209. audio buffer for triggering keywords. If detected, then call
  210. corresponding function in `detected_callback`, which can be a single
  211. function (single model) or a list of callback functions (multiple
  212. models). Every loop it also calls `interrupt_check` -- if it returns
  213. True, then breaks from the loop and return.
  214. :param detected_callback: a function or list of functions. The number of
  215. items must match the number of models in
  216. `decoder_model`.
  217. :param interrupt_check: a function that returns True if the main loop
  218. needs to stop.
  219. :param float sleep_time: how much time in second every loop waits.
  220. :param audio_recorder_callback: if specified, this will be called after
  221. a keyword has been spoken and after the
  222. phrase immediately after the keyword has
  223. been recorded. The function will be
  224. passed the name of the file where the
  225. phrase was recorded.
  226. :param silent_count_threshold: indicates how long silence must be heard
  227. to mark the end of a phrase that is
  228. being recorded.
  229. :param recording_timeout: limits the maximum length of a recording.
  230. :return: None
  231. """
  232. self._running = True
  233. def audio_callback(in_data, frame_count, time_info, status):
  234. self.ring_buffer.extend(in_data)
  235. play_data = chr(0) * len(in_data)
  236. return play_data, pyaudio.paContinue
  237. with no_alsa_error():
  238. self.audio = pyaudio.PyAudio()
  239. self.stream_in = self.audio.open(
  240. input=True, output=False,
  241. format=self.audio.get_format_from_width(
  242. self.detector.BitsPerSample() / 8),
  243. channels=self.detector.NumChannels(),
  244. rate=self.detector.SampleRate(),
  245. frames_per_buffer=2048,
  246. stream_callback=audio_callback)
  247. if interrupt_check():
  248. logger.debug("detect voice return")
  249. return
  250. tc = type(detected_callback)
  251. if tc is not list:
  252. detected_callback = [detected_callback]
  253. if len(detected_callback) == 1 and self.num_hotwords > 1:
  254. detected_callback *= self.num_hotwords
  255. assert self.num_hotwords == len(detected_callback), \
  256. "Error: hotwords in your models (%d) do not match the number of " \
  257. "callbacks (%d)" % (self.num_hotwords, len(detected_callback))
  258. logger.debug("detecting...")
  259. state = "PASSIVE"
  260. while self._running is True:
  261. if interrupt_check():
  262. logger.debug("detect voice break")
  263. break
  264. data = self.ring_buffer.get()
  265. if len(data) == 0:
  266. time.sleep(sleep_time)
  267. continue
  268. status = self.detector.RunDetection(data)
  269. if status == -1:
  270. logger.warning("Error initializing streams or reading audio data")
  271. #small state machine to handle recording of phrase after keyword
  272. if state == "PASSIVE":
  273. if status > 0: #key word found
  274. self.recordedData = []
  275. self.recordedData.append(data)
  276. silentCount = 0
  277. recordingCount = 0
  278. message = "Keyword " + str(status) + " detected at time: "
  279. message += time.strftime("%Y-%m-%d %H:%M:%S",
  280. time.localtime(time.time()))
  281. logger.info(message)
  282. callback = detected_callback[status-1]
  283. if callback is not None:
  284. callback()
  285. if audio_recorder_callback is not None and status == 1 and utils.is_proper_time():
  286. state = "ACTIVE"
  287. continue
  288. elif state == "ACTIVE":
  289. stopRecording = False
  290. if recordingCount > recording_timeout:
  291. stopRecording = True
  292. elif status == -2: #silence found
  293. if silentCount > silent_count_threshold:
  294. stopRecording = True
  295. else:
  296. silentCount = silentCount + 1
  297. elif status == 0: #voice found
  298. silentCount = 0
  299. if stopRecording == True:
  300. fname = self.saveMessage()
  301. audio_recorder_callback(fname)
  302. state = "PASSIVE"
  303. continue
  304. recordingCount = recordingCount + 1
  305. self.recordedData.append(data)
  306. logger.debug("finished.")
  307. def saveMessage(self):
  308. """
  309. Save the message stored in self.recordedData to a timestamped file.
  310. """
  311. filename = os.path.join(self.temp, self.outname + str(int(time.time())) + '.wav')
  312. data = b''.join(self.recordedData)
  313. #use wave to save data
  314. wf = wave.open(filename, 'wb')
  315. wf.setnchannels(self.detector.NumChannels())
  316. wf.setsampwidth(self.audio.get_sample_size(
  317. self.audio.get_format_from_width(
  318. self.detector.BitsPerSample() / 8)))
  319. wf.setframerate(self.detector.SampleRate())
  320. wf.writeframes(data)
  321. wf.close()
  322. logger.debug("finished saving: " + filename)
  323. return filename
  324. def terminate(self):
  325. """
  326. Terminate audio stream. Users can call start() again to detect.
  327. :return: None
  328. """
  329. if self._running:
  330. self.stream_in.stop_stream()
  331. self.stream_in.close()
  332. self.audio.terminate()
  333. self._running = False