snowboydecoder.py 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409
  1. # -*- coding: utf-8 -*-
  2. from __future__ import division
  3. from ..__load__ import *
  4. import collections
  5. import pyaudio
  6. from . import snowboydetect
  7. from ctypes import CFUNCTYPE, c_char_p, c_int, cdll
  8. from contextlib import contextmanager
  9. logger = Log.init('snowboy')
  10. TOP_DIR = os.path.dirname(os.path.abspath(__file__))
  11. RESOURCE_FILE = os.path.join(TOP_DIR, "resources/common.res")
  12. DETECT_DING = os.path.join(TOP_DIR, "resources/ding.wav")
  13. DETECT_DONG = os.path.join(TOP_DIR, "resources/dong.wav")
  14. def py_error_handler(filename, line, function, err, fmt):
  15. pass
  16. ERROR_HANDLER_FUNC = CFUNCTYPE(None, c_char_p, c_int, c_char_p, c_int, c_char_p)
  17. c_error_handler = ERROR_HANDLER_FUNC(py_error_handler)
  18. @contextmanager
  19. def no_alsa_error():
  20. try:
  21. asound = cdll.LoadLibrary('libasound.so')
  22. asound.snd_lib_error_set_handler(c_error_handler)
  23. yield
  24. asound.snd_lib_error_set_handler(None)
  25. except:
  26. yield
  27. pass
  28. class RingBuffer(object):
  29. """Ring buffer to hold audio from PortAudio"""
  30. def __init__(self, size=4096):
  31. self._buf = collections.deque(maxlen=size)
  32. def extend(self, data):
  33. """Adds data to the end of buffer"""
  34. self._buf.extend(data)
  35. def get(self):
  36. """Retrieves data from the beginning of buffer and clears it"""
  37. tmp = bytes(bytearray(self._buf))
  38. self._buf.clear()
  39. return tmp
  40. def play_audio_file(fname=DETECT_DING):
  41. """Simple callback function to play a wave file. By default it plays
  42. a Ding sound.
  43. :param str fname: wave file name
  44. :return: None
  45. """
  46. ding_wav = wave.open(fname, 'rb')
  47. ding_data = ding_wav.readframes(ding_wav.getnframes())
  48. with no_alsa_error():
  49. audio = pyaudio.PyAudio()
  50. stream_out = audio.open(
  51. format=audio.get_format_from_width(ding_wav.getsampwidth()),
  52. channels=ding_wav.getnchannels(),
  53. rate=ding_wav.getframerate(), input=False, output=True)
  54. stream_out.start_stream()
  55. stream_out.write(ding_data)
  56. time.sleep(0.2)
  57. stream_out.stop_stream()
  58. stream_out.close()
  59. audio.terminate()
  60. class ActiveListener(object):
  61. """ Active Listening with VAD """
  62. def __init__(self, decoder_model,
  63. resource=RESOURCE_FILE,
  64. outname = 'output',
  65. temp = ''):
  66. self.outname = outname
  67. self.temp = temp
  68. logger.debug("activeListen __init__()")
  69. self.recordedData = []
  70. model_str = ",".join(decoder_model)
  71. self.detector = snowboydetect.SnowboyDetect(
  72. resource_filename=resource.encode(), model_str=model_str.encode())
  73. self.ring_buffer = RingBuffer(
  74. self.detector.NumChannels() * self.detector.SampleRate() * 5)
  75. def listen(self, interrupt_check=lambda: False, sleep_time=0.03, silent_count_threshold=15, recording_timeout=100):
  76. """
  77. :param interrupt_check: a function that returns True if the main loop
  78. needs to stop.
  79. :param silent_count_threshold: indicates how long silence must be heard
  80. to mark the end of a phrase that is
  81. being recorded.
  82. :param float sleep_time: how much time in second every loop waits.
  83. :param recording_timeout: limits the maximum length of a recording.
  84. :return: recorded file path
  85. """
  86. logger.debug("activeListen listen()")
  87. self._running = True
  88. def audio_callback(in_data, frame_count, time_info, status):
  89. self.ring_buffer.extend(in_data)
  90. play_data = chr(0) * len(in_data)
  91. return play_data, pyaudio.paContinue
  92. with no_alsa_error():
  93. self.audio = pyaudio.PyAudio()
  94. logger.debug('opening audio stream')
  95. try:
  96. self.stream_in = self.audio.open(
  97. input=True, output=False,
  98. format=self.audio.get_format_from_width(
  99. self.detector.BitsPerSample() / 8),
  100. channels=self.detector.NumChannels(),
  101. rate=self.detector.SampleRate(),
  102. frames_per_buffer=2048,
  103. stream_callback=audio_callback)
  104. except Exception as e:
  105. logger.critical(e)
  106. return
  107. logger.debug('audio stream opened')
  108. if interrupt_check():
  109. logger.debug("detect voice return")
  110. return
  111. silentCount = 0
  112. recordingCount = 0
  113. logger.debug("begin activeListen loop")
  114. while self._running is True:
  115. if interrupt_check():
  116. logger.debug("detect voice break")
  117. break
  118. data = self.ring_buffer.get()
  119. if len(data) == 0:
  120. time.sleep(sleep_time)
  121. continue
  122. status = self.detector.RunDetection(data)
  123. if status == -1:
  124. logger.warning("Error initializing streams or reading audio data")
  125. stopRecording = False
  126. if recordingCount > recording_timeout:
  127. stopRecording = True
  128. elif status == -2: #silence found
  129. if silentCount > silent_count_threshold:
  130. stopRecording = True
  131. else:
  132. silentCount = silentCount + 1
  133. elif status == 0: #voice found
  134. silentCount = 0
  135. if stopRecording == True:
  136. return self.saveMessage()
  137. recordingCount = recordingCount + 1
  138. self.recordedData.append(data)
  139. logger.debug("finished.")
  140. def saveMessage(self):
  141. """
  142. Save the message stored in self.recordedData to a timestamped file.
  143. """
  144. filename = os.path.join(self.temp, self.outname + str(int(time.time())) + '.wav')
  145. data = b''.join(self.recordedData)
  146. #use wave to save data
  147. wf = wave.open(filename, 'wb')
  148. wf.setnchannels(self.detector.NumChannels())
  149. wf.setsampwidth(self.audio.get_sample_size(
  150. self.audio.get_format_from_width(self.detector.BitsPerSample() / 8)))
  151. wf.setframerate(self.detector.SampleRate())
  152. wf.writeframes(data)
  153. wf.close()
  154. logger.debug("finished saving: " + filename)
  155. self.stream_in.stop_stream()
  156. self.stream_in.close()
  157. self.audio.terminate()
  158. return filename
  159. class HotwordDetector(object):
  160. """
  161. Snowboy decoder to detect whether a keyword specified by `decoder_model`
  162. exists in a microphone input stream.
  163. :param decoder_model: decoder model file path, a string or a list of strings
  164. :param resource: resource file path.
  165. :param sensitivity: decoder sensitivity, a float of a list of floats.
  166. The bigger the value, the more senstive the
  167. decoder. If an empty list is provided, then the
  168. default sensitivity in the model will be used.
  169. :param audio_gain: multiply input volume by this factor.
  170. :param apply_frontend: applies the frontend processing algorithm if True.
  171. """
  172. def __init__(self, decoder_model,
  173. resource=RESOURCE_FILE,
  174. sensitivity=[],
  175. audio_gain=1,
  176. apply_frontend=False,
  177. outname='output',
  178. temp='',
  179. robot=False
  180. ):
  181. self._running = False
  182. self.robot = robot
  183. self.outname = outname
  184. self.temp = temp
  185. tm = type(decoder_model)
  186. ts = type(sensitivity)
  187. if tm is not list:
  188. decoder_model = [decoder_model]
  189. if ts is not list:
  190. sensitivity = [sensitivity]
  191. model_str = ",".join(decoder_model)
  192. self.detector = snowboydetect.SnowboyDetect(
  193. resource_filename=resource.encode(), model_str=model_str.encode())
  194. self.detector.SetAudioGain(audio_gain)
  195. self.detector.ApplyFrontend(apply_frontend)
  196. self.num_hotwords = self.detector.NumHotwords()
  197. if len(decoder_model) > 1 and len(sensitivity) == 1:
  198. sensitivity = sensitivity * self.num_hotwords
  199. if len(sensitivity) != 0:
  200. assert self.num_hotwords == len(sensitivity), \
  201. "number of hotwords in decoder_model (%d) and sensitivity " \
  202. "(%d) does not match" % (self.num_hotwords, len(sensitivity))
  203. sensitivity_str = ",".join([str(t) for t in sensitivity])
  204. if len(sensitivity) != 0:
  205. self.detector.SetSensitivity(sensitivity_str.encode())
  206. self.ring_buffer = RingBuffer(
  207. self.detector.NumChannels() * self.detector.SampleRate() * 5)
  208. def start(self, detected_callback=play_audio_file,
  209. interrupt_check=lambda: False,
  210. sleep_time=0.03,
  211. audio_recorder_callback=None,
  212. silent_count_threshold=15,
  213. recording_timeout=100):
  214. """
  215. Start the voice detector. For every `sleep_time` second it checks the
  216. audio buffer for triggering keywords. If detected, then call
  217. corresponding function in `detected_callback`, which can be a single
  218. function (single model) or a list of callback functions (multiple
  219. models). Every loop it also calls `interrupt_check` -- if it returns
  220. True, then breaks from the loop and return.
  221. :param detected_callback: a function or list of functions. The number of
  222. items must match the number of models in
  223. `decoder_model`.
  224. :param interrupt_check: a function that returns True if the main loop
  225. needs to stop.
  226. :param float sleep_time: how much time in second every loop waits.
  227. :param audio_recorder_callback: if specified, this will be called after
  228. a keyword has been spoken and after the
  229. phrase immediately after the keyword has
  230. been recorded. The function will be
  231. passed the name of the file where the
  232. phrase was recorded.
  233. :param silent_count_threshold: indicates how long silence must be heard
  234. to mark the end of a phrase that is
  235. being recorded.
  236. :param recording_timeout: limits the maximum length of a recording.
  237. :return: None
  238. """
  239. self._running = True
  240. def audio_callback(in_data, frame_count, time_info, status):
  241. self.ring_buffer.extend(in_data)
  242. play_data = chr(0) * len(in_data)
  243. return play_data, pyaudio.paContinue
  244. with no_alsa_error():
  245. self.audio = pyaudio.PyAudio()
  246. self.stream_in = self.audio.open(
  247. input=True, output=False,
  248. format=self.audio.get_format_from_width(
  249. self.detector.BitsPerSample() / 8),
  250. channels=self.detector.NumChannels(),
  251. rate=self.detector.SampleRate(),
  252. frames_per_buffer=2048,
  253. stream_callback=audio_callback)
  254. if interrupt_check():
  255. logger.debug("detect voice return")
  256. return
  257. tc = type(detected_callback)
  258. if tc is not list:
  259. detected_callback = [detected_callback]
  260. if len(detected_callback) == 1 and self.num_hotwords > 1:
  261. detected_callback *= self.num_hotwords
  262. assert self.num_hotwords == len(detected_callback), \
  263. "Error: hotwords in your models (%d) do not match the number of " \
  264. "callbacks (%d)" % (self.num_hotwords, len(detected_callback))
  265. logger.debug("detecting...")
  266. state = "PASSIVE"
  267. while self._running is True:
  268. if interrupt_check():
  269. logger.debug("detect voice break")
  270. break
  271. data = self.ring_buffer.get()
  272. if len(data) == 0:
  273. time.sleep(sleep_time)
  274. continue
  275. status = self.detector.RunDetection(data)
  276. if status == -1:
  277. logger.warning("Error initializing streams or reading audio data")
  278. #small state machine to handle recording of phrase after keyword
  279. if state == "PASSIVE":
  280. if status > 0: #key word found
  281. self.recordedData = []
  282. self.recordedData.append(data)
  283. silentCount = 0
  284. recordingCount = 0
  285. message = "Keyword " + str(status) + " detected at time: "
  286. message += time.strftime("%Y-%m-%d %H:%M:%S",
  287. time.localtime(time.time()))
  288. logger.info(message)
  289. callback = detected_callback[status-1]
  290. if callback is not None:
  291. callback()
  292. if audio_recorder_callback is not None and status == 1 and self.robot.ear.isProperTime():
  293. state = "ACTIVE"
  294. continue
  295. elif state == "ACTIVE":
  296. stopRecording = False
  297. if recordingCount > recording_timeout:
  298. stopRecording = True
  299. elif status == -2: #silence found
  300. if silentCount > silent_count_threshold:
  301. stopRecording = True
  302. else:
  303. silentCount = silentCount + 1
  304. elif status == 0: #voice found
  305. silentCount = 0
  306. if stopRecording == True:
  307. fname = self.saveMessage()
  308. audio_recorder_callback(fname)
  309. state = "PASSIVE"
  310. continue
  311. recordingCount = recordingCount + 1
  312. self.recordedData.append(data)
  313. logger.debug("finished.")
  314. def saveMessage(self):
  315. """
  316. Save the message stored in self.recordedData to a timestamped file.
  317. """
  318. filename = os.path.join(self.temp, self.outname + str(int(time.time())) + '.wav')
  319. data = b''.join(self.recordedData)
  320. #use wave to save data
  321. wf = wave.open(filename, 'wb')
  322. wf.setnchannels(self.detector.NumChannels())
  323. wf.setsampwidth(self.audio.get_sample_size(
  324. self.audio.get_format_from_width(
  325. self.detector.BitsPerSample() / 8)))
  326. wf.setframerate(self.detector.SampleRate())
  327. wf.writeframes(data)
  328. wf.close()
  329. logger.debug("finished saving: " + filename)
  330. return filename
  331. def terminate(self):
  332. """
  333. Terminate audio stream. Users can call start() again to detect.
  334. :return: None
  335. """
  336. if self._running:
  337. self.stream_in.stop_stream()
  338. self.stream_in.close()
  339. self.audio.terminate()
  340. self._running = False