Add speechtotext script

2022-10-17 10:36:48 +01:00 · 2022-10-17 10:36:48 +01:00 · 0cb068107b
parent 962a44f6e9
commit 0cb068107b
1 changed files with 74 additions and 0 deletions
--- a/test_microphone.py
+++ b/test_microphone.py
@ -0,0 +1,74 @@
+#!/usr/bin/env python3
+
+import json
+import os
+import sys
+import asyncio
+import websockets
+import logging
+import sounddevice as sd
+import argparse
+
+def int_or_str(text):
+    """Helper function for argument parsing."""
+    try:
+        return int(text)
+    except ValueError:
+        return text
+
+def callback(indata, frames, time, status):
+    """This is called (from a separate thread) for each audio block."""
+    loop.call_soon_threadsafe(audio_queue.put_nowait, bytes(indata))
+
+async def run_test():
+
+    with sd.RawInputStream(samplerate=args.samplerate, blocksize = 4000, device=args.device, dtype='int16',
+                           channels=1, callback=callback) as device:
+
+        async with websockets.connect(args.uri) as websocket:
+            await websocket.send('{ "config" : { "sample_rate" : %d } }' % (device.samplerate))
+
+            while True:
+                data = await audio_queue.get()
+                await websocket.send(data)
+                result = await websocket.recv()
+                j = json.loads(result)
+                if "text" in j and "result" in j:
+                    print(j["text"])
+                    n = 0
+                    for word in j["result"]: 
+                        n += int(word["conf"])
+                    print(n/len(j["result"])) 
+            await websocket.send('{"eof" : 1}')
+            print (await websocket.recv())
+
+async def main():
+
+    global args
+    global loop
+    global audio_queue
+
+    parser = argparse.ArgumentParser(add_help=False)
+    parser.add_argument('-l', '--list-devices', action='store_true',
+                        help='show list of audio devices and exit')
+    args, remaining = parser.parse_known_args()
+    if args.list_devices:
+        print(sd.query_devices())
+        parser.exit(0)
+    parser = argparse.ArgumentParser(description="ASR Server",
+                                     formatter_class=argparse.RawDescriptionHelpFormatter,
+                                     parents=[parser])
+    parser.add_argument('-u', '--uri', type=str, metavar='URL',
+                        help='Server URL', default='ws://localhost:2700')
+    parser.add_argument('-d', '--device', type=int_or_str,
+                        help='input device (numeric ID or substring)')
+    parser.add_argument('-r', '--samplerate', type=int, help='sampling rate', default=16000)
+    args = parser.parse_args(remaining)
+    loop = asyncio.get_running_loop()
+    audio_queue = asyncio.Queue()
+
+    logging.basicConfig(level=logging.INFO)
+    await run_test()
+
+if __name__ == '__main__':
+    asyncio.run(main())