speechtoimage/pi/test_microphone.py

#!/usr/bin/env python3
import json
import asyncio
import websockets
import logging
import sounddevice as sd
import argparse
from termcolor import colored
import aiohttp
import aiofiles
import vlc
from time import sleep

def int_or_str(text):
    """Helper function for argument parsing."""
    try:
        return int(text)
    except ValueError:
        return text

def callback(indata, frames, time, status):
    """This is called (from a separate thread) for each audio block."""
    loop.call_soon_threadsafe(audio_queue.put_nowait, bytes(indata))

async def run_test():

    vlc_instance = vlc.Instance()
    player = vlc_instance.media_player_new()
    media = vlc_instance.media_new("image.png")
    player.set_media(media)    

    with sd.RawInputStream(samplerate=args.samplerate, blocksize = 4000, device=args.device, dtype='int16',
                           channels=1, callback=callback) as device:

        async with websockets.connect(args.uri) as websocket:
            await websocket.send('{ "config" : { "sample_rate" : %d } }' % (device.samplerate))

            while True:
                data = await audio_queue.get()
                await websocket.send(data)
                result = await websocket.recv()
                j = json.loads(result)
                if "text" in j and "result" in j:
                    n = 0
                    for word in j["result"]: 
                        n += float(word["conf"])
                        if float(word["conf"]) > 0.7:
                            print(colored(word["word"], "green"), end=" ")
                        elif float(word["conf"]) > 0.5:
                            print(colored(word["word"], "yellow"), end=" ")
                        else:
                            print(colored(word["word"], "red"), end=" ")
                    print(n/len(j["result"])) 
                    print("Generating Image")
                    if len(j["result"]) > 2: 
                        async with aiohttp.ClientSession() as session:
                            url = f'http://192.168.1.95:8000?text={j["text"].replace(" ", "+")}'
                            async with session.get(url) as resp:
                                print(resp.status)
                                if resp.status == 200:
                                    f = await aiofiles.open('image.png', mode='wb')
                                    await f.write(await resp.read())
                                    await f.close()
                        print("Image generated")
                        player.stop()
                        player.play()
                        sleep(1)
                        player.pause()
            

            await websocket.send('{"eof" : 1}')
            print (await websocket.recv())

async def main(): 

    global args
    global loop
    global audio_queue

    parser = argparse.ArgumentParser(add_help=False)
    parser.add_argument('-l', '--list-devices', action='store_true',
                        help='show list of audio devices and exit')
    args, remaining = parser.parse_known_args()
    if args.list_devices:
        print(sd.query_devices())
        parser.exit(0)
    parser = argparse.ArgumentParser(description="ASR Server",
                                     formatter_class=argparse.RawDescriptionHelpFormatter,
                                     parents=[parser])
    parser.add_argument('-u', '--uri', type=str, metavar='URL',
                        help='Server URL', default='ws://192.168.1.95:2700')
    parser.add_argument('-d', '--device', type=int_or_str,
                        help='input device (numeric ID or substring)')
    parser.add_argument('-r', '--samplerate', type=int, help='sampling rate', default=16000)
    args = parser.parse_args(remaining)
    loop = asyncio.get_running_loop()
    audio_queue = asyncio.Queue()

    logging.basicConfig(level=logging.INFO)
    await run_test()

if __name__ == '__main__':
    asyncio.run(main())
Add speechtotext script 2022-10-17 09:36:48 +00:00			`#!/usr/bin/env python3`
			`import json`
			`import asyncio`
			`import websockets`
			`import logging`
			`import sounddevice as sd`
			`import argparse`
Request image and display 2022-11-01 00:22:04 +00:00			`from termcolor import colored`
			`import aiohttp`
			`import aiofiles`
			`import vlc`
			`from time import sleep`
Add speechtotext script 2022-10-17 09:36:48 +00:00
			`def int_or_str(text):`
			`"""Helper function for argument parsing."""`
			`try:`
			`return int(text)`
			`except ValueError:`
			`return text`

			`def callback(indata, frames, time, status):`
			`"""This is called (from a separate thread) for each audio block."""`
			`loop.call_soon_threadsafe(audio_queue.put_nowait, bytes(indata))`

			`async def run_test():`

Request image and display 2022-11-01 00:22:04 +00:00			`vlc_instance = vlc.Instance()`
			`player = vlc_instance.media_player_new()`
			`media = vlc_instance.media_new("image.png")`
			`player.set_media(media)`

Add speechtotext script 2022-10-17 09:36:48 +00:00			`with sd.RawInputStream(samplerate=args.samplerate, blocksize = 4000, device=args.device, dtype='int16',`
			`channels=1, callback=callback) as device:`

			`async with websockets.connect(args.uri) as websocket:`
			`await websocket.send('{ "config" : { "sample_rate" : %d } }' % (device.samplerate))`

			`while True:`
			`data = await audio_queue.get()`
			`await websocket.send(data)`
			`result = await websocket.recv()`
			`j = json.loads(result)`
			`if "text" in j and "result" in j:`
			`n = 0`
			`for word in j["result"]:`
Cast to float 2022-11-17 06:37:16 +00:00			`n += float(word["conf"])`
			`if float(word["conf"]) > 0.7:`
Request image and display 2022-11-01 00:22:04 +00:00			`print(colored(word["word"], "green"), end=" ")`
Cast to float 2022-11-17 06:37:16 +00:00			`elif float(word["conf"]) > 0.5:`
Request image and display 2022-11-01 00:22:04 +00:00			`print(colored(word["word"], "yellow"), end=" ")`
			`else:`
			`print(colored(word["word"], "red"), end=" ")`
Add speechtotext script 2022-10-17 09:36:48 +00:00			`print(n/len(j["result"]))`
Request image and display 2022-11-01 00:22:04 +00:00			`print("Generating Image")`
			`if len(j["result"]) > 2:`
			`async with aiohttp.ClientSession() as session:`
			`url = f'http://192.168.1.95:8000?text={j["text"].replace(" ", "+")}'`
			`async with session.get(url) as resp:`
			`print(resp.status)`
			`if resp.status == 200:`
			`f = await aiofiles.open('image.png', mode='wb')`
			`await f.write(await resp.read())`
			`await f.close()`
			`print("Image generated")`
			`player.stop()`
			`player.play()`
			`sleep(1)`
			`player.pause()`


Add speechtotext script 2022-10-17 09:36:48 +00:00			`await websocket.send('{"eof" : 1}')`
			`print (await websocket.recv())`

Request image and display 2022-11-01 00:22:04 +00:00			`async def main():`
Add speechtotext script 2022-10-17 09:36:48 +00:00
			`global args`
			`global loop`
			`global audio_queue`

			`parser = argparse.ArgumentParser(add_help=False)`
			`parser.add_argument('-l', '--list-devices', action='store_true',`
			`help='show list of audio devices and exit')`
			`args, remaining = parser.parse_known_args()`
			`if args.list_devices:`
			`print(sd.query_devices())`
			`parser.exit(0)`
			`parser = argparse.ArgumentParser(description="ASR Server",`
			`formatter_class=argparse.RawDescriptionHelpFormatter,`
			`parents=[parser])`
			`parser.add_argument('-u', '--uri', type=str, metavar='URL',`
Request image and display 2022-11-01 00:22:04 +00:00			`help='Server URL', default='ws://192.168.1.95:2700')`
Add speechtotext script 2022-10-17 09:36:48 +00:00			`parser.add_argument('-d', '--device', type=int_or_str,`
			`help='input device (numeric ID or substring)')`
			`parser.add_argument('-r', '--samplerate', type=int, help='sampling rate', default=16000)`
			`args = parser.parse_args(remaining)`
			`loop = asyncio.get_running_loop()`
			`audio_queue = asyncio.Queue()`

			`logging.basicConfig(level=logging.INFO)`
			`await run_test()`

			`if __name__ == '__main__':`
			`asyncio.run(main())`