[Esperienza seriale di prova della scheda di sviluppo Jiandian Atom i.MX93] Integrazione della registrazione e del riconoscimento delle parole chiave

2024-07-12

Questo articolo è stato pubblicato per la prima volta sull'Electronic Enthusiast Forum:[Nuovo promemoria] [Esperienza seriale di prova della scheda di sviluppo Jiandian Atomic i.MX93] Controllo locale vocale basato sull'apprendimento profondo - Gruppo di apprendimento atomico puntuale - Forum sulla tecnologia elettronica - Forum di elettronica professionale popolare (elecfans.com)

Successivamente, voglio integrare la registrazione e il riconoscimento delle parole chiave in un unico programma.

La prima cosa che viene in mente quando si eseguono operazioni vocali in Python è PyAudio. Tuttavia, ho riscontrato qualche problema nell'installazione di PyAudio sulla scheda. Non esiste un pacchetto software già pronto corrispondente a questa scheda nel magazzino Python. Il pacchetto software deve essere compilato e generato sulla scheda PyAudio si basa su PortAudio e PortAudio non è stato portato sulla scheda, quindi PyAudio non può essere utilizzato. per il momento troveremo un modo per risolvere questo problema.

Il metodo temporaneo che ho adottato è modificare lo script della shell per testare l'audio menzionato in precedenza e lasciare che registri 1 secondo di discorso, quindi chiamare il programma Python per il riconoscimento delle parole chiave. Se è SÌ, accendere la luce LED sullo sviluppo scheda, se è NO, spegnere la luce LED sulla scheda di sviluppo. Dopo aver acceso o spento la luce, verrà riprodotto un messaggio corrispondente. Per facilitare il debug, i risultati della registrazione verranno riprodotti automaticamente dopo la registrazione per determinare se la registrazione è corretta.


#!/bin/bash
 
INIT_FLAG="/home/root/shell/audio/.initialized_audio_device"
RECORD_FILE="test.wav"
PLAY_FILE="/home/root/shell/audio/short.mp3"
RC_LOCAL_FILE="/etc/rc.local"
DELETE_COMMAND="rm -f $INIT_FLAG"
 
# 将开机自动删除音频初始化标志文件命令加入到开机自启中
add_command() {
    if ! grep -qFx "$DELETE_COMMAND" "$RC_LOCAL_FILE"; then
        echo "$DELETE_COMMAND" >> "$RC_LOCAL_FILE"
        sync
    fi
}
   
# 检查命令是否存在
check_command() {
    command -v "$1" > /dev/null 2>&1
}
 
# 初始化音频设备
init_audio_device() {
    amixer cset name='PCM Volume' 192
    amixer cset name='Mono Mux' 'Stereo'
    amixer cset name='Playback De-emphasis' 2
    amixer cset name='Capture Digital Volume' 192
    amixer cset name='Capture Mute' 'on'
    amixer cset name='Capture Polarity' 'Normal'
    amixer cset name='3D Mode' 'No 3D'
    amixer cset name='ALC Capture Attack Time' 5
    amixer cset name='ALC Capture Decay Time' 2
    amixer cset name='ALC Capture Function' 'Stereo'
    amixer cset name='ALC Capture Hold Time' 2
    amixer cset name='ALC Capture Max PGA' 3
    amixer cset name='ALC Capture Min PGA' 6
    amixer cset name='ALC Capture NG Switch' 'on'
    amixer cset name='ALC Capture NG Threshold' 9
    amixer cset name='ALC Capture NG Type' 'Mute ADC Output'
    amixer cset name='ALC Capture Target Volume' 15
    amixer cset name='ALC Capture ZC Switch' 'on'
    amixer cset name='Left Channel Capture Volume' 100%
    amixer cset name='Right Channel Capture Volume' 100%
    amixer cset name='Left Mixer Left Bypass Volume' 100%
    amixer cset name='Right Mixer Right Bypass Volume' 100%
    amixer cset name='Output 1 Playback Volume' 100%
    amixer cset name='Output 2 Playback Volume' 100%
    amixer cset name='ZC Timeout Switch' 'on'
    amixer cset name='Left PGA Mux' 'DifferentialL'
    amixer cset name='Right PGA Mux' 'DifferentialR'
    # touch "$INIT_FLAG"
}
 
# 检查是否已初始化
check_initialized() {
    [ -e "$INIT_FLAG" ]
}
 
function init_board_mic() {
    # 初始化板载麦克风
    check_command amixer && {
        amixer -q cset name='Differential Mux' 'Line 2'
        amixer -q cset name='Left Line Mux' 'Line 2L'
        amixer -q cset name='Right Line Mux' 'Line 2R'
    }
}
 
function init_headphone_mic() {
    # 初始化耳机麦克风
    check_command amixer && {
        amixer cset name='Differential Mux' 'Line 1'
        amixer cset name='Left Line Mux' 'Line 1L'
        amixer cset name='Right Line Mux' 'NC'
    }
}
 
function cleanup() {
    # 清理并退出
    printf "n清理并退出...n"
    stty sane  # 还原终端状态
    exit 0
}
 
function switch_mode() {
    # 录音/播音模式切换
    case $1 in
        1)
            # 进入录音模式
            check_command amixer && {
                amixer -q cset name='Left Mixer Left Bypass Switch' 'on'
                amixer -q cset name='Right Mixer Right Bypass Switch' 'on'
                amixer -q cset name='Left Mixer Left Playback Switch' 'off'
                amixer -q cset name='Right Mixer Right Playback Switch' 'off'
            }
            ;;
        2)
            # 进入播音模式
            check_command amixer && {
                amixer -q cset name='Left Mixer Left Bypass Switch' 'off'
                amixer -q cset name='Right Mixer Right Bypass Switch' 'off'
                amixer -q cset name='Left Mixer Left Playback Switch' 'on'
                amixer -q cset name='Right Mixer Right Playback Switch' 'on'
            }
            ;;
	3)
            # 关闭录音和播音模式
            check_command amixer && {
                amixer -q cset name='Left Mixer Left Bypass Switch' 'off'
                amixer -q cset name='Right Mixer Right Bypass Switch' 'off'
                amixer -q cset name='Left Mixer Left Playback Switch' 'off'
                amixer -q cset name='Right Mixer Right Playback Switch' 'off'
		amixer -q cset name='Left Line Mux' 'NC'
        	amixer -q cset name='Right Line Mux' 'NC'
	    }
            ;;	
    esac
}
 
function apply_config() {
    # printf "n可选麦克风测试项目:n"
    # printf "1. 耳机麦克风n"
    # printf "2. 板载麦克风n"
 
    # while true; do
    #     read -r -p "请输入您的选择: " choice
 
    #     if [[ "$choice" == "1" || "$choice" == "2" ]]; then
    #         break
    #     else
    #         printf "无效输入。请输入1或2。n"
    #     fi
    # done
    choice=2
    printf "n应用麦克风配置项 %sn" "$choice"
    case $choice in
        1)
	    switch_mode 1
            init_headphone_mic
            ;;
        2)
	    switch_mode 1
            init_board_mic
            ;;
        *)
            printf "无效选项n"
            ;;
    esac
}
 
# 捕获Ctrl+C信号，并调用cleanup函数
trap cleanup SIGINT
 
# 检查是否已初始化，如果没有，则进行初始化
# if ! check_initialized; then
#     printf "第一次运行，执行音频设备初始化...n"
    init_audio_device
#     add_command
# fi
 
while true; do
    while true; do
        command=1
        case $command in
            1)
                apply_config
                printf "n开始录音...n"
                #sleep 1
		check_command arecord && arecord -f cd -d 1 -r 16000 "$RECORD_FILE"
                switch_mode 2
                printf "n播放录音...n"
                check_command aplay && aplay "$RECORD_FILE"
		        switch_mode 3
                # 调用Python程序并捕获其输出  
                output=$(python3 simple_audio.py --input=test.wav)  
                echo "$output"
 
                # 检查输出是否包含">>> YES"  
                if echo "$output" | grep -q ">>> YES"; then  
                    echo "Python程序输出YES，执行相应代码..."  
			echo 1 > /sys/class/leds/sys-led/brightness
			echo heartbeat > /sys/class/leds/sys-led/trigger
                    # 在这里添加当输出为YES时需要执行的代码  
		    switch_mode 2
                    #gst-play-1.0 haodeyiweinindakai.mp3
			aplay haodeyiweinindakai.wav
		    switch_mode 3
                    
                elif echo "$output" | grep -q ">>> NO"; then  
                    echo "Python程序输出NO，执行其他代码..."  
			echo none > /sys/class/leds/sys-led/trigger 
			echo 0 > /sys/class/leds/sys-led/brightness
                    # 在这里添加当输出为NO时需要执行的代码  
		    switch_mode 2
                    #gst-play-1.0 haodeyiweininguanbi.mp3      
			aplay haodeyiweininguanbi.wav
		    switch_mode 3
                else  
                    echo "Python程序输出未知结果，或者没有输出结果。"  
                    # 可以选择添加处理未知输出的代码  
                fi
                #break
                ;;
            2)
                switch_mode 2
                printf "n开始播音，按 Ctrl+C 可退出播音n"
                gst-play-1.0 --audiosink="alsasink" "$PLAY_FILE"
		        switch_mode 3
                #break
                ;;
            *)
                cleanup
                ;;
        esac
    done
 
    break
done

Per il programma completo, vedere il pacchetto compresso*Allegato: sì-no-test.zip . Il codice dello script principale è il seguente:

A giudicare dal video qui sotto, l'effetto richiesto è sostanzialmente raggiunto. Inizialmente temevo che l'effetto di registrazione del microfono della scheda potesse influenzare il riconoscimento, ma al momento non sembra essere un grosso problema. Poiché il metodo consiste nel registrare prima in un file e il tempo è di solo 1 secondo, è piuttosto problematico da utilizzare. A volte la registrazione non viene completata se si parla un po' lentamente. Ciò richiede la successiva ottimizzazione dell'elaborazione vocale di Python.

[Prova della scheda di sviluppo Jiandian Atomic i.MX93] Usa la voce per dire SÌ o NO per controllare le luci a LED

Inoltre, il modello attualmente utilizzato è pre-addestrato e le parole rapide cinesi verranno addestrate in seguito per facilità d'uso.

Condivisione della tecnologia

[Esperienza seriale di prova della scheda di sviluppo Jiandian Atom i.MX93] Integrazione della registrazione e del riconoscimento delle parole chiave

Profilo personale

le mie informazioni di contatto