Skip to main content
deleted 25 characters in body
Source Link
Robert
  • 9.2k
  • 62
  • 207
  • 221
  • mono

    mono
  • 16-bit PCM

    16-bit PCM
  • 16 kHz

    16 kHz
  • about 7–8 seconds long

    about 7–8 seconds long
  • it becomes only about 2–4 seconds long

    it becomes only about 2–4 seconds long
  • the output audio contains mostly noise/glitches instead of speech

    the output audio contains mostly noise/glitches instead of speech
Total samples in file: 130672 Filtered samples received: 72021 
Total samples in file: 130672 Filtered samples received: 72021 

 
uint8_t rx_char; uint8_t buffer[50]; uint8_t index = 0; int16_t x0 = 0; int16_t x1 = 0; int16_t x2 = 0; int16_t x3 = 0; HAL_UART_Receive(&huart2, &rx_char, 1, HAL_MAX_DELAY); if(rx_char == '\r' || rx_char == '\n') { buffer[index] = '\0'; int16_t value = (int16_t)atoi((char*)buffer); x3 = x2; x2 = x1; x1 = x0; x0 = value; int32_t sum = (2*(int32_t)x0 + (int32_t)x1 + (int32_t)x2); int16_t result = sum / 4; char msg[50]; sprintf(msg,"\r\nResult: %d\r\n\r\n",result); HAL_UART_Transmit(&huart2,(uint8_t*)msg,strlen(msg),HAL_MAX_DELAY); memset(buffer,0,sizeof(buffer)); index = 0; } else { if(index < 49) buffer[index++] = rx_char; } 

uint8_t rx_char; uint8_t buffer[50]; uint8_t index = 0; int16_t x0 = 0; int16_t x1 = 0; int16_t x2 = 0; int16_t x3 = 0; HAL_UART_Receive(&huart2, &rx_char, 1, HAL_MAX_DELAY); if(rx_char == '\r' || rx_char == '\n') { buffer[index] = '\0'; int16_t value = (int16_t)atoi((char*)buffer); x3 = x2; x2 = x1; x1 = x0; x0 = value; int32_t sum = (2*(int32_t)x0 + (int32_t)x1 + (int32_t)x2); int16_t result = sum / 4; char msg[50]; sprintf(msg,"\r\nResult: %d\r\n\r\n",result); HAL_UART_Transmit(&huart2,(uint8_t*)msg,strlen(msg),HAL_MAX_DELAY); memset(buffer,0,sizeof(buffer)); index = 0; } else { if(index < 49) buffer[index++] = rx_char; } 
import wave import numpy as np import serial import time ser = serial.Serial('COM9',115200,timeout=1) filtered_samples = [] file_path = r"recordings/raw_enrollment_16k/user_1/sample_1.wav" with wave.open(file_path,'rb') as w: frames = w.readframes(w.getnframes()) samples = np.frombuffer(frames,dtype=np.int16) print("Total samples in file:", len(samples)) for s in samples: ser.write((str(int(s)) + "\n").encode()) time.sleep(0.0005) while ser.in_waiting: response = ser.readline().decode(errors='ignore').strip() if response: try: value = int(response.split()[-1]) filtered_samples.append(value) except: pass print("Filtered samples received:", len(filtered_samples)) 
import wave import numpy as np import serial import time ser = serial.Serial('COM9',115200,timeout=1) filtered_samples = [] file_path = r"recordings/raw_enrollment_16k/user_1/sample_1.wav" with wave.open(file_path,'rb') as w: frames = w.readframes(w.getnframes()) samples = np.frombuffer(frames,dtype=np.int16) print("Total samples in file:", len(samples)) for s in samples: ser.write((str(int(s)) + "\n").encode()) time.sleep(0.0005) while ser.in_waiting: response = ser.readline().decode(errors='ignore').strip() if response: try: value = int(response.split()[-1]) filtered_samples.append(value) except: pass print("Filtered samples received:", len(filtered_samples)) 
  • mono

  • 16-bit PCM

  • 16 kHz

  • about 7–8 seconds long

  • it becomes only about 2–4 seconds long

  • the output audio contains mostly noise/glitches instead of speech

Total samples in file: 130672 Filtered samples received: 72021 

 
uint8_t rx_char; uint8_t buffer[50]; uint8_t index = 0; int16_t x0 = 0; int16_t x1 = 0; int16_t x2 = 0; int16_t x3 = 0; HAL_UART_Receive(&huart2, &rx_char, 1, HAL_MAX_DELAY); if(rx_char == '\r' || rx_char == '\n') { buffer[index] = '\0'; int16_t value = (int16_t)atoi((char*)buffer); x3 = x2; x2 = x1; x1 = x0; x0 = value; int32_t sum = (2*(int32_t)x0 + (int32_t)x1 + (int32_t)x2); int16_t result = sum / 4; char msg[50]; sprintf(msg,"\r\nResult: %d\r\n\r\n",result); HAL_UART_Transmit(&huart2,(uint8_t*)msg,strlen(msg),HAL_MAX_DELAY); memset(buffer,0,sizeof(buffer)); index = 0; } else { if(index < 49) buffer[index++] = rx_char; } 

import wave import numpy as np import serial import time ser = serial.Serial('COM9',115200,timeout=1) filtered_samples = [] file_path = r"recordings/raw_enrollment_16k/user_1/sample_1.wav" with wave.open(file_path,'rb') as w: frames = w.readframes(w.getnframes()) samples = np.frombuffer(frames,dtype=np.int16) print("Total samples in file:", len(samples)) for s in samples: ser.write((str(int(s)) + "\n").encode()) time.sleep(0.0005) while ser.in_waiting: response = ser.readline().decode(errors='ignore').strip() if response: try: value = int(response.split()[-1]) filtered_samples.append(value) except: pass print("Filtered samples received:", len(filtered_samples)) 
  • mono
  • 16-bit PCM
  • 16 kHz
  • about 7–8 seconds long
  • it becomes only about 2–4 seconds long
  • the output audio contains mostly noise/glitches instead of speech
Total samples in file: 130672 Filtered samples received: 72021 
uint8_t rx_char; uint8_t buffer[50]; uint8_t index = 0; int16_t x0 = 0; int16_t x1 = 0; int16_t x2 = 0; int16_t x3 = 0; HAL_UART_Receive(&huart2, &rx_char, 1, HAL_MAX_DELAY); if(rx_char == '\r' || rx_char == '\n') { buffer[index] = '\0'; int16_t value = (int16_t)atoi((char*)buffer); x3 = x2; x2 = x1; x1 = x0; x0 = value; int32_t sum = (2*(int32_t)x0 + (int32_t)x1 + (int32_t)x2); int16_t result = sum / 4; char msg[50]; sprintf(msg,"\r\nResult: %d\r\n\r\n",result); HAL_UART_Transmit(&huart2,(uint8_t*)msg,strlen(msg),HAL_MAX_DELAY); memset(buffer,0,sizeof(buffer)); index = 0; } else { if(index < 49) buffer[index++] = rx_char; } 
import wave import numpy as np import serial import time ser = serial.Serial('COM9',115200,timeout=1) filtered_samples = [] file_path = r"recordings/raw_enrollment_16k/user_1/sample_1.wav" with wave.open(file_path,'rb') as w: frames = w.readframes(w.getnframes()) samples = np.frombuffer(frames,dtype=np.int16) print("Total samples in file:", len(samples)) for s in samples: ser.write((str(int(s)) + "\n").encode()) time.sleep(0.0005) while ser.in_waiting: response = ser.readline().decode(errors='ignore').strip() if response: try: value = int(response.split()[-1]) filtered_samples.append(value) except: pass print("Filtered samples received:", len(filtered_samples)) 
Source Link

STM32F401RE UART audio processing losing samples when sending WAV data from Python

I am trying to perform simple audio denoising using an STM32F401RE Nucleo board.

I have .wav audio files that are:

  • mono

  • 16-bit PCM

  • 16 kHz

  • about 7–8 seconds long

Python reads the audio samples and sends them one by one over UART to the STM32.
The STM32 applies a simple moving-average filter and sends the processed samples back to Python.
Python then reconstructs the filtered samples into a new .wav file.

However the reconstructed audio file is incorrect:

  • it becomes only about 2–4 seconds long

  • the output audio contains mostly noise/glitches instead of speech

Example output from Python:

Total samples in file: 130672 Filtered samples received: 72021 

So it seems that many samples are lost during UART transmission or processing.

I want to understand how to correctly stream audio samples over UART to STM32 without losing data.


STM32 Code (filter + UART processing)

uint8_t rx_char; uint8_t buffer[50]; uint8_t index = 0; int16_t x0 = 0; int16_t x1 = 0; int16_t x2 = 0; int16_t x3 = 0; HAL_UART_Receive(&huart2, &rx_char, 1, HAL_MAX_DELAY); if(rx_char == '\r' || rx_char == '\n') { buffer[index] = '\0'; int16_t value = (int16_t)atoi((char*)buffer); x3 = x2; x2 = x1; x1 = x0; x0 = value; int32_t sum = (2*(int32_t)x0 + (int32_t)x1 + (int32_t)x2); int16_t result = sum / 4; char msg[50]; sprintf(msg,"\r\nResult: %d\r\n\r\n",result); HAL_UART_Transmit(&huart2,(uint8_t*)msg,strlen(msg),HAL_MAX_DELAY); memset(buffer,0,sizeof(buffer)); index = 0; } else { if(index < 49) buffer[index++] = rx_char; } 

Python Code

import wave import numpy as np import serial import time ser = serial.Serial('COM9',115200,timeout=1) filtered_samples = [] file_path = r"recordings/raw_enrollment_16k/user_1/sample_1.wav" with wave.open(file_path,'rb') as w: frames = w.readframes(w.getnframes()) samples = np.frombuffer(frames,dtype=np.int16) print("Total samples in file:", len(samples)) for s in samples: ser.write((str(int(s)) + "\n").encode()) time.sleep(0.0005) while ser.in_waiting: response = ser.readline().decode(errors='ignore').strip() if response: try: value = int(response.split()[-1]) filtered_samples.append(value) except: pass print("Filtered samples received:", len(filtered_samples)) 
created from staging ground