Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
import logging
import warnings
import os
import time
from aws_sdk_bedrock_runtime.client import (
BedrockRuntimeClient,
InvokeModelWithBidirectionalStreamOperationInput,
Expand All @@ -24,15 +23,10 @@
InvokeModelWithBidirectionalStreamInputChunk,
BidirectionalInputPayloadPart,
)
from aws_sdk_bedrock_runtime.config import (
Config,
HTTPAuthSchemeResolver,
SigV4AuthScheme,
)
from smithy_aws_core.credentials_resolvers.environment import (
EnvironmentCredentialsResolver,
)
from smithy_aws_core.credentials_resolvers.container import ContainerCredentialsResolver
from aws_sdk_bedrock_runtime.config import Config
from smithy_aws_core.auth.sigv4 import SigV4AuthScheme
from smithy_aws_core.identity.environment import EnvironmentCredentialsResolver
from smithy_aws_core.identity.container import ContainerCredentialsResolver
from smithy_http.aio.aiohttp import AIOHTTPClient, AIOHTTPClientConfig

# Configure logging
Expand Down Expand Up @@ -100,8 +94,7 @@ def initialize_client(self):
endpoint_uri=f"https://bedrock-runtime.{self.region}.amazonaws.com",
region=self.region,
aws_credentials_identity_resolver=resolver,
http_auth_scheme_resolver=HTTPAuthSchemeResolver(),
http_auth_schemes={"aws.auth#sigv4": SigV4AuthScheme()},
auth_schemes={"aws.auth#sigv4": SigV4AuthScheme(service="bedrock")},
)
self.bedrock_client = BedrockRuntimeClient(config=config)
logger.info(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,40 @@ async def queue_audio(self, prompt_name: str, content_name: str, audio_base64: s
)
return

# Add audio validation to detect silence
try:
import base64
audio_bytes = base64.b64decode(audio_base64)

# Check if audio is silence (all zeros or very low values)
if len(audio_bytes) > 0:
# Convert bytes to 16-bit signed integers for analysis
import struct
num_samples = len(audio_bytes) // 2 # 16-bit samples

if num_samples > 0:
# Unpack first few samples to check for silence
sample_check_count = min(100, num_samples)
samples = struct.unpack(f'{sample_check_count}h', audio_bytes[:sample_check_count * 2])

# Calculate max absolute value
max_val = max(abs(s) for s in samples)

# Threshold for silence detection (16-bit audio range is -32768 to 32767)
# Only warn on truly silent audio (complete zeros)
if max_val == 0:
self.logger.warning(
"Detected completely silent audio chunk (all zeros). "
"Frontend may not be sending real microphone audio."
)
elif max_val < 50:
# Very low audio - only log at debug level
self.logger.debug(f"Very low audio level - max amplitude: {max_val}")
# Normal audio - no logging needed (reduces verbosity)

except Exception as e:
self.logger.error(f"Error validating audio: {e}")

# Use put_nowait() like original code to avoid blocking/timing delays
self.audio_input_queue.put_nowait(
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -123,12 +123,12 @@ async def handle_connection(self, websocket):
# Start audio processor
await self.audio_processor.start(self.bedrock_client, self.stream)

# Start response processing task
self.response_task = asyncio.create_task(self._process_responses(websocket))

# Register message handlers
self._register_message_handlers()

# Start response processing task AFTER setup is complete
self.response_task = asyncio.create_task(self._process_responses(websocket))

# Main message processing loop
async for message in websocket:
await self._handle_message(websocket, message)
Expand Down Expand Up @@ -235,14 +235,42 @@ async def _handle_message(self, websocket, message):
list(data.get("event", {}).keys())[0] if "event" in data else None
)

# Store session information
if event_type == "promptStart":
self.prompt_name = data["event"]["promptStart"]["promptName"]
elif (
event_type == "contentStart"
and data["event"]["contentStart"].get("type") == "AUDIO"
):
self.audio_content_name = data["event"]["contentStart"]["contentName"]
# Event sequence tracking (reduced verbosity)
if event_type:
if event_type == "sessionStart":
self.logger.info("Session started")
elif event_type == "promptStart":
self.prompt_name = data["event"]["promptStart"]["promptName"]
self.logger.debug("Prompt started")
elif event_type == "contentStart":
content_type = data["event"]["contentStart"].get("type")
role = data["event"]["contentStart"].get("role", "UNSPECIFIED")
content_name = data["event"]["contentStart"].get("contentName")

# Only log at debug level for routine content
self.logger.debug(f"Content start: type={content_type}, role={role}")

# Validate first content block has SYSTEM role
if not hasattr(self, '_first_content_received'):
self._first_content_received = True
if role != "SYSTEM":
self.logger.error(f"First content block must have SYSTEM role, received {role}")
await websocket.send(json.dumps({
"type": "error",
"message": f"First content block must have SYSTEM role, received {role}"
}))

# Store audio content name
if content_type == "AUDIO":
self.audio_content_name = content_name
if role != "USER":
self.logger.warning(f"Audio content should have USER role, received {role}")

elif event_type == "sessionEnd":
self.logger.info("Session ended")
# Other events logged at debug level only
elif event_type in ["textInput", "contentEnd", "promptEnd"]:
self.logger.debug(f"Event: {event_type}")

# Send event to Bedrock
await self.bedrock_client.send_event(self.stream, data)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,7 @@ def content_start_audio(
"promptName": prompt_name,
"contentName": content_name,
"type": "AUDIO",
"role": "USER", # Required by Nova Sonic API
"interactive": True,
"audioInputConfiguration": audio_input_config,
}
Expand Down
4 changes: 2 additions & 2 deletions samples/speech-to-speech/backend/python_app/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# Core dependencies
aws_sdk_bedrock_runtime==0.0.2
smithy-aws-core==0.0.3
aws_sdk_bedrock_runtime==0.1.0
smithy-aws-core==0.1.0
python-dotenv>=1.1.0
aiohttp>=3.8.0

Expand Down