From 1664fbc2b1ac08cb3da7e90415ab0bac6edb4837 Mon Sep 17 00:00:00 2001 From: Jiaqi Liu Date: Thu, 5 Mar 2026 17:06:29 -0800 Subject: [PATCH 1/6] Update streaming_transcription.py to allow longer audio input per stream --- dialogflow/streaming_transcription.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/dialogflow/streaming_transcription.py b/dialogflow/streaming_transcription.py index 6395a30b3a8..7391524b6f4 100644 --- a/dialogflow/streaming_transcription.py +++ b/dialogflow/streaming_transcription.py @@ -1,4 +1,4 @@ -# Copyright 2023 Google LLC +# Copyright 2026 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -34,7 +34,7 @@ import re import sys -from google.api_core.exceptions import DeadlineExceeded +from google.api_core.exceptions import DeadlineExceeded, OutOfRange import pyaudio @@ -51,6 +51,7 @@ CHUNK_SIZE = int(SAMPLE_RATE / 10) # 100ms RESTART_TIMEOUT = 160 # seconds MAX_LOOKBACK = 3 # seconds +HALF_CLOSE_DURATION_MS = 90 * 1000 # milliseconds YELLOW = "\033[0;33m" @@ -214,7 +215,8 @@ def main(): ) transcript = response.recognition_result.transcript # Half-close the stream with gRPC (in Python just stop yielding requests) - stream.is_final = True + if stream.is_final_offset > HALF_CLOSE_DURATION_MS: + stream.is_final = True # Exit recognition if any of the transcribed phrase could be # one of our keywords. if re.search(r"\b(exit|quit)\b", transcript, re.I): @@ -223,6 +225,8 @@ def main(): terminate = True stream.closed = True break + except OutOfRange: + print("Maximum audio duration exceeded in the stream, restarting.") except DeadlineExceeded: print("Deadline Exceeded, restarting.") From d7b66fe03c45672fca421299f97faa13c27b45d7 Mon Sep 17 00:00:00 2001 From: Jiaqi Liu Date: Thu, 5 Mar 2026 17:37:17 -0800 Subject: [PATCH 2/6] Configure output_multiple_utterances=true for long audio streams --- dialogflow/participant_management.py | 5 ++++- dialogflow/requirements.txt | 2 +- dialogflow/streaming_transcription.py | 1 + 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/dialogflow/participant_management.py b/dialogflow/participant_management.py index e2f9a486c1a..7a21bd2a8d2 100644 --- a/dialogflow/participant_management.py +++ b/dialogflow/participant_management.py @@ -196,6 +196,7 @@ def analyze_content_audio_stream( timeout: int, language_code: str, single_utterance=False, + output_multiple_utterances=True, ): import google.auth from google.cloud import dialogflow_v2beta1 as dialogflow @@ -231,7 +232,9 @@ def gen_requests(participant_name, audio_config, stream): """Generates requests for streaming.""" audio_generator = stream.generator() yield dialogflow.types.participant.StreamingAnalyzeContentRequest( - participant=participant_name, audio_config=audio_config + participant=participant_name, + audio_config=audio_config, + output_multiple_utterances=output_multiple_utterances ) for content in audio_generator: yield dialogflow.types.participant.StreamingAnalyzeContentRequest( diff --git a/dialogflow/requirements.txt b/dialogflow/requirements.txt index 4c7d355eb45..ed176a19af0 100644 --- a/dialogflow/requirements.txt +++ b/dialogflow/requirements.txt @@ -1,4 +1,4 @@ -google-cloud-dialogflow==2.36.0 +google-cloud-dialogflow==2.46.0 Flask==3.0.3 pyaudio==0.2.14 termcolor==3.0.0 diff --git a/dialogflow/streaming_transcription.py b/dialogflow/streaming_transcription.py index 7391524b6f4..03629bf6097 100644 --- a/dialogflow/streaming_transcription.py +++ b/dialogflow/streaming_transcription.py @@ -199,6 +199,7 @@ def main(): timeout=RESTART_TIMEOUT, language_code="en-US", single_utterance=False, + output_multiple_utterances=True, ) # Now, print the final transcription responses to user. From 80f3fb2b1c8ed5326d3bac1d625765ef23ecca4a Mon Sep 17 00:00:00 2001 From: Jiaqi Liu Date: Thu, 5 Mar 2026 17:44:05 -0800 Subject: [PATCH 3/6] Update comments per gemini suggestion. --- dialogflow/streaming_transcription.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dialogflow/streaming_transcription.py b/dialogflow/streaming_transcription.py index 03629bf6097..bd433ec9f79 100644 --- a/dialogflow/streaming_transcription.py +++ b/dialogflow/streaming_transcription.py @@ -215,7 +215,8 @@ def main(): offset.seconds * 1000 + offset.microseconds / 1000 ) transcript = response.recognition_result.transcript - # Half-close the stream with gRPC (in Python just stop yielding requests) + # Half-close upon final results for better streaming experiences + # (in Python just stop yielding requests) if stream.is_final_offset > HALF_CLOSE_DURATION_MS: stream.is_final = True # Exit recognition if any of the transcribed phrase could be From 69c0e548da9a371a3480c2694097ee62e962efe5 Mon Sep 17 00:00:00 2001 From: Jiaqi Liu Date: Thu, 5 Mar 2026 17:48:06 -0800 Subject: [PATCH 4/6] Update lint. --- dialogflow/streaming_transcription.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dialogflow/streaming_transcription.py b/dialogflow/streaming_transcription.py index bd433ec9f79..e06102feaf5 100644 --- a/dialogflow/streaming_transcription.py +++ b/dialogflow/streaming_transcription.py @@ -51,7 +51,7 @@ CHUNK_SIZE = int(SAMPLE_RATE / 10) # 100ms RESTART_TIMEOUT = 160 # seconds MAX_LOOKBACK = 3 # seconds -HALF_CLOSE_DURATION_MS = 90 * 1000 # milliseconds +HALF_CLOSE_DURATION_MS = 90 * 1000 # milliseconds YELLOW = "\033[0;33m" From de891c8539953249b13ab8237357b06f06babc28 Mon Sep 17 00:00:00 2001 From: Jiaqi Liu Date: Thu, 5 Mar 2026 18:21:34 -0800 Subject: [PATCH 5/6] Disable output_multiple_utterances by default. --- dialogflow/participant_management.py | 2 +- dialogflow/streaming_transcription.py | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/dialogflow/participant_management.py b/dialogflow/participant_management.py index 7a21bd2a8d2..d0bfa9decf3 100644 --- a/dialogflow/participant_management.py +++ b/dialogflow/participant_management.py @@ -196,7 +196,7 @@ def analyze_content_audio_stream( timeout: int, language_code: str, single_utterance=False, - output_multiple_utterances=True, + output_multiple_utterances=False, ): import google.auth from google.cloud import dialogflow_v2beta1 as dialogflow diff --git a/dialogflow/streaming_transcription.py b/dialogflow/streaming_transcription.py index e06102feaf5..014e4ee0729 100644 --- a/dialogflow/streaming_transcription.py +++ b/dialogflow/streaming_transcription.py @@ -199,7 +199,9 @@ def main(): timeout=RESTART_TIMEOUT, language_code="en-US", single_utterance=False, - output_multiple_utterances=True, + # Uncomment to process multiple utterances detected in the audio stream + # individually instead of stitching together to form a single utterance. + # output_multiple_utterances=True, ) # Now, print the final transcription responses to user. From c605d93b155b7dbc15faca44de8c9fc1e4d09f6c Mon Sep 17 00:00:00 2001 From: Jiaqi Liu Date: Fri, 6 Mar 2026 21:27:05 -0800 Subject: [PATCH 6/6] Update lint and revert license hearder change. --- dialogflow/detect_intent_texts_with_location.py | 2 +- dialogflow/streaming_transcription.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/dialogflow/detect_intent_texts_with_location.py b/dialogflow/detect_intent_texts_with_location.py index d52ac178dd7..99e2eabb2f3 100644 --- a/dialogflow/detect_intent_texts_with_location.py +++ b/dialogflow/detect_intent_texts_with_location.py @@ -58,7 +58,7 @@ def detect_intent_texts_with_location( print("=" * 20) print(f"Query text: {response.query_result.query_text}") print( - f"Detected intent: {response.query_result.intent.display_name} (confidence: {response.query_result.intent_detection_confidence,})\n" + f"Detected intent: {response.query_result.intent.display_name} (confidence: {response.query_result.intent_detection_confidence})\n" ) print(f"Fulfillment text: {response.query_result.fulfillment_text}\n") diff --git a/dialogflow/streaming_transcription.py b/dialogflow/streaming_transcription.py index 014e4ee0729..fe88afb581f 100644 --- a/dialogflow/streaming_transcription.py +++ b/dialogflow/streaming_transcription.py @@ -1,4 +1,4 @@ -# Copyright 2026 Google LLC +# Copyright 2023 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License.