|
1 | 1 | from __future__ import annotations
|
2 | 2 |
|
| 3 | +import logging |
3 | 4 | from typing import Literal
|
4 | 5 |
|
5 | 6 | from typing_extensions import Unpack
|
@@ -65,16 +66,31 @@ def recognize(
|
65 | 66 | parser = argparse.ArgumentParser()
|
66 | 67 | parser.add_argument("audio_file")
|
67 | 68 | parser.add_argument(
|
68 |
| - "--model", choices=get_args(WhisperModel), default="whisper-1" |
| 69 | + "-m", "--model", choices=get_args(WhisperModel), default="whisper-1" |
69 | 70 | )
|
70 | 71 | parser.add_argument("-l", "--language")
|
| 72 | + parser.add_argument("-p", "--prompt") |
| 73 | + parser.add_argument("-v", "--verbose", action="store_true") |
71 | 74 | args = parser.parse_args()
|
72 | 75 |
|
| 76 | + if args.verbose: |
| 77 | + speech_recognition_logger = logging.getLogger("speech_recognition") |
| 78 | + speech_recognition_logger.setLevel(logging.DEBUG) |
| 79 | + |
| 80 | + console_handler = logging.StreamHandler() |
| 81 | + console_formatter = logging.Formatter( |
| 82 | + "%(asctime)s | %(levelname)s | %(name)s:%(funcName)s:%(lineno)d - %(message)s" |
| 83 | + ) |
| 84 | + console_handler.setFormatter(console_formatter) |
| 85 | + speech_recognition_logger.addHandler(console_handler) |
| 86 | + |
73 | 87 | audio_data = sr.AudioData.from_file(args.audio_file)
|
| 88 | + |
| 89 | + recognize_args = {"model": args.model} |
74 | 90 | if args.language:
|
75 |
| - transcription = recognize( |
76 |
| - None, audio_data, model=args.model, language=args.language |
77 |
| - ) |
78 |
| - else: |
79 |
| - transcription = recognize(None, audio_data, model=args.model) |
| 91 | + recognize_args["language"] = args.language |
| 92 | + if args.prompt: |
| 93 | + recognize_args["prompt"] = args.prompt |
| 94 | + |
| 95 | + transcription = recognize(None, audio_data, **recognize_args) |
80 | 96 | print(transcription)
|
0 commit comments