From 4bc7e9054daf1271729fbeefe2c77fcc002d0ab0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Julien=20Fastr=C3=A9?= Date: Wed, 26 Mar 2025 22:53:10 +0100 Subject: [PATCH] Initial commit --- .gitignore | 164 ++++++++++++++++++ .idea/.gitignore | 8 + .idea/audio-transcript.iml | 10 ++ .../inspectionProfiles/profiles_settings.xml | 6 + .idea/misc.xml | 6 + .idea/modules.xml | 8 + .idea/vcs.xml | 6 + README.md | 74 ++++++++ requirements.txt | 2 + transcribe.py | 23 +++ 10 files changed, 307 insertions(+) create mode 100644 .gitignore create mode 100644 .idea/.gitignore create mode 100644 .idea/audio-transcript.iml create mode 100644 .idea/inspectionProfiles/profiles_settings.xml create mode 100644 .idea/misc.xml create mode 100644 .idea/modules.xml create mode 100644 .idea/vcs.xml create mode 100644 README.md create mode 100644 requirements.txt create mode 100644 transcribe.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..dda4f8f --- /dev/null +++ b/.gitignore @@ -0,0 +1,164 @@ +### Python template +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +#pdm.lock +# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it +# in version control. +# https://pdm.fming.dev/latest/usage/project/#working-with-version-control +.pdm.toml +.pdm-python +.pdm-build/ + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +#.idea/ + diff --git a/.idea/.gitignore b/.idea/.gitignore new file mode 100644 index 0000000..13566b8 --- /dev/null +++ b/.idea/.gitignore @@ -0,0 +1,8 @@ +# Default ignored files +/shelf/ +/workspace.xml +# Editor-based HTTP Client requests +/httpRequests/ +# Datasource local storage ignored files +/dataSources/ +/dataSources.local.xml diff --git a/.idea/audio-transcript.iml b/.idea/audio-transcript.iml new file mode 100644 index 0000000..dbc317c --- /dev/null +++ b/.idea/audio-transcript.iml @@ -0,0 +1,10 @@ + + + + + + + + + + \ No newline at end of file diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml new file mode 100644 index 0000000..105ce2d --- /dev/null +++ b/.idea/inspectionProfiles/profiles_settings.xml @@ -0,0 +1,6 @@ + + + + \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml new file mode 100644 index 0000000..e60c841 --- /dev/null +++ b/.idea/misc.xml @@ -0,0 +1,6 @@ + + + + + \ No newline at end of file diff --git a/.idea/modules.xml b/.idea/modules.xml new file mode 100644 index 0000000..b1024bc --- /dev/null +++ b/.idea/modules.xml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/.idea/vcs.xml b/.idea/vcs.xml new file mode 100644 index 0000000..94a25f7 --- /dev/null +++ b/.idea/vcs.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..2420a52 --- /dev/null +++ b/README.md @@ -0,0 +1,74 @@ +# Project README + +## Setting Up the Project Environment + +Follow the steps below to set up your project environment: + +### 1. Create a Virtual Environment + +To isolate the project dependencies and prevent conflicts, create a virtual environment using the following commands: + +```bash +# Create the virtual environment +python3 -m venv venv + +# Activate the virtual environment: +source venv/bin/activate +``` + +### 2. Install Project Dependencies + +Once the virtual environment is activated, install the required dependencies using `pip`: + +```bash +pip install -r requirements.txt +``` + +Make sure you have a `requirements.txt` file in the project directory with the list of all required dependencies. + +## Running the Script + +To run the Python script `transcribe.py`, you need to provide an audio file as a parameter. Use the following command: + +```bash +python transcribe.py +``` + +- Replace `` with the path to your audio file. +- Example: + +```bash +python transcribe.py sample_audio.wav +``` + +## Where is downloaded the AI Whisper model ? + +When using the `openai-whisper` package, the AI Whisper model is downloaded and stored in a local cache directory. By +default, it is stored under the user's home directory in the following path: + +```plaintext +~/.cache/whisper/ +``` + +Here: + +- `~` refers to the user's home directory. +- `.cache/whisper/` is the folder where the models are cached. + +The cache directory contains the downloaded model files, which are reused in subsequent runs to avoid re-downloading +them. Specifically: + +- Whisper downloads the model files when they are first used, based on the requested model size (e.g., `base`, `medium`, + or `large`). + +If you need to modify or relocate the cache directory, you can set the `WHISPER_CACHE_DIR` environment variable to +specify a custom path for storing these files. + + +## Notes + +- Ensure that your virtual environment is activated before running the script. +- If you encounter any missing dependencies, double-check your `requirements.txt` file and re-run the installation + command. + +Happy coding! \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..b0d705f --- /dev/null +++ b/requirements.txt @@ -0,0 +1,2 @@ +openai-whisper==20240930 +torch==2.6.0 \ No newline at end of file diff --git a/transcribe.py b/transcribe.py new file mode 100644 index 0000000..e950101 --- /dev/null +++ b/transcribe.py @@ -0,0 +1,23 @@ +import sys +import whisper + +def transcribe_audio(audio_path): + # Load the Whisper model + model = whisper.load_model("base") + + # Transcribe the audio file + print(f"Transcribing: {audio_path} ...") + result = model.transcribe(audio_path, language="fr") + + # Print and return transcription + transcription = result["text"] + print("\nTranscription:\n") + print(transcription) + return transcription + +if __name__ == "__main__": + if len(sys.argv) < 2: + print("Usage: python transcribe.py path/to/audiofile") + else: + audio_file = sys.argv[1] + transcribe_audio(audio_file)