Initial commit

2025-03-26 22:53:10 +01:00
commit 4bc7e9054d
10 changed files with 307 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1,164 @@
 ### Python template
 # Byte-compiled / optimized / DLL files
 __pycache__/
 *.py[cod]
 *$py.class
 # C extensions
 *.so
 # Distribution / packaging
 .Python
 build/
 develop-eggs/
 dist/
 downloads/
 eggs/
 .eggs/
 lib/
 lib64/
 parts/
 sdist/
 var/
 wheels/
 share/python-wheels/
 *.egg-info/
 .installed.cfg
 *.egg
 MANIFEST
 # PyInstaller
 #  Usually these files are written by a python script from a template
 #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 *.manifest
 *.spec
 # Installer logs
 pip-log.txt
 pip-delete-this-directory.txt
 # Unit test / coverage reports
 htmlcov/
 .tox/
 .nox/
 .coverage
 .coverage.*
 .cache
 nosetests.xml
 coverage.xml
 *.cover
 *.py,cover
 .hypothesis/
 .pytest_cache/
 cover/
 # Translations
 *.mo
 *.pot
 # Django stuff:
 *.log
 local_settings.py
 db.sqlite3
 db.sqlite3-journal
 # Flask stuff:
 instance/
 .webassets-cache
 # Scrapy stuff:
 .scrapy
 # Sphinx documentation
 docs/_build/
 # PyBuilder
 .pybuilder/
 target/
 # Jupyter Notebook
 .ipynb_checkpoints
 # IPython
 profile_default/
 ipython_config.py
 # pyenv
 #   For a library or package, you might want to ignore these files since the code is
 #   intended to run in multiple environments; otherwise, check them in:
 # .python-version
 # pipenv
 #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 #   install all needed dependencies.
 #Pipfile.lock
 # poetry
 #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
 #   This is especially recommended for binary packages to ensure reproducibility, and is more
 #   commonly ignored for libraries.
 #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
 #poetry.lock
 # pdm
 #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
 #pdm.lock
 #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
 #   in version control.
 #   https://pdm.fming.dev/latest/usage/project/#working-with-version-control
 .pdm.toml
 .pdm-python
 .pdm-build/
 # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
 __pypackages__/
 # Celery stuff
 celerybeat-schedule
 celerybeat.pid
 # SageMath parsed files
 *.sage.py
 # Environments
 .env
 .venv
 env/
 venv/
 ENV/
 env.bak/
 venv.bak/
 # Spyder project settings
 .spyderproject
 .spyproject
 # Rope project settings
 .ropeproject
 # mkdocs documentation
 /site
 # mypy
 .mypy_cache/
 .dmypy.json
 dmypy.json
 # Pyre type checker
 .pyre/
 # pytype static type analyzer
 .pytype/
 # Cython debug symbols
 cython_debug/
 # PyCharm
 #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
 #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
 #  and can be added to the global gitignore or merged into this file.  For a more nuclear
 #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
 #.idea/
--- a/.idea/.gitignore
+++ b/.idea/.gitignore
@@ -0,0 +1,8 @@
 # Default ignored files
 /shelf/
 /workspace.xml
 # Editor-based HTTP Client requests
 /httpRequests/
 # Datasource local storage ignored files
 /dataSources/
 /dataSources.local.xml
--- a/.idea/audio-transcript.iml
+++ b/.idea/audio-transcript.iml
@@ -0,0 +1,10 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <module type="PYTHON_MODULE" version="4">
  <component name="NewModuleRootManager">
    <content url="file://$MODULE_DIR$">
      <excludeFolder url="file://$MODULE_DIR$/.venv" />
    </content>
    <orderEntry type="jdk" jdkName="Python 3.10 (audio-transcript)" jdkType="Python SDK" />
    <orderEntry type="sourceFolder" forTests="false" />
  </component>
 </module>
--- a/.idea/inspectionProfiles/profiles_settings.xml
+++ b/.idea/inspectionProfiles/profiles_settings.xml
@@ -0,0 +1,6 @@
 <component name="InspectionProjectProfileManager">
  <settings>
    <option name="USE_PROJECT_PROFILE" value="false" />
    <version value="1.0" />
  </settings>
 </component>
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@@ -0,0 +1,6 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <project version="4">
  <component name="Black">
    <option name="sdkName" value="Python 3.10 (audio-transcript)" />
  </component>
 </project>
--- a/.idea/modules.xml
+++ b/.idea/modules.xml
@@ -0,0 +1,8 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <project version="4">
  <component name="ProjectModuleManager">
    <modules>
      <module fileurl="file://$PROJECT_DIR$/.idea/audio-transcript.iml" filepath="$PROJECT_DIR$/.idea/audio-transcript.iml" />
    </modules>
  </component>
 </project>
--- a/.idea/vcs.xml
+++ b/.idea/vcs.xml
@@ -0,0 +1,6 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <project version="4">
  <component name="VcsDirectoryMappings">
    <mapping directory="$PROJECT_DIR$" vcs="Git" />
  </component>
 </project>
--- a/README.md
+++ b/README.md
@@ -0,0 +1,74 @@
 # Project README
 ## Setting Up the Project Environment
 Follow the steps below to set up your project environment:
 ### 1. Create a Virtual Environment
 To isolate the project dependencies and prevent conflicts, create a virtual environment using the following commands:
 ```bash
 # Create the virtual environment
 python3 -m venv venv
 # Activate the virtual environment:
 source venv/bin/activate
 ```
 ### 2. Install Project Dependencies
 Once the virtual environment is activated, install the required dependencies using `pip`:
 ```bash
 pip install -r requirements.txt
 ```
 Make sure you have a `requirements.txt` file in the project directory with the list of all required dependencies.
 ## Running the Script
 To run the Python script `transcribe.py`, you need to provide an audio file as a parameter. Use the following command:
 ```bash
 python transcribe.py <audio_file>
 ```
 - Replace `<audio_file>` with the path to your audio file.
 - Example:
 ```bash
 python transcribe.py sample_audio.wav
 ```
 ## Where is downloaded the AI Whisper model ?
 When using the `openai-whisper` package, the AI Whisper model is downloaded and stored in a local cache directory. By
 default, it is stored under the user's home directory in the following path:
 ```plaintext
 ~/.cache/whisper/
 ```
 Here:
 - `~` refers to the user's home directory.
 - `.cache/whisper/` is the folder where the models are cached.
 The cache directory contains the downloaded model files, which are reused in subsequent runs to avoid re-downloading
 them. Specifically:
 - Whisper downloads the model files when they are first used, based on the requested model size (e.g., `base`, `medium`,
  or `large`).
 If you need to modify or relocate the cache directory, you can set the `WHISPER_CACHE_DIR` environment variable to
 specify a custom path for storing these files.
 ## Notes
 - Ensure that your virtual environment is activated before running the script.
 - If you encounter any missing dependencies, double-check your `requirements.txt` file and re-run the installation
  command.
 Happy coding!
--- a/requirements.txt
+++ b/requirements.txt
@@ -0,0 +1,2 @@
 openai-whisper==20240930
 torch==2.6.0
--- a/transcribe.py
+++ b/transcribe.py
@@ -0,0 +1,23 @@
 import sys
 import whisper
 def transcribe_audio(audio_path):
    # Load the Whisper model
    model = whisper.load_model("base")
    # Transcribe the audio file
    print(f"Transcribing: {audio_path} ...")
    result = model.transcribe(audio_path, language="fr")
    # Print and return transcription
    transcription = result["text"]
    print("\nTranscription:\n")
    print(transcription)
    return transcription
 if __name__ == "__main__":
    if len(sys.argv) < 2:
        print("Usage: python transcribe.py path/to/audiofile")
    else:
        audio_file = sys.argv[1]
        transcribe_audio(audio_file)