205 lines
8.3 KiB
Python
205 lines
8.3 KiB
Python
import json
|
|
from pathlib import Path
|
|
|
|
notebook_content = {
|
|
"cells": [
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"<a href=\"https://colab.research.google.com/github/meizhong986/WhisperJAV/blob/main/notebook/WhisperJAV_colab_edition.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>\n",
|
|
"\n",
|
|
"# 🎌 WhisperJAV - Colab Edition\n",
|
|
"\n",
|
|
"**The easiest way to generate subtitles for Japanese video content.**\n",
|
|
"\n",
|
|
"### ✨ Features\n",
|
|
"- **One-Click Simple:** Just select your settings and run.\n",
|
|
"- **Japanese Optimized:** Uses the `kotoba-faster-whisper` model for superior accuracy.\n",
|
|
"- **Ensemble Mode:** Combines multiple models for maximum precision.\n",
|
|
"- **Auto-Translation:** Optional AI translation to English.\n",
|
|
"\n",
|
|
"### 🚀 How to Use\n",
|
|
"1. **Run Step 1** to install dependencies (takes ~2 mins).\n",
|
|
"2. **Configure Step 2** with your preferences.\n",
|
|
"3. **Press Play** on Step 2 and wait for your subtitles!"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": None,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"#@title 📦 Step 1: Initialize Environment\n",
|
|
"#@markdown Click the **Play** button to install WhisperJAV and dependencies.\n",
|
|
"\n",
|
|
"import os\n",
|
|
"import sys\n",
|
|
"import subprocess\n",
|
|
"import time\n",
|
|
"from IPython.display import display, HTML, clear_output\n",
|
|
"\n",
|
|
"def show_status(message, status=\"info\"):\n",
|
|
" colors = {\"success\": \"#2ecc71\", \"error\": \"#e74c3c\", \"info\": \"#3498db\"}\n",
|
|
" color = colors.get(status, \"#333\")\n",
|
|
" display(HTML(f'<div style=\"padding:8px;border-left:4px solid {color}\"><b>{message}</b></div>'))\n",
|
|
"\n",
|
|
"def run_cmd(cmd):\n",
|
|
" try:\n",
|
|
" subprocess.run(cmd, shell=True, check=True, capture_output=True)\n",
|
|
" return True\n",
|
|
" except:\n",
|
|
" return False\n",
|
|
"\n",
|
|
"print(\"🚀 Initializing environment...\")\n",
|
|
"start_time = time.time()\n",
|
|
"\n",
|
|
"# Install dependencies\n",
|
|
"commands = [\n",
|
|
" (\"apt-get update -qq && apt-get install -y -qq ffmpeg portaudio19-dev\", \"System packages\"),\n",
|
|
" (\"pip install -q tqdm numba tiktoken ffmpeg-python soundfile auditok numpy scipy pysrt srt aiofiles jsonschema Pillow colorama librosa matplotlib pyloudnorm requests faster-whisper transformers optimum accelerate huggingface-hub pydantic\", \"Python dependencies\"),\n",
|
|
" (\"pip install -q --no-deps git+https://github.com/openai/whisper.git@main\", \"OpenAI Whisper\"),\n",
|
|
" (\"pip install -q --no-deps git+https://github.com/meizhong986/stable-ts-fix-setup.git@main\", \"Stable-TS\"),\n",
|
|
" (\"pip install -q git+https://github.com/meizhong986/WhisperJAV.git@main\", \"WhisperJAV\")\n",
|
|
"]\n",
|
|
"\n",
|
|
"for cmd, name in commands:\n",
|
|
" show_status(f\"Installing {name}...\", \"info\")\n",
|
|
" if not run_cmd(cmd):\n",
|
|
" show_status(f\"Failed to install {name}\", \"error\")\n",
|
|
" sys.exit(1)\n",
|
|
"\n",
|
|
"clear_output()\n",
|
|
"elapsed = time.time() - start_time\n",
|
|
"display(HTML(f'<div style=\"background:#d4edda;padding:16px;border-radius:8px\"><h3 style=\"color:#155724;margin:0\">✅ Ready to go! ({elapsed:.0f}s)</h3></div>'))"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": None,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"#@title 🎬 Step 2: Configure & Run\n",
|
|
"#@markdown Select your options and press Play.\n",
|
|
"\n",
|
|
"#@markdown ### 📁 Input Source\n",
|
|
"source_type = \"Google Drive\" #@param [\"Google Drive\", \"Upload File\"]\n",
|
|
"drive_folder = \"WhisperJAV\" #@param {type:\"string\"}\n",
|
|
"\n",
|
|
"#@markdown ### ⚙️ Processing Mode\n",
|
|
"preset = \"Japanese Expert (Recommended)\" #@param [\"Japanese Expert (Recommended)\", \"Quick Scan\", \"Maximum Quality (Ensemble)\"]\n",
|
|
"\n",
|
|
"#@markdown ### 🗣️ Output Language\n",
|
|
"output_language = \"Japanese (Native)\" #@param [\"Japanese (Native)\", \"English (Whisper Translate)\", \"English (LLM Translate)\"]\n",
|
|
"\n",
|
|
"#@markdown ### 🤖 LLM Translation (Only if LLM Translate selected)\n",
|
|
"llm_provider = \"deepseek\" #@param [\"deepseek\", \"openrouter\", \"gemini\", \"claude\", \"gpt\"]\n",
|
|
"llm_api_key = \"\" #@param {type:\"string\"}\n",
|
|
"\n",
|
|
"#@markdown ---\n",
|
|
"import os\n",
|
|
"import sys\n",
|
|
"import shlex\n",
|
|
"import subprocess\n",
|
|
"from pathlib import Path\n",
|
|
"from google.colab import drive, files\n",
|
|
"from IPython.display import display, HTML\n",
|
|
"\n",
|
|
"# 1. Setup Input\n",
|
|
"if source_type == \"Google Drive\":\n",
|
|
" drive.mount('/content/drive')\n",
|
|
" input_path = Path(f'/content/drive/MyDrive/{drive_folder}')\n",
|
|
" input_path.mkdir(parents=True, exist_ok=True)\n",
|
|
" print(f\"📂 Using Google Drive folder: {input_path}\")\n",
|
|
"else:\n",
|
|
" print(\"📤 Upload your video file:\")\n",
|
|
" uploaded = files.upload()\n",
|
|
" if not uploaded: sys.exit(1)\n",
|
|
" input_path = Path('/content/' + list(uploaded.keys())[0])\n",
|
|
"\n",
|
|
"output_path = input_path if source_type == \"Google Drive\" else Path('/content/output')\n",
|
|
"output_path.mkdir(exist_ok=True)\n",
|
|
"\n",
|
|
"# 2. Configure Command\n",
|
|
"cmd = ['whisperjav', str(input_path), '--output-dir', str(output_path)]\n",
|
|
"\n",
|
|
"# Preset Logic\n",
|
|
"if preset == \"Quick Scan\":\n",
|
|
" cmd.extend(['--mode', 'faster', '--sensitivity', 'balanced'])\n",
|
|
"elif preset == \"Maximum Quality (Ensemble)\":\n",
|
|
" cmd.extend(['--ensemble', \n",
|
|
" '--pass1-pipeline', 'kotoba-faster-whisper', '--pass1-sensitivity', 'aggressive',\n",
|
|
" '--pass2-pipeline', 'balanced', '--pass2-sensitivity', 'balanced',\n",
|
|
" '--merge-strategy', 'smart_merge'])\n",
|
|
"else: # Japanese Expert\n",
|
|
" cmd.extend(['--mode', 'kotoba-faster-whisper', '--sensitivity', 'aggressive'])\n",
|
|
"\n",
|
|
"# Language Logic\n",
|
|
"if output_language == \"English (Whisper Translate)\":\n",
|
|
" cmd.extend(['--subs-language', 'direct-to-english'])\n",
|
|
"else:\n",
|
|
" cmd.extend(['--subs-language', 'native'])\n",
|
|
"\n",
|
|
"# 3. Run Transcription\n",
|
|
"print(f\"\\n🚀 Starting Transcription ({preset})...\")\n",
|
|
"full_cmd = shlex.join(cmd)\n",
|
|
"!{full_cmd}\n",
|
|
"\n",
|
|
"# 4. Run Translation (if requested)\n",
|
|
"if output_language == \"English (LLM Translate)\":\n",
|
|
" if not llm_api_key:\n",
|
|
" print(\"⚠️ LLM Translation skipped: No API Key provided.\")\n",
|
|
" else:\n",
|
|
" print(\"\\n🤖 Starting LLM Translation...\")\n",
|
|
" # Set Env Var\n",
|
|
" env_map = {\"deepseek\": \"DEEPSEEK_API_KEY\", \"openrouter\": \"OPENROUTER_API_KEY\", \n",
|
|
" \"gemini\": \"GEMINI_API_KEY\", \"claude\": \"ANTHROPIC_API_KEY\", \"gpt\": \"OPENAI_API_KEY\"}\n",
|
|
" os.environ[env_map.get(llm_provider, \"API_KEY\")] = llm_api_key\n",
|
|
" \n",
|
|
" # Find SRTs\n",
|
|
" srt_files = list(output_path.glob(\"*.srt\"))\n",
|
|
" for srt in srt_files:\n",
|
|
" if \"_en\" not in srt.name: # Avoid re-translating\n",
|
|
" print(f\"Translating {srt.name}...\")\n",
|
|
" !whisperjav-translate -i \"{srt}\" --provider {llm_provider} --target english --tone standard\n",
|
|
"\n",
|
|
"print(\"\\n✅ All tasks complete!\")"
|
|
]
|
|
}
|
|
],
|
|
"metadata": {
|
|
"accelerator": "GPU",
|
|
"colab": {
|
|
"gpuType": "T4",
|
|
"provenance": []
|
|
},
|
|
"kernelspec": {
|
|
"display_name": "Python 3",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.10.12"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 0
|
|
}
|
|
|
|
target_path = Path(r"c:\BIN\git\WhisperJav_V1_Minami_Edition\notebook\WhisperJAV_colab_edition.ipynb")
|
|
with open(target_path, "w", encoding="utf-8") as f:
|
|
json.dump(notebook_content, f, indent=1)
|
|
|
|
print(f"Successfully wrote valid JSON to {target_path}")
|