diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..43c1dccdf4ab226f48af4bc6d80fb4bda8deb251 --- /dev/null +++ b/.gitignore @@ -0,0 +1,87 @@ +# Python +__pycache__/ +*.py[cod] +*$py.class +*.so +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyTorch +*.pth +*.pt +*.ckpt +weights/ +checkpoints/ + +# Gradio +gradio_cached_examples/ +flagged/ + +# Temporary files +*.tmp +*.temp +/tmp/ +temp/ + +# Logs +*.log +logs/ + +# Environment +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# IDE +.vscode/ +.idea/ +*.swp +*.swo +*~ + +# OS +.DS_Store +.DS_Store? +._* +.Spotlight-V100 +.Trashes +ehthumbs.db +Thumbs.db + +# Model files (too large for git) +*.safetensors +model.pth +*.bin + +# Audio/Video files +*.wav +*.mp3 +*.mp4 +*.avi +*.mov + +# Jupyter +.ipynb_checkpoints/ + +# Cache +.cache/ +*.cache diff --git a/DEPLOYMENT.md b/DEPLOYMENT.md new file mode 100644 index 0000000000000000000000000000000000000000..bfc064d7a1fdbd6eebe733c12bf18c9d9162826b --- /dev/null +++ b/DEPLOYMENT.md @@ -0,0 +1,103 @@ +# 🚀 Hugging Face Space 部署指南 + +## 📁 文件结构 + +确保你的HF Space包含以下文件: + +``` +Acfoley/ +├── README.md # Space配置和说明 +├── app.py # 主应用文件 +├── requirements.txt # Python依赖 +├── packages.txt # 系统依赖 +├── hf_AC/ # hf_AC模型代码 +└── .gitignore # Git忽略文件 +``` + +## 🔧 部署步骤 + +### 1. 上传代码到HF Space + +将所有文件上传到你的Hugging Face Space仓库: + +```bash +git add . +git commit -m "Add hf_AC audio generation demo" +git push +``` + +### 2. 模型权重下载 + +模型会自动从以下位置下载: +- 主模型: `https://huggingface.co/FF2416/AC-Foley/resolve/main/model.pth` +- 其他组件会根据需要自动下载 + +### 3. 环境配置 + +HF Space会自动: +- 安装`requirements.txt`中的Python包 +- 安装`packages.txt`中的系统依赖 +- 运行`app.py`启动Gradio界面 + +## 📋 README.md 配置 + +确保README.md顶部包含正确的YAML配置: + +```yaml +--- +title: hf_AC Audio Foley Generator +emoji: 🎵 +colorFrom: blue +colorTo: green +sdk: gradio +sdk_version: 5.42.0 +app_file: app.py +pinned: false +license: mit +--- +``` + +## 🔍 故障排除 + +### 常见问题 + +1. **模型下载失败** + - 检查网络连接 + - 确认模型URL可访问 + +2. **依赖安装失败** + - 检查`requirements.txt`格式 + - 确认包版本兼容性 + +3. **内存不足** + - HF Space免费版有内存限制 + - 考虑优化模型或升级到付费版 + +### 调试方法 + +1. 查看Space日志 +2. 运行`test_setup.py`验证环境 +3. 检查模型文件是否正确下载 + +## 🎯 使用说明 + +部署成功后,用户可以: + +1. 上传MP4视频文件 +2. 输入音频描述文字 +3. 调整生成参数 +4. 点击生成按钮 +5. 下载生成的音频 + +## 📊 性能优化 + +- 首次运行需要下载模型(约几GB) +- 生成时间取决于视频长度和硬件 +- 建议视频时长控制在15秒以内 + +## 🔗 相关链接 + +- [hf_AC GitHub](https://github.com/ff2416/hf_AC) +- [模型权重](https://huggingface.co/FF2416/AC-Foley) +- [Gradio文档](https://gradio.app/docs/) +- [HF Spaces文档](https://huggingface.co/docs/hub/spaces) diff --git a/README.md b/README.md index 2445e94f45a189aa0395116ae4cb5804c96664af..0e15e48ccd5fec33822205a9d1389ab10eaf11a8 100644 --- a/README.md +++ b/README.md @@ -1,15 +1,50 @@ --- -title: Acfoley -emoji: 💬 -colorFrom: yellow -colorTo: purple +title: hf_AC Audio Foley Generator +emoji: 🎵 +colorFrom: blue +colorTo: green sdk: gradio sdk_version: 5.42.0 app_file: app.py pinned: false -hf_oauth: true -hf_oauth_scopes: -- inference-api +license: mit --- -An example chatbot using [Gradio](https://gradio.app), [`huggingface_hub`](https://huggingface.co/docs/huggingface_hub/v0.22.2/en/index), and the [Hugging Face Inference API](https://huggingface.co/docs/api-inference/index). +# 🎵 hf_AC Audio Foley Generator + +A Gradio demo for generating synchronized audio from videos using the hf_AC (Audio-Conditioned Foley) model. This application allows you to upload a video and generate matching audio content based on text descriptions. + +## Features + +- **Video-to-Audio Generation**: Upload a video and generate synchronized audio +- **Text-Guided Generation**: Use text prompts to describe the desired audio +- **Customizable Parameters**: Adjust duration, CFG strength, and other generation parameters +- **Real-time Processing**: Generate audio in real-time with GPU acceleration + +## How to Use + +1. **Load Model**: The model will automatically load when you start the app +2. **Upload Video**: Choose a video file (MP4 format recommended) +3. **Describe Audio**: Write a text description of the audio you want to generate +4. **Generate**: Click the generate button and wait for the audio to be created +5. **Download**: Listen to and download the generated audio + +## Example Prompts + +- "Crackling fireplace with gentle flames" +- "Ocean waves crashing on rocky shore" +- "Busy city street with car horns and chatter" +- "Forest ambience with bird songs and rustling leaves" +- "Keyboard typing in a quiet office" + +## Model Information + +This demo uses the hf_AC model, which is designed for audio-visual synchronization and generation. The model can generate high-quality audio that matches the visual content and text descriptions. + +## Technical Details + +- **Framework**: PyTorch, Gradio +- **Model**: hf_AC (Audio-Conditioned Foley) +- **Audio Format**: WAV, 44.1kHz +- **Video Support**: MP4, various resolutions +- **Processing**: GPU-accelerated when available diff --git a/app.py b/app.py index bec204167857e75cae4ce3a9355e169be2409d68..72d817d3dd731b2c38d5e20ca582ae7a5cd24d4b 100644 --- a/app.py +++ b/app.py @@ -1,70 +1,360 @@ import gradio as gr -from huggingface_hub import InferenceClient +import torch +import torchaudio +import logging +import tempfile +import os +import sys +from pathlib import Path +import numpy as np +from typing import Optional, Tuple +import time +import traceback +# Add hf_AC to path +current_dir = Path(__file__).parent +hf_ac_path = current_dir / "hf_AC" +if hf_ac_path.exists(): + sys.path.insert(0, str(hf_ac_path)) -def respond( - message, - history: list[dict[str, str]], - system_message, - max_tokens, - temperature, - top_p, - hf_token: gr.OAuthToken, -): - """ - For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference - """ - client = InferenceClient(token=hf_token.token, model="openai/gpt-oss-20b") +# Configuration for HF Space +EXAMPLE_PROMPTS = [ + "Crackling fireplace with gentle flames", + "Ocean waves crashing on rocky shore", + "Forest ambience with bird songs", + "Keyboard typing sounds", + "Footsteps on wooden floor", + "Rain on metal roof" +] - messages = [{"role": "system", "content": system_message}] +USAGE_TIPS = """ +### 💡 使用技巧 - messages.extend(history) - - messages.append({"role": "user", "content": message}) - - response = "" - - for message in client.chat_completion( - messages, - max_tokens=max_tokens, - stream=True, - temperature=temperature, - top_p=top_p, - ): - choices = message.choices - token = "" - if len(choices) and choices[0].delta.content: - token = choices[0].delta.content +1. **视频质量**: 使用清晰、光线良好的视频 +2. **提示词**: 具体描述想要的音频类型 +3. **时长**: 建议1-30秒效果最佳 +4. **CFG强度**: 数值越高越贴合提示词,但可能降低质量 +""" - response += token - yield response +# Import hf_AC modules with error handling +try: + from hf_AC.mmaudio.eval_utils import (ModelConfig, all_model_cfg, generate, load_video, + setup_eval_logging) + from hf_AC.mmaudio.model.flow_matching import FlowMatching + from hf_AC.mmaudio.model.networks import MMAudio, get_my_mmaudio + from hf_AC.mmaudio.model.utils.features_utils import FeaturesUtils + + # Setup logging + setup_eval_logging() + log = logging.getLogger() + HF_AC_AVAILABLE = True +except ImportError as e: + print(f"Warning: hf_AC modules not available: {e}") + log = logging.getLogger() + HF_AC_AVAILABLE = False +class AudioFoleyModel: + def __init__(self): + self.device = 'cpu' + if torch.cuda.is_available(): + self.device = 'cuda' + elif torch.backends.mps.is_available(): + self.device = 'mps' + + self.dtype = torch.bfloat16 + self.model = None + self.net = None + self.fm = None + self.feature_utils = None + + def load_model(self, variant='large_44k', model_path=None): + """Load the hf_AC model""" + try: + if not HF_AC_AVAILABLE: + return "❌ hf_AC modules not available. Please install the hf_AC package." + + if variant not in all_model_cfg: + available_variants = list(all_model_cfg.keys()) if all_model_cfg else [] + return f"❌ Unknown model variant: {variant}. Available: {available_variants}" + + log.info(f"Loading model variant: {variant}") + self.model: ModelConfig = all_model_cfg[variant] + + # Download model components if needed + try: + self.model.download_if_needed() + except Exception as e: + log.warning(f"Could not download model components: {e}") + + # Set custom model path if provided + if model_path and os.path.exists(model_path): + self.model.model_path = Path(model_path) + log.info(f"Using custom model path: {model_path}") + + # Load network + self.net: MMAudio = get_my_mmaudio(self.model.model_name).to(self.device, self.dtype).eval() + + # Load weights + if hasattr(self.model, 'model_path') and self.model.model_path and Path(self.model.model_path).exists(): + try: + weights = torch.load(self.model.model_path, map_location=self.device, weights_only=True) + self.net.load_weights(weights['weights']) + log.info(f'✅ Loaded weights from {self.model.model_path}') + except Exception as e: + log.error(f"Failed to load weights: {e}") + return f"❌ Failed to load model weights: {e}" + else: + log.warning('⚠️ No model weights found, using default initialization') + return "⚠️ Model loaded but no weights found. Download model.pth from HuggingFace." + + # Initialize flow matching + self.fm = FlowMatching(min_sigma=0, inference_mode='euler', num_steps=25) + + # Initialize feature utils + try: + self.feature_utils = FeaturesUtils( + tod_vae_ckpt=self.model.vae_path, + synchformer_ckpt=self.model.synchformer_ckpt, + enable_conditions=True, + mode=self.model.mode, + bigvgan_vocoder_ckpt=self.model.bigvgan_16k_path, + need_vae_encoder=True + ) + self.feature_utils = self.feature_utils.to(self.device, self.dtype).eval() + except Exception as e: + log.error(f"Failed to initialize feature utils: {e}") + return f"❌ Failed to initialize feature utilities: {e}" + + return "✅ Model loaded successfully!" + + except Exception as e: + error_msg = f"❌ Error loading model: {str(e)}\n{traceback.format_exc()}" + log.error(error_msg) + return error_msg + + def generate_audio(self, video_file, prompt: str, negative_prompt: str = "", + duration: float = 8.0, cfg_strength: float = 4.5, + seed: int = 42) -> Tuple[Optional[str], str]: + """Generate audio from video and text prompt""" + try: + # Validation checks + if not HF_AC_AVAILABLE: + return None, "❌ hf_AC modules not available." + + if self.net is None or self.feature_utils is None: + return None, "❌ Model not loaded. Please load the model first." + + if video_file is None: + return None, "❌ Please upload a video file." + + if not prompt.strip(): + return None, "❌ Please provide a text prompt describing the desired audio." + + log.info(f'🎬 Processing video: {video_file}') + log.info(f'📝 Prompt: "{prompt}"') + + # Load and process video + try: + video_path = Path(video_file) + if not video_path.exists(): + return None, f"❌ Video file not found: {video_file}" + + video_info = load_video(video_path, duration) + clip_frames = video_info.clip_frames + sync_frames = video_info.sync_frames + duration_sec = video_info.duration_sec + + log.info(f'📹 Video loaded: {duration_sec:.2f}s duration') + + except Exception as e: + return None, f"❌ Failed to load video: {str(e)}" + + # Prepare frames + clip_frames = clip_frames.unsqueeze(0) if clip_frames is not None else None + sync_frames = sync_frames.unsqueeze(0) + + # Update model sequence configuration + try: + self.model.seq_cfg.duration = duration_sec + self.model.seq_cfg.audio_num_sample = 89088 # Default for 44kHz + self.net.update_seq_lengths( + self.model.seq_cfg.latent_seq_len, + self.model.seq_cfg.clip_seq_len, + self.model.seq_cfg.sync_seq_len, + self.model.seq_cfg.audio_seq_len + ) + except Exception as e: + return None, f"❌ Failed to configure model: {str(e)}" + + # Generate audio + try: + log.info('🎵 Generating audio...') + start_time = time.time() + + with torch.inference_mode(): + audios = generate( + clip_frames, + sync_frames, + [prompt], + None, # No reference audio + negative_text=[negative_prompt] if negative_prompt.strip() else None, + feature_utils=self.feature_utils, + net=self.net, + fm=self.fm, + rng=torch.Generator(device=self.device).manual_seed(seed), + cfg_strength=cfg_strength + ) + + generation_time = time.time() - start_time + log.info(f'⏱️ Generation completed in {generation_time:.2f}s') + + except Exception as e: + return None, f"❌ Audio generation failed: {str(e)}" + + # Save generated audio + try: + audio = audios.float().cpu()[0] + + # Create output filename with timestamp + timestamp = int(time.time()) + output_filename = f"generated_audio_{timestamp}.wav" + permanent_path = f"/tmp/{output_filename}" + + # Save audio file + torchaudio.save(permanent_path, audio, self.model.seq_cfg.sampling_rate) + + # Verify file was created + if not os.path.exists(permanent_path): + return None, "❌ Failed to save audio file" + + file_size = os.path.getsize(permanent_path) / 1024 # KB + success_msg = f"✅ Audio generated successfully!\n" + success_msg += f"📊 Duration: {duration_sec:.2f}s | " + success_msg += f"Size: {file_size:.1f}KB | " + success_msg += f"Time: {generation_time:.2f}s" + + return permanent_path, success_msg + + except Exception as e: + return None, f"❌ Failed to save audio: {str(e)}" + + except Exception as e: + error_msg = f"❌ Unexpected error: {str(e)}\n{traceback.format_exc()}" + log.error(error_msg) + return None, error_msg -""" -For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface -""" -chatbot = gr.ChatInterface( - respond, - type="messages", - additional_inputs=[ - gr.Textbox(value="You are a friendly Chatbot.", label="System message"), - gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"), - gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"), - gr.Slider( - minimum=0.1, - maximum=1.0, - value=0.95, - step=0.05, - label="Top-p (nucleus sampling)", - ), - ], -) +# Initialize model +audio_model = AudioFoleyModel() -with gr.Blocks() as demo: - with gr.Sidebar(): - gr.LoginButton() - chatbot.render() +def generate_audio_interface(video_file, prompt, duration, cfg_strength): + """Interface function for generating audio""" + # Use fixed seed for consistency in HF Space + seed = 42 + negative_prompt = "" # Simplified interface + + audio_path, message = audio_model.generate_audio( + video_file, prompt, negative_prompt, duration, cfg_strength, seed + ) + return audio_path, message +# Create Gradio interface +with gr.Blocks(title="hf_AC Audio Foley Generator", theme=gr.themes.Soft()) as demo: + gr.Markdown(""" + # 🎵 hf_AC Audio Foley Generator + + 基于AI的视频音频生成工具。上传视频并提供文本描述,模型将生成匹配的音频内容。 + + **注意**: 首次使用时模型需要下载,请耐心等待。 + """) + + # Model status display + model_status = gr.Textbox( + label="模型状态", + value="正在初始化模型...", + interactive=False + ) + + with gr.Row(): + with gr.Column(): + video_input = gr.Video( + label="上传视频", + format="mp4" + ) + + prompt_input = gr.Textbox( + label="音频描述", + placeholder="描述你想要生成的音频 (例如: '脚步声', '鸟叫声', '汽车引擎声')", + lines=3 + ) + + with gr.Row(): + duration_slider = gr.Slider( + minimum=1.0, + maximum=15.0, + value=8.0, + step=0.5, + label="时长 (秒)" + ) + + cfg_strength_slider = gr.Slider( + minimum=1.0, + maximum=8.0, + value=4.5, + step=0.1, + label="CFG强度" + ) + + with gr.Column(): + # Example prompts + gr.Markdown("### 🎯 示例提示词") + example_buttons = [] + for prompt in EXAMPLE_PROMPTS[:6]: + btn = gr.Button(prompt, size="sm") + example_buttons.append(btn) + btn.click( + fn=lambda p=prompt: p, + outputs=prompt_input + ) + + generate_btn = gr.Button("🎵 生成音频", variant="primary", size="lg") + + audio_output = gr.Audio( + label="生成的音频", + type="filepath" + ) + + generation_status = gr.Textbox(label="生成状态", interactive=False) + + generate_btn.click( + fn=generate_audio_interface, + inputs=[ + video_input, prompt_input, duration_slider, cfg_strength_slider + ], + outputs=[audio_output, generation_status] + ) + + with gr.Accordion("💡 使用说明", open=False): + gr.Markdown(USAGE_TIPS) + + gr.Markdown(""" + ### 🎬 更多示例提示词 + + - "壁炉中燃烧的柴火声" + - "海浪拍打岩石的声音" + - "繁忙街道上的汽车和人声" + - "森林中的鸟叫和树叶声" + - "安静办公室里的键盘敲击声" + - "厨房里炒菜和切菜的声音" + - "雨滴打在金属屋顶上" + - "木地板上轻柔的脚步声" + """) + + # Auto-load model on startup + demo.load( + fn=lambda: audio_model.load_model(), + outputs=[model_status] + ) if __name__ == "__main__": - demo.launch() + # HF Space will handle the server configuration + demo.launch() \ No newline at end of file diff --git a/hf_AC/.gitignore b/hf_AC/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..b0a357cd8cfcc62e7bf7ac2f12214eb4f43151da --- /dev/null +++ b/hf_AC/.gitignore @@ -0,0 +1,224 @@ +run_*.sh +log/ +saves +saves/ +weights/ +weights +output/ +output +pretrained/ +workspace +workspace/ +ext_weights/ +ext_weights +.checkpoints/ +.vscode/ +training/example_output/ + +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[codz] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py.cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# UV +# Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +#uv.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock +#poetry.toml + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +# pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python. +# https://pdm-project.org/en/latest/usage/project/#working-with-version-control +#pdm.lock +#pdm.toml +.pdm-python +.pdm-build/ + +# pixi +# Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control. +#pixi.lock +# Pixi creates a virtual environment in the .pixi directory, just like venv module creates one +# in the .venv directory. It is recommended not to include this directory in version control. +.pixi + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.envrc +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +#.idea/ + +# Abstra +# Abstra is an AI-powered process automation framework. +# Ignore directories containing user credentials, local state, and settings. +# Learn more at https://abstra.io/docs +.abstra/ + +# Visual Studio Code +# Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore +# that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore +# and can be added to the global gitignore or merged into this file. However, if you prefer, +# you could uncomment the following to ignore the entire vscode folder +# .vscode/ + +# Ruff stuff: +.ruff_cache/ + +# PyPI configuration file +.pypirc + +# Cursor +# Cursor is an AI-powered code editor. `.cursorignore` specifies files/directories to +# exclude from AI features like autocomplete and code analysis. Recommended for sensitive data +# refer to https://docs.cursor.com/context/ignore-files +.cursorignore +.cursorindexingignore + +# Marimo +marimo/_static/ +marimo/_lsp/ +__marimo__/ diff --git a/hf_AC/README.md b/hf_AC/README.md new file mode 100644 index 0000000000000000000000000000000000000000..758ce15cb18da5e67b0439829bb962938e36c6ae --- /dev/null +++ b/hf_AC/README.md @@ -0,0 +1,25 @@ +# hf_AC + +## Environment Setup +- Python 3.9+ +- PyTorch **2.5.1+** and corresponding torchvision/torchaudio (pick your CUDA version https://pytorch.org/, pip install recommended) + +```bash +pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118 --upgrade +git clone https://github.com/ff2416/hf_AC.git +cd hf_AC +pip install -e . +``` +## Model Installation +https://huggingface.co/FF2416/AC-Foley/blob/main/model.pth + +## Inference +```bash +python inf.py \ + --model_path \ + --duration 8 \ + --prompt \ + --video_dir \ + --audio_path