Upload 11 files
Browse files- .gitignore +168 -0
- LICENSE +21 -0
- README.md +266 -10
- app.py +374 -0
- cog.yaml +35 -0
- inference.py +159 -0
- launcher.py +197 -0
- predict.py +214 -0
- quick_demo.ipynb +208 -0
- requirements3d.txt +21 -0
- webui.bat +17 -0
.gitignore
ADDED
|
@@ -0,0 +1,168 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Byte-compiled / optimized / DLL files
|
| 2 |
+
__pycache__/
|
| 3 |
+
*.py[cod]
|
| 4 |
+
*$py.class
|
| 5 |
+
|
| 6 |
+
# C extensions
|
| 7 |
+
*.so
|
| 8 |
+
|
| 9 |
+
# Distribution / packaging
|
| 10 |
+
.Python
|
| 11 |
+
build/
|
| 12 |
+
develop-eggs/
|
| 13 |
+
dist/
|
| 14 |
+
downloads/
|
| 15 |
+
eggs/
|
| 16 |
+
.eggs/
|
| 17 |
+
lib/
|
| 18 |
+
lib64/
|
| 19 |
+
parts/
|
| 20 |
+
sdist/
|
| 21 |
+
var/
|
| 22 |
+
wheels/
|
| 23 |
+
share/python-wheels/
|
| 24 |
+
*.egg-info/
|
| 25 |
+
.installed.cfg
|
| 26 |
+
*.egg
|
| 27 |
+
MANIFEST
|
| 28 |
+
|
| 29 |
+
# PyInstaller
|
| 30 |
+
# Usually these files are written by a python script from a template
|
| 31 |
+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
| 32 |
+
*.manifest
|
| 33 |
+
*.spec
|
| 34 |
+
|
| 35 |
+
# Installer logs
|
| 36 |
+
pip-log.txt
|
| 37 |
+
pip-delete-this-directory.txt
|
| 38 |
+
|
| 39 |
+
# Unit test / coverage reports
|
| 40 |
+
htmlcov/
|
| 41 |
+
.tox/
|
| 42 |
+
.nox/
|
| 43 |
+
.coverage
|
| 44 |
+
.coverage.*
|
| 45 |
+
.cache
|
| 46 |
+
nosetests.xml
|
| 47 |
+
coverage.xml
|
| 48 |
+
*.cover
|
| 49 |
+
*.py,cover
|
| 50 |
+
.hypothesis/
|
| 51 |
+
.pytest_cache/
|
| 52 |
+
cover/
|
| 53 |
+
|
| 54 |
+
# Translations
|
| 55 |
+
*.mo
|
| 56 |
+
*.pot
|
| 57 |
+
|
| 58 |
+
# Django stuff:
|
| 59 |
+
*.log
|
| 60 |
+
local_settings.py
|
| 61 |
+
db.sqlite3
|
| 62 |
+
db.sqlite3-journal
|
| 63 |
+
|
| 64 |
+
# Flask stuff:
|
| 65 |
+
instance/
|
| 66 |
+
.webassets-cache
|
| 67 |
+
|
| 68 |
+
# Scrapy stuff:
|
| 69 |
+
.scrapy
|
| 70 |
+
|
| 71 |
+
# Sphinx documentation
|
| 72 |
+
docs/_build/
|
| 73 |
+
|
| 74 |
+
# PyBuilder
|
| 75 |
+
.pybuilder/
|
| 76 |
+
target/
|
| 77 |
+
|
| 78 |
+
# Jupyter Notebook
|
| 79 |
+
.ipynb_checkpoints
|
| 80 |
+
|
| 81 |
+
# IPython
|
| 82 |
+
profile_default/
|
| 83 |
+
ipython_config.py
|
| 84 |
+
|
| 85 |
+
# pyenv
|
| 86 |
+
# For a library or package, you might want to ignore these files since the code is
|
| 87 |
+
# intended to run in multiple environments; otherwise, check them in:
|
| 88 |
+
# .python-version
|
| 89 |
+
|
| 90 |
+
# pipenv
|
| 91 |
+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
| 92 |
+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
| 93 |
+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
| 94 |
+
# install all needed dependencies.
|
| 95 |
+
#Pipfile.lock
|
| 96 |
+
|
| 97 |
+
# poetry
|
| 98 |
+
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
|
| 99 |
+
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
| 100 |
+
# commonly ignored for libraries.
|
| 101 |
+
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
|
| 102 |
+
#poetry.lock
|
| 103 |
+
|
| 104 |
+
# pdm
|
| 105 |
+
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
|
| 106 |
+
#pdm.lock
|
| 107 |
+
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
|
| 108 |
+
# in version control.
|
| 109 |
+
# https://pdm.fming.dev/#use-with-ide
|
| 110 |
+
.pdm.toml
|
| 111 |
+
|
| 112 |
+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
|
| 113 |
+
__pypackages__/
|
| 114 |
+
|
| 115 |
+
# Celery stuff
|
| 116 |
+
celerybeat-schedule
|
| 117 |
+
celerybeat.pid
|
| 118 |
+
|
| 119 |
+
# SageMath parsed files
|
| 120 |
+
*.sage.py
|
| 121 |
+
|
| 122 |
+
# Environments
|
| 123 |
+
.env
|
| 124 |
+
.venv
|
| 125 |
+
env/
|
| 126 |
+
venv/
|
| 127 |
+
ENV/
|
| 128 |
+
env.bak/
|
| 129 |
+
venv.bak/
|
| 130 |
+
|
| 131 |
+
# Spyder project settings
|
| 132 |
+
.spyderproject
|
| 133 |
+
.spyproject
|
| 134 |
+
|
| 135 |
+
# Rope project settings
|
| 136 |
+
.ropeproject
|
| 137 |
+
|
| 138 |
+
# mkdocs documentation
|
| 139 |
+
/site
|
| 140 |
+
|
| 141 |
+
# mypy
|
| 142 |
+
.mypy_cache/
|
| 143 |
+
.dmypy.json
|
| 144 |
+
dmypy.json
|
| 145 |
+
|
| 146 |
+
# Pyre type checker
|
| 147 |
+
.pyre/
|
| 148 |
+
|
| 149 |
+
# pytype static type analyzer
|
| 150 |
+
.pytype/
|
| 151 |
+
|
| 152 |
+
# Cython debug symbols
|
| 153 |
+
cython_debug/
|
| 154 |
+
|
| 155 |
+
# PyCharm
|
| 156 |
+
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
|
| 157 |
+
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
| 158 |
+
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
| 159 |
+
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
| 160 |
+
#.idea/
|
| 161 |
+
|
| 162 |
+
examples/results/*
|
| 163 |
+
gfpgan/*
|
| 164 |
+
checkpoints/
|
| 165 |
+
results/*
|
| 166 |
+
Dockerfile
|
| 167 |
+
start_docker.sh
|
| 168 |
+
start.sh
|
LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
MIT License
|
| 2 |
+
|
| 3 |
+
Copyright (c) 2023 Tencent AI Lab
|
| 4 |
+
|
| 5 |
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
| 6 |
+
of this software and associated documentation files (the "Software"), to deal
|
| 7 |
+
in the Software without restriction, including without limitation the rights
|
| 8 |
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
| 9 |
+
copies of the Software, and to permit persons to whom the Software is
|
| 10 |
+
furnished to do so, subject to the following conditions:
|
| 11 |
+
|
| 12 |
+
The above copyright notice and this permission notice shall be included in all
|
| 13 |
+
copies or substantial portions of the Software.
|
| 14 |
+
|
| 15 |
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
| 16 |
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
| 17 |
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
| 18 |
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
| 19 |
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
| 20 |
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
| 21 |
+
SOFTWARE.
|
README.md
CHANGED
|
@@ -1,10 +1,266 @@
|
|
| 1 |
-
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<div align="center">
|
| 2 |
+
|
| 3 |
+
<img src='https://user-images.githubusercontent.com/4397546/229094115-862c747e-7397-4b54-ba4a-bd368bfe2e0f.png' width='500px'/>
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
<!--<h2> 😭 SadTalker: <span style="font-size:12px">Learning Realistic 3D Motion Coefficients for Stylized Audio-Driven Single Image Talking Face Animation </span> </h2> -->
|
| 7 |
+
|
| 8 |
+
<a href='https://arxiv.org/abs/2211.12194'><img src='https://img.shields.io/badge/ArXiv-PDF-red'></a> <a href='https://sadtalker.github.io'><img src='https://img.shields.io/badge/Project-Page-Green'></a> [](https://colab.research.google.com/github/Winfredy/SadTalker/blob/main/quick_demo.ipynb) [](https://huggingface.co/spaces/vinthony/SadTalker) [](https://colab.research.google.com/github/camenduru/stable-diffusion-webui-colab/blob/main/video/stable/stable_diffusion_1_5_video_webui_colab.ipynb) [](https://replicate.com/cjwbw/sadtalker)
|
| 9 |
+
|
| 10 |
+
<div>
|
| 11 |
+
<a target='_blank'>Wenxuan Zhang <sup>*,1,2</sup> </a> 
|
| 12 |
+
<a href='https://vinthony.github.io/' target='_blank'>Xiaodong Cun <sup>*,2</a> 
|
| 13 |
+
<a href='https://xuanwangvc.github.io/' target='_blank'>Xuan Wang <sup>3</sup></a> 
|
| 14 |
+
<a href='https://yzhang2016.github.io/' target='_blank'>Yong Zhang <sup>2</sup></a> 
|
| 15 |
+
<a href='https://xishen0220.github.io/' target='_blank'>Xi Shen <sup>2</sup></a>  </br>
|
| 16 |
+
<a href='https://yuguo-xjtu.github.io/' target='_blank'>Yu Guo<sup>1</sup> </a> 
|
| 17 |
+
<a href='https://scholar.google.com/citations?hl=zh-CN&user=4oXBp9UAAAAJ' target='_blank'>Ying Shan <sup>2</sup> </a> 
|
| 18 |
+
<a target='_blank'>Fei Wang <sup>1</sup> </a> 
|
| 19 |
+
</div>
|
| 20 |
+
<br>
|
| 21 |
+
<div>
|
| 22 |
+
<sup>1</sup> Xi'an Jiaotong University   <sup>2</sup> Tencent AI Lab   <sup>3</sup> Ant Group  
|
| 23 |
+
</div>
|
| 24 |
+
<br>
|
| 25 |
+
<i><strong><a href='https://arxiv.org/abs/2211.12194' target='_blank'>CVPR 2023</a></strong></i>
|
| 26 |
+
<br>
|
| 27 |
+
<br>
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+

|
| 34 |
+
|
| 35 |
+
<b>TL;DR: single portrait image 🙎♂️ + audio 🎤 = talking head video 🎞.</b>
|
| 36 |
+
|
| 37 |
+
<br>
|
| 38 |
+
|
| 39 |
+
</div>
|
| 40 |
+
|
| 41 |
+
|
| 42 |
+
|
| 43 |
+
## 🔥 Highlight
|
| 44 |
+
|
| 45 |
+
- 🔥 The extension of the [stable-diffusion-webui](https://github.com/AUTOMATIC1111/stable-diffusion-webui) is online. Checkout more details [here](docs/webui_extension.md).
|
| 46 |
+
|
| 47 |
+
https://user-images.githubusercontent.com/4397546/231495639-5d4bb925-ea64-4a36-a519-6389917dac29.mp4
|
| 48 |
+
|
| 49 |
+
- 🔥 `full image mode` is online! checkout [here](https://github.com/Winfredy/SadTalker#full-bodyimage-generation) for more details.
|
| 50 |
+
|
| 51 |
+
| still+enhancer in v0.0.1 | still + enhancer in v0.0.2 | [input image @bagbag1815](https://twitter.com/bagbag1815/status/1642754319094108161) |
|
| 52 |
+
|:--------------------: |:--------------------: | :----: |
|
| 53 |
+
| <video src="https://user-images.githubusercontent.com/48216707/229484996-5d7be64f-2553-4c9e-a452-c5cf0b8ebafe.mp4" type="video/mp4"> </video> | <video src="https://user-images.githubusercontent.com/4397546/230717873-355b7bf3-d3de-49f9-a439-9220e623fce7.mp4" type="video/mp4"> </video> | <img src='./examples/source_image/full_body_2.png' width='380'>
|
| 54 |
+
|
| 55 |
+
- 🔥 Several new mode, eg, `still mode`, `reference mode`, `resize mode` are online for better and custom applications.
|
| 56 |
+
|
| 57 |
+
- 🔥 Happy to see more community demos at [bilibili](https://search.bilibili.com/all?keyword=sadtalker&from_source=webtop_search&spm_id_from=333.1007&search_source=3
|
| 58 |
+
), [Youtube](https://www.youtube.com/results?search_query=sadtalker&sp=CAM%253D) and [twitter #sadtalker](https://twitter.com/search?q=%23sadtalker&src=typed_query).
|
| 59 |
+
|
| 60 |
+
## 📋 Changelog (Previous changelog can be founded [here](docs/changlelog.md))
|
| 61 |
+
|
| 62 |
+
- __[2023.04.15]__: Adding automatic1111 colab by @camenduru, thanks for this awesome colab: [](https://colab.research.google.com/github/camenduru/stable-diffusion-webui-colab/blob/main/video/stable/stable_diffusion_1_5_video_webui_colab.ipynb).
|
| 63 |
+
|
| 64 |
+
- __[2023.04.12]__: adding a more detailed sd-webui installation document, fixed reinstallation problem.
|
| 65 |
+
|
| 66 |
+
- __[2023.04.12]__: Fixed the sd-webui safe issues becasue of the 3rd packages, optimize the output path in `sd-webui-extension`.
|
| 67 |
+
|
| 68 |
+
- __[2023.04.08]__: ❗️❗️❗️ In v0.0.2, we add a logo watermark to the generated video to prevent abusing since it is very realistic.
|
| 69 |
+
|
| 70 |
+
- __[2023.04.08]__: v0.0.2, full image animation, adding baidu driver for download checkpoints. Optimizing the logic about enhancer.
|
| 71 |
+
|
| 72 |
+
|
| 73 |
+
## 🚧 TODO
|
| 74 |
+
|
| 75 |
+
<details><summary> Previous TODOs </summary>
|
| 76 |
+
|
| 77 |
+
- [x] Generating 2D face from a single Image.
|
| 78 |
+
- [x] Generating 3D face from Audio.
|
| 79 |
+
- [x] Generating 4D free-view talking examples from audio and a single image.
|
| 80 |
+
- [x] Gradio/Colab Demo.
|
| 81 |
+
- [x] Full body/image Generation.
|
| 82 |
+
- [x] integrade with stable-diffusion-web-ui. (stay tunning!)
|
| 83 |
+
</details>
|
| 84 |
+
|
| 85 |
+
|
| 86 |
+
- [ ] Audio-driven Anime Avatar.
|
| 87 |
+
- [ ] training code of each componments.
|
| 88 |
+
|
| 89 |
+
|
| 90 |
+
## If you have any problem, please view our [FAQ](docs/FAQ.md) before opening an issue.
|
| 91 |
+
|
| 92 |
+
## ⚙️ 1. Installation.
|
| 93 |
+
|
| 94 |
+
Tutorials from communities: [中文windows教程](https://www.bilibili.com/video/BV1Dc411W7V6/) | [日本語コース](https://br-d.fanbox.cc/posts/5685086?utm_campaign=manage_post_page&utm_medium=share&utm_source=twitter)
|
| 95 |
+
|
| 96 |
+
### Linux:
|
| 97 |
+
|
| 98 |
+
1. Installing [anaconda](https://www.anaconda.com/), python and git.
|
| 99 |
+
|
| 100 |
+
2. Creating the env and install the requirements.
|
| 101 |
+
```bash
|
| 102 |
+
git clone https://github.com/Winfredy/SadTalker.git
|
| 103 |
+
|
| 104 |
+
cd SadTalker
|
| 105 |
+
|
| 106 |
+
conda create -n sadtalker python=3.8
|
| 107 |
+
|
| 108 |
+
conda activate sadtalker
|
| 109 |
+
|
| 110 |
+
pip install torch==1.12.1+cu113 torchvision==0.13.1+cu113 torchaudio==0.12.1 --extra-index-url https://download.pytorch.org/whl/cu113
|
| 111 |
+
|
| 112 |
+
conda install ffmpeg
|
| 113 |
+
|
| 114 |
+
pip install -r requirements.txt
|
| 115 |
+
|
| 116 |
+
### tts is optional for gradio demo.
|
| 117 |
+
### pip install TTS
|
| 118 |
+
|
| 119 |
+
```
|
| 120 |
+
### Windows ([中文windows教程](https://www.bilibili.com/video/BV1Dc411W7V6/)):
|
| 121 |
+
|
| 122 |
+
1. Install [Python 3.10.6](https://www.python.org/downloads/windows/), checking "Add Python to PATH".
|
| 123 |
+
2. Install [git](https://git-scm.com/download/win) manually (OR `scoop install git` via [scoop](https://scoop.sh/)).
|
| 124 |
+
3. Install `ffmpeg`, following [this instruction](https://www.wikihow.com/Install-FFmpeg-on-Windows) (OR using `scoop install ffmpeg` via [scoop](https://scoop.sh/)).
|
| 125 |
+
4. Download our SadTalker repository, for example by running `git clone https://github.com/Winfredy/SadTalker.git`.
|
| 126 |
+
5. Download the `checkpoint` and `gfpgan` [below↓](https://github.com/Winfredy/SadTalker#-2-download-trained-models).
|
| 127 |
+
5. Run `start.bat` from Windows Explorer as normal, non-administrator, user, a gradio WebUI demo will be started.
|
| 128 |
+
|
| 129 |
+
### Macbook:
|
| 130 |
+
|
| 131 |
+
More tips about installnation on Macbook and the Docker file can be founded [here](docs/install.md)
|
| 132 |
+
|
| 133 |
+
## 📥 2. Download Trained Models.
|
| 134 |
+
|
| 135 |
+
You can run the following script to put all the models in the right place.
|
| 136 |
+
|
| 137 |
+
```bash
|
| 138 |
+
bash scripts/download_models.sh
|
| 139 |
+
```
|
| 140 |
+
|
| 141 |
+
Other alternatives:
|
| 142 |
+
> we also provide an offline patch (`gfpgan/`), thus, no model will be downloaded when generating.
|
| 143 |
+
|
| 144 |
+
**Google Driver**: download our pre-trained model from [ this link (main checkpoints)](https://drive.google.com/drive/folders/1Wd88VDoLhVzYsQ30_qDVluQr_Xm46yHT?usp=sharing) and [ gfpgan (offline patch)](https://drive.google.com/file/d/19AIBsmfcHW6BRJmeqSFlG5fL445Xmsyi?usp=sharing)
|
| 145 |
+
|
| 146 |
+
**Github Release Page**: download all the files from the [lastest github release page](https://github.com/Winfredy/SadTalker/releases), and then, put it in ./checkpoints.
|
| 147 |
+
|
| 148 |
+
**百度云盘**: we provided the downloaded model in [checkpoints, 提取码: sadt.](https://pan.baidu.com/s/1nXuVNd0exUl37ISwWqbFGA?pwd=sadt) And [gfpgan, 提取码: sadt.](https://pan.baidu.com/s/1kb1BCPaLOWX1JJb9Czbn6w?pwd=sadt)
|
| 149 |
+
|
| 150 |
+
|
| 151 |
+
|
| 152 |
+
<details><summary>Model Details</summary>
|
| 153 |
+
|
| 154 |
+
The final folder will be shown as:
|
| 155 |
+
|
| 156 |
+
<img width="331" alt="image" src="https://user-images.githubusercontent.com/4397546/232511411-4ca75cbf-a434-48c5-9ae0-9009e8316484.png">
|
| 157 |
+
|
| 158 |
+
|
| 159 |
+
Model explains:
|
| 160 |
+
|
| 161 |
+
| Model | Description
|
| 162 |
+
| :--- | :----------
|
| 163 |
+
|checkpoints/auido2exp_00300-model.pth | Pre-trained ExpNet in Sadtalker.
|
| 164 |
+
|checkpoints/auido2pose_00140-model.pth | Pre-trained PoseVAE in Sadtalker.
|
| 165 |
+
|checkpoints/mapping_00229-model.pth.tar | Pre-trained MappingNet in Sadtalker.
|
| 166 |
+
|checkpoints/mapping_00109-model.pth.tar | Pre-trained MappingNet in Sadtalker.
|
| 167 |
+
|checkpoints/facevid2vid_00189-model.pth.tar | Pre-trained face-vid2vid model from [the reappearance of face-vid2vid](https://github.com/zhanglonghao1992/One-Shot_Free-View_Neural_Talking_Head_Synthesis).
|
| 168 |
+
|checkpoints/epoch_20.pth | Pre-trained 3DMM extractor in [Deep3DFaceReconstruction](https://github.com/microsoft/Deep3DFaceReconstruction).
|
| 169 |
+
|checkpoints/wav2lip.pth | Highly accurate lip-sync model in [Wav2lip](https://github.com/Rudrabha/Wav2Lip).
|
| 170 |
+
|checkpoints/shape_predictor_68_face_landmarks.dat | Face landmark model used in [dilb](http://dlib.net/).
|
| 171 |
+
|checkpoints/BFM | 3DMM library file.
|
| 172 |
+
|checkpoints/hub | Face detection models used in [face alignment](https://github.com/1adrianb/face-alignment).
|
| 173 |
+
|gfpgan/weights | Face detection and enhanced models used in `facexlib` and `gfpgan`.
|
| 174 |
+
|
| 175 |
+
|
| 176 |
+
</details>
|
| 177 |
+
|
| 178 |
+
## 🔮 3. Quick Start ([Best Practice](docs/best_practice.md)).
|
| 179 |
+
|
| 180 |
+
### WebUI Demos:
|
| 181 |
+
|
| 182 |
+
**Online**: [Huggingface](https://huggingface.co/spaces/vinthony/SadTalker) | [SDWebUI-Colab](https://colab.research.google.com/github/camenduru/stable-diffusion-webui-colab/blob/main/video/stable/stable_diffusion_1_5_video_webui_colab.ipynb) | [Colab](https://colab.research.google.com/github/Winfredy/SadTalker/blob/main/quick_demo.ipynb)
|
| 183 |
+
|
| 184 |
+
**Local Autiomatic1111 stable-diffusion webui extension**: please refer to [Autiomatic1111 stable-diffusion webui docs](docs/webui_extension.md).
|
| 185 |
+
|
| 186 |
+
**Local gradio demo**: Similar to our [hugging-face demo](https://huggingface.co/spaces/vinthony/SadTalker) can be run by:
|
| 187 |
+
|
| 188 |
+
```bash
|
| 189 |
+
## you need manually install TTS(https://github.com/coqui-ai/TTS) via `pip install tts` in advanced.
|
| 190 |
+
python app.py
|
| 191 |
+
```
|
| 192 |
+
|
| 193 |
+
**Local windows gradio demo**: just double click `webui.bat`, the requirements will be installed automatically.
|
| 194 |
+
|
| 195 |
+
|
| 196 |
+
### Manually usages:
|
| 197 |
+
|
| 198 |
+
##### Animating a portrait image from default config:
|
| 199 |
+
```bash
|
| 200 |
+
python inference.py --driven_audio <audio.wav> \
|
| 201 |
+
--source_image <video.mp4 or picture.png> \
|
| 202 |
+
--enhancer gfpgan
|
| 203 |
+
```
|
| 204 |
+
The results will be saved in `results/$SOME_TIMESTAMP/*.mp4`.
|
| 205 |
+
|
| 206 |
+
##### Full body/image Generation:
|
| 207 |
+
|
| 208 |
+
Using `--still` to generate a natural full body video. You can add `enhancer` to improve the quality of the generated video.
|
| 209 |
+
|
| 210 |
+
```bash
|
| 211 |
+
python inference.py --driven_audio <audio.wav> \
|
| 212 |
+
--source_image <video.mp4 or picture.png> \
|
| 213 |
+
--result_dir <a file to store results> \
|
| 214 |
+
--still \
|
| 215 |
+
--preprocess full \
|
| 216 |
+
--enhancer gfpgan
|
| 217 |
+
```
|
| 218 |
+
|
| 219 |
+
More examples and configuration and tips can be founded in the [ >>> best practice documents <<<](docs/best_practice.md).
|
| 220 |
+
|
| 221 |
+
## 🛎 Citation
|
| 222 |
+
|
| 223 |
+
If you find our work useful in your research, please consider citing:
|
| 224 |
+
|
| 225 |
+
```bibtex
|
| 226 |
+
@article{zhang2022sadtalker,
|
| 227 |
+
title={SadTalker: Learning Realistic 3D Motion Coefficients for Stylized Audio-Driven Single Image Talking Face Animation},
|
| 228 |
+
author={Zhang, Wenxuan and Cun, Xiaodong and Wang, Xuan and Zhang, Yong and Shen, Xi and Guo, Yu and Shan, Ying and Wang, Fei},
|
| 229 |
+
journal={arXiv preprint arXiv:2211.12194},
|
| 230 |
+
year={2022}
|
| 231 |
+
}
|
| 232 |
+
```
|
| 233 |
+
|
| 234 |
+
|
| 235 |
+
|
| 236 |
+
## 💗 Acknowledgements
|
| 237 |
+
|
| 238 |
+
Facerender code borrows heavily from [zhanglonghao's reproduction of face-vid2vid](https://github.com/zhanglonghao1992/One-Shot_Free-View_Neural_Talking_Head_Synthesis) and [PIRender](https://github.com/RenYurui/PIRender). We thank the authors for sharing their wonderful code. In training process, We also use the model from [Deep3DFaceReconstruction](https://github.com/microsoft/Deep3DFaceReconstruction) and [Wav2lip](https://github.com/Rudrabha/Wav2Lip). We thank for their wonderful work.
|
| 239 |
+
|
| 240 |
+
See also these wonderful 3rd libraries we use:
|
| 241 |
+
|
| 242 |
+
- **Face Utils**: https://github.com/xinntao/facexlib
|
| 243 |
+
- **Face Enhancement**: https://github.com/TencentARC/GFPGAN
|
| 244 |
+
- **Image/Video Enhancement**:https://github.com/xinntao/Real-ESRGAN
|
| 245 |
+
|
| 246 |
+
## 🥂 Extensions:
|
| 247 |
+
|
| 248 |
+
- [SadTalker-Video-Lip-Sync](https://github.com/Zz-ww/SadTalker-Video-Lip-Sync) from [@Zz-ww](https://github.com/Zz-ww): SadTalker for Video Lip Editing
|
| 249 |
+
|
| 250 |
+
## 🥂 Related Works
|
| 251 |
+
- [StyleHEAT: One-Shot High-Resolution Editable Talking Face Generation via Pre-trained StyleGAN (ECCV 2022)](https://github.com/FeiiYin/StyleHEAT)
|
| 252 |
+
- [CodeTalker: Speech-Driven 3D Facial Animation with Discrete Motion Prior (CVPR 2023)](https://github.com/Doubiiu/CodeTalker)
|
| 253 |
+
- [VideoReTalking: Audio-based Lip Synchronization for Talking Head Video Editing In the Wild (SIGGRAPH Asia 2022)](https://github.com/vinthony/video-retalking)
|
| 254 |
+
- [DPE: Disentanglement of Pose and Expression for General Video Portrait Editing (CVPR 2023)](https://github.com/Carlyx/DPE)
|
| 255 |
+
- [3D GAN Inversion with Facial Symmetry Prior (CVPR 2023)](https://github.com/FeiiYin/SPI/)
|
| 256 |
+
- [T2M-GPT: Generating Human Motion from Textual Descriptions with Discrete Representations (CVPR 2023)](https://github.com/Mael-zys/T2M-GPT)
|
| 257 |
+
|
| 258 |
+
## 📢 Disclaimer
|
| 259 |
+
|
| 260 |
+
This is not an official product of Tencent. This repository can only be used for personal/research/non-commercial purposes.
|
| 261 |
+
|
| 262 |
+
LOGO: color and font suggestion: [ChatGPT](ai.com), logo font:[Montserrat Alternates
|
| 263 |
+
](https://fonts.google.com/specimen/Montserrat+Alternates?preview.text=SadTalker&preview.text_type=custom&query=mont).
|
| 264 |
+
|
| 265 |
+
All the copyright of the demo images and audio are from communities users or the geneartion from stable diffusion. Free free to contact us if you feel uncomfortable.
|
| 266 |
+
|
app.py
ADDED
|
@@ -0,0 +1,374 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from flask import Flask, request, jsonify
|
| 2 |
+
import torch
|
| 3 |
+
import shutil
|
| 4 |
+
import os
|
| 5 |
+
import sys
|
| 6 |
+
from argparse import ArgumentParser
|
| 7 |
+
from time import strftime
|
| 8 |
+
from argparse import Namespace
|
| 9 |
+
from src.utils.preprocess import CropAndExtract
|
| 10 |
+
from src.test_audio2coeff import Audio2Coeff
|
| 11 |
+
from src.facerender.animate import AnimateFromCoeff
|
| 12 |
+
from src.generate_batch import get_data
|
| 13 |
+
from src.generate_facerender_batch import get_facerender_data
|
| 14 |
+
# from src.utils.init_path import init_path
|
| 15 |
+
import tempfile
|
| 16 |
+
from openai import OpenAI
|
| 17 |
+
import threading
|
| 18 |
+
import elevenlabs
|
| 19 |
+
from elevenlabs import set_api_key, generate, play, clone
|
| 20 |
+
# from flask_cors import CORS, cross_origin
|
| 21 |
+
# from flask_swagger_ui import get_swaggerui_blueprint
|
| 22 |
+
import uuid
|
| 23 |
+
import time
|
| 24 |
+
|
| 25 |
+
start_time = time.time()
|
| 26 |
+
|
| 27 |
+
class AnimationConfig:
|
| 28 |
+
def __init__(self, driven_audio_path, source_image_path, result_folder,pose_style,expression_scale,enhancer,still,preprocess,ref_pose_video_path):
|
| 29 |
+
self.driven_audio = driven_audio_path
|
| 30 |
+
self.source_image = source_image_path
|
| 31 |
+
self.ref_eyeblink = ref_pose_video_path
|
| 32 |
+
self.ref_pose = ref_pose_video_path
|
| 33 |
+
self.checkpoint_dir = './checkpoints'
|
| 34 |
+
self.result_dir = result_folder
|
| 35 |
+
self.pose_style = pose_style
|
| 36 |
+
self.batch_size = 2
|
| 37 |
+
self.expression_scale = expression_scale
|
| 38 |
+
self.input_yaw = None
|
| 39 |
+
self.input_pitch = None
|
| 40 |
+
self.input_roll = None
|
| 41 |
+
self.enhancer = enhancer
|
| 42 |
+
self.background_enhancer = None
|
| 43 |
+
self.cpu = False
|
| 44 |
+
self.face3dvis = False
|
| 45 |
+
self.still = still
|
| 46 |
+
self.preprocess = preprocess
|
| 47 |
+
self.verbose = False
|
| 48 |
+
self.old_version = False
|
| 49 |
+
self.net_recon = 'resnet50'
|
| 50 |
+
self.init_path = None
|
| 51 |
+
self.use_last_fc = False
|
| 52 |
+
self.bfm_folder = './checkpoints/BFM_Fitting/'
|
| 53 |
+
self.bfm_model = 'BFM_model_front.mat'
|
| 54 |
+
self.focal = 1015.
|
| 55 |
+
self.center = 112.
|
| 56 |
+
self.camera_d = 10.
|
| 57 |
+
self.z_near = 5.
|
| 58 |
+
self.z_far = 15.
|
| 59 |
+
self.device = 'cpu'
|
| 60 |
+
|
| 61 |
+
|
| 62 |
+
app = Flask(__name__)
|
| 63 |
+
|
| 64 |
+
TEMP_DIR = None
|
| 65 |
+
|
| 66 |
+
app.config['temp_response'] = None
|
| 67 |
+
app.config['generation_thread'] = None
|
| 68 |
+
app.config['text_prompt'] = None
|
| 69 |
+
app.config['final_video_path'] = None
|
| 70 |
+
|
| 71 |
+
|
| 72 |
+
|
| 73 |
+
def main(args):
|
| 74 |
+
pic_path = args.source_image
|
| 75 |
+
audio_path = args.driven_audio
|
| 76 |
+
save_dir = args.result_dir
|
| 77 |
+
pose_style = args.pose_style
|
| 78 |
+
device = args.device
|
| 79 |
+
batch_size = args.batch_size
|
| 80 |
+
input_yaw_list = args.input_yaw
|
| 81 |
+
input_pitch_list = args.input_pitch
|
| 82 |
+
input_roll_list = args.input_roll
|
| 83 |
+
ref_eyeblink = args.ref_eyeblink
|
| 84 |
+
ref_pose = args.ref_pose
|
| 85 |
+
preprocess = args.preprocess
|
| 86 |
+
|
| 87 |
+
dir_path = os.path.dirname(os.path.realpath(__file__))
|
| 88 |
+
current_root_path = dir_path
|
| 89 |
+
print('current_root_path ',current_root_path)
|
| 90 |
+
|
| 91 |
+
# sadtalker_paths = init_path(args.checkpoint_dir, os.path.join(current_root_path, 'src/config'), args.size, args.old_version, args.preprocess)
|
| 92 |
+
|
| 93 |
+
path_of_lm_croper = os.path.join(current_root_path, args.checkpoint_dir, 'shape_predictor_68_face_landmarks.dat')
|
| 94 |
+
path_of_net_recon_model = os.path.join(current_root_path, args.checkpoint_dir, 'epoch_20.pth')
|
| 95 |
+
dir_of_BFM_fitting = os.path.join(current_root_path, args.checkpoint_dir, 'BFM_Fitting/BFM_Fitting')
|
| 96 |
+
wav2lip_checkpoint = os.path.join(current_root_path, args.checkpoint_dir, 'wav2lip.pth')
|
| 97 |
+
|
| 98 |
+
audio2pose_checkpoint = os.path.join(current_root_path, args.checkpoint_dir, 'auido2pose_00140-model.pth')
|
| 99 |
+
audio2pose_yaml_path = os.path.join(current_root_path, 'src', 'config', 'auido2pose.yaml')
|
| 100 |
+
|
| 101 |
+
audio2exp_checkpoint = os.path.join(current_root_path, args.checkpoint_dir, 'auido2exp_00300-model.pth')
|
| 102 |
+
audio2exp_yaml_path = os.path.join(current_root_path, 'src', 'config', 'auido2exp.yaml')
|
| 103 |
+
|
| 104 |
+
free_view_checkpoint = os.path.join(current_root_path, args.checkpoint_dir, 'facevid2vid_00189-model.pth.tar')
|
| 105 |
+
|
| 106 |
+
if preprocess == 'full':
|
| 107 |
+
mapping_checkpoint = os.path.join(current_root_path, args.checkpoint_dir, 'mapping_00109-model.pth.tar')
|
| 108 |
+
facerender_yaml_path = os.path.join(current_root_path, 'src', 'config', 'facerender_still.yaml')
|
| 109 |
+
else:
|
| 110 |
+
mapping_checkpoint = os.path.join(current_root_path, args.checkpoint_dir, 'mapping_00229-model.pth.tar')
|
| 111 |
+
facerender_yaml_path = os.path.join(current_root_path, 'src', 'config', 'facerender.yaml')
|
| 112 |
+
|
| 113 |
+
# preprocess_model = CropAndExtract(sadtalker_paths, device)
|
| 114 |
+
#init model
|
| 115 |
+
print(path_of_net_recon_model)
|
| 116 |
+
preprocess_model = CropAndExtract(path_of_lm_croper, path_of_net_recon_model, dir_of_BFM_fitting, device)
|
| 117 |
+
|
| 118 |
+
# audio_to_coeff = Audio2Coeff(sadtalker_paths, device)
|
| 119 |
+
audio_to_coeff = Audio2Coeff(audio2pose_checkpoint, audio2pose_yaml_path,
|
| 120 |
+
audio2exp_checkpoint, audio2exp_yaml_path,
|
| 121 |
+
wav2lip_checkpoint, device)
|
| 122 |
+
# animate_from_coeff = AnimateFromCoeff(sadtalker_paths, device)
|
| 123 |
+
animate_from_coeff = AnimateFromCoeff(free_view_checkpoint, mapping_checkpoint,
|
| 124 |
+
facerender_yaml_path, device)
|
| 125 |
+
|
| 126 |
+
first_frame_dir = os.path.join(save_dir, 'first_frame_dir')
|
| 127 |
+
os.makedirs(first_frame_dir, exist_ok=True)
|
| 128 |
+
# first_coeff_path, crop_pic_path, crop_info = preprocess_model.generate(pic_path, first_frame_dir, args.preprocess,\
|
| 129 |
+
# source_image_flag=True, pic_size=args.size)
|
| 130 |
+
|
| 131 |
+
|
| 132 |
+
first_coeff_path, crop_pic_path, crop_info = preprocess_model.generate(pic_path, first_frame_dir, args.preprocess, source_image_flag=True)
|
| 133 |
+
print('first_coeff_path ',first_coeff_path)
|
| 134 |
+
print('crop_pic_path ',crop_pic_path)
|
| 135 |
+
|
| 136 |
+
if first_coeff_path is None:
|
| 137 |
+
print("Can't get the coeffs of the input")
|
| 138 |
+
return
|
| 139 |
+
|
| 140 |
+
if ref_eyeblink is not None:
|
| 141 |
+
ref_eyeblink_videoname = os.path.splitext(os.path.split(ref_eyeblink)[-1])[0]
|
| 142 |
+
ref_eyeblink_frame_dir = os.path.join(save_dir, ref_eyeblink_videoname)
|
| 143 |
+
os.makedirs(ref_eyeblink_frame_dir, exist_ok=True)
|
| 144 |
+
# ref_eyeblink_coeff_path, _, _ = preprocess_model.generate(ref_eyeblink, ref_eyeblink_frame_dir, args.preprocess, source_image_flag=False)
|
| 145 |
+
ref_eyeblink_coeff_path, _, _ = preprocess_model.generate(ref_eyeblink, ref_eyeblink_frame_dir)
|
| 146 |
+
else:
|
| 147 |
+
ref_eyeblink_coeff_path=None
|
| 148 |
+
print('ref_eyeblink_coeff_path',ref_eyeblink_coeff_path)
|
| 149 |
+
|
| 150 |
+
if ref_pose is not None:
|
| 151 |
+
if ref_pose == ref_eyeblink:
|
| 152 |
+
ref_pose_coeff_path = ref_eyeblink_coeff_path
|
| 153 |
+
else:
|
| 154 |
+
ref_pose_videoname = os.path.splitext(os.path.split(ref_pose)[-1])[0]
|
| 155 |
+
ref_pose_frame_dir = os.path.join(save_dir, ref_pose_videoname)
|
| 156 |
+
os.makedirs(ref_pose_frame_dir, exist_ok=True)
|
| 157 |
+
# ref_pose_coeff_path, _, _ = preprocess_model.generate(ref_pose, ref_pose_frame_dir, args.preprocess, source_image_flag=False)
|
| 158 |
+
ref_pose_coeff_path, _, _ = preprocess_model.generate(ref_pose, ref_pose_frame_dir)
|
| 159 |
+
else:
|
| 160 |
+
ref_pose_coeff_path=None
|
| 161 |
+
print('ref_eyeblink_coeff_path',ref_pose_coeff_path)
|
| 162 |
+
|
| 163 |
+
batch = get_data(first_coeff_path, audio_path, device, ref_eyeblink_coeff_path, still=args.still)
|
| 164 |
+
coeff_path = audio_to_coeff.generate(batch, save_dir, pose_style, ref_pose_coeff_path)
|
| 165 |
+
|
| 166 |
+
|
| 167 |
+
if args.face3dvis:
|
| 168 |
+
from src.face3d.visualize import gen_composed_video
|
| 169 |
+
gen_composed_video(args, device, first_coeff_path, coeff_path, audio_path, os.path.join(save_dir, '3dface.mp4'))
|
| 170 |
+
|
| 171 |
+
# data = get_facerender_data(coeff_path, crop_pic_path, first_coeff_path, audio_path,
|
| 172 |
+
# batch_size, input_yaw_list, input_pitch_list, input_roll_list,
|
| 173 |
+
# expression_scale=args.expression_scale, still_mode=args.still, preprocess=args.preprocess, size=args.size)
|
| 174 |
+
|
| 175 |
+
|
| 176 |
+
data = get_facerender_data(coeff_path, crop_pic_path, first_coeff_path, audio_path,
|
| 177 |
+
batch_size, input_yaw_list, input_pitch_list, input_roll_list,
|
| 178 |
+
expression_scale=args.expression_scale, still_mode=args.still, preprocess=args.preprocess)
|
| 179 |
+
|
| 180 |
+
# result, base64_video,temp_file_path= animate_from_coeff.generate(data, save_dir, pic_path, crop_info, \
|
| 181 |
+
# enhancer=args.enhancer, background_enhancer=args.background_enhancer, preprocess=args.preprocess, img_size=args.size)
|
| 182 |
+
|
| 183 |
+
|
| 184 |
+
result, base64_video,temp_file_path = animate_from_coeff.generate(data, save_dir, pic_path, crop_info, \
|
| 185 |
+
enhancer=args.enhancer, background_enhancer=args.background_enhancer, preprocess=args.preprocess)
|
| 186 |
+
|
| 187 |
+
print('The generated video is named:')
|
| 188 |
+
app.config['temp_response'] = base64_video
|
| 189 |
+
app.config['final_video_path'] = temp_file_path
|
| 190 |
+
return base64_video, temp_file_path
|
| 191 |
+
|
| 192 |
+
# shutil.move(result, save_dir+'.mp4')
|
| 193 |
+
|
| 194 |
+
|
| 195 |
+
if not args.verbose:
|
| 196 |
+
shutil.rmtree(save_dir)
|
| 197 |
+
|
| 198 |
+
def create_temp_dir():
|
| 199 |
+
return tempfile.TemporaryDirectory()
|
| 200 |
+
|
| 201 |
+
def save_uploaded_file(file, filename,TEMP_DIR):
|
| 202 |
+
unique_filename = str(uuid.uuid4()) + "_" + filename
|
| 203 |
+
file_path = os.path.join(TEMP_DIR.name, unique_filename)
|
| 204 |
+
file.save(file_path)
|
| 205 |
+
return file_path
|
| 206 |
+
|
| 207 |
+
client = OpenAI(api_key="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")
|
| 208 |
+
|
| 209 |
+
def translate_text(text_prompt, target_language):
|
| 210 |
+
response = client.chat.completions.create(
|
| 211 |
+
model="gpt-4-0125-preview",
|
| 212 |
+
messages=[{"role": "system", "content": "You are a helpful language translator assistant."},
|
| 213 |
+
{"role": "user", "content": f"Translate completely without hallucination, end to end, and give the following text to {target_language} language and the text is: {text_prompt}"},
|
| 214 |
+
],
|
| 215 |
+
max_tokens = len(text_prompt) + 200 # Use the length of the input text
|
| 216 |
+
# temperature=0.3,
|
| 217 |
+
# stop=["Translate:", "Text:"]
|
| 218 |
+
)
|
| 219 |
+
return response
|
| 220 |
+
|
| 221 |
+
|
| 222 |
+
|
| 223 |
+
@app.route("/run", methods=['POST'])
|
| 224 |
+
async def generate_video():
|
| 225 |
+
global TEMP_DIR
|
| 226 |
+
TEMP_DIR = create_temp_dir()
|
| 227 |
+
if request.method == 'POST':
|
| 228 |
+
source_image = request.files['source_image']
|
| 229 |
+
text_prompt = request.form['text_prompt']
|
| 230 |
+
print('Input text prompt: ',text_prompt)
|
| 231 |
+
voice_cloning = request.form.get('voice_cloning', 'no')
|
| 232 |
+
target_language = request.form.get('target_language', 'original_text')
|
| 233 |
+
print('target_language',target_language)
|
| 234 |
+
pose_style = int(request.form.get('pose_style', 1))
|
| 235 |
+
expression_scale = int(request.form.get('expression_scale', 1))
|
| 236 |
+
enhancer = request.form.get('enhancer', None)
|
| 237 |
+
voice_gender = request.form.get('voice_gender', 'male')
|
| 238 |
+
still_str = request.form.get('still', 'False')
|
| 239 |
+
still = still_str.lower() == 'true'
|
| 240 |
+
print('still', still)
|
| 241 |
+
preprocess = request.form.get('preprocess', 'crop')
|
| 242 |
+
print('preprocess selected: ',preprocess)
|
| 243 |
+
ref_pose_video = request.files.get('ref_pose', None)
|
| 244 |
+
|
| 245 |
+
if target_language != 'original_text':
|
| 246 |
+
response = translate_text(text_prompt, target_language)
|
| 247 |
+
# response = await translate_text_async(text_prompt, target_language)
|
| 248 |
+
text_prompt = response.choices[0].message.content.strip()
|
| 249 |
+
|
| 250 |
+
app.config['text_prompt'] = text_prompt
|
| 251 |
+
print('Final text prompt: ',text_prompt)
|
| 252 |
+
|
| 253 |
+
source_image_path = save_uploaded_file(source_image, 'source_image.png',TEMP_DIR)
|
| 254 |
+
print(source_image_path)
|
| 255 |
+
|
| 256 |
+
# driven_audio_path = await voice_cloning_async(voice_cloning, voice_gender, text_prompt, user_voice)
|
| 257 |
+
|
| 258 |
+
if voice_cloning == 'no':
|
| 259 |
+
if voice_gender == 'male':
|
| 260 |
+
voice = 'onyx'
|
| 261 |
+
else:
|
| 262 |
+
voice = 'nova'
|
| 263 |
+
|
| 264 |
+
print('Entering Audio creation using whisper')
|
| 265 |
+
response = client.audio.speech.create(model="tts-1-hd",
|
| 266 |
+
voice=voice,
|
| 267 |
+
input = text_prompt)
|
| 268 |
+
|
| 269 |
+
print('Audio created using whisper')
|
| 270 |
+
with tempfile.NamedTemporaryFile(suffix=".wav", prefix="text_to_speech_",dir=TEMP_DIR.name, delete=False) as temp_file:
|
| 271 |
+
driven_audio_path = temp_file.name
|
| 272 |
+
|
| 273 |
+
response.write_to_file(driven_audio_path)
|
| 274 |
+
print('Audio file saved')
|
| 275 |
+
|
| 276 |
+
elif voice_cloning == 'yes':
|
| 277 |
+
user_voice = request.files['user_voice']
|
| 278 |
+
|
| 279 |
+
with tempfile.NamedTemporaryFile(suffix=".wav", prefix="user_voice_",dir=TEMP_DIR.name, delete=False) as temp_file:
|
| 280 |
+
user_voice_path = temp_file.name
|
| 281 |
+
user_voice.save(user_voice_path)
|
| 282 |
+
print('user_voice_path',user_voice_path)
|
| 283 |
+
|
| 284 |
+
set_api_key("87792fce164425fbe1204e9fd1fe25cd")
|
| 285 |
+
voice = clone(name = "User Cloned Voice",
|
| 286 |
+
files = [user_voice_path] )
|
| 287 |
+
|
| 288 |
+
audio = generate(text = text_prompt, voice = voice, model = "eleven_multilingual_v2",stream=True, latency=4)
|
| 289 |
+
with tempfile.NamedTemporaryFile(suffix=".mp3", prefix="cloned_audio_",dir=TEMP_DIR.name, delete=False) as temp_file:
|
| 290 |
+
for chunk in audio:
|
| 291 |
+
temp_file.write(chunk)
|
| 292 |
+
driven_audio_path = temp_file.name
|
| 293 |
+
print('driven_audio_path',driven_audio_path)
|
| 294 |
+
|
| 295 |
+
# elevenlabs.save(audio, driven_audio_path)
|
| 296 |
+
|
| 297 |
+
save_dir = tempfile.mkdtemp(dir=TEMP_DIR.name)
|
| 298 |
+
result_folder = os.path.join(save_dir, "results")
|
| 299 |
+
os.makedirs(result_folder, exist_ok=True)
|
| 300 |
+
|
| 301 |
+
ref_pose_video_path = None
|
| 302 |
+
if ref_pose_video:
|
| 303 |
+
with tempfile.NamedTemporaryFile(suffix=".mp4", prefix="ref_pose_",dir=TEMP_DIR.name, delete=False) as temp_file:
|
| 304 |
+
ref_pose_video_path = temp_file.name
|
| 305 |
+
ref_pose_video.save(ref_pose_video_path)
|
| 306 |
+
print('ref_pose_video_path',ref_pose_video_path)
|
| 307 |
+
|
| 308 |
+
# Example of using the class with some hypothetical paths
|
| 309 |
+
args = AnimationConfig(driven_audio_path=driven_audio_path, source_image_path=source_image_path, result_folder=result_folder, pose_style=pose_style, expression_scale=expression_scale, enhancer=enhancer,still=still,preprocess=preprocess,ref_pose_video_path=ref_pose_video_path)
|
| 310 |
+
|
| 311 |
+
if torch.cuda.is_available() and not args.cpu:
|
| 312 |
+
args.device = "cuda"
|
| 313 |
+
else:
|
| 314 |
+
args.device = "cpu"
|
| 315 |
+
|
| 316 |
+
generation_thread = threading.Thread(target=main, args=(args,))
|
| 317 |
+
app.config['generation_thread'] = generation_thread
|
| 318 |
+
generation_thread.start()
|
| 319 |
+
response_data = {"message": "Video generation started",
|
| 320 |
+
"process_id": generation_thread.ident}
|
| 321 |
+
|
| 322 |
+
return jsonify(response_data)
|
| 323 |
+
# base64_video = main(args)
|
| 324 |
+
# return jsonify({"base64_video": base64_video})
|
| 325 |
+
|
| 326 |
+
#else:
|
| 327 |
+
# return 'Unsupported HTTP method', 405
|
| 328 |
+
|
| 329 |
+
@app.route("/status", methods=["GET"])
|
| 330 |
+
def check_generation_status():
|
| 331 |
+
global TEMP_DIR
|
| 332 |
+
response = {"base64_video": "","text_prompt":"", "status": ""}
|
| 333 |
+
process_id = request.args.get('process_id', None)
|
| 334 |
+
|
| 335 |
+
# process_id is required to check the status for that specific process
|
| 336 |
+
if process_id:
|
| 337 |
+
generation_thread = app.config.get('generation_thread')
|
| 338 |
+
if generation_thread and generation_thread.ident == int(process_id) and generation_thread.is_alive():
|
| 339 |
+
return jsonify({"status": "in_progress"}), 200
|
| 340 |
+
elif app.config.get('temp_response'):
|
| 341 |
+
# app.config['temp_response']['status'] = 'completed'
|
| 342 |
+
final_response = app.config['temp_response']
|
| 343 |
+
response["base64_video"] = final_response
|
| 344 |
+
response["text_prompt"] = app.config.get('text_prompt')
|
| 345 |
+
response["status"] = "completed"
|
| 346 |
+
|
| 347 |
+
final_video_path = app.config['final_video_path']
|
| 348 |
+
print('final_video_path',final_video_path)
|
| 349 |
+
|
| 350 |
+
|
| 351 |
+
if final_video_path and os.path.exists(final_video_path):
|
| 352 |
+
os.remove(final_video_path)
|
| 353 |
+
print("Deleted video file:", final_video_path)
|
| 354 |
+
|
| 355 |
+
TEMP_DIR.cleanup()
|
| 356 |
+
# print("Temporary Directory:", TEMP_DIR.name)
|
| 357 |
+
# if TEMP_DIR:
|
| 358 |
+
# print("Contents of Temporary Directory:")
|
| 359 |
+
# for filename in os.listdir(TEMP_DIR.name):
|
| 360 |
+
# print(filename)
|
| 361 |
+
# else:
|
| 362 |
+
# print("Temporary Directory is None or already cleaned up.")
|
| 363 |
+
end_time = time.time()
|
| 364 |
+
total_time = round(end_time - start_time, 2)
|
| 365 |
+
print("Total time taken for execution:", total_time, " seconds")
|
| 366 |
+
return jsonify(response)
|
| 367 |
+
return jsonify({"error":"No process id provided"})
|
| 368 |
+
|
| 369 |
+
@app.route("/health", methods=["GET"])
|
| 370 |
+
def health_status():
|
| 371 |
+
response = {"online": "true"}
|
| 372 |
+
return jsonify(response)
|
| 373 |
+
if __name__ == '__main__':
|
| 374 |
+
app.run(debug=True)
|
cog.yaml
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
build:
|
| 2 |
+
gpu: true
|
| 3 |
+
cuda: "11.3"
|
| 4 |
+
python_version: "3.8"
|
| 5 |
+
system_packages:
|
| 6 |
+
- "ffmpeg"
|
| 7 |
+
- "libgl1-mesa-glx"
|
| 8 |
+
- "libglib2.0-0"
|
| 9 |
+
python_packages:
|
| 10 |
+
- "torch==1.12.1"
|
| 11 |
+
- "torchvision==0.13.1"
|
| 12 |
+
- "torchaudio==0.12.1"
|
| 13 |
+
- "joblib==1.1.0"
|
| 14 |
+
- "scikit-image==0.19.3"
|
| 15 |
+
- "basicsr==1.4.2"
|
| 16 |
+
- "facexlib==0.3.0"
|
| 17 |
+
- "resampy==0.3.1"
|
| 18 |
+
- "pydub==0.25.1"
|
| 19 |
+
- "scipy==1.10.1"
|
| 20 |
+
- "kornia==0.6.8"
|
| 21 |
+
- "face_alignment==1.3.5"
|
| 22 |
+
- "imageio==2.19.3"
|
| 23 |
+
- "imageio-ffmpeg==0.4.7"
|
| 24 |
+
- "librosa==0.9.2" #
|
| 25 |
+
- "tqdm==4.65.0"
|
| 26 |
+
- "yacs==0.1.8"
|
| 27 |
+
- "gfpgan==1.3.8"
|
| 28 |
+
- "dlib-bin==19.24.1"
|
| 29 |
+
- "av==10.0.0"
|
| 30 |
+
- "trimesh==3.9.20"
|
| 31 |
+
run:
|
| 32 |
+
- mkdir -p /root/.cache/torch/hub/checkpoints/ && wget --output-document "/root/.cache/torch/hub/checkpoints/s3fd-619a316812.pth" "https://www.adrianbulat.com/downloads/python-fan/s3fd-619a316812.pth"
|
| 33 |
+
- mkdir -p /root/.cache/torch/hub/checkpoints/ && wget --output-document "/root/.cache/torch/hub/checkpoints/2DFAN4-cd938726ad.zip" "https://www.adrianbulat.com/downloads/python-fan/2DFAN4-cd938726ad.zip"
|
| 34 |
+
|
| 35 |
+
predict: "predict.py:Predictor"
|
inference.py
ADDED
|
@@ -0,0 +1,159 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import torch
|
| 2 |
+
from time import strftime
|
| 3 |
+
import os, sys, time
|
| 4 |
+
from argparse import ArgumentParser
|
| 5 |
+
|
| 6 |
+
from src.utils.preprocess import CropAndExtract
|
| 7 |
+
from src.test_audio2coeff import Audio2Coeff
|
| 8 |
+
from src.facerender.animate import AnimateFromCoeff
|
| 9 |
+
from src.generate_batch import get_data
|
| 10 |
+
from src.generate_facerender_batch import get_facerender_data
|
| 11 |
+
|
| 12 |
+
def main(args):
|
| 13 |
+
#torch.backends.cudnn.enabled = False
|
| 14 |
+
|
| 15 |
+
pic_path = args.source_image
|
| 16 |
+
audio_path = args.driven_audio
|
| 17 |
+
save_dir = os.path.join(args.result_dir, strftime("%Y_%m_%d_%H.%M.%S"))
|
| 18 |
+
os.makedirs(save_dir, exist_ok=True)
|
| 19 |
+
pose_style = args.pose_style
|
| 20 |
+
device = args.device
|
| 21 |
+
batch_size = args.batch_size
|
| 22 |
+
input_yaw_list = args.input_yaw
|
| 23 |
+
input_pitch_list = args.input_pitch
|
| 24 |
+
input_roll_list = args.input_roll
|
| 25 |
+
ref_eyeblink = args.ref_eyeblink
|
| 26 |
+
ref_pose = args.ref_pose
|
| 27 |
+
|
| 28 |
+
current_code_path = sys.argv[0]
|
| 29 |
+
current_root_path = os.path.split(current_code_path)[0]
|
| 30 |
+
|
| 31 |
+
os.environ['TORCH_HOME']=os.path.join(current_root_path, args.checkpoint_dir)
|
| 32 |
+
|
| 33 |
+
path_of_lm_croper = os.path.join(current_root_path, args.checkpoint_dir, 'shape_predictor_68_face_landmarks.dat')
|
| 34 |
+
path_of_net_recon_model = os.path.join(current_root_path, args.checkpoint_dir, 'epoch_20.pth')
|
| 35 |
+
dir_of_BFM_fitting = os.path.join(current_root_path, args.checkpoint_dir, 'BFM_Fitting')
|
| 36 |
+
wav2lip_checkpoint = os.path.join(current_root_path, args.checkpoint_dir, 'wav2lip.pth')
|
| 37 |
+
|
| 38 |
+
audio2pose_checkpoint = os.path.join(current_root_path, args.checkpoint_dir, 'auido2pose_00140-model.pth')
|
| 39 |
+
audio2pose_yaml_path = os.path.join(current_root_path, 'src', 'config', 'auido2pose.yaml')
|
| 40 |
+
|
| 41 |
+
audio2exp_checkpoint = os.path.join(current_root_path, args.checkpoint_dir, 'auido2exp_00300-model.pth')
|
| 42 |
+
audio2exp_yaml_path = os.path.join(current_root_path, 'src', 'config', 'auido2exp.yaml')
|
| 43 |
+
|
| 44 |
+
free_view_checkpoint = os.path.join(current_root_path, args.checkpoint_dir, 'facevid2vid_00189-model.pth.tar')
|
| 45 |
+
|
| 46 |
+
if args.preprocess == 'full':
|
| 47 |
+
mapping_checkpoint = os.path.join(current_root_path, args.checkpoint_dir, 'mapping_00109-model.pth.tar')
|
| 48 |
+
facerender_yaml_path = os.path.join(current_root_path, 'src', 'config', 'facerender_still.yaml')
|
| 49 |
+
else:
|
| 50 |
+
mapping_checkpoint = os.path.join(current_root_path, args.checkpoint_dir, 'mapping_00229-model.pth.tar')
|
| 51 |
+
facerender_yaml_path = os.path.join(current_root_path, 'src', 'config', 'facerender.yaml')
|
| 52 |
+
|
| 53 |
+
#init model
|
| 54 |
+
print(path_of_net_recon_model)
|
| 55 |
+
preprocess_model = CropAndExtract(path_of_lm_croper, path_of_net_recon_model, dir_of_BFM_fitting, device)
|
| 56 |
+
|
| 57 |
+
print(audio2pose_checkpoint)
|
| 58 |
+
print(audio2exp_checkpoint)
|
| 59 |
+
audio_to_coeff = Audio2Coeff(audio2pose_checkpoint, audio2pose_yaml_path,
|
| 60 |
+
audio2exp_checkpoint, audio2exp_yaml_path,
|
| 61 |
+
wav2lip_checkpoint, device)
|
| 62 |
+
|
| 63 |
+
print(free_view_checkpoint)
|
| 64 |
+
print(mapping_checkpoint)
|
| 65 |
+
animate_from_coeff = AnimateFromCoeff(free_view_checkpoint, mapping_checkpoint,
|
| 66 |
+
facerender_yaml_path, device)
|
| 67 |
+
|
| 68 |
+
#crop image and extract 3dmm from image
|
| 69 |
+
first_frame_dir = os.path.join(save_dir, 'first_frame_dir')
|
| 70 |
+
os.makedirs(first_frame_dir, exist_ok=True)
|
| 71 |
+
print('3DMM Extraction for source image')
|
| 72 |
+
first_coeff_path, crop_pic_path, crop_info = preprocess_model.generate(pic_path, first_frame_dir, args.preprocess, source_image_flag=True)
|
| 73 |
+
if first_coeff_path is None:
|
| 74 |
+
print("Can't get the coeffs of the input")
|
| 75 |
+
return
|
| 76 |
+
|
| 77 |
+
if ref_eyeblink is not None:
|
| 78 |
+
ref_eyeblink_videoname = os.path.splitext(os.path.split(ref_eyeblink)[-1])[0]
|
| 79 |
+
ref_eyeblink_frame_dir = os.path.join(save_dir, ref_eyeblink_videoname)
|
| 80 |
+
os.makedirs(ref_eyeblink_frame_dir, exist_ok=True)
|
| 81 |
+
print('3DMM Extraction for the reference video providing eye blinking')
|
| 82 |
+
ref_eyeblink_coeff_path, _, _ = preprocess_model.generate(ref_eyeblink, ref_eyeblink_frame_dir)
|
| 83 |
+
else:
|
| 84 |
+
ref_eyeblink_coeff_path=None
|
| 85 |
+
|
| 86 |
+
if ref_pose is not None:
|
| 87 |
+
if ref_pose == ref_eyeblink:
|
| 88 |
+
ref_pose_coeff_path = ref_eyeblink_coeff_path
|
| 89 |
+
else:
|
| 90 |
+
ref_pose_videoname = os.path.splitext(os.path.split(ref_pose)[-1])[0]
|
| 91 |
+
ref_pose_frame_dir = os.path.join(save_dir, ref_pose_videoname)
|
| 92 |
+
os.makedirs(ref_pose_frame_dir, exist_ok=True)
|
| 93 |
+
print('3DMM Extraction for the reference video providing pose')
|
| 94 |
+
ref_pose_coeff_path, _, _ = preprocess_model.generate(ref_pose, ref_pose_frame_dir)
|
| 95 |
+
else:
|
| 96 |
+
ref_pose_coeff_path=None
|
| 97 |
+
|
| 98 |
+
#audio2ceoff
|
| 99 |
+
batch = get_data(first_coeff_path, audio_path, device, ref_eyeblink_coeff_path, still=args.still)
|
| 100 |
+
coeff_path = audio_to_coeff.generate(batch, save_dir, pose_style, ref_pose_coeff_path)
|
| 101 |
+
|
| 102 |
+
# 3dface render
|
| 103 |
+
if args.face3dvis:
|
| 104 |
+
from src.face3d.visualize import gen_composed_video
|
| 105 |
+
gen_composed_video(args, device, first_coeff_path, coeff_path, audio_path, os.path.join(save_dir, '3dface.mp4'))
|
| 106 |
+
|
| 107 |
+
#coeff2video
|
| 108 |
+
data = get_facerender_data(coeff_path, crop_pic_path, first_coeff_path, audio_path,
|
| 109 |
+
batch_size, input_yaw_list, input_pitch_list, input_roll_list,
|
| 110 |
+
expression_scale=args.expression_scale, still_mode=args.still, preprocess=args.preprocess)
|
| 111 |
+
|
| 112 |
+
animate_from_coeff.generate(data, save_dir, pic_path, crop_info, \
|
| 113 |
+
enhancer=args.enhancer, background_enhancer=args.background_enhancer, preprocess=args.preprocess)
|
| 114 |
+
|
| 115 |
+
if __name__ == '__main__':
|
| 116 |
+
|
| 117 |
+
parser = ArgumentParser()
|
| 118 |
+
parser.add_argument("--driven_audio", default='./examples/driven_audio/bus_chinese.wav', help="path to driven audio")
|
| 119 |
+
parser.add_argument("--source_image", default='./examples/source_image/full_body_2.png', help="path to source image")
|
| 120 |
+
parser.add_argument("--ref_eyeblink", default=None, help="path to reference video providing eye blinking")
|
| 121 |
+
parser.add_argument("--ref_pose", default=None, help="path to reference video providing pose")
|
| 122 |
+
parser.add_argument("--checkpoint_dir", default='./checkpoints', help="path to output")
|
| 123 |
+
parser.add_argument("--result_dir", default='./results', help="path to output")
|
| 124 |
+
parser.add_argument("--pose_style", type=int, default=0, help="input pose style from [0, 46)")
|
| 125 |
+
parser.add_argument("--batch_size", type=int, default=2, help="the batch size of facerender")
|
| 126 |
+
parser.add_argument("--expression_scale", type=float, default=1., help="the batch size of facerender")
|
| 127 |
+
parser.add_argument('--input_yaw', nargs='+', type=int, default=None, help="the input yaw degree of the user ")
|
| 128 |
+
parser.add_argument('--input_pitch', nargs='+', type=int, default=None, help="the input pitch degree of the user")
|
| 129 |
+
parser.add_argument('--input_roll', nargs='+', type=int, default=None, help="the input roll degree of the user")
|
| 130 |
+
parser.add_argument('--enhancer', type=str, default=None, help="Face enhancer, [gfpgan, RestoreFormer]")
|
| 131 |
+
parser.add_argument('--background_enhancer', type=str, default=None, help="background enhancer, [realesrgan]")
|
| 132 |
+
parser.add_argument("--cpu", dest="cpu", action="store_true")
|
| 133 |
+
parser.add_argument("--face3dvis", action="store_true", help="generate 3d face and 3d landmarks")
|
| 134 |
+
parser.add_argument("--still", action="store_true", help="can crop back to the original videos for the full body aniamtion")
|
| 135 |
+
parser.add_argument("--preprocess", default='crop', choices=['crop', 'resize', 'full'], help="how to preprocess the images" )
|
| 136 |
+
|
| 137 |
+
# net structure and parameters
|
| 138 |
+
parser.add_argument('--net_recon', type=str, default='resnet50', choices=['resnet18', 'resnet34', 'resnet50'], help='useless')
|
| 139 |
+
parser.add_argument('--init_path', type=str, default=None, help='Useless')
|
| 140 |
+
parser.add_argument('--use_last_fc',default=False, help='zero initialize the last fc')
|
| 141 |
+
parser.add_argument('--bfm_folder', type=str, default='./checkpoints/BFM_Fitting/')
|
| 142 |
+
parser.add_argument('--bfm_model', type=str, default='BFM_model_front.mat', help='bfm model')
|
| 143 |
+
|
| 144 |
+
# default renderer parameters
|
| 145 |
+
parser.add_argument('--focal', type=float, default=1015.)
|
| 146 |
+
parser.add_argument('--center', type=float, default=112.)
|
| 147 |
+
parser.add_argument('--camera_d', type=float, default=10.)
|
| 148 |
+
parser.add_argument('--z_near', type=float, default=5.)
|
| 149 |
+
parser.add_argument('--z_far', type=float, default=15.)
|
| 150 |
+
|
| 151 |
+
args = parser.parse_args()
|
| 152 |
+
|
| 153 |
+
if torch.cuda.is_available() and not args.cpu:
|
| 154 |
+
args.device = "cuda"
|
| 155 |
+
else:
|
| 156 |
+
args.device = "cpu"
|
| 157 |
+
|
| 158 |
+
main(args)
|
| 159 |
+
|
launcher.py
ADDED
|
@@ -0,0 +1,197 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# this scripts installs necessary requirements and launches main program in webui.py
|
| 2 |
+
# borrow from : https://github.com/AUTOMATIC1111/stable-diffusion-webui/blob/master/launch.py
|
| 3 |
+
import subprocess
|
| 4 |
+
import os
|
| 5 |
+
import sys
|
| 6 |
+
import importlib.util
|
| 7 |
+
import shlex
|
| 8 |
+
import platform
|
| 9 |
+
import json
|
| 10 |
+
|
| 11 |
+
python = sys.executable
|
| 12 |
+
git = os.environ.get('GIT', "git")
|
| 13 |
+
index_url = os.environ.get('INDEX_URL', "")
|
| 14 |
+
stored_commit_hash = None
|
| 15 |
+
skip_install = False
|
| 16 |
+
dir_repos = "repositories"
|
| 17 |
+
script_path = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
|
| 18 |
+
|
| 19 |
+
if 'GRADIO_ANALYTICS_ENABLED' not in os.environ:
|
| 20 |
+
os.environ['GRADIO_ANALYTICS_ENABLED'] = 'False'
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
def check_python_version():
|
| 24 |
+
is_windows = platform.system() == "Windows"
|
| 25 |
+
major = sys.version_info.major
|
| 26 |
+
minor = sys.version_info.minor
|
| 27 |
+
micro = sys.version_info.micro
|
| 28 |
+
|
| 29 |
+
if is_windows:
|
| 30 |
+
supported_minors = [10]
|
| 31 |
+
else:
|
| 32 |
+
supported_minors = [7, 8, 9, 10, 11]
|
| 33 |
+
|
| 34 |
+
if not (major == 3 and minor in supported_minors):
|
| 35 |
+
|
| 36 |
+
raise (f"""
|
| 37 |
+
INCOMPATIBLE PYTHON VERSION
|
| 38 |
+
This program is tested with 3.10.6 Python, but you have {major}.{minor}.{micro}.
|
| 39 |
+
If you encounter an error with "RuntimeError: Couldn't install torch." message,
|
| 40 |
+
or any other error regarding unsuccessful package (library) installation,
|
| 41 |
+
please downgrade (or upgrade) to the latest version of 3.10 Python
|
| 42 |
+
and delete current Python and "venv" folder in WebUI's directory.
|
| 43 |
+
You can download 3.10 Python from here: https://www.python.org/downloads/release/python-3109/
|
| 44 |
+
{"Alternatively, use a binary release of WebUI: https://github.com/AUTOMATIC1111/stable-diffusion-webui/releases" if is_windows else ""}
|
| 45 |
+
Use --skip-python-version-check to suppress this warning.
|
| 46 |
+
""")
|
| 47 |
+
|
| 48 |
+
|
| 49 |
+
def commit_hash():
|
| 50 |
+
global stored_commit_hash
|
| 51 |
+
|
| 52 |
+
if stored_commit_hash is not None:
|
| 53 |
+
return stored_commit_hash
|
| 54 |
+
|
| 55 |
+
try:
|
| 56 |
+
stored_commit_hash = run(f"{git} rev-parse HEAD").strip()
|
| 57 |
+
except Exception:
|
| 58 |
+
stored_commit_hash = "<none>"
|
| 59 |
+
|
| 60 |
+
return stored_commit_hash
|
| 61 |
+
|
| 62 |
+
|
| 63 |
+
def run(command, desc=None, errdesc=None, custom_env=None, live=False):
|
| 64 |
+
if desc is not None:
|
| 65 |
+
print(desc)
|
| 66 |
+
|
| 67 |
+
if live:
|
| 68 |
+
result = subprocess.run(command, shell=True, env=os.environ if custom_env is None else custom_env)
|
| 69 |
+
if result.returncode != 0:
|
| 70 |
+
raise RuntimeError(f"""{errdesc or 'Error running command'}.
|
| 71 |
+
Command: {command}
|
| 72 |
+
Error code: {result.returncode}""")
|
| 73 |
+
|
| 74 |
+
return ""
|
| 75 |
+
|
| 76 |
+
result = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True, env=os.environ if custom_env is None else custom_env)
|
| 77 |
+
|
| 78 |
+
if result.returncode != 0:
|
| 79 |
+
|
| 80 |
+
message = f"""{errdesc or 'Error running command'}.
|
| 81 |
+
Command: {command}
|
| 82 |
+
Error code: {result.returncode}
|
| 83 |
+
stdout: {result.stdout.decode(encoding="utf8", errors="ignore") if len(result.stdout)>0 else '<empty>'}
|
| 84 |
+
stderr: {result.stderr.decode(encoding="utf8", errors="ignore") if len(result.stderr)>0 else '<empty>'}
|
| 85 |
+
"""
|
| 86 |
+
raise RuntimeError(message)
|
| 87 |
+
|
| 88 |
+
return result.stdout.decode(encoding="utf8", errors="ignore")
|
| 89 |
+
|
| 90 |
+
|
| 91 |
+
def check_run(command):
|
| 92 |
+
result = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)
|
| 93 |
+
return result.returncode == 0
|
| 94 |
+
|
| 95 |
+
|
| 96 |
+
def is_installed(package):
|
| 97 |
+
try:
|
| 98 |
+
spec = importlib.util.find_spec(package)
|
| 99 |
+
except ModuleNotFoundError:
|
| 100 |
+
return False
|
| 101 |
+
|
| 102 |
+
return spec is not None
|
| 103 |
+
|
| 104 |
+
|
| 105 |
+
def repo_dir(name):
|
| 106 |
+
return os.path.join(script_path, dir_repos, name)
|
| 107 |
+
|
| 108 |
+
|
| 109 |
+
def run_python(code, desc=None, errdesc=None):
|
| 110 |
+
return run(f'"{python}" -c "{code}"', desc, errdesc)
|
| 111 |
+
|
| 112 |
+
|
| 113 |
+
def run_pip(args, desc=None):
|
| 114 |
+
if skip_install:
|
| 115 |
+
return
|
| 116 |
+
|
| 117 |
+
index_url_line = f' --index-url {index_url}' if index_url != '' else ''
|
| 118 |
+
return run(f'"{python}" -m pip {args} --prefer-binary{index_url_line}', desc=f"Installing {desc}", errdesc=f"Couldn't install {desc}")
|
| 119 |
+
|
| 120 |
+
|
| 121 |
+
def check_run_python(code):
|
| 122 |
+
return check_run(f'"{python}" -c "{code}"')
|
| 123 |
+
|
| 124 |
+
|
| 125 |
+
def git_clone(url, dir, name, commithash=None):
|
| 126 |
+
# TODO clone into temporary dir and move if successful
|
| 127 |
+
|
| 128 |
+
if os.path.exists(dir):
|
| 129 |
+
if commithash is None:
|
| 130 |
+
return
|
| 131 |
+
|
| 132 |
+
current_hash = run(f'"{git}" -C "{dir}" rev-parse HEAD', None, f"Couldn't determine {name}'s hash: {commithash}").strip()
|
| 133 |
+
if current_hash == commithash:
|
| 134 |
+
return
|
| 135 |
+
|
| 136 |
+
run(f'"{git}" -C "{dir}" fetch', f"Fetching updates for {name}...", f"Couldn't fetch {name}")
|
| 137 |
+
run(f'"{git}" -C "{dir}" checkout {commithash}', f"Checking out commit for {name} with hash: {commithash}...", f"Couldn't checkout commit {commithash} for {name}")
|
| 138 |
+
return
|
| 139 |
+
|
| 140 |
+
run(f'"{git}" clone "{url}" "{dir}"', f"Cloning {name} into {dir}...", f"Couldn't clone {name}")
|
| 141 |
+
|
| 142 |
+
if commithash is not None:
|
| 143 |
+
run(f'"{git}" -C "{dir}" checkout {commithash}', None, "Couldn't checkout {name}'s hash: {commithash}")
|
| 144 |
+
|
| 145 |
+
|
| 146 |
+
def git_pull_recursive(dir):
|
| 147 |
+
for subdir, _, _ in os.walk(dir):
|
| 148 |
+
if os.path.exists(os.path.join(subdir, '.git')):
|
| 149 |
+
try:
|
| 150 |
+
output = subprocess.check_output([git, '-C', subdir, 'pull', '--autostash'])
|
| 151 |
+
print(f"Pulled changes for repository in '{subdir}':\n{output.decode('utf-8').strip()}\n")
|
| 152 |
+
except subprocess.CalledProcessError as e:
|
| 153 |
+
print(f"Couldn't perform 'git pull' on repository in '{subdir}':\n{e.output.decode('utf-8').strip()}\n")
|
| 154 |
+
|
| 155 |
+
|
| 156 |
+
def run_extension_installer(extension_dir):
|
| 157 |
+
path_installer = os.path.join(extension_dir, "install.py")
|
| 158 |
+
if not os.path.isfile(path_installer):
|
| 159 |
+
return
|
| 160 |
+
|
| 161 |
+
try:
|
| 162 |
+
env = os.environ.copy()
|
| 163 |
+
env['PYTHONPATH'] = os.path.abspath(".")
|
| 164 |
+
|
| 165 |
+
print(run(f'"{python}" "{path_installer}"', errdesc=f"Error running install.py for extension {extension_dir}", custom_env=env))
|
| 166 |
+
except Exception as e:
|
| 167 |
+
print(e, file=sys.stderr)
|
| 168 |
+
|
| 169 |
+
|
| 170 |
+
def prepare_environment():
|
| 171 |
+
global skip_install
|
| 172 |
+
|
| 173 |
+
torch_command = os.environ.get('TORCH_COMMAND', "pip install torch==1.13.1+cu117 torchvision==0.14.1+cu117 --extra-index-url https://download.pytorch.org/whl/cu117")
|
| 174 |
+
requirements_file = os.environ.get('REQS_FILE', "requirements.txt")
|
| 175 |
+
|
| 176 |
+
commit = commit_hash()
|
| 177 |
+
|
| 178 |
+
print(f"Python {sys.version}")
|
| 179 |
+
print(f"Commit hash: {commit}")
|
| 180 |
+
|
| 181 |
+
if not is_installed("torch") or not is_installed("torchvision"):
|
| 182 |
+
run(f'"{python}" -m {torch_command}', "Installing torch and torchvision", "Couldn't install torch", live=True)
|
| 183 |
+
|
| 184 |
+
run_python("import torch; assert torch.cuda.is_available(), 'Torch is not able to use GPU; add --skip-torch-cuda-test to COMMANDLINE_ARGS variable to disable this check'")
|
| 185 |
+
|
| 186 |
+
run_pip(f"install -r \"{requirements_file}\"", "requirements for SadTalker WebUI (may take longer time in first time)")
|
| 187 |
+
|
| 188 |
+
|
| 189 |
+
def start():
|
| 190 |
+
print(f"Launching SadTalker Web UI")
|
| 191 |
+
from app import sadtalker_demo
|
| 192 |
+
demo = sadtalker_demo()
|
| 193 |
+
demo.launch(share=True)
|
| 194 |
+
|
| 195 |
+
if __name__ == "__main__":
|
| 196 |
+
prepare_environment()
|
| 197 |
+
start()
|
predict.py
ADDED
|
@@ -0,0 +1,214 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""run bash scripts/download_models.sh first to prepare the weights file"""
|
| 2 |
+
import os
|
| 3 |
+
import shutil
|
| 4 |
+
from argparse import Namespace
|
| 5 |
+
from src.utils.preprocess import CropAndExtract
|
| 6 |
+
from src.test_audio2coeff import Audio2Coeff
|
| 7 |
+
from src.facerender.animate import AnimateFromCoeff
|
| 8 |
+
from src.generate_batch import get_data
|
| 9 |
+
from src.generate_facerender_batch import get_facerender_data
|
| 10 |
+
from cog import BasePredictor, Input, Path
|
| 11 |
+
|
| 12 |
+
checkpoints = "checkpoints"
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
class Predictor(BasePredictor):
|
| 16 |
+
def setup(self):
|
| 17 |
+
"""Load the model into memory to make running multiple predictions efficient"""
|
| 18 |
+
device = "cuda"
|
| 19 |
+
|
| 20 |
+
path_of_lm_croper = os.path.join(
|
| 21 |
+
checkpoints, "shape_predictor_68_face_landmarks.dat"
|
| 22 |
+
)
|
| 23 |
+
path_of_net_recon_model = os.path.join(checkpoints, "epoch_20.pth")
|
| 24 |
+
dir_of_BFM_fitting = os.path.join(checkpoints, "BFM_Fitting")
|
| 25 |
+
wav2lip_checkpoint = os.path.join(checkpoints, "wav2lip.pth")
|
| 26 |
+
|
| 27 |
+
audio2pose_checkpoint = os.path.join(checkpoints, "auido2pose_00140-model.pth")
|
| 28 |
+
audio2pose_yaml_path = os.path.join("src", "config", "auido2pose.yaml")
|
| 29 |
+
|
| 30 |
+
audio2exp_checkpoint = os.path.join(checkpoints, "auido2exp_00300-model.pth")
|
| 31 |
+
audio2exp_yaml_path = os.path.join("src", "config", "auido2exp.yaml")
|
| 32 |
+
|
| 33 |
+
free_view_checkpoint = os.path.join(
|
| 34 |
+
checkpoints, "facevid2vid_00189-model.pth.tar"
|
| 35 |
+
)
|
| 36 |
+
|
| 37 |
+
# init model
|
| 38 |
+
self.preprocess_model = CropAndExtract(
|
| 39 |
+
path_of_lm_croper, path_of_net_recon_model, dir_of_BFM_fitting, device
|
| 40 |
+
)
|
| 41 |
+
|
| 42 |
+
self.audio_to_coeff = Audio2Coeff(
|
| 43 |
+
audio2pose_checkpoint,
|
| 44 |
+
audio2pose_yaml_path,
|
| 45 |
+
audio2exp_checkpoint,
|
| 46 |
+
audio2exp_yaml_path,
|
| 47 |
+
wav2lip_checkpoint,
|
| 48 |
+
device,
|
| 49 |
+
)
|
| 50 |
+
|
| 51 |
+
self.animate_from_coeff = {
|
| 52 |
+
"full": AnimateFromCoeff(
|
| 53 |
+
free_view_checkpoint,
|
| 54 |
+
os.path.join(checkpoints, "mapping_00109-model.pth.tar"),
|
| 55 |
+
os.path.join("src", "config", "facerender_still.yaml"),
|
| 56 |
+
device,
|
| 57 |
+
),
|
| 58 |
+
"others": AnimateFromCoeff(
|
| 59 |
+
free_view_checkpoint,
|
| 60 |
+
os.path.join(checkpoints, "mapping_00229-model.pth.tar"),
|
| 61 |
+
os.path.join("src", "config", "facerender.yaml"),
|
| 62 |
+
device,
|
| 63 |
+
),
|
| 64 |
+
}
|
| 65 |
+
|
| 66 |
+
def predict(
|
| 67 |
+
self,
|
| 68 |
+
source_image: Path = Input(
|
| 69 |
+
description="Upload the source image, it can be video.mp4 or picture.png",
|
| 70 |
+
),
|
| 71 |
+
driven_audio: Path = Input(
|
| 72 |
+
description="Upload the driven audio, accepts .wav and .mp4 file",
|
| 73 |
+
),
|
| 74 |
+
enhancer: str = Input(
|
| 75 |
+
description="Choose a face enhancer",
|
| 76 |
+
choices=["gfpgan", "RestoreFormer"],
|
| 77 |
+
default="gfpgan",
|
| 78 |
+
),
|
| 79 |
+
preprocess: str = Input(
|
| 80 |
+
description="how to preprocess the images",
|
| 81 |
+
choices=["crop", "resize", "full"],
|
| 82 |
+
default="full",
|
| 83 |
+
),
|
| 84 |
+
ref_eyeblink: Path = Input(
|
| 85 |
+
description="path to reference video providing eye blinking",
|
| 86 |
+
default=None,
|
| 87 |
+
),
|
| 88 |
+
ref_pose: Path = Input(
|
| 89 |
+
description="path to reference video providing pose",
|
| 90 |
+
default=None,
|
| 91 |
+
),
|
| 92 |
+
still: bool = Input(
|
| 93 |
+
description="can crop back to the original videos for the full body aniamtion when preprocess is full",
|
| 94 |
+
default=True,
|
| 95 |
+
),
|
| 96 |
+
) -> Path:
|
| 97 |
+
"""Run a single prediction on the model"""
|
| 98 |
+
|
| 99 |
+
animate_from_coeff = (
|
| 100 |
+
self.animate_from_coeff["full"]
|
| 101 |
+
if preprocess == "full"
|
| 102 |
+
else self.animate_from_coeff["others"]
|
| 103 |
+
)
|
| 104 |
+
|
| 105 |
+
args = load_default()
|
| 106 |
+
args.pic_path = str(source_image)
|
| 107 |
+
args.audio_path = str(driven_audio)
|
| 108 |
+
device = "cuda"
|
| 109 |
+
args.still = still
|
| 110 |
+
args.ref_eyeblink = None if ref_eyeblink is None else str(ref_eyeblink)
|
| 111 |
+
args.ref_pose = None if ref_pose is None else str(ref_pose)
|
| 112 |
+
|
| 113 |
+
# crop image and extract 3dmm from image
|
| 114 |
+
results_dir = "results"
|
| 115 |
+
if os.path.exists(results_dir):
|
| 116 |
+
shutil.rmtree(results_dir)
|
| 117 |
+
os.makedirs(results_dir)
|
| 118 |
+
first_frame_dir = os.path.join(results_dir, "first_frame_dir")
|
| 119 |
+
os.makedirs(first_frame_dir)
|
| 120 |
+
|
| 121 |
+
print("3DMM Extraction for source image")
|
| 122 |
+
first_coeff_path, crop_pic_path, crop_info = self.preprocess_model.generate(
|
| 123 |
+
args.pic_path, first_frame_dir, preprocess, source_image_flag=True
|
| 124 |
+
)
|
| 125 |
+
if first_coeff_path is None:
|
| 126 |
+
print("Can't get the coeffs of the input")
|
| 127 |
+
return
|
| 128 |
+
|
| 129 |
+
if ref_eyeblink is not None:
|
| 130 |
+
ref_eyeblink_videoname = os.path.splitext(os.path.split(ref_eyeblink)[-1])[
|
| 131 |
+
0
|
| 132 |
+
]
|
| 133 |
+
ref_eyeblink_frame_dir = os.path.join(results_dir, ref_eyeblink_videoname)
|
| 134 |
+
os.makedirs(ref_eyeblink_frame_dir, exist_ok=True)
|
| 135 |
+
print("3DMM Extraction for the reference video providing eye blinking")
|
| 136 |
+
ref_eyeblink_coeff_path, _, _ = self.preprocess_model.generate(
|
| 137 |
+
ref_eyeblink, ref_eyeblink_frame_dir
|
| 138 |
+
)
|
| 139 |
+
else:
|
| 140 |
+
ref_eyeblink_coeff_path = None
|
| 141 |
+
|
| 142 |
+
if ref_pose is not None:
|
| 143 |
+
if ref_pose == ref_eyeblink:
|
| 144 |
+
ref_pose_coeff_path = ref_eyeblink_coeff_path
|
| 145 |
+
else:
|
| 146 |
+
ref_pose_videoname = os.path.splitext(os.path.split(ref_pose)[-1])[0]
|
| 147 |
+
ref_pose_frame_dir = os.path.join(results_dir, ref_pose_videoname)
|
| 148 |
+
os.makedirs(ref_pose_frame_dir, exist_ok=True)
|
| 149 |
+
print("3DMM Extraction for the reference video providing pose")
|
| 150 |
+
ref_pose_coeff_path, _, _ = self.preprocess_model.generate(
|
| 151 |
+
ref_pose, ref_pose_frame_dir
|
| 152 |
+
)
|
| 153 |
+
else:
|
| 154 |
+
ref_pose_coeff_path = None
|
| 155 |
+
|
| 156 |
+
# audio2ceoff
|
| 157 |
+
batch = get_data(
|
| 158 |
+
first_coeff_path,
|
| 159 |
+
args.audio_path,
|
| 160 |
+
device,
|
| 161 |
+
ref_eyeblink_coeff_path,
|
| 162 |
+
still=still,
|
| 163 |
+
)
|
| 164 |
+
coeff_path = self.audio_to_coeff.generate(
|
| 165 |
+
batch, results_dir, args.pose_style, ref_pose_coeff_path
|
| 166 |
+
)
|
| 167 |
+
# coeff2video
|
| 168 |
+
print("coeff2video")
|
| 169 |
+
data = get_facerender_data(
|
| 170 |
+
coeff_path,
|
| 171 |
+
crop_pic_path,
|
| 172 |
+
first_coeff_path,
|
| 173 |
+
args.audio_path,
|
| 174 |
+
args.batch_size,
|
| 175 |
+
args.input_yaw,
|
| 176 |
+
args.input_pitch,
|
| 177 |
+
args.input_roll,
|
| 178 |
+
expression_scale=args.expression_scale,
|
| 179 |
+
still_mode=still,
|
| 180 |
+
preprocess=preprocess,
|
| 181 |
+
)
|
| 182 |
+
animate_from_coeff.generate(
|
| 183 |
+
data, results_dir, args.pic_path, crop_info,
|
| 184 |
+
enhancer=enhancer, background_enhancer=args.background_enhancer,
|
| 185 |
+
preprocess=preprocess)
|
| 186 |
+
|
| 187 |
+
output = "/tmp/out.mp4"
|
| 188 |
+
mp4_path = os.path.join(results_dir, [f for f in os.listdir(results_dir) if "enhanced.mp4" in f][0])
|
| 189 |
+
shutil.copy(mp4_path, output)
|
| 190 |
+
|
| 191 |
+
return Path(output)
|
| 192 |
+
|
| 193 |
+
|
| 194 |
+
def load_default():
|
| 195 |
+
return Namespace(
|
| 196 |
+
pose_style=0,
|
| 197 |
+
batch_size=2,
|
| 198 |
+
expression_scale=1.0,
|
| 199 |
+
input_yaw=None,
|
| 200 |
+
input_pitch=None,
|
| 201 |
+
input_roll=None,
|
| 202 |
+
background_enhancer=None,
|
| 203 |
+
face3dvis=False,
|
| 204 |
+
net_recon="resnet50",
|
| 205 |
+
init_path=None,
|
| 206 |
+
use_last_fc=False,
|
| 207 |
+
bfm_folder="./checkpoints/BFM_Fitting/",
|
| 208 |
+
bfm_model="BFM_model_front.mat",
|
| 209 |
+
focal=1015.0,
|
| 210 |
+
center=112.0,
|
| 211 |
+
camera_d=10.0,
|
| 212 |
+
z_near=5.0,
|
| 213 |
+
z_far=15.0,
|
| 214 |
+
)
|
quick_demo.ipynb
ADDED
|
@@ -0,0 +1,208 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cells": [
|
| 3 |
+
{
|
| 4 |
+
"cell_type": "markdown",
|
| 5 |
+
"metadata": {
|
| 6 |
+
"id": "M74Gs_TjYl_B"
|
| 7 |
+
},
|
| 8 |
+
"source": [
|
| 9 |
+
"[](https://colab.research.google.com/github/Winfredy/SadTalker/blob/main/quick_demo.ipynb)"
|
| 10 |
+
]
|
| 11 |
+
},
|
| 12 |
+
{
|
| 13 |
+
"cell_type": "markdown",
|
| 14 |
+
"metadata": {
|
| 15 |
+
"id": "view-in-github"
|
| 16 |
+
},
|
| 17 |
+
"source": [
|
| 18 |
+
"### SadTalker:Learning Realistic 3D Motion Coefficients for Stylized Audio-Driven Single Image Talking Face Animation \n",
|
| 19 |
+
"\n",
|
| 20 |
+
"[arxiv](https://arxiv.org/abs/2211.12194) | [project](https://sadtalker.github.io) | [Github](https://github.com/Winfredy/SadTalker)\n",
|
| 21 |
+
"\n",
|
| 22 |
+
"Wenxuan Zhang, Xiaodong Cun, Xuan Wang, Yong Zhang, Xi Shen, Yu Guo, Ying Shan, Fei Wang.\n",
|
| 23 |
+
"\n",
|
| 24 |
+
"Xi'an Jiaotong University, Tencent AI Lab, Ant Group\n",
|
| 25 |
+
"\n",
|
| 26 |
+
"CVPR 2023\n",
|
| 27 |
+
"\n",
|
| 28 |
+
"TL;DR: A realistic and stylized talking head video generation method from a single image and audio\n"
|
| 29 |
+
]
|
| 30 |
+
},
|
| 31 |
+
{
|
| 32 |
+
"cell_type": "markdown",
|
| 33 |
+
"metadata": {
|
| 34 |
+
"id": "kA89DV-sKS4i"
|
| 35 |
+
},
|
| 36 |
+
"source": [
|
| 37 |
+
"Installation (around 5 mins)"
|
| 38 |
+
]
|
| 39 |
+
},
|
| 40 |
+
{
|
| 41 |
+
"cell_type": "code",
|
| 42 |
+
"execution_count": null,
|
| 43 |
+
"metadata": {
|
| 44 |
+
"id": "qJ4CplXsYl_E"
|
| 45 |
+
},
|
| 46 |
+
"outputs": [],
|
| 47 |
+
"source": [
|
| 48 |
+
"### make sure that CUDA is available in Edit -> Nootbook settings -> GPU\n",
|
| 49 |
+
"!nvidia-smi --query-gpu=name,memory.total,memory.free --format=csv,noheader"
|
| 50 |
+
]
|
| 51 |
+
},
|
| 52 |
+
{
|
| 53 |
+
"cell_type": "code",
|
| 54 |
+
"execution_count": null,
|
| 55 |
+
"metadata": {
|
| 56 |
+
"id": "Mdq6j4E5KQAR"
|
| 57 |
+
},
|
| 58 |
+
"outputs": [],
|
| 59 |
+
"source": [
|
| 60 |
+
"!update-alternatives --install /usr/local/bin/python3 python3 /usr/bin/python3.8 2 \n",
|
| 61 |
+
"!update-alternatives --install /usr/local/bin/python3 python3 /usr/bin/python3.9 1 \n",
|
| 62 |
+
"!python --version \n",
|
| 63 |
+
"!apt-get update\n",
|
| 64 |
+
"!apt install software-properties-common\n",
|
| 65 |
+
"!sudo dpkg --remove --force-remove-reinstreq python3-pip python3-setuptools python3-wheel\n",
|
| 66 |
+
"!apt-get install python3-pip\n",
|
| 67 |
+
"\n",
|
| 68 |
+
"print('Git clone project and install requirements...')\n",
|
| 69 |
+
"!git clone https://github.com/Winfredy/SadTalker &> /dev/null\n",
|
| 70 |
+
"%cd SadTalker \n",
|
| 71 |
+
"!export PYTHONPATH=/content/SadTalker:$PYTHONPATH \n",
|
| 72 |
+
"!python3.8 -m pip install torch==1.12.1+cu113 torchvision==0.13.1+cu113 torchaudio==0.12.1 --extra-index-url https://download.pytorch.org/whl/cu113\n",
|
| 73 |
+
"!apt update\n",
|
| 74 |
+
"!apt install ffmpeg &> /dev/null \n",
|
| 75 |
+
"!python3.8 -m pip install -r requirements.txt"
|
| 76 |
+
]
|
| 77 |
+
},
|
| 78 |
+
{
|
| 79 |
+
"cell_type": "markdown",
|
| 80 |
+
"metadata": {
|
| 81 |
+
"id": "DddcKB_nKsnk"
|
| 82 |
+
},
|
| 83 |
+
"source": [
|
| 84 |
+
"Download models (1 mins)"
|
| 85 |
+
]
|
| 86 |
+
},
|
| 87 |
+
{
|
| 88 |
+
"cell_type": "code",
|
| 89 |
+
"execution_count": null,
|
| 90 |
+
"metadata": {
|
| 91 |
+
"id": "eDw3_UN8K2xa"
|
| 92 |
+
},
|
| 93 |
+
"outputs": [],
|
| 94 |
+
"source": [
|
| 95 |
+
"print('Download pre-trained models...')\n",
|
| 96 |
+
"!rm -rf checkpoints\n",
|
| 97 |
+
"!bash scripts/download_models.sh"
|
| 98 |
+
]
|
| 99 |
+
},
|
| 100 |
+
{
|
| 101 |
+
"cell_type": "code",
|
| 102 |
+
"execution_count": null,
|
| 103 |
+
"metadata": {
|
| 104 |
+
"id": "kK7DYeo7Yl_H"
|
| 105 |
+
},
|
| 106 |
+
"outputs": [],
|
| 107 |
+
"source": [
|
| 108 |
+
"# borrow from makeittalk\n",
|
| 109 |
+
"import ipywidgets as widgets\n",
|
| 110 |
+
"import glob\n",
|
| 111 |
+
"import matplotlib.pyplot as plt\n",
|
| 112 |
+
"print(\"Choose the image name to animate: (saved in folder 'examples/')\")\n",
|
| 113 |
+
"img_list = glob.glob1('examples/source_image', '*.png')\n",
|
| 114 |
+
"img_list.sort()\n",
|
| 115 |
+
"img_list = [item.split('.')[0] for item in img_list]\n",
|
| 116 |
+
"default_head_name = widgets.Dropdown(options=img_list, value='full3')\n",
|
| 117 |
+
"def on_change(change):\n",
|
| 118 |
+
" if change['type'] == 'change' and change['name'] == 'value':\n",
|
| 119 |
+
" plt.imshow(plt.imread('examples/source_image/{}.png'.format(default_head_name.value)))\n",
|
| 120 |
+
" plt.axis('off')\n",
|
| 121 |
+
" plt.show()\n",
|
| 122 |
+
"default_head_name.observe(on_change)\n",
|
| 123 |
+
"display(default_head_name)\n",
|
| 124 |
+
"plt.imshow(plt.imread('examples/source_image/{}.png'.format(default_head_name.value)))\n",
|
| 125 |
+
"plt.axis('off')\n",
|
| 126 |
+
"plt.show()"
|
| 127 |
+
]
|
| 128 |
+
},
|
| 129 |
+
{
|
| 130 |
+
"cell_type": "markdown",
|
| 131 |
+
"metadata": {
|
| 132 |
+
"id": "-khNZcnGK4UK"
|
| 133 |
+
},
|
| 134 |
+
"source": [
|
| 135 |
+
"Animation"
|
| 136 |
+
]
|
| 137 |
+
},
|
| 138 |
+
{
|
| 139 |
+
"cell_type": "code",
|
| 140 |
+
"execution_count": null,
|
| 141 |
+
"metadata": {
|
| 142 |
+
"id": "ToBlDusjK5sS"
|
| 143 |
+
},
|
| 144 |
+
"outputs": [],
|
| 145 |
+
"source": [
|
| 146 |
+
"# selected audio from exmaple/driven_audio\n",
|
| 147 |
+
"img = 'examples/source_image/{}.png'.format(default_head_name.value)\n",
|
| 148 |
+
"print(img)\n",
|
| 149 |
+
"!python3.8 inference.py --driven_audio ./examples/driven_audio/RD_Radio31_000.wav \\\n",
|
| 150 |
+
" --source_image {img} \\\n",
|
| 151 |
+
" --result_dir ./results --still --preprocess full --enhancer gfpgan"
|
| 152 |
+
]
|
| 153 |
+
},
|
| 154 |
+
{
|
| 155 |
+
"cell_type": "code",
|
| 156 |
+
"execution_count": null,
|
| 157 |
+
"metadata": {
|
| 158 |
+
"id": "fAjwGmKKYl_I"
|
| 159 |
+
},
|
| 160 |
+
"outputs": [],
|
| 161 |
+
"source": [
|
| 162 |
+
"# visualize code from makeittalk\n",
|
| 163 |
+
"from IPython.display import HTML\n",
|
| 164 |
+
"from base64 import b64encode\n",
|
| 165 |
+
"import os, sys\n",
|
| 166 |
+
"\n",
|
| 167 |
+
"# get the last from results\n",
|
| 168 |
+
"\n",
|
| 169 |
+
"results = sorted(os.listdir('./results/'))\n",
|
| 170 |
+
"\n",
|
| 171 |
+
"mp4_name = glob.glob('./results/'+results[-1]+'/*.mp4')[0]\n",
|
| 172 |
+
"\n",
|
| 173 |
+
"mp4 = open('{}'.format(mp4_name),'rb').read()\n",
|
| 174 |
+
"data_url = \"data:video/mp4;base64,\" + b64encode(mp4).decode()\n",
|
| 175 |
+
"\n",
|
| 176 |
+
"print('Display animation: {}'.format(mp4_name), file=sys.stderr)\n",
|
| 177 |
+
"display(HTML(\"\"\"\n",
|
| 178 |
+
" <video width=256 controls>\n",
|
| 179 |
+
" <source src=\"%s\" type=\"video/mp4\">\n",
|
| 180 |
+
" </video>\n",
|
| 181 |
+
" \"\"\" % data_url))\n"
|
| 182 |
+
]
|
| 183 |
+
}
|
| 184 |
+
],
|
| 185 |
+
"metadata": {
|
| 186 |
+
"colab": {
|
| 187 |
+
"provenance": []
|
| 188 |
+
},
|
| 189 |
+
"kernelspec": {
|
| 190 |
+
"display_name": "base",
|
| 191 |
+
"language": "python",
|
| 192 |
+
"name": "python3"
|
| 193 |
+
},
|
| 194 |
+
"language_info": {
|
| 195 |
+
"name": "python",
|
| 196 |
+
"version": "3.9.7"
|
| 197 |
+
},
|
| 198 |
+
"vscode": {
|
| 199 |
+
"interpreter": {
|
| 200 |
+
"hash": "db5031b3636a3f037ea48eb287fd3d023feb9033aefc2a9652a92e470fb0851b"
|
| 201 |
+
}
|
| 202 |
+
},
|
| 203 |
+
"accelerator": "GPU",
|
| 204 |
+
"gpuClass": "standard"
|
| 205 |
+
},
|
| 206 |
+
"nbformat": 4,
|
| 207 |
+
"nbformat_minor": 0
|
| 208 |
+
}
|
requirements3d.txt
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
numpy==1.23.4
|
| 2 |
+
face_alignment==1.3.5
|
| 3 |
+
imageio==2.19.3
|
| 4 |
+
imageio-ffmpeg==0.4.7
|
| 5 |
+
librosa==0.9.2 #
|
| 6 |
+
numba
|
| 7 |
+
resampy==0.3.1
|
| 8 |
+
pydub==0.25.1
|
| 9 |
+
scipy==1.5.3
|
| 10 |
+
kornia==0.6.8
|
| 11 |
+
tqdm
|
| 12 |
+
yacs==0.1.8
|
| 13 |
+
pyyaml
|
| 14 |
+
joblib==1.1.0
|
| 15 |
+
scikit-image==0.19.3
|
| 16 |
+
basicsr==1.4.2
|
| 17 |
+
facexlib==0.2.5
|
| 18 |
+
trimesh==3.9.20
|
| 19 |
+
dlib-bin
|
| 20 |
+
gradio
|
| 21 |
+
gfpgan
|
webui.bat
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
@echo off
|
| 2 |
+
|
| 3 |
+
IF NOT EXIST venv (
|
| 4 |
+
python -m venv venv
|
| 5 |
+
) ELSE (
|
| 6 |
+
echo venv folder already exists, skipping creation...
|
| 7 |
+
)
|
| 8 |
+
call .\venv\Scripts\activate.bat
|
| 9 |
+
|
| 10 |
+
set PYTHON="venv\Scripts\Python.exe"
|
| 11 |
+
echo venv %PYTHON%
|
| 12 |
+
|
| 13 |
+
%PYTHON% Launcher.py
|
| 14 |
+
|
| 15 |
+
echo.
|
| 16 |
+
echo Launch unsuccessful. Exiting.
|
| 17 |
+
pause
|