scene-digit-human/test.py

"""
@Email: liaoxiju@inspur.com
"""
from modelscope.piplines import pipeline

# Create a pipeline instance for talking head generation using the specified model and revision.
inference = pipeline('talking-head', model='./wwd123/sadtalker', model_revision='v1.0.0')

# Define the input source image and audio file paths.
source_image = "liao.jpg"
driven_audio = "xx_cn.wav"

# Set the output directory where results will be saved.
out_dir = "./results/"

# Configure various parameters for the inference process:
kwargs = {
    'preprocess': 'full',  # Options are 'crop', 'resize', or 'full'
    'still_mode': True,
    'use_enhancer': False,
    'batch_size': 1,
    'size': 256,  # Image size can be either 256 or 512 pixels
    'pose_style': 0,
    'exp_scale': 1,
    'result_dir': out_dir
}

# Perform inference to generate the video from the source image and audio.
video_path = inference(source_image=source_image, driven_audio=driven_audio, **kwargs)

# Print the path of the generated video file.
print(f"==>> video_path: {video_path}")