介绍
Amazon Polly是一项将文本转换为逼真语音的服务,借助它,您可以创建可通话的应用程序,并构建全新类别的语音功能。
根据业务需求我们对aws sdk 进行了封装,由于只有知语支持普通话,所以默认使用知语进行朗读。
另外我们封装了朗读速度和音量的控制,定义了ProsodyRate和ProsodyVolume枚举类,其他请参考AWSPollyClient类中的方法。
添加pom依赖
<dependency>
<groupId>com.walltech</groupId>
<artifactId>walltech-awsclient</artifactId>
</dependency>
快速开始
import com.walltech.aws.polly.AWSPollyClient;
import com.walltech.aws.util.AWSUtils;
import org.junit.jupiter.api.Test;
import software.amazon.awssdk.services.polly.PollyClient;
import java.io.FileOutputStream;
/**
* @author mori
* @date 2022/9/8
*/
public class PollyTest {
private static final String SAMPLE = "早上好,请先扫描再称重, please scan first and then weigh,thank you, 澳大利亚, AP-123.";
@Test
public void testHello() {
// 目前只有cn north west client可用
PollyClient pollyClient = AWSUtils.getPollyCNNorthWestClient();
try (FileOutputStream fileOutputStream = new FileOutputStream("./sample.mp3");) {
// 合成mp3流 写入到output
AWSPollyClient.synthesizeMp3ByZhiyu(pollyClient, SAMPLE, fileOutputStream);
} catch (Exception e) {
e.printStackTrace();
}
}
}
AWSPollyClient
import com.walltech.aws.polly.enums.ProsodyRate;
import com.walltech.aws.polly.enums.ProsodyVolume;
import com.walltech.aws.s3.AWSS3Client;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import software.amazon.awssdk.services.polly.PollyClient;
import software.amazon.awssdk.services.polly.model.OutputFormat;
import software.amazon.awssdk.services.polly.model.SynthesizeSpeechRequest;
import software.amazon.awssdk.services.polly.model.TextType;
import software.amazon.awssdk.services.polly.model.VoiceId;
import software.amazon.awssdk.utils.IoUtils;
import java.io.InputStream;
import java.io.OutputStream;
/**
* @author mori
* @date 2022/9/9
*/
public class AWSPollyClient {
private static final Logger logger = LoggerFactory.getLogger(AWSS3Client.class);
public static void synthesizeMp3ByZhiyu(PollyClient polly, String text, OutputStream outputStream) {
synthesizeMp3ByZhiyu(polly, ProsodyVolume.X_LOUD.toString(), ProsodyRate.X_FAST.toString(), text, outputStream);
}
public static void synthesizeMp3ByZhiyu(PollyClient polly, ProsodyVolume volume, ProsodyRate prosodyRate, String text, OutputStream outputStream) {
synthesizeMp3ByZhiyu(polly, volume.toString(), prosodyRate.toString(), text, outputStream);
}
public static void synthesizeMp3ByZhiyu(PollyClient polly, String volume, String rate, String text, OutputStream outputStream) {
try (InputStream inputStream = synthesizeMp3ByZhiyu(polly, volume, rate, text)) {
if (inputStream != null) {
IoUtils.copy(inputStream, outputStream);
}
} catch (Exception e) {
logger.error("Synthesize mp3 failed. error: {}", e.getMessage());
}
}
public static InputStream synthesizeMp3ByZhiyu(PollyClient polly, String text) {
return synthesizeMp3ByZhiyu(polly, ProsodyVolume.X_LOUD.toString(), ProsodyRate.X_FAST.toString(), text);
}
public static InputStream synthesizeMp3ByZhiyu(PollyClient polly, ProsodyVolume volume, ProsodyRate rate, String text) {
return synthesizeMp3ByZhiyu(polly, volume.toString(), rate.toString(), text);
}
public static InputStream synthesizeMp3ByZhiyu(PollyClient polly, String volume, String rate, String text) {
text = "<speak><prosody volume=\"" + volume + "\" rate=\"" + rate + "\">" + text + " <break/></prosody></speak>";
SynthesizeSpeechRequest synthReq = SynthesizeSpeechRequest.builder()
.text(text)
.textType(TextType.SSML)
.voiceId(VoiceId.ZHIYU)
.outputFormat(OutputFormat.MP3)
.build();
try {
return polly.synthesizeSpeech(synthReq);
} catch (Exception e) {
logger.error("Synthesize mp3 failed. error: {}", e.getMessage());
}
return null;
}
}