介绍

Amazon Polly是一项将文本转换为逼真语音的服务,借助它,您可以创建可通话的应用程序,并构建全新类别的语音功能。
根据业务需求我们对aws sdk 进行了封装,由于只有知语支持普通话,所以默认使用知语进行朗读。
另外我们封装了朗读速度和音量的控制,定义了ProsodyRate和ProsodyVolume枚举类,其他请参考AWSPollyClient类中的方法。

添加pom依赖

  1. <dependency>
  2. <groupId>com.walltech</groupId>
  3. <artifactId>walltech-awsclient</artifactId>
  4. </dependency>

快速开始

  1. import com.walltech.aws.polly.AWSPollyClient;
  2. import com.walltech.aws.util.AWSUtils;
  3. import org.junit.jupiter.api.Test;
  4. import software.amazon.awssdk.services.polly.PollyClient;
  5. import java.io.FileOutputStream;
  6. /**
  7. * @author mori
  8. * @date 2022/9/8
  9. */
  10. public class PollyTest {
  11. private static final String SAMPLE = "早上好,请先扫描再称重, please scan first and then weigh,thank you, 澳大利亚, AP-123.";
  12. @Test
  13. public void testHello() {
  14. // 目前只有cn north west client可用
  15. PollyClient pollyClient = AWSUtils.getPollyCNNorthWestClient();
  16. try (FileOutputStream fileOutputStream = new FileOutputStream("./sample.mp3");) {
  17. // 合成mp3流 写入到output
  18. AWSPollyClient.synthesizeMp3ByZhiyu(pollyClient, SAMPLE, fileOutputStream);
  19. } catch (Exception e) {
  20. e.printStackTrace();
  21. }
  22. }
  23. }

AWSPollyClient

  1. import com.walltech.aws.polly.enums.ProsodyRate;
  2. import com.walltech.aws.polly.enums.ProsodyVolume;
  3. import com.walltech.aws.s3.AWSS3Client;
  4. import org.slf4j.Logger;
  5. import org.slf4j.LoggerFactory;
  6. import software.amazon.awssdk.services.polly.PollyClient;
  7. import software.amazon.awssdk.services.polly.model.OutputFormat;
  8. import software.amazon.awssdk.services.polly.model.SynthesizeSpeechRequest;
  9. import software.amazon.awssdk.services.polly.model.TextType;
  10. import software.amazon.awssdk.services.polly.model.VoiceId;
  11. import software.amazon.awssdk.utils.IoUtils;
  12. import java.io.InputStream;
  13. import java.io.OutputStream;
  14. /**
  15. * @author mori
  16. * @date 2022/9/9
  17. */
  18. public class AWSPollyClient {
  19. private static final Logger logger = LoggerFactory.getLogger(AWSS3Client.class);
  20. public static void synthesizeMp3ByZhiyu(PollyClient polly, String text, OutputStream outputStream) {
  21. synthesizeMp3ByZhiyu(polly, ProsodyVolume.X_LOUD.toString(), ProsodyRate.X_FAST.toString(), text, outputStream);
  22. }
  23. public static void synthesizeMp3ByZhiyu(PollyClient polly, ProsodyVolume volume, ProsodyRate prosodyRate, String text, OutputStream outputStream) {
  24. synthesizeMp3ByZhiyu(polly, volume.toString(), prosodyRate.toString(), text, outputStream);
  25. }
  26. public static void synthesizeMp3ByZhiyu(PollyClient polly, String volume, String rate, String text, OutputStream outputStream) {
  27. try (InputStream inputStream = synthesizeMp3ByZhiyu(polly, volume, rate, text)) {
  28. if (inputStream != null) {
  29. IoUtils.copy(inputStream, outputStream);
  30. }
  31. } catch (Exception e) {
  32. logger.error("Synthesize mp3 failed. error: {}", e.getMessage());
  33. }
  34. }
  35. public static InputStream synthesizeMp3ByZhiyu(PollyClient polly, String text) {
  36. return synthesizeMp3ByZhiyu(polly, ProsodyVolume.X_LOUD.toString(), ProsodyRate.X_FAST.toString(), text);
  37. }
  38. public static InputStream synthesizeMp3ByZhiyu(PollyClient polly, ProsodyVolume volume, ProsodyRate rate, String text) {
  39. return synthesizeMp3ByZhiyu(polly, volume.toString(), rate.toString(), text);
  40. }
  41. public static InputStream synthesizeMp3ByZhiyu(PollyClient polly, String volume, String rate, String text) {
  42. text = "<speak><prosody volume=\"" + volume + "\" rate=\"" + rate + "\">" + text + " <break/></prosody></speak>";
  43. SynthesizeSpeechRequest synthReq = SynthesizeSpeechRequest.builder()
  44. .text(text)
  45. .textType(TextType.SSML)
  46. .voiceId(VoiceId.ZHIYU)
  47. .outputFormat(OutputFormat.MP3)
  48. .build();
  49. try {
  50. return polly.synthesizeSpeech(synthReq);
  51. } catch (Exception e) {
  52. logger.error("Synthesize mp3 failed. error: {}", e.getMessage());
  53. }
  54. return null;
  55. }
  56. }