AWS - AWS polly 基础架构封装 - 《初梦 's 学习记录》

介绍
添加pom依赖
快速开始
AWSPollyClient

介绍

Amazon Polly是一项将文本转换为逼真语音的服务，借助它，您可以创建可通话的应用程序，并构建全新类别的语音功能。
根据业务需求我们对aws sdk 进行了封装，由于只有知语支持普通话，所以默认使用知语进行朗读。
另外我们封装了朗读速度和音量的控制，定义了ProsodyRate和ProsodyVolume枚举类，其他请参考AWSPollyClient类中的方法。

添加pom依赖

<dependency>
  <groupId>com.walltech</groupId>
  <artifactId>walltech-awsclient</artifactId>
</dependency>

快速开始

import com.walltech.aws.polly.AWSPollyClient;
import com.walltech.aws.util.AWSUtils;
import org.junit.jupiter.api.Test;
import software.amazon.awssdk.services.polly.PollyClient;
import java.io.FileOutputStream;
/**
 * @author mori
 * @date 2022/9/8
 */
public class PollyTest {
    private static final String SAMPLE = "早上好，请先扫描再称重, please scan first and then weigh,thank you, 澳大利亚, AP-123.";
    @Test
    public void testHello() {
        // 目前只有cn north west client可用
        PollyClient pollyClient = AWSUtils.getPollyCNNorthWestClient();
        try (FileOutputStream fileOutputStream = new FileOutputStream("./sample.mp3");) {
            // 合成mp3流 写入到output
            AWSPollyClient.synthesizeMp3ByZhiyu(pollyClient, SAMPLE, fileOutputStream);
        } catch (Exception e) {
            e.printStackTrace();
        }
    }
}

AWSPollyClient

import com.walltech.aws.polly.enums.ProsodyRate;
import com.walltech.aws.polly.enums.ProsodyVolume;
import com.walltech.aws.s3.AWSS3Client;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import software.amazon.awssdk.services.polly.PollyClient;
import software.amazon.awssdk.services.polly.model.OutputFormat;
import software.amazon.awssdk.services.polly.model.SynthesizeSpeechRequest;
import software.amazon.awssdk.services.polly.model.TextType;
import software.amazon.awssdk.services.polly.model.VoiceId;
import software.amazon.awssdk.utils.IoUtils;
import java.io.InputStream;
import java.io.OutputStream;
/**
 * @author mori
 * @date 2022/9/9
 */
public class AWSPollyClient {
    private static final Logger logger = LoggerFactory.getLogger(AWSS3Client.class);
    public static void synthesizeMp3ByZhiyu(PollyClient polly, String text, OutputStream outputStream) {
        synthesizeMp3ByZhiyu(polly, ProsodyVolume.X_LOUD.toString(), ProsodyRate.X_FAST.toString(), text, outputStream);
    }
    public static void synthesizeMp3ByZhiyu(PollyClient polly, ProsodyVolume volume, ProsodyRate prosodyRate, String text, OutputStream outputStream) {
        synthesizeMp3ByZhiyu(polly, volume.toString(), prosodyRate.toString(), text, outputStream);
    }
    public static void synthesizeMp3ByZhiyu(PollyClient polly, String volume, String rate, String text, OutputStream outputStream) {
        try (InputStream inputStream = synthesizeMp3ByZhiyu(polly, volume, rate, text)) {
            if (inputStream != null) {
                IoUtils.copy(inputStream, outputStream);
            }
        } catch (Exception e) {
            logger.error("Synthesize mp3 failed. error: {}", e.getMessage());
        }
    }
    public static InputStream synthesizeMp3ByZhiyu(PollyClient polly, String text) {
        return synthesizeMp3ByZhiyu(polly, ProsodyVolume.X_LOUD.toString(), ProsodyRate.X_FAST.toString(), text);
    }
    public static InputStream synthesizeMp3ByZhiyu(PollyClient polly, ProsodyVolume volume, ProsodyRate rate, String text) {
        return synthesizeMp3ByZhiyu(polly, volume.toString(), rate.toString(), text);
    }
    public static InputStream synthesizeMp3ByZhiyu(PollyClient polly, String volume, String rate, String text) {
        text = "<speak><prosody volume=\"" + volume + "\" rate=\"" + rate + "\">" + text + " <break/></prosody></speak>";
        SynthesizeSpeechRequest synthReq = SynthesizeSpeechRequest.builder()
                .text(text)
                .textType(TextType.SSML)
                .voiceId(VoiceId.ZHIYU)
                .outputFormat(OutputFormat.MP3)
                .build();
        try {
            return polly.synthesizeSpeech(synthReq);
        } catch (Exception e) {
            logger.error("Synthesize mp3 failed. error: {}", e.getMessage());
        }
        return null;
    }
}