Flink CDC - Flink CDC 配置认识 - 《大数据学习笔记》

flink-conf.yaml
zoo.cfg

flink-conf.yaml

flink-conf.yaml文件存放目录在/opt/flink/conf

# jobManager 的IP地址
jobmanager.rpc.address: localhost
# jobManager 的端口号
jobmanager.rpc.port: 6123
# jobManager JVM heap 内存大小
jobmanager.heap.size: 1024m
# taskManager JVM heap 内存大小
taskmanager.heap.size: 1024m
# 每个taskManager提供任务的slots数量大小
taskmanager.numberOfTaskSlots: 1
#程序默认的并行计算的个数
parallelism.default: 1
#==============================================================================
# High Availability
#==============================================================================
# 可以选择'NONE'或者'zookeeper'
# The high-availability mode. Possible options are 'NONE' or 'zookeeper'.
#
# high-availability: zookeeper
# The path where metadata for master recovery is persisted. While ZooKeeper stores
# the small ground truth for checkpoint and leader election, this location stores
# the larger objects, like persisted dataflow graphs.
# 
# Must be a durable file system that is accessible from all nodes
# (like HDFS, S3, Ceph, nfs, ...) 
# 文件系统路径，让Flink在高可用设置中持久保存元数据
# high-availability.storageDir: hdfs:///flink/ha/
# The list of ZooKeeper quorum peers that coordinate the high-availability
# setup. This must be a list of the form:
# "host1:clientPort,host2:clientPort,..." (default clientPort: 2181)
# zookeeper集群中仲裁者的机器ip和port端口号
# high-availability.zookeeper.quorum: localhost:2181
# ACL options are based on https://zookeeper.apache.org/doc/r3.1.2/zookeeperProgrammers.html#sc_BuiltinACLSchemes
# It can be either "creator" (ZOO_CREATE_ALL_ACL) or "open" (ZOO_OPEN_ACL_UNSAFE)
# The default value is "open" and it can be changed to "creator" if ZK security is enabled
# 默认是open，如果zookeeper security启用了该值会更改成 creator
# high-availability.zookeeper.client.acl: open
#==============================================================================
# Fault tolerance and checkpointing 容错和检查点
#==============================================================================
# The backend that will be used to store operator state checkpoints if
# checkpointing is enabled.
#
# Supported backends are 'jobmanager', 'filesystem', 'rocksdb', or the
# <class-name-of-factory>.
# 用于存储和检查点状态
# state.backend: filesystem
# Directory for checkpoints filesystem, when using any of the default bundled
# state backends.
# 存储检查点的数据文件和元数据的默认目录
# state.checkpoints.dir: hdfs://namenode-host:port/flink-checkpoints
# Default target directory for savepoints, optional.
# savepoints 的默认目标目录(可选)
# state.savepoints.dir: hdfs://namenode-host:port/flink-checkpoints
# Flag to enable/disable incremental checkpoints for backends that
# support incremental checkpoints (like the RocksDB state backend). 
# 用于启用/禁用增量 checkpoints 的标志
# state.backend.incremental: false
#==============================================================================
# Web Frontend
#==============================================================================
# The address under which the web-based runtime monitor listens.
# 基于web的运行时监视器侦听的地址
#jobmanager.web.address: 0.0.0.0
# web的运行时监视器端口
rest.port: 8081
# 是否从基于web的jobManager启用作业提交
#jobmanager.web.submit.enable: false
#==============================================================================
# Advanced 高级配置
#==============================================================================
# 是否应在TaskManager启动时预先分配TaskManager管理的内存
# taskmanager.memory.preallocate: false
# The classloading resolve order. Possible values are 'child-first' (Flink's default)
# and 'parent-first' (Java's default).
#
# Child first classloading allows users to use different dependency/library
# versions in their application than those in the classpath. Switching back
# to 'parent-first' may help with debugging dependency issues.
# 类加载解析顺序，是先检查用户代码jar('child-first')还是应用程序类路径('parent-first')。默认设置指示首先从用户代码jar加载类
# classloader.resolve-order: child-first
# The amount of memory going to the network stack. These numbers usually need 
# no tuning. Adjusting them may be necessary in case of an "Insufficient number
# of network buffers" error. The default min is 64MB, teh default max is 1GB.
# 用于网络缓冲区的JVM内存的分数。这决定了 TaskManager 可以同时拥有多少流数据交换通道以及通道缓冲的程度。
# 如果作业被拒绝或者您收到系统没有足够缓冲区的警告，请增加此值或下面的最小/最大值。
# 另外请注意'taskmanager.network.memory.min'和'taskmanager.network.memory.max'可能会覆盖此分数
# taskmanager.network.memory.fraction: 0.1
# taskmanager.network.memory.min: 67108864
# taskmanager.network.memory.max: 1073741824
#==============================================================================
# Flink Cluster Security Configuration 集群的安全配置
#==============================================================================
# 
# Kerberos authentication for various components - Hadoop, ZooKeeper, and connectors -
# may be enabled in four steps:
# 1. configure the local krb5.conf file
# 2. provide Kerberos credentials (either a keytab or a ticket cache w/ kinit)
# 3. make the credentials available to various JAAS login contexts
# 4. configure the connector to use JAAS/SASL
# The below configure how Kerberos credentials are provided. A keytab will be used instead of
# a ticket cache if the keytab path and principal are set.
# 提示是否从 Kerberos ticket 缓存中读取
# security.kerberos.login.use-ticket-cache: true
#
# 包含用户凭据的 Kerberos 秘钥表文件的绝对路径
# security.kerberos.login.keytab: /path/to/kerberos/keytab
# 与keytab 关联的 Kerberos 主体名称
# security.kerberos.login.principal: flink-user
# The configuration below defines which JAAS login contexts
# 以逗号分隔的登录上下文列表，用于提供 Kerberos 凭据 (例如：'Client , KafkaClient' 使用凭证进行 zookeeper 身份验证和 kafka 的身份验证)
# security.kerberos.login.contexts: Client,KafkaClient
#==============================================================================
# ZK Security Configuration zookeeper 安全配置
#==============================================================================
# Below configurations are applicable if ZK ensemble is configured for security
# Override below configuration to provide custom ZK service name if configured
# 覆盖以下配置以提供自定义 zookeeper 名字
# zookeeper.sasl.service-name: zookeeper
# The configuration below must match one of the values set in "security.kerberos.login.contexts"
# 该配置必须匹配'security.kerberos.login.contexts'中的列表(含有一个)
# zookeeper.sasl.login-context-name: Client
#==============================================================================
# HistoryServer
#==============================================================================
# 可以通过 bin/historyserver.sh (start|stop) 命令启动和关闭 HistoryServer
# The HistoryServer is started and stopped via bin/historyserver.sh (start|stop)
# Directory to upload completed jobs to. Add this directory to the list of
# monitored directories of the HistoryServer as well (see below).
# 将已经完成的作业上传到的目录
#jobmanager.archive.fs.dir: hdfs:///completed-jobs/
# The address under which the web-based HistoryServer listens.
# 基于 Web 的 HistoryServer 的地址
#historyserver.web.address: 0.0.0.0
# The port under which the web-based HistoryServer listens.
# 基于 Web 的 HistoryServer 的端口号
#historyserver.web.port: 8082
# Comma separated list of directories to monitor for completed jobs.
# 以逗号分隔的目录列表，用于监视已完成的作业
#historyserver.archive.fs.dir: hdfs:///completed-jobs/
# Interval in milliseconds for refreshing the monitored directories.
# 刷新受监控目录的时间间隔(以毫秒为单位)
#historyserver.archive.fs.refresh-interval: 10000

zoo.cfg

# 每个 tick 的毫秒数
tickTime=2000
# 初始同步阶段可以采用的 tick 数
initLimit=10
# 在发送请求和获取确认之间可以传递的 tick 数
syncLimit=5
# 存储快照的目录
# dataDir=/tmp/zookeeper
# 客户端将连接的端口
clientPort=2181
# ZooKeeper quorum peers
server.1=localhost:2888:3888
# server.2=host:peer-port:leader-port