GitHub

https://github.com/alibaba/DataX
https://github.com/alibaba/DataX/blob/master/introduction.md
https://github.com/alibaba/DataX/blob/master/userGuid.md

JDK安装

https://www.oracle.com/java/technologies/downloads/archive/
版本说明
Linux x86 RPM Package //适用于32bit的centos、rethat(linux)操作系统
Linux x64 RPM Package //适用于64bit的centos、rethat(linux)操作系统
Linux x86 Compressed Archive //适用于32bit的Linux操作系统
Linux x64 Compressed Archive //适用于64bit的Linux操作系统

简介

DataX 是阿里云 DataWorks数据集成 的开源版本,在阿里巴巴集团内被广泛使用的离线数据同步工具/平台。DataX 实现了包括 MySQL、Oracle、OceanBase、SqlServer、Postgre、HDFS、Hive、ADS、HBase、TableStore(OTS)、MaxCompute(ODPS)、Hologres、DRDS 等各种异构数据源之间高效的数据同步功能。
本示例测试使用的sqlserver将一部分表全量同步到mysql。

背景需求

将某个sql server数据库的某个表的数据以离线作业方式同步到另外一个sql server数据库的表中.

添加驱动

Microsoft JDBC Driver for SQL Server 发行说明
https://github.com/microsoft/mssql-jdbc/tags

查看配置模板

  1. python3 /usr/local/datax/bin/datax.py -r sqlserverreader -w mysqlwriter
  1. {
  2. "job": {
  3. "content": [
  4. {
  5. "reader": {
  6. "name": "sqlserverreader",
  7. "parameter": {
  8. "connection": [
  9. {
  10. "jdbcUrl": [],
  11. "table": []
  12. }
  13. ],
  14. "password": "",
  15. "username": ""
  16. }
  17. },
  18. "writer": {
  19. "name": "mysqlwriter",
  20. "parameter": {
  21. "column": [],
  22. "connection": [
  23. {
  24. "jdbcUrl": "",
  25. "table": []
  26. }
  27. ],
  28. "password": "",
  29. "preSql": [],
  30. "session": [],
  31. "username": "",
  32. "writeMode": ""
  33. }
  34. }
  35. }
  36. ],
  37. "setting": {
  38. "speed": {
  39. "channel": ""
  40. }
  41. }
  42. }
  43. }

:::warning 注意,在sqlserverreader的parameter配置项里,是少了column的配置项的,需要自己手工加上 :::

同步cap_published表配置文件

  1. USE [es_test]
  2. GO
  3. /****** Object: Table [dbo].[cap_published] Script Date: 2022-10-10 12:11:22 ******/
  4. SET ANSI_NULLS ON
  5. GO
  6. SET QUOTED_IDENTIFIER ON
  7. GO
  8. CREATE TABLE [dbo].[cap_published](
  9. [Id] [bigint] NOT NULL,
  10. [Version] [nvarchar](20) NOT NULL,
  11. [Name] [nvarchar](200) NOT NULL,
  12. [Content] [nvarchar](max) NULL,
  13. [Retries] [int] NOT NULL,
  14. [Added] [datetime2](7) NOT NULL,
  15. [ExpiresAt] [datetime2](7) NULL,
  16. [StatusName] [nvarchar](50) NOT NULL,
  17. CONSTRAINT [PK_cap.Published] PRIMARY KEY CLUSTERED
  18. (
  19. [Id] ASC
  20. )WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON, OPTIMIZE_FOR_SEQUENTIAL_KEY = OFF) ON [PRIMARY]
  21. ) ON [PRIMARY] TEXTIMAGE_ON [PRIMARY]
  22. GO
  1. {
  2. "job": {
  3. "content": [
  4. {
  5. "reader": {
  6. "name": "sqlserverreader",
  7. "parameter": {
  8. "column": [
  9. "Id",
  10. "Version",
  11. "Name",
  12. "Content",
  13. "Retries",
  14. "Added",
  15. "ExpiresAt",
  16. "StatusName"
  17. ],
  18. "connection": [
  19. {
  20. "jdbcUrl": [
  21. "jdbc:sqlserver://192.168.3.40:1433;DatabaseName=es_test"
  22. ],
  23. "table": [
  24. "cap_published"
  25. ]
  26. }
  27. ],
  28. "password": "longfuchu",
  29. "username": "sa"
  30. }
  31. },
  32. "writer": {
  33. "name": "sqlserverwriter",
  34. "parameter": {
  35. "column": [
  36. "Id",
  37. "Version",
  38. "Name",
  39. "Content",
  40. "Retries",
  41. "Added",
  42. "ExpiresAt",
  43. "StatusName"
  44. ],
  45. "connection": [
  46. {
  47. "jdbcUrl": "jdbc:sqlserver://192.168.3.40:1433;DatabaseName=es_test2",
  48. "table": [
  49. "cap_published"
  50. ]
  51. }
  52. ],
  53. "password": "longfuchu",
  54. "postSql": [],
  55. "preSql": [],
  56. "username": "sa"
  57. }
  58. }
  59. }
  60. ],
  61. "setting": {
  62. "speed": {
  63. "channel": "5"
  64. }
  65. }
  66. }
  67. }

验证

  1. python3 /usr/local/datax/bin/datax.py /usr/local/datax/job/mssqltomssql-published.json

1665374534625.png

同步V_Blood_BOutItem表配置文件

  1. USE [es_test2]
  2. GO
  3. /****** Object: Table [dbo].[V_Blood_BOutItem] Script Date: 2022-10-10 12:12:19 ******/
  4. SET ANSI_NULLS ON
  5. GO
  6. SET QUOTED_IDENTIFIER ON
  7. GO
  8. CREATE TABLE [dbo].[V_Blood_BOutItem](
  9. [id] [int] NOT NULL,
  10. [deptno] [int] NOT NULL,
  11. [deptname] [varchar](65) NULL,
  12. [bloodno] [varchar](20) NULL,
  13. [bloodname] [varchar](65) NULL,
  14. [boutcount] [float] NULL,
  15. [bloodunitname] [varchar](65) NULL,
  16. [bodate] [datetime] NULL,
  17. CONSTRAINT [PK_V_Blood_BOutItem] PRIMARY KEY CLUSTERED
  18. (
  19. [id] ASC
  20. )WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON, OPTIMIZE_FOR_SEQUENTIAL_KEY = OFF) ON [PRIMARY]
  21. ) ON [PRIMARY]
  22. GO
  23. ALTER TABLE [dbo].[V_Blood_BOutItem] ADD CONSTRAINT [DF__V_Blood_BOut__id__239E4DCF] DEFAULT (NULL) FOR [id]
  24. GO
  25. ALTER TABLE [dbo].[V_Blood_BOutItem] ADD CONSTRAINT [DF__V_Blood_B__boutc__24927208] DEFAULT (NULL) FOR [boutcount]
  26. GO
  1. {
  2. "job": {
  3. "content": [
  4. {
  5. "reader": {
  6. "name": "sqlserverreader",
  7. "parameter": {
  8. "column": [
  9. "id"
  10. ,"deptno"
  11. ,"deptname"
  12. ,"bloodno"
  13. ,"bloodname"
  14. ,"boutcount"
  15. ,"bloodunitname"
  16. ,"bodate"
  17. ],
  18. "connection": [
  19. {
  20. "jdbcUrl": [
  21. "jdbc:sqlserver://192.168.3.40:1433;DatabaseName=es_test"
  22. ],
  23. "table": [
  24. "V_Blood_BOutItem"
  25. ]
  26. }
  27. ],
  28. "password": "longfuchu",
  29. "username": "sa"
  30. }
  31. },
  32. "writer": {
  33. "name": "sqlserverwriter",
  34. "parameter": {
  35. "column": [
  36. "id"
  37. ,"deptno"
  38. ,"deptname"
  39. ,"bloodno"
  40. ,"bloodname"
  41. ,"boutcount"
  42. ,"bloodunitname"
  43. ,"bodate"
  44. ],
  45. "connection": [
  46. {
  47. "jdbcUrl": "jdbc:sqlserver://192.168.3.40:1433;DatabaseName=es_test2",
  48. "table": [
  49. "V_Blood_BOutItem"
  50. ]
  51. }
  52. ],
  53. "password": "longfuchu",
  54. "postSql": [],
  55. "preSql": [],
  56. "username": "sa"
  57. }
  58. }
  59. }
  60. ],
  61. "setting": {
  62. "speed": {
  63. "channel": "5"
  64. }
  65. }
  66. }
  67. }

验证

  1. python3 /usr/local/datax/bin/datax.py /usr/local/datax/job/mssqltomssql-outitems.json

执行结果
1665375256428.png