通过实现Locust的prometheus的exporter,将数据导入到proemtheus,然后由grafana进行数据展示。
什么是proemtheus,请转prometheus
什么是Grafana,请转grafana

Exporter编写

  1. 需要拓展一个web端接口/export/prometheus ,让prometheus根据配置定时拉取Metric信息,这里需要用到Prometheus 官方提供的 client 库,prometheus_client,来生成符合 Prometheus 规范的 metrics 信息。

    1. pip install prometheus_client
  2. 编写locust版的Exporter,prometheus_exporter.py ```python

    -- coding: utf-8 --

    file: prometheus_exporter

    Author: ShunZhe

    Date: 2021/7/31

    import six from itertools import chain

from flask import request, Response from locust import stats as locust_stats, runners as locust_runners from locust import User, task, events from prometheus_client import Metric, REGISTRY, exposition

This locustfile adds an external web endpoint to the locust master, and makes it serve as a prometheus exporter.

Runs it as a normal locustfile, then points prometheus to it.

locust -f prometheus_exporter.py —master

Lots of code taken from mbolek’s locust_exporter, thx mbolek!

class LocustCollector(object): registry = REGISTRY

  1. def __init__(self, environment, runner):
  2. self.environment = environment
  3. self.runner = runner
  4. def collect(self):
  5. # collect metrics only when locust runner is spawning or running.
  6. runner = self.runner
  7. if runner and runner.state in (locust_runners.STATE_SPAWNING, locust_runners.STATE_RUNNING):
  8. stats = []
  9. for s in chain(locust_stats.sort_stats(runner.stats.entries), [runner.stats.total]):
  10. stats.append({
  11. "method": s.method,
  12. "name": s.name,
  13. "num_requests": s.num_requests,
  14. "num_failures": s.num_failures,
  15. "avg_response_time": s.avg_response_time,
  16. "min_response_time": s.min_response_time or 0,
  17. "max_response_time": s.max_response_time,
  18. "current_rps": s.current_rps,
  19. "median_response_time": s.median_response_time,
  20. "ninetieth_response_time": s.get_response_time_percentile(0.9),
  21. # only total stats can use current_response_time, so sad.
  22. # "current_response_time_percentile_95": s.get_current_response_time_percentile(0.95),
  23. "avg_content_length": s.avg_content_length,
  24. "current_fail_per_sec": s.current_fail_per_sec
  25. })
  26. # perhaps StatsError.parse_error in e.to_dict only works in python slave, take notices!
  27. errors = [e.to_dict() for e in six.itervalues(runner.stats.errors)]
  28. metric = Metric('locust_user_count', 'Swarmed users', 'gauge')
  29. metric.add_sample('locust_user_count', value=runner.user_count, labels={})
  30. yield metric
  31. metric = Metric('locust_errors', 'Locust requests errors', 'gauge')
  32. for err in errors:
  33. metric.add_sample('locust_errors', value=err['occurrences'],
  34. labels={'path': err['name'], 'method': err['method'],
  35. 'error': err['error']})
  36. yield metric
  37. is_distributed = isinstance(runner, locust_runners.MasterRunner)
  38. if is_distributed:
  39. metric = Metric('locust_slave_count', 'Locust number of slaves', 'gauge')
  40. metric.add_sample('locust_slave_count', value=len(runner.clients.values()), labels={})
  41. yield metric
  42. metric = Metric('locust_fail_ratio', 'Locust failure ratio', 'gauge')
  43. metric.add_sample('locust_fail_ratio', value=runner.stats.total.fail_ratio, labels={})
  44. yield metric
  45. metric = Metric('locust_state', 'State of the locust swarm', 'gauge')
  46. metric.add_sample('locust_state', value=1, labels={'state': runner.state})
  47. yield metric
  48. stats_metrics = ['avg_content_length', 'avg_response_time', 'current_rps', 'current_fail_per_sec',
  49. 'max_response_time', 'ninetieth_response_time', 'median_response_time',
  50. 'min_response_time',
  51. 'num_failures', 'num_requests']
  52. for mtr in stats_metrics:
  53. mtype = 'gauge'
  54. if mtr in ['num_requests', 'num_failures']:
  55. mtype = 'counter'
  56. metric = Metric('locust_stats_' + mtr, 'Locust stats ' + mtr, mtype)
  57. for stat in stats:
  58. # Aggregated stat's method label is None, so name it as Aggregated
  59. # locust has changed name Total to Aggregated since 0.12.1
  60. if 'Aggregated' != stat['name']:
  61. metric.add_sample('locust_stats_' + mtr, value=stat[mtr],
  62. labels={'path': stat['name'], 'method': stat['method']})
  63. else:
  64. metric.add_sample('locust_stats_' + mtr, value=stat[mtr],
  65. labels={'path': stat['name'], 'method': 'Aggregated'})
  66. yield metric

@events.init.add_listener def locust_init(environment, runner, **kwargs): print(“locust init event received”) if environment.web_ui and runner: @environment.web_ui.app.route(“/export/prometheus”) def prometheus_exporter(): registry = REGISTRY encoder, content_type = exposition.choose_encoder(request.headers.get(‘Accept’)) if ‘name[]’ in request.args: registry = REGISTRY.restricted_registry(request.args.get(‘name[]’)) body = encoder(registry) return Response(body, content_type=content_type)

  1. REGISTRY.register(LocustCollector(environment, runner))

class Dummy(User): @task(20) def hello(self): pass

  1. 3. master启动prometheus_exporter.py文件
  2. ```dockerfile
  3. version: '3'
  4. networks: # 自定义网络(默认桥接), 不使用links通信
  5. coll_network:
  6. driver: bridge
  7. services:
  8. master:
  9. build: ./compose
  10. networks:
  11. - coll_network
  12. ports:
  13. - "8089:8089"
  14. volumes:
  15. - ./coll:/mnt/locust
  16. # command: locust -f test_op.py --master --web-host=0.0.0.0 --first_index 0 --last_index 1000
  17. command: locust -f prometheus_exporter.py --master --master-host=192.168.10.206
  18. worker_1:
  19. build: ./compose
  20. networks:
  21. - coll_network
  22. volumes:
  23. - ./coll:/mnt/locust
  24. command: locust -f test_op.py --worker --master-host master --first_index 0 --last_index 1000
  25. depends_on:
  26. - master
  1. # 开始压测并通过浏览器访问,可以看到数据采集的信息如下
  2. http://0.0.0.0:8089/export/prometheus
  3. # HELP python_gc_objects_collected_total Objects collected during gc
  4. # TYPE python_gc_objects_collected_total counter
  5. python_gc_objects_collected_total{generation="0"} 31281.0
  6. python_gc_objects_collected_total{generation="1"} 6190.0
  7. python_gc_objects_collected_total{generation="2"} 351.0
  8. # HELP python_gc_objects_uncollectable_total Uncollectable object found during GC
  9. # TYPE python_gc_objects_uncollectable_total counter
  10. python_gc_objects_uncollectable_total{generation="0"} 0.0
  11. python_gc_objects_uncollectable_total{generation="1"} 0.0
  12. python_gc_objects_uncollectable_total{generation="2"} 0.0
  13. # HELP python_gc_collections_total Number of times this generation was collected
  14. # TYPE python_gc_collections_total counter
  15. python_gc_collections_total{generation="0"} 165.0
  16. python_gc_collections_total{generation="1"} 14.0
  17. python_gc_collections_total{generation="2"} 1.0
  18. # HELP python_info Python platform information
  19. # TYPE python_info gauge
  20. python_info{implementation="CPython",major="3",minor="7",patchlevel="11",version="3.7.11"} 1.0
  21. # HELP process_virtual_memory_bytes Virtual memory size in bytes.
  22. # TYPE process_virtual_memory_bytes gauge
  23. process_virtual_memory_bytes 2.83045888e+08
  24. # HELP process_resident_memory_bytes Resident memory size in bytes.
  25. # TYPE process_resident_memory_bytes gauge
  26. process_resident_memory_bytes 4.9606656e+07
  27. # HELP process_start_time_seconds Start time of the process since unix epoch in seconds.
  28. # TYPE process_start_time_seconds gauge
  29. process_start_time_seconds 1.62781476304e+09
  30. # HELP process_cpu_seconds_total Total user and system CPU time spent in seconds.
  31. # TYPE process_cpu_seconds_total counter
  32. process_cpu_seconds_total 1.3900000000000001
  33. # HELP process_open_fds Number of open file descriptors.
  34. # TYPE process_open_fds gauge
  35. process_open_fds 26.0
  36. # HELP process_max_fds Maximum number of open file descriptors.
  37. # TYPE process_max_fds gauge
  38. process_max_fds 1.048576e+06
  39. # HELP locust_user_count Swarmed users
  40. # TYPE locust_user_count gauge
  41. locust_user_count 30.0
  42. # HELP locust_errors Locust requests errors
  43. # TYPE locust_errors gauge
  44. # HELP locust_slave_count Locust number of slaves
  45. # TYPE locust_slave_count gauge
  46. locust_slave_count 4.0
  47. # HELP locust_fail_ratio Locust failure ratio
  48. # TYPE locust_fail_ratio gauge
  49. locust_fail_ratio 0.0
  50. # HELP locust_state State of the locust swarm
  51. # TYPE locust_state gauge
  52. locust_state{state="spawning"} 1.0
  53. # HELP locust_stats_avg_content_length Locust stats avg_content_length
  54. # TYPE locust_stats_avg_content_length gauge
  55. locust_stats_avg_content_length{method="接收数据",path="0 open"} 114.0
  56. locust_stats_avg_content_length{method="接收数据",path="3 heartbeat"} 1.0
  57. locust_stats_avg_content_length{method="接收数据",path="40 message ok"} 2.0
  58. locust_stats_avg_content_length{method="接收数据",path="42 join_ack"} 8961.85
  59. locust_stats_avg_content_length{method="接收数据",path="42 operation_ack"} 69.0
  60. locust_stats_avg_content_length{method="接收数据",path="42 set_doc_ack"} 28.0
  61. locust_stats_avg_content_length{method="接收数据",path="42 svr_user_status_change"} 154.0
  62. locust_stats_avg_content_length{method="Aggregated",path="Aggregated"} 435.48864105741427
  63. # HELP locust_stats_avg_response_time Locust stats avg_response_time
  64. # TYPE locust_stats_avg_response_time gauge
  65. locust_stats_avg_response_time{method="接收数据",path="0 open"} 0.41
  66. locust_stats_avg_response_time{method="接收数据",path="3 heartbeat"} 38.403225806451616
  67. locust_stats_avg_response_time{method="接收数据",path="40 message ok"} 0.92
  68. locust_stats_avg_response_time{method="接收数据",path="42 join_ack"} 76.88
  69. locust_stats_avg_response_time{method="接收数据",path="42 operation_ack"} 44.30930607853685
  70. locust_stats_avg_response_time{method="接收数据",path="42 set_doc_ack"} 57.24
  71. locust_stats_avg_response_time{method="接收数据",path="42 svr_user_status_change"} 77.93
  72. locust_stats_avg_response_time{method="Aggregated",path="Aggregated"} 43.82073523337464
  73. # HELP locust_stats_current_rps Locust stats current_rps
  74. # TYPE locust_stats_current_rps gauge
  75. locust_stats_current_rps{method="接收数据",path="0 open"} 0.0
  76. locust_stats_current_rps{method="接收数据",path="3 heartbeat"} 0.7
  77. locust_stats_current_rps{method="接收数据",path="40 message ok"} 0.0
  78. locust_stats_current_rps{method="接收数据",path="42 join_ack"} 0.0
  79. locust_stats_current_rps{method="接收数据",path="42 operation_ack"} 11.7
  80. locust_stats_current_rps{method="接收数据",path="42 set_doc_ack"} 0.0
  81. locust_stats_current_rps{method="接收数据",path="42 svr_user_status_change"} 0.0
  82. locust_stats_current_rps{method="Aggregated",path="Aggregated"} 12.4
  83. # HELP locust_stats_current_fail_per_sec Locust stats current_fail_per_sec
  84. # TYPE locust_stats_current_fail_per_sec gauge
  85. locust_stats_current_fail_per_sec{method="接收数据",path="0 open"} 0.0
  86. locust_stats_current_fail_per_sec{method="接收数据",path="3 heartbeat"} 0.0
  87. locust_stats_current_fail_per_sec{method="接收数据",path="40 message ok"} 0.0
  88. locust_stats_current_fail_per_sec{method="接收数据",path="42 join_ack"} 0.0
  89. locust_stats_current_fail_per_sec{method="接收数据",path="42 operation_ack"} 0.0
  90. locust_stats_current_fail_per_sec{method="接收数据",path="42 set_doc_ack"} 0.0
  91. locust_stats_current_fail_per_sec{method="接收数据",path="42 svr_user_status_change"} 0.0
  92. locust_stats_current_fail_per_sec{method="Aggregated",path="Aggregated"} 0.0
  93. # HELP locust_stats_max_response_time Locust stats max_response_time
  94. # TYPE locust_stats_max_response_time gauge
  95. locust_stats_max_response_time{method="接收数据",path="0 open"} 2.0
  96. locust_stats_max_response_time{method="接收数据",path="3 heartbeat"} 159.0
  97. locust_stats_max_response_time{method="接收数据",path="40 message ok"} 5.0
  98. locust_stats_max_response_time{method="接收数据",path="42 join_ack"} 184.0
  99. locust_stats_max_response_time{method="接收数据",path="42 operation_ack"} 317.0
  100. locust_stats_max_response_time{method="接收数据",path="42 set_doc_ack"} 280.0
  101. locust_stats_max_response_time{method="接收数据",path="42 svr_user_status_change"} 185.0
  102. locust_stats_max_response_time{method="Aggregated",path="Aggregated"} 317.0
  103. # HELP locust_stats_ninetieth_response_time Locust stats ninetieth_response_time
  104. # TYPE locust_stats_ninetieth_response_time gauge
  105. locust_stats_ninetieth_response_time{method="接收数据",path="0 open"} 1.0
  106. locust_stats_ninetieth_response_time{method="接收数据",path="3 heartbeat"} 42.0
  107. locust_stats_ninetieth_response_time{method="接收数据",path="40 message ok"} 1.0
  108. locust_stats_ninetieth_response_time{method="接收数据",path="42 join_ack"} 150.0
  109. locust_stats_ninetieth_response_time{method="接收数据",path="42 operation_ack"} 51.0
  110. locust_stats_ninetieth_response_time{method="接收数据",path="42 set_doc_ack"} 67.0
  111. locust_stats_ninetieth_response_time{method="接收数据",path="42 svr_user_status_change"} 150.0
  112. locust_stats_ninetieth_response_time{method="Aggregated",path="Aggregated"} 60.0
  113. # HELP locust_stats_median_response_time Locust stats median_response_time
  114. # TYPE locust_stats_median_response_time gauge
  115. locust_stats_median_response_time{method="接收数据",path="0 open"} 0.0
  116. locust_stats_median_response_time{method="接收数据",path="3 heartbeat"} 34.0
  117. locust_stats_median_response_time{method="接收数据",path="40 message ok"} 1.0
  118. locust_stats_median_response_time{method="接收数据",path="42 join_ack"} 61.0
  119. locust_stats_median_response_time{method="接收数据",path="42 operation_ack"} 40.0
  120. locust_stats_median_response_time{method="接收数据",path="42 set_doc_ack"} 50.0
  121. locust_stats_median_response_time{method="接收数据",path="42 svr_user_status_change"} 62.0
  122. locust_stats_median_response_time{method="Aggregated",path="Aggregated"} 40.0
  123. # HELP locust_stats_min_response_time Locust stats min_response_time
  124. # TYPE locust_stats_min_response_time gauge
  125. locust_stats_min_response_time{method="接收数据",path="0 open"} 0.0
  126. locust_stats_min_response_time{method="接收数据",path="3 heartbeat"} 27.0
  127. locust_stats_min_response_time{method="接收数据",path="40 message ok"} 0.0
  128. locust_stats_min_response_time{method="接收数据",path="42 join_ack"} 50.0
  129. locust_stats_min_response_time{method="接收数据",path="42 operation_ack"} 31.0
  130. locust_stats_min_response_time{method="接收数据",path="42 set_doc_ack"} 42.0
  131. locust_stats_min_response_time{method="接收数据",path="42 svr_user_status_change"} 50.0
  132. locust_stats_min_response_time{method="Aggregated",path="Aggregated"} 0.0
  133. # HELP locust_stats_num_failures_total Locust stats num_failures
  134. # TYPE locust_stats_num_failures_total counter
  135. locust_stats_num_failures{method="接收数据",path="0 open"} 0.0
  136. locust_stats_num_failures{method="接收数据",path="3 heartbeat"} 0.0
  137. locust_stats_num_failures{method="接收数据",path="40 message ok"} 0.0
  138. locust_stats_num_failures{method="接收数据",path="42 join_ack"} 0.0
  139. locust_stats_num_failures{method="接收数据",path="42 operation_ack"} 0.0
  140. locust_stats_num_failures{method="接收数据",path="42 set_doc_ack"} 0.0
  141. locust_stats_num_failures{method="接收数据",path="42 svr_user_status_change"} 0.0
  142. locust_stats_num_failures{method="Aggregated",path="Aggregated"} 0.0
  143. # HELP locust_stats_num_requests_total Locust stats num_requests
  144. # TYPE locust_stats_num_requests_total counter
  145. locust_stats_num_requests{method="接收数据",path="0 open"} 100.0
  146. locust_stats_num_requests{method="接收数据",path="3 heartbeat"} 62.0
  147. locust_stats_num_requests{method="接收数据",path="40 message ok"} 100.0
  148. locust_stats_num_requests{method="接收数据",path="42 join_ack"} 100.0
  149. locust_stats_num_requests{method="接收数据",path="42 operation_ack"} 1859.0
  150. locust_stats_num_requests{method="接收数据",path="42 set_doc_ack"} 100.0
  151. locust_stats_num_requests{method="接收数据",path="42 svr_user_status_change"} 100.0
  152. locust_stats_num_requests{method="Aggregated",path="Aggregated"} 2421.0

Prometheus部署

  1. 部署Prometheus,拉取metric数据

    1. docker pull prom/prometheus
  2. 创建prometheus.yml配置文件(与docker-compose.yml同目录) ```yaml global: scrape_interval: 15s evaluation_interval: 15s

scrape_configs:

  • job_name: prometheus static_configs:

    • targets: [‘localhost:9090’] labels: instance: prometheus
  • job_name: locust metrics_path: ‘/export/prometheus’ static_configs:

    • targets: [‘192.168.10.206:8089’] # 地址修改为实际地址 labels: instance: locust ```
  1. 启动Prometheus,将9090端口映射出来 ```bash

    prometheus.yml目录下执行命令

docker run -itd -p 9090:9090 -v $PWD/prometheus.yml:/etc/prometheus/prometheus.yml prom/prometheus

  1. 4. 访问http://<locust_master_ip>:9090/targets查看数据收集是否正常
  2. ![image.png](https://cdn.nlark.com/yuque/0/2021/png/1170640/1627817023918-7622d855-4f8e-4fb6-b678-fe83b8d06349.png#clientId=u429fe8ed-2727-4&from=paste&height=551&id=u6a658126&margin=%5Bobject%20Object%5D&name=image.png&originHeight=1102&originWidth=3490&originalType=binary&ratio=1&size=377984&status=done&style=none&taskId=ucc435c47-7afe-4ffa-b7fe-793379e1c69&width=1745)
  3. <a name="tFt5y"></a>
  4. #### Grafana部署&配置
  5. 1. 拉取grafana镜像
  6. ```bash
  7. docker pull grafana/grafana
  1. 启动grafana服务

    1. docker run -d --name grafana -p 3000:3000 grafana/grafana
  2. 访问http://:3000/进入grafana,默认账号密码为admin。

image.png

  1. 选择添加prometheus数据源

Configuration — Data sources — Add data source — 选择Prometheus
这里的URL 设置为 prometheus的地址:http://:9090
image.png

  1. 导入DashBoard

通过Dashboard 找到合适的导入
➕ — import — load,这里导入的dashboard id是12081
效果展示:
image.png

附项目目录:
image.png
Dockerfile文件如下:

  1. FROM python:3.7
  2. # 设置 python 环境变量
  3. ENV PYTHONUNBUFFERED 1
  4. ENV APT_KEY_DONT_WARN_ON_DANGEROUS_USAGE=DontWarn
  5. COPY pip.conf /root/.pip/pip.conf
  6. # 创建 locust 文件夹
  7. RUN mkdir -p /mnt/locust
  8. # 将 locust 文件夹为工作目录
  9. WORKDIR /mnt/locust
  10. # 将当前目录加入到工作目录中(. 表示当前目录)
  11. ADD . /mnt/locust
  12. # 更新pip版本
  13. RUN /usr/local/bin/python -m pip install --upgrade pip
  14. # 利用 pip 安装依赖
  15. RUN pip install -r requirements.txt
  16. RUN pip install websocket
  17. RUN pip install websocket-client
  18. RUN pip3 install prometheus-client

docker-compose.yml文件如下:

  1. version: '3'
  2. networks: # 自定义网络(默认桥接), 不使用links通信
  3. coll_network:
  4. driver: bridge
  5. services:
  6. master:
  7. build: ./compose
  8. networks:
  9. - coll_network
  10. ports:
  11. - "8089:8089"
  12. volumes:
  13. - ./coll:/mnt/locust
  14. # command: locust -f test_op.py --master --web-host=0.0.0.0 --first_index 0 --last_index 1000
  15. command: locust -f prometheus_exporter.py --master --master-host=192.168.10.206
  16. worker_1:
  17. build: ./compose
  18. networks:
  19. - coll_network
  20. volumes:
  21. - ./coll:/mnt/locust
  22. command: locust -f test_op.py --worker --master-host master --first_index 0 --last_index 1000
  23. depends_on:
  24. - master
  25. worker_2:
  26. build: ./compose
  27. networks:
  28. - coll_network
  29. volumes:
  30. - ./coll:/mnt/locust
  31. command: locust -f test_op.py --worker --master-host master --first_index 1000 --last_index 2000
  32. depends_on:
  33. - master
  34. worker_3:
  35. build: ./compose
  36. networks:
  37. - coll_network
  38. volumes:
  39. - ./coll:/mnt/locust
  40. command: locust -f test_op.py --worker --master-host master --first_index 2000 --last_index 3000
  41. depends_on:
  42. - master
  43. worker_4:
  44. build: ./compose
  45. networks:
  46. - coll_network
  47. volumes:
  48. - ./coll:/mnt/locust
  49. command: locust -f test_op.py --worker --master-host master --first_index 3000 --last_index 4000
  50. depends_on:
  51. - master