Software /
code /
prosody
Comparison
util/statsd.lua @ 11523:5f15ab7c6ae5
Statistics: Rewrite statistics backends to use OpenMetrics
The metric subsystem of Prosody has had some shortcomings from
the perspective of the current state-of-the-art in metric
observability.
The OpenMetrics standard [0] is a formalization of the data
model (and serialization format) of the well-known and
widely-used Prometheus [1] software stack.
The previous stats subsystem of Prosody did not map well to that
format (see e.g. [2] and [3]); the key reason is that it was
trying to do too much math on its own ([2]) while lacking
first-class support for "families" of metrics ([3]) and
structured metric metadata (despite the `extra` argument to
metrics, there was no standard way of representing common things
like "tags" or "labels").
Even though OpenMetrics has grown from the Prometheus world of
monitoring, it maps well to other popular monitoring stacks
such as:
- InfluxDB (labels can be mapped to tags and fields as necessary)
- Carbon/Graphite (labels can be attached to the metric name with
dot-separation)
- StatsD (see graphite when assuming that graphite is used as
backend, which is the default)
The util.statsd module has been ported to use the OpenMetrics
model as a proof of concept. An implementation which exposes
the util.statistics backend data as Prometheus metrics is
ready for publishing in prosody-modules (most likely as
mod_openmetrics_prometheus to avoid breaking existing 0.11
deployments).
At the same time, the previous measure()-based API had one major
advantage: It is really simple and easy to use without requiring
lots of knowledge about OpenMetrics or similar concepts. For that
reason as well as compatibility with existing code, it is preserved
and may even be extended in the future.
However, code relying on the `stats-updated` event as well as
`get_stats` from `statsmanager` will break because the data
model has changed completely; in case of `stats-updated`, the
code will simply not run (as the event was renamed in order
to avoid conflicts); the `get_stats` function has been removed
completely (so it will cause a traceback when it is attempted
to be used).
Note that the measure_*_event methods have been removed from
the module API. I was unable to find any uses or documentation
and thus deemed they should not be ported. Re-implementation is
possible when necessary.
[0]: https://openmetrics.io/
[1]: https://prometheus.io/
[2]: #959
[3]: #960
author | Jonas Schäfer <jonas@wielicki.name> |
---|---|
date | Sun, 18 Apr 2021 11:47:41 +0200 |
parent | 10924:0c072dd69603 |
child | 12123:7ba686696250 |
comparison
equal
deleted
inserted
replaced
11522:5bd38d9197e1 | 11523:5f15ab7c6ae5 |
---|---|
1 local socket = require "socket"; | 1 local socket = require "socket"; |
2 | 2 local time = require "util.time".now; |
3 local time = require "util.time".now | 3 local array = require "util.array"; |
4 local t_concat = table.concat; | |
5 | |
6 local new_metric_registry = require "util.openmetrics".new_metric_registry; | |
7 local render_histogram_le = require "util.openmetrics".render_histogram_le; | |
8 | |
9 -- BEGIN of Metric implementations | |
10 | |
11 -- Gauges | |
12 local gauge_metric_mt = {} | |
13 gauge_metric_mt.__index = gauge_metric_mt | |
14 | |
15 local function new_gauge_metric(full_name, impl) | |
16 local metric = { | |
17 _full_name = full_name; | |
18 _impl = impl; | |
19 value = 0; | |
20 } | |
21 setmetatable(metric, gauge_metric_mt) | |
22 return metric | |
23 end | |
24 | |
25 function gauge_metric_mt:set(value) | |
26 self.value = value | |
27 self._impl:push_gauge(self._full_name, value) | |
28 end | |
29 | |
30 function gauge_metric_mt:add(delta) | |
31 self.value = self.value + delta | |
32 self._impl:push_gauge(self._full_name, self.value) | |
33 end | |
34 | |
35 function gauge_metric_mt:reset() | |
36 self.value = 0 | |
37 self._impl:push_gauge(self._full_name, 0) | |
38 end | |
39 | |
40 function gauge_metric_mt.iter_samples() | |
41 -- statsd backend does not support iteration. | |
42 return function() | |
43 return nil | |
44 end | |
45 end | |
46 | |
47 -- Counters | |
48 local counter_metric_mt = {} | |
49 counter_metric_mt.__index = counter_metric_mt | |
50 | |
51 local function new_counter_metric(full_name, impl) | |
52 local metric = { | |
53 _full_name = full_name, | |
54 _impl = impl, | |
55 value = 0, | |
56 } | |
57 setmetatable(metric, counter_metric_mt) | |
58 return metric | |
59 end | |
60 | |
61 function counter_metric_mt:set(value) | |
62 local delta = value - self.value | |
63 self.value = value | |
64 self._impl:push_counter_delta(self._full_name, delta) | |
65 end | |
66 | |
67 function counter_metric_mt:add(value) | |
68 self.value = (self.value or 0) + value | |
69 self._impl:push_counter_delta(self._full_name, value) | |
70 end | |
71 | |
72 function counter_metric_mt.iter_samples() | |
73 -- statsd backend does not support iteration. | |
74 return function() | |
75 return nil | |
76 end | |
77 end | |
78 | |
79 function counter_metric_mt:reset() | |
80 self.value = 0 | |
81 end | |
82 | |
83 -- Histograms | |
84 local histogram_metric_mt = {} | |
85 histogram_metric_mt.__index = histogram_metric_mt | |
86 | |
87 local function new_histogram_metric(buckets, full_name, impl) | |
88 -- NOTE: even though the more or less proprietrary dogstatsd has its own | |
89 -- histogram implementation, we push the individual buckets in this statsd | |
90 -- backend for both consistency and compatibility across statsd | |
91 -- implementations. | |
92 local metric = { | |
93 _sum_name = full_name..".sum", | |
94 _count_name = full_name..".count", | |
95 _impl = impl, | |
96 _created = time(), | |
97 _sum = 0, | |
98 _count = 0, | |
99 } | |
100 -- the order of buckets matters unfortunately, so we cannot directly use | |
101 -- the thresholds as table keys | |
102 for i, threshold in ipairs(buckets) do | |
103 local threshold_s = render_histogram_le(threshold) | |
104 metric[i] = { | |
105 threshold = threshold, | |
106 threshold_s = threshold_s, | |
107 count = 0, | |
108 _full_name = full_name..".bucket."..(threshold_s:gsub("%.", "_")), | |
109 } | |
110 end | |
111 setmetatable(metric, histogram_metric_mt) | |
112 return metric | |
113 end | |
114 | |
115 function histogram_metric_mt:sample(value) | |
116 -- According to the I-D, values must be part of all buckets | |
117 for i, bucket in pairs(self) do | |
118 if "number" == type(i) and bucket.threshold > value then | |
119 bucket.count = bucket.count + 1 | |
120 self._impl:push_counter_delta(bucket._full_name, 1) | |
121 end | |
122 end | |
123 self._sum = self._sum + value | |
124 self._count = self._count + 1 | |
125 self._impl:push_gauge(self._sum_name, self._sum) | |
126 self._impl:push_counter_delta(self._count_name, 1) | |
127 end | |
128 | |
129 function histogram_metric_mt.iter_samples() | |
130 -- statsd backend does not support iteration. | |
131 return function() | |
132 return nil | |
133 end | |
134 end | |
135 | |
136 function histogram_metric_mt:reset() | |
137 self._created = time() | |
138 self._count = 0 | |
139 self._sum = 0 | |
140 for i, bucket in pairs(self) do | |
141 if "number" == type(i) then | |
142 bucket.count = 0 | |
143 end | |
144 end | |
145 self._impl:push_gauge(self._sum_name, self._sum) | |
146 end | |
147 | |
148 -- Summaries | |
149 local summary_metric_mt = {} | |
150 summary_metric_mt.__index = summary_metric_mt | |
151 | |
152 local function new_summary_metric(full_name, impl) | |
153 local metric = { | |
154 _sum_name = full_name..".sum", | |
155 _count_name = full_name..".count", | |
156 _impl = impl, | |
157 } | |
158 setmetatable(metric, summary_metric_mt) | |
159 return metric | |
160 end | |
161 | |
162 function summary_metric_mt:sample(value) | |
163 self._impl:push_counter_delta(self._sum_name, value) | |
164 self._impl:push_counter_delta(self._count_name, 1) | |
165 end | |
166 | |
167 function summary_metric_mt.iter_samples() | |
168 -- statsd backend does not support iteration. | |
169 return function() | |
170 return nil | |
171 end | |
172 end | |
173 | |
174 function summary_metric_mt.reset() | |
175 end | |
176 | |
177 -- BEGIN of statsd client implementation | |
178 | |
179 local statsd_mt = {} | |
180 statsd_mt.__index = statsd_mt | |
181 | |
182 function statsd_mt:cork() | |
183 self.corked = true | |
184 self.cork_buffer = self.cork_buffer or {} | |
185 end | |
186 | |
187 function statsd_mt:uncork() | |
188 self.corked = false | |
189 self:_flush_cork_buffer() | |
190 end | |
191 | |
192 function statsd_mt:_flush_cork_buffer() | |
193 local buffer = self.cork_buffer | |
194 for metric_name, value in pairs(buffer) do | |
195 self:_send_gauge(metric_name, value) | |
196 buffer[metric_name] = nil | |
197 end | |
198 end | |
199 | |
200 function statsd_mt:push_gauge(metric_name, value) | |
201 if self.corked then | |
202 self.cork_buffer[metric_name] = value | |
203 else | |
204 self:_send_gauge(metric_name, value) | |
205 end | |
206 end | |
207 | |
208 function statsd_mt:_send_gauge(metric_name, value) | |
209 self:_send(self.prefix..metric_name..":"..tostring(value).."|g") | |
210 end | |
211 | |
212 function statsd_mt:push_counter_delta(metric_name, delta) | |
213 self:_send(self.prefix..metric_name..":"..tostring(delta).."|c") | |
214 end | |
215 | |
216 function statsd_mt:_send(s) | |
217 return self.sock:send(s) | |
218 end | |
219 | |
220 -- END of statsd client implementation | |
221 | |
222 local function build_metric_name(family_name, labels) | |
223 local parts = array { family_name } | |
224 if labels then | |
225 parts:append(labels) | |
226 end | |
227 return t_concat(parts, "/"):gsub("%.", "_"):gsub("/", ".") | |
228 end | |
4 | 229 |
5 local function new(config) | 230 local function new(config) |
6 if not config or not config.statsd_server then | 231 if not config or not config.statsd_server then |
7 return nil, "No statsd server specified in the config, please see https://prosody.im/doc/statistics"; | 232 return nil, "No statsd server specified in the config, please see https://prosody.im/doc/statistics"; |
8 end | 233 end |
10 local sock = socket.udp(); | 235 local sock = socket.udp(); |
11 sock:setpeername(config.statsd_server, config.statsd_port or 8125); | 236 sock:setpeername(config.statsd_server, config.statsd_port or 8125); |
12 | 237 |
13 local prefix = (config.prefix or "prosody").."."; | 238 local prefix = (config.prefix or "prosody").."."; |
14 | 239 |
15 local function send_metric(s) | 240 local impl = { |
16 return sock:send(prefix..s); | 241 metric_registry = nil; |
17 end | 242 sock = sock; |
18 | 243 prefix = prefix; |
19 local function send_gauge(name, amount, relative) | |
20 local s_amount = tostring(amount); | |
21 if relative and amount > 0 then | |
22 s_amount = "+"..s_amount; | |
23 end | |
24 return send_metric(name..":"..s_amount.."|g"); | |
25 end | |
26 | |
27 local function send_counter(name, amount) | |
28 return send_metric(name..":"..tostring(amount).."|c"); | |
29 end | |
30 | |
31 local function send_duration(name, duration) | |
32 return send_metric(name..":"..tostring(duration).."|ms"); | |
33 end | |
34 | |
35 local function send_histogram_sample(name, sample) | |
36 return send_metric(name..":"..tostring(sample).."|h"); | |
37 end | |
38 | |
39 local methods; | |
40 methods = { | |
41 amount = function (name, conf) | |
42 if conf and conf.initial then | |
43 send_gauge(name, conf.initial); | |
44 end | |
45 return function (new_v) send_gauge(name, new_v); end | |
46 end; | |
47 counter = function (name, conf) --luacheck: ignore 212/conf | |
48 return function (delta) | |
49 send_gauge(name, delta, true); | |
50 end; | |
51 end; | |
52 rate = function (name) | |
53 return function () | |
54 send_counter(name, 1); | |
55 end; | |
56 end; | |
57 distribution = function (name, conf) --luacheck: ignore 212/conf | |
58 return function (value) | |
59 send_histogram_sample(name, value); | |
60 end; | |
61 end; | |
62 sizes = function (name) | |
63 name = name.."_size"; | |
64 return function (value) | |
65 send_histogram_sample(name, value); | |
66 end; | |
67 end; | |
68 times = function (name) | |
69 return function () | |
70 local start_time = time(); | |
71 return function () | |
72 local end_time = time(); | |
73 local duration = end_time - start_time; | |
74 send_duration(name, duration*1000); | |
75 end | |
76 end; | |
77 end; | |
78 }; | 244 }; |
79 return methods; | 245 setmetatable(impl, statsd_mt) |
246 | |
247 local backend = { | |
248 gauge = function(family_name, labels) | |
249 return new_gauge_metric(build_metric_name(family_name, labels), impl) | |
250 end; | |
251 counter = function(family_name, labels) | |
252 return new_counter_metric(build_metric_name(family_name, labels), impl) | |
253 end; | |
254 histogram = function(buckets, family_name, labels) | |
255 return new_histogram_metric(buckets, build_metric_name(family_name, labels), impl) | |
256 end; | |
257 summary = function(family_name, labels, extra) | |
258 return new_summary_metric(build_metric_name(family_name, labels), impl, extra) | |
259 end; | |
260 }; | |
261 | |
262 impl.metric_registry = new_metric_registry(backend); | |
263 | |
264 return impl; | |
80 end | 265 end |
81 | 266 |
82 return { | 267 return { |
83 new = new; | 268 new = new; |
84 } | 269 } |