Software /
code /
prosody
File
util/debug.lua @ 11523:5f15ab7c6ae5
Statistics: Rewrite statistics backends to use OpenMetrics
The metric subsystem of Prosody has had some shortcomings from
the perspective of the current state-of-the-art in metric
observability.
The OpenMetrics standard [0] is a formalization of the data
model (and serialization format) of the well-known and
widely-used Prometheus [1] software stack.
The previous stats subsystem of Prosody did not map well to that
format (see e.g. [2] and [3]); the key reason is that it was
trying to do too much math on its own ([2]) while lacking
first-class support for "families" of metrics ([3]) and
structured metric metadata (despite the `extra` argument to
metrics, there was no standard way of representing common things
like "tags" or "labels").
Even though OpenMetrics has grown from the Prometheus world of
monitoring, it maps well to other popular monitoring stacks
such as:
- InfluxDB (labels can be mapped to tags and fields as necessary)
- Carbon/Graphite (labels can be attached to the metric name with
dot-separation)
- StatsD (see graphite when assuming that graphite is used as
backend, which is the default)
The util.statsd module has been ported to use the OpenMetrics
model as a proof of concept. An implementation which exposes
the util.statistics backend data as Prometheus metrics is
ready for publishing in prosody-modules (most likely as
mod_openmetrics_prometheus to avoid breaking existing 0.11
deployments).
At the same time, the previous measure()-based API had one major
advantage: It is really simple and easy to use without requiring
lots of knowledge about OpenMetrics or similar concepts. For that
reason as well as compatibility with existing code, it is preserved
and may even be extended in the future.
However, code relying on the `stats-updated` event as well as
`get_stats` from `statsmanager` will break because the data
model has changed completely; in case of `stats-updated`, the
code will simply not run (as the event was renamed in order
to avoid conflicts); the `get_stats` function has been removed
completely (so it will cause a traceback when it is attempted
to be used).
Note that the measure_*_event methods have been removed from
the module API. I was unable to find any uses or documentation
and thus deemed they should not be ported. Re-implementation is
possible when necessary.
[0]: https://openmetrics.io/
[1]: https://prometheus.io/
[2]: #959
[3]: #960
author | Jonas Schäfer <jonas@wielicki.name> |
---|---|
date | Sun, 18 Apr 2021 11:47:41 +0200 |
parent | 11177:37dc2a6144d1 |
child | 12250:e157e5c79daa |
line wrap: on
line source
-- Variables ending with these names will not -- have their values printed ('password' includes -- 'new_password', etc.) -- -- luacheck: ignore 122/debug local censored_names = { password = true; passwd = true; pass = true; pwd = true; }; local optimal_line_length = 65; local termcolours = require "util.termcolours"; local getstring = termcolours.getstring; local styles; do local _ = termcolours.getstyle; styles = { boundary_padding = _("bright"); filename = _("bright", "blue"); level_num = _("green"); funcname = _("yellow"); location = _("yellow"); }; end local function get_locals_table(thread, level) local locals = {}; for local_num = 1, math.huge do local name, value; if thread then name, value = debug.getlocal(thread, level, local_num); else name, value = debug.getlocal(level+1, local_num); end if not name then break; end table.insert(locals, { name = name, value = value }); end return locals; end local function get_upvalues_table(func) local upvalues = {}; if func then for upvalue_num = 1, math.huge do local name, value = debug.getupvalue(func, upvalue_num); if not name then break; end if name == "" then name = ("[%d]"):format(upvalue_num); end table.insert(upvalues, { name = name, value = value }); end end return upvalues; end local function string_from_var_table(var_table, max_line_len, indent_str) local var_string = {}; local col_pos = 0; max_line_len = max_line_len or math.huge; indent_str = "\n"..(indent_str or ""); for _, var in ipairs(var_table) do local name, value = var.name, var.value; if name:sub(1,1) ~= "(" then if type(value) == "string" then if censored_names[name:match("%a+$")] then value = "<hidden>"; else value = ("%q"):format(value); end else value = tostring(value); end if #value > max_line_len then value = value:sub(1, max_line_len-3).."…"; end local str = ("%s = %s"):format(name, tostring(value)); col_pos = col_pos + #str; if col_pos > max_line_len then table.insert(var_string, indent_str); col_pos = 0; end table.insert(var_string, str); end end if #var_string == 0 then return nil; else return "{ "..table.concat(var_string, ", "):gsub(indent_str..", ", indent_str).." }"; end end local function get_traceback_table(thread, start_level) local levels = {}; for level = start_level, math.huge do local info; if thread then info = debug.getinfo(thread, level); else info = debug.getinfo(level+1); end if not info then break; end levels[(level-start_level)+1] = { level = level; info = info; locals = get_locals_table(thread, level+1); upvalues = get_upvalues_table(info.func); }; end return levels; end local function build_source_boundary_marker(last_source_desc) local padding = string.rep("-", math.floor(((optimal_line_length - 6) - #last_source_desc)/2)); return getstring(styles.boundary_padding, "v"..padding).." ".. getstring(styles.filename, last_source_desc).." ".. getstring(styles.boundary_padding, padding..(#last_source_desc%2==0 and "-v" or "v ")); end local function _traceback(thread, message, level) -- Lua manual says: debug.traceback ([thread,] [message [, level]]) -- I fathom this to mean one of: -- () -- (thread) -- (message, level) -- (thread, message, level) if thread == nil then -- Defaults thread, message, level = coroutine.running(), message, level; elseif type(thread) == "string" then thread, message, level = coroutine.running(), thread, message; elseif type(thread) ~= "thread" then return nil; -- debug.traceback() does this end level = level or 0; message = message and (message.."\n") or ""; -- +3 counts for this function, and the pcall() and wrapper above us, the +1... I don't know. local levels = get_traceback_table(thread, level+(thread == nil and 4 or 0)); local last_source_desc; local lines = {}; for nlevel, current_level in ipairs(levels) do local info = current_level.info; local line; local func_type = info.namewhat.." "; local source_desc = (info.short_src == "[C]" and "C code") or info.short_src or "Unknown"; if func_type == " " then func_type = ""; end; if info.short_src == "[C]" then line = "[ C ] "..func_type.."C function "..getstring(styles.location, (info.name and ("%q"):format(info.name) or "(unknown name)")); elseif info.what == "main" then line = "[Lua] "..getstring(styles.location, info.short_src.." line "..info.currentline); else local name = info.name or " "; if name ~= " " then name = ("%q"):format(name); end if func_type == "global " or func_type == "local " then func_type = func_type.."function "; end line = "[Lua] "..getstring(styles.location, info.short_src.." line ".. info.currentline).." in "..func_type..getstring(styles.funcname, name).. " (defined on line "..info.linedefined..")"; end if source_desc ~= last_source_desc then -- Venturing into a new source, add marker for previous last_source_desc = source_desc; table.insert(lines, "\t "..build_source_boundary_marker(last_source_desc)); end nlevel = nlevel-1; table.insert(lines, "\t"..(nlevel==0 and ">" or " ")..getstring(styles.level_num, "("..nlevel..") ")..line); local npadding = (" "):rep(#tostring(nlevel)); if current_level.locals then local locals_str = string_from_var_table(current_level.locals, optimal_line_length, "\t "..npadding); if locals_str then table.insert(lines, "\t "..npadding.."Locals: "..locals_str); end end local upvalues_str = string_from_var_table(current_level.upvalues, optimal_line_length, "\t "..npadding); if upvalues_str then table.insert(lines, "\t "..npadding.."Upvals: "..upvalues_str); end end -- table.insert(lines, "\t "..build_source_boundary_marker(last_source_desc)); return message.."stack traceback:\n"..table.concat(lines, "\n"); end local function traceback(...) local ok, ret = pcall(_traceback, ...); if not ok then return "Error in error handling: "..ret; end return ret; end local function use() debug.traceback = traceback; end return { get_locals_table = get_locals_table; get_upvalues_table = get_upvalues_table; string_from_var_table = string_from_var_table; get_traceback_table = get_traceback_table; traceback = traceback; use = use; };