Software /
code /
prosody
File
util/datamanager.lua @ 13182:c48ae06e24d6
util.datamanager: Fix indexing first item if not at the very start
If the first item does not start at position 0 then the index function
produces a phantom first entry covering position zero until where the
real first item starts. When using the index, this would make it either
appear as the first item was missing or cause an off-by-one issue with
remaining items.
author | Kim Alvefur <zash@zash.se> |
---|---|
date | Mon, 10 Jul 2023 17:19:05 +0200 |
parent | 13181:87487056bccb |
child | 13183:33b114fbb5de |
line wrap: on
line source
-- Prosody IM -- Copyright (C) 2008-2010 Matthew Wild -- Copyright (C) 2008-2010 Waqas Hussain -- -- This project is MIT/X11 licensed. Please see the -- COPYING file in the source package for more information. -- local string = string; local format = string.format; local setmetatable = setmetatable; local ipairs = ipairs; local char = string.char; local pcall = pcall; local log = require "prosody.util.logger".init("datamanager"); local io_open = io.open; local os_remove = os.remove; local os_rename = os.rename; local tonumber = tonumber; local floor = math.floor; local next = next; local type = type; local t_insert = table.insert; local t_concat = table.concat; local envloadfile = require"prosody.util.envload".envloadfile; local envload = require"prosody.util.envload".envload; local serialize = require "prosody.util.serialization".serialize; local lfs = require "lfs"; -- Extract directory separator from package.config (an undocumented string that comes with lua) local path_separator = assert ( package.config:match ( "^([^\n]+)" ) , "package.config not in standard form" ) local prosody = prosody; local raw_mkdir = lfs.mkdir; local atomic_append; local ENOENT = 2; pcall(function() local pposix = require "prosody.util.pposix"; raw_mkdir = pposix.mkdir or raw_mkdir; -- Doesn't trample on umask atomic_append = pposix.atomic_append; ENOENT = pposix.ENOENT or ENOENT; end); local _ENV = nil; -- luacheck: std none ---- utils ----- local encode, decode, store_encode; do local urlcodes = setmetatable({}, { __index = function (t, k) t[k] = char(tonumber(k, 16)); return t[k]; end }); decode = function (s) return s and (s:gsub("%%(%x%x)", urlcodes)); end encode = function (s) return s and (s:gsub("%W", function (c) return format("%%%02x", c:byte()); end)); end -- Special encode function for store names, which historically were unencoded. -- All currently known stores use a-z and underscore, so this one preserves underscores. store_encode = function (s) return s and (s:gsub("[^_%w]", function (c) return format("%%%02x", c:byte()); end)); end end if not atomic_append then function atomic_append(f, data) local pos = f:seek(); if not f:write(data) or not f:flush() then f:seek("set", pos); f:write((" "):rep(#data)); f:flush(); return nil, "write-failed"; end return true; end end local _mkdir = {}; local function mkdir(path) path = path:gsub("/", path_separator); -- TODO as an optimization, do this during path creation rather than here if not _mkdir[path] then raw_mkdir(path); _mkdir[path] = true; end return path; end local data_path = (prosody and prosody.paths and prosody.paths.data) or "."; local callbacks = {}; ------- API ------------- local function set_data_path(path) log("debug", "Setting data path to: %s", path); data_path = path; end local function callback(username, host, datastore, data) for _, f in ipairs(callbacks) do username, host, datastore, data = f(username, host, datastore, data); if username == false then break; end end return username, host, datastore, data; end local function add_callback(func) if not callbacks[func] then -- Would you really want to set the same callback more than once? callbacks[func] = true; callbacks[#callbacks+1] = func; return true; end end local function remove_callback(func) if callbacks[func] then for i, f in ipairs(callbacks) do if f == func then callbacks[i] = nil; callbacks[f] = nil; return true; end end end end local function getpath(username, host, datastore, ext, create) ext = ext or "dat"; host = (host and encode(host)) or "_global"; username = username and encode(username); datastore = store_encode(datastore); if username then if create then mkdir(mkdir(mkdir(data_path).."/"..host).."/"..datastore); end return format("%s/%s/%s/%s.%s", data_path, host, datastore, username, ext); else if create then mkdir(mkdir(data_path).."/"..host); end return format("%s/%s/%s.%s", data_path, host, datastore, ext); end end local function load(username, host, datastore) local data, err, errno = envloadfile(getpath(username, host, datastore), {}); if not data then if errno == ENOENT then -- No such file, ok to ignore return nil; end log("error", "Failed to load %s storage ('%s') for user: %s@%s", datastore, err, username or "nil", host or "nil"); return nil, "Error reading storage"; end local success, ret = pcall(data); if not success then log("error", "Unable to load %s storage ('%s') for user: %s@%s", datastore, ret, username or "nil", host or "nil"); return nil, "Error reading storage"; end return ret; end local function atomic_store(filename, data) local scratch = filename.."~"; local f, ok, msg, errno; -- luacheck: ignore errno -- TODO return util.error with code=errno? f, msg, errno = io_open(scratch, "w"); if not f then return nil, msg; end ok, msg = f:write(data); if not ok then f:close(); os_remove(scratch); return nil, msg; end ok, msg = f:close(); if not ok then os_remove(scratch); return nil, msg; end return os_rename(scratch, filename); end if prosody and prosody.platform ~= "posix" then -- os.rename does not overwrite existing files on Windows -- TODO We could use Transactional NTFS on Vista and above function atomic_store(filename, data) local f, err = io_open(filename, "w"); if not f then return f, err; end local ok, msg = f:write(data); if not ok then f:close(); return ok, msg; end return f:close(); end end local function store(username, host, datastore, data) if not data then data = {}; end username, host, datastore, data = callback(username, host, datastore, data); if username == false then return true; -- Don't save this data at all end -- save the datastore local d = "return " .. serialize(data) .. ";\n"; local mkdir_cache_cleared; repeat local ok, msg = atomic_store(getpath(username, host, datastore, nil, true), d); if not ok then if not mkdir_cache_cleared then -- We may need to recreate a removed directory _mkdir = {}; mkdir_cache_cleared = true; else log("error", "Unable to write to %s storage ('%s') for user: %s@%s", datastore, msg, username or "nil", host or "nil"); return nil, "Error saving to storage"; end end if next(data) == nil then -- try to delete empty datastore log("debug", "Removing empty %s datastore for user %s@%s", datastore, username or "nil", host or "nil"); os_remove(getpath(username, host, datastore)); end -- we write data even when we are deleting because lua doesn't have a -- platform independent way of checking for nonexisting files until ok; return true; end -- Append a blob of data to a file local function append(username, host, datastore, ext, data) if type(data) ~= "string" then return; end local filename = getpath(username, host, datastore, ext, true); local f = io_open(filename, "r+"); if not f then return atomic_store(filename, data); -- File did probably not exist, let's create it end local pos = f:seek("end"); local ok, msg = atomic_append(f, data); if not ok then f:close(); return ok, msg, "write"; end ok, msg = f:close(); if not ok then return ok, msg, "close"; end return true, pos; end local index_fmt, index_item_size, index_magic; if string.packsize then index_fmt = "T"; -- offset to the end of the item, length can be derived from two index items index_item_size = string.packsize(index_fmt); index_magic = string.pack(index_fmt, 7767639 + 1); -- Magic string: T9 for "prosody", version number end local function list_append(username, host, datastore, data) if not data then return; end if callback(username, host, datastore) == false then return true; end -- save the datastore data = "item(" .. serialize(data) .. ");\n"; local ok, msg, where = append(username, host, datastore, "list", data); if not ok then log("error", "Unable to write to %s storage ('%s' in %s) for user: %s@%s", datastore, msg, where, username or "nil", host or "nil"); return ok, msg; end if string.packsize then local offset = type(msg) == "number" and msg or 0; local index_entry = string.pack(index_fmt, offset + #data); if offset == 0 then index_entry = index_magic .. index_entry; end local ok, off = append(username, host, datastore, "lidx", index_entry); off = off or 0; -- If this was the first item, then both the data and index offsets should -- be zero, otherwise there's some kind of mismatch and we should drop the -- index and recreate it from scratch -- TODO Actually rebuild the index in this case? if not ok or (off == 0 and offset ~= 0) or (off ~= 0 and offset == 0) then os_remove(getpath(username, host, datastore, "lidx")); end end return true; end local function list_store(username, host, datastore, data) if not data then data = {}; end if callback(username, host, datastore) == false then return true; end -- save the datastore local d = {}; for i, item in ipairs(data) do d[i] = "item(" .. serialize(item) .. ");\n"; end os_remove(getpath(username, host, datastore, "lidx")); local ok, msg = atomic_store(getpath(username, host, datastore, "list", true), t_concat(d)); if not ok then log("error", "Unable to write to %s storage ('%s') for user: %s@%s", datastore, msg, username or "nil", host or "nil"); return; end if next(data) == nil then -- try to delete empty datastore log("debug", "Removing empty %s datastore for user %s@%s", datastore, username or "nil", host or "nil"); os_remove(getpath(username, host, datastore, "list")); end -- we write data even when we are deleting because lua doesn't have a -- platform independent way of checking for nonexisting files return true; end local function build_list_index(username, host, datastore, items) log("debug", "Building index for (%s@%s/%s)", username, host, datastore); local filename = getpath(username, host, datastore, "list"); local fh, err, errno = io_open(filename); if not fh then return fh, err, errno; end local prev_pos = 0; -- position before reading local last_item_start = nil; if items and items[1] then local last_item = items[#items]; last_item_start = fh:seek("set", last_item.start + last_item.length); else items = {}; end for line in fh:lines() do if line:sub(1, 4) == "item" then if prev_pos ~= 0 and last_item_start then t_insert(items, { start = last_item_start; length = prev_pos - last_item_start }); end last_item_start = prev_pos end -- seek position is at the start of the next line within each loop iteration -- so we need to collect the "current" position at the end of the previous prev_pos = fh:seek() end if prev_pos ~= 0 then t_insert(items, { start = last_item_start; length = prev_pos - last_item_start }); end return items; end local function store_list_index(username, host, datastore, index) local data = { index_magic }; for i, v in ipairs(index) do data[i + 1] = string.pack(index_fmt, v.start + v.length); end local filename = getpath(username, host, datastore, "lidx"); return atomic_store(filename, t_concat(data)); end local index_mt = { __index = function(t, i) if type(i) ~= "number" or i % 1 ~= 0 or i < 0 then return end if i <= 0 then return 0 end local fh = t.file; local pos = (i - 1) * index_item_size; if fh:seek("set", pos) ~= pos then return nil end local data = fh:read(index_item_size * 2); if not data or #data ~= index_item_size * 2 then return nil end local start, next_pos = string.unpack(index_fmt .. index_fmt, data); if pos == 0 then start = 0 end local length = next_pos - start; local v = { start = start; length = length }; t[i] = v; return v; end; __len = function(t) -- Account for both the header and the fence post error return floor(t.file:seek("end") / index_item_size) - 1; end; } local function get_list_index(username, host, datastore) log("debug", "Loading index for (%s@%s/%s)", username, host, datastore); local index_filename = getpath(username, host, datastore, "lidx"); local ih = io_open(index_filename); if ih then local magic = ih:read(#index_magic); if magic ~= index_magic then log("debug", "Index %q has wrong version number (got %q, expected %q), rebuilding...", index_filename, magic, index_magic); -- wrong version or something ih:close(); ih = nil; end end if ih then return setmetatable({ file = ih }, index_mt); end local index, err = build_list_index(username, host, datastore); if not index then return index, err end -- TODO How to handle failure to store the index? local dontcare = store_list_index(username, host, datastore, index); -- luacheck: ignore 211/dontcare return index; end local function list_load_one(fh, start, length) if fh:seek("set", start) ~= start then return nil end local raw_data = fh:read(length) if not raw_data or #raw_data ~= length then return end local item; local data, err, errno = envload(raw_data, "@list", { item = function(i) item = i; end; }); if not data then return data, err, errno end local success, ret = pcall(data); if not success then return success, ret; end return item; end local indexed_list_mt = { __index = function(t, i) if type(i) ~= "number" or i % 1 ~= 0 or i < 1 then return end local ix = t.index[i]; if not ix then return end local item = list_load_one(t.file, ix.start, ix.length); return item; end; __len = function(t) return #t.index; end; } local function list_load(username, host, datastore) local items = {}; local data, err, errno = envloadfile(getpath(username, host, datastore, "list"), {item = function(i) t_insert(items, i); end}); if not data then if errno == ENOENT then -- No such file, ok to ignore return nil; end log("error", "Failed to load %s storage ('%s') for user: %s@%s", datastore, err, username or "nil", host or "nil"); return nil, "Error reading storage"; end local success, ret = pcall(data); if not success then log("error", "Unable to load %s storage ('%s') for user: %s@%s", datastore, ret, username or "nil", host or "nil"); return nil, "Error reading storage"; end return items; end local function list_open(username, host, datastore) if not index_magic then log("debug", "Falling back from lazy loading to to loading full list for %s storage for user: %s@%s", datastore, username or "nil", host or "nil"); return list_load(username, host, datastore); end local filename = getpath(username, host, datastore, "list"); local file, err, errno = io_open(filename); if not file then if errno == ENOENT then return nil; end return file, err, errno; end local index, err = get_list_index(username, host, datastore); if not index then file:close() return index, err; end return setmetatable({ file = file; index = index }, indexed_list_mt); end local type_map = { keyval = "dat"; list = "list"; } local function users(host, store, typ) -- luacheck: ignore 431/store typ = "."..(type_map[typ or "keyval"] or typ); local store_dir = format("%s/%s/%s", data_path, encode(host), store_encode(store)); local mode, err = lfs.attributes(store_dir, "mode"); if not mode then return function() log("debug", "%s", err or (store_dir .. " does not exist")) end end local next, state = lfs.dir(store_dir); -- luacheck: ignore 431/next 431/state return function(state) -- luacheck: ignore 431/state for node in next, state do if node:sub(-#typ, -1) == typ then return decode(node:sub(1, -#typ-1)); end end end, state; end local function stores(username, host, typ) typ = type_map[typ or "keyval"]; local store_dir = format("%s/%s/", data_path, encode(host)); local mode, err = lfs.attributes(store_dir, "mode"); if not mode then return function() log("debug", "Could not iterate over stores in %s: %s", store_dir, err); end end local next, state = lfs.dir(store_dir); -- luacheck: ignore 431/next 431/state return function(state) -- luacheck: ignore 431/state for node in next, state do if not node:match"^%." then if username == true then if lfs.attributes(store_dir..node, "mode") == "directory" then return decode(node); end elseif username then local store_name = decode(node); if lfs.attributes(getpath(username, host, store_name, typ), "mode") then return store_name; end elseif lfs.attributes(node, "mode") == "file" then local file, ext = node:match("^(.*)%.([dalist]+)$"); if ext == typ then return decode(file) end end end end end, state; end local function do_remove(path) local ok, err = os_remove(path); if not ok and lfs.attributes(path, "mode") then return ok, err; end return true end local function purge(username, host) local host_dir = format("%s/%s/", data_path, encode(host)); local ok, iter, state, var = pcall(lfs.dir, host_dir); if not ok then return ok, iter; end local errs = {}; for file in iter, state, var do if lfs.attributes(host_dir..file, "mode") == "directory" then local store_name = decode(file); local ok, err = do_remove(getpath(username, host, store_name)); if not ok then errs[#errs+1] = err; end local ok, err = do_remove(getpath(username, host, store_name, "list")); if not ok then errs[#errs+1] = err; end end end return #errs == 0, t_concat(errs, ", "); end return { set_data_path = set_data_path; add_callback = add_callback; remove_callback = remove_callback; getpath = getpath; load = load; store = store; append_raw = append; store_raw = atomic_store; list_append = list_append; list_store = list_store; list_load = list_load; users = users; stores = stores; purge = purge; path_decode = decode; path_encode = encode; build_list_index = build_list_index; list_open = list_open; };