File

util/dbuffer.lua @ 13134:638f627e707f

util.datamanager: Add O(1) list indexing with on-disk index Index file contains offsets and lengths of each item() which allows seeking directly to each item and reading it without parsing the entire file. Also allows tricks like binary search, assuming items have some defined order. We take advantage of the 1-based indexing in tables to store a magic header in the 0 position, so that table index 1 ends up at file index 1.
author Kim Alvefur <zash@zash.se>
date Tue, 11 May 2021 02:09:56 +0200
parent 12975:d10957394a3c
line wrap: on
line source

local queue = require "prosody.util.queue";

local s_byte, s_sub = string.byte, string.sub;
local dbuffer_methods = {};
local dynamic_buffer_mt = { __name = "dbuffer", __index = dbuffer_methods };

function dbuffer_methods:write(data)
	if self.max_size and #data + self._length > self.max_size then
		return nil;
	end
	local ok = self.items:push(data);
	if not ok then
		self:collapse();
		ok = self.items:push(data);
	end
	if not ok then
		return nil;
	end
	self._length = self._length + #data;
	return true;
end

function dbuffer_methods:read_chunk(requested_bytes)
	local chunk, consumed = self.items:peek(), self.front_consumed;
	if not chunk then return; end
	local chunk_length = #chunk;
	local remaining_chunk_length = chunk_length - consumed;
	if not requested_bytes then
		requested_bytes = remaining_chunk_length;
	end
	if remaining_chunk_length <= requested_bytes then
		self.front_consumed = 0;
		self._length = self._length - remaining_chunk_length;
		self.items:pop();
		assert(#chunk:sub(consumed + 1, -1) == remaining_chunk_length);
		return chunk:sub(consumed + 1, -1), remaining_chunk_length;
	end
	local end_pos = consumed + requested_bytes;
	self.front_consumed = end_pos;
	self._length = self._length - requested_bytes;
	assert(#chunk:sub(consumed + 1, end_pos) == requested_bytes);
	return chunk:sub(consumed + 1, end_pos), requested_bytes;
end

function dbuffer_methods:read(requested_bytes)
	local chunks;

	if requested_bytes and requested_bytes > self._length then
		return nil;
	end

	local chunk, read_bytes = self:read_chunk(requested_bytes);
	if not requested_bytes then
		return chunk;
	elseif chunk then
		requested_bytes = requested_bytes - read_bytes;
		if requested_bytes == 0 then -- Already read everything we need
			return chunk;
		end
		chunks = {};
	else
		return nil;
	end

	-- Need to keep reading more chunks
	while chunk do
		table.insert(chunks, chunk);
		if requested_bytes > 0 then
			chunk, read_bytes = self:read_chunk(requested_bytes);
			requested_bytes = requested_bytes - read_bytes;
		else
			break;
		end
	end

	return table.concat(chunks);
end

-- Read to, and including, the specified character sequence (return nil if not found)
function dbuffer_methods:read_until(char)
	local buffer_pos = 0;
	for i, chunk in self.items:items() do
		local start = 1 + ((i == 1) and self.front_consumed or 0);
		local char_pos = chunk:find(char, start, true);
		if char_pos then
			return self:read(1 + buffer_pos + char_pos - start);
		end
		buffer_pos = buffer_pos + #chunk - (start - 1);
	end
	return nil;
end

function dbuffer_methods:discard(requested_bytes)
	if self._length == 0 then return true; end
	if not requested_bytes or requested_bytes >= self._length then
		self.front_consumed = 0;
		self._length = 0;
		for _ in self.items:consume() do end
		return true;
	end

	local chunk, read_bytes = self:read_chunk(requested_bytes);
	requested_bytes = requested_bytes - read_bytes;
	if requested_bytes == 0 then -- Already read everything we need
		return true;
	end

	while chunk do
		if requested_bytes > 0 then
			chunk, read_bytes = self:read_chunk(requested_bytes);
			requested_bytes = requested_bytes - read_bytes;
		else
			break;
		end
	end
	return true;
end

-- Normalize i, j into absolute offsets within the
-- front chunk (accounting for front_consumed), and
-- ensure there is enough data in the first chunk
-- to cover any subsequent :sub() or :byte() operation
function dbuffer_methods:_prep_sub(i, j)
	if j == nil then
		j = -1;
	end
	if j < 0 then
		j = self._length + (j+1);
	end
	if i < 0 then
		i = self._length + (i+1);
	end
	if i < 1 then
		i = 1;
	end
	if j > self._length then
		j = self._length;
	end
	if i > j then
		return nil;
	end

	self:collapse(j);

	if self.front_consumed > 0 then
		i = i + self.front_consumed;
		j = j + self.front_consumed;
	end

	return i, j;
end

function dbuffer_methods:sub(i, j)
	i, j = self:_prep_sub(i, j);
	if not i then
		return "";
	end
	return s_sub(self.items:peek(), i, j);
end

function dbuffer_methods:byte(i, j)
	i = i or 1;
	j = j or i;
	i, j = self:_prep_sub(i, j);
	if not i then
		return;
	end
	return s_byte(self.items:peek(), i, j);
end

function dbuffer_methods:length()
	return self._length;
end
dbuffer_methods.len = dbuffer_methods.length; -- strings have :len()
dynamic_buffer_mt.__len = dbuffer_methods.length; -- support # operator

function dbuffer_methods:collapse(bytes)
	bytes = bytes or self._length;

	local front_chunk = self.items:peek();

	if not front_chunk or #front_chunk - self.front_consumed >= bytes then
		return;
	end

	local front_chunks = { front_chunk:sub(self.front_consumed+1) };
	local front_bytes = #front_chunks[1];

	while front_bytes < bytes do
		self.items:pop();
		local chunk = self.items:peek();
		front_bytes = front_bytes + #chunk;
		table.insert(front_chunks, chunk);
	end
	self.items:replace(table.concat(front_chunks));
	self.front_consumed = 0;
end

local function new(max_size, max_chunks)
	if max_size and max_size <= 0 then
		return nil;
	end
	return setmetatable({
		front_consumed = 0;
		_length = 0;
		max_size = max_size;
		items = queue.new(max_chunks or 32);
	}, dynamic_buffer_mt);
end

return {
	new = new;
};