Changeset

10973:39991e40d1dc

util.dbuffer: dynamic string buffer Similar to util.ringbuffer (and shares almost identical API). Differences: - size limit is optional and dynamic - does not allocate a fixed buffer of max_size bytes - focus on simply storing references to existing string objects where possible, avoiding unnecessary allocations - references are still stored in a ring buffer to enable use as a fast FIFO Optional second parameter to new() provides the number of ring buffer segments. On Lua 5.2 on my laptop, a segment is ~19 bytes. If the ring buffer fills up, the next write will compact all strings into a single item.
author Matthew Wild <mwild1@gmail.com>
date Fri, 26 Jun 2020 16:41:31 +0100
parents 10972:b3773b1b90a1
children 10974:3b9d533da8fe
files spec/util_dbuffer_spec.lua util/dbuffer.lua util/queue.lua
diffstat 3 files changed, 273 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/spec/util_dbuffer_spec.lua	Fri Jun 26 16:41:31 2020 +0100
@@ -0,0 +1,95 @@
+local dbuffer = require "util.dbuffer";
+describe("util.dbuffer", function ()
+	describe("#new", function ()
+		it("has a constructor", function ()
+			assert.Function(dbuffer.new);
+		end);
+		it("can be created", function ()
+			assert.truthy(dbuffer.new());
+		end);
+		it("won't create an empty buffer", function ()
+			assert.falsy(dbuffer.new(0));
+		end);
+		it("won't create a negatively sized buffer", function ()
+			assert.falsy(dbuffer.new(-1));
+		end);
+	end);
+	describe(":write", function ()
+		local b = dbuffer.new();
+		it("works", function ()
+			assert.truthy(b:write("hi"));
+		end);
+	end);
+
+	describe(":discard", function ()
+		local b = dbuffer.new();
+		it("works", function ()
+			assert.truthy(b:write("hello world"));
+			assert.truthy(b:discard(6));
+			assert.equal(5, #b);
+			assert.equal("world", b:read(5));
+		end);
+	end);
+
+	describe(":sub", function ()
+		-- Helper function to compare buffer:sub() with string:sub()
+		local s = "hello world";
+		local function test_sub(b, x, y)
+			local string_result, buffer_result = s:sub(x, y), b:sub(x, y);
+			assert.equals(string_result, buffer_result, ("buffer:sub(%d, %s) does not match string:sub()"):format(x, y and ("%d"):format(y) or "nil"));
+		end
+
+		it("works", function ()
+			local b = dbuffer.new();
+			assert.truthy(b:write("hello world"));
+			assert.equals("hello", b:sub(1, 5));
+		end);
+
+		it("supports optional end parameter", function ()
+			local b = dbuffer.new();
+			assert.truthy(b:write("hello world"));
+			assert.equals("hello world", b:sub(1));
+			assert.equals("world", b:sub(-5));
+		end);
+
+		it("is equivalent to string:sub", function ()
+			local b = dbuffer.new(11);
+			assert.truthy(b:write(s));
+			for i = -13, 13 do
+				for j = -13, 13 do
+					test_sub(b, i, j);
+				end
+			end
+		end);
+	end);
+
+	describe(":byte", function ()
+		-- Helper function to compare buffer:byte() with string:byte()
+		local s = "hello world"
+		local function test_byte(b, x, y)
+			local string_result, buffer_result = {s:byte(x, y)}, {b:byte(x, y)};
+			assert.same(string_result, buffer_result, ("buffer:byte(%d, %s) does not match string:byte()"):format(x, y and ("%d"):format(y) or "nil"));
+		end
+
+		it("is equivalent to string:byte", function ()
+			local b = dbuffer.new(11);
+			assert.truthy(b:write(s));
+			test_byte(b, 1);
+			test_byte(b, 3);
+			test_byte(b, -1);
+			test_byte(b, -3);
+			for i = -13, 13 do
+				for j = -13, 13 do
+					test_byte(b, i, j);
+				end
+			end
+		end);
+
+		it("works with characters > 127", function ()
+			local b = dbuffer.new();
+			b:write(string.char(0, 140));
+			local r = { b:byte(1, 2) };
+			assert.same({ 0, 140 }, r);
+		end);
+	end);
+end);
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/util/dbuffer.lua	Fri Jun 26 16:41:31 2020 +0100
@@ -0,0 +1,171 @@
+local queue = require "util.queue";
+
+local dbuffer_methods = {};
+local dynamic_buffer_mt = { __index = dbuffer_methods };
+
+function dbuffer_methods:write(data)
+	if self.max_size and #data + self._length > self.max_size then
+		return nil;
+	end
+	local ok = self.items:push(data);
+	if not ok then
+		self:collapse();
+		ok = self.items:push(data);
+	end
+	if not ok then
+		return nil;
+	end
+	self._length = self._length + #data;
+	return true;
+end
+
+function dbuffer_methods:read_chunk(requested_bytes)
+	local chunk, consumed = self.items:peek(), self.front_consumed;
+	if not chunk then return; end
+	local chunk_length = #chunk;
+	local remaining_chunk_length = chunk_length - consumed;
+	if remaining_chunk_length <= requested_bytes then
+		self.front_consumed = 0;
+		self._length = self._length - remaining_chunk_length;
+		self.items:pop();
+		assert(#chunk:sub(consumed + 1, -1) == remaining_chunk_length);
+		return chunk:sub(consumed + 1, -1), remaining_chunk_length;
+	end
+	local end_pos = consumed + requested_bytes;
+	self.front_consumed = end_pos;
+	self._length = self._length - requested_bytes;
+	assert(#chunk:sub(consumed + 1, end_pos) == requested_bytes);
+	return chunk:sub(consumed + 1, end_pos), requested_bytes;
+end
+
+function dbuffer_methods:read(requested_bytes)
+	local chunks;
+
+	if requested_bytes > self._length then
+		return nil;
+	end
+
+	local chunk, read_bytes = self:read_chunk(requested_bytes);
+	if chunk then
+		requested_bytes = requested_bytes - read_bytes;
+		if requested_bytes == 0 then -- Already read everything we need
+			return chunk;
+		end
+		chunks = {};
+	else
+		return nil;
+	end
+
+	-- Need to keep reading more chunks
+	while chunk do
+		table.insert(chunks, chunk);
+		if requested_bytes > 0 then
+			chunk, read_bytes = self:read_chunk(requested_bytes);
+			requested_bytes = requested_bytes - read_bytes;
+		else
+			break;
+		end
+	end
+
+	return table.concat(chunks);
+end
+
+function dbuffer_methods:discard(requested_bytes)
+	if requested_bytes > self._length then
+		return nil;
+	end
+
+	local chunk, read_bytes = self:read_chunk(requested_bytes);
+	if chunk then
+		requested_bytes = requested_bytes - read_bytes;
+		if requested_bytes == 0 then -- Already read everything we need
+			return true;
+		end
+	else
+		return nil;
+	end
+
+	while chunk do
+		if requested_bytes > 0 then
+			chunk, read_bytes = self:read_chunk(requested_bytes);
+			requested_bytes = requested_bytes - read_bytes;
+		else
+			break;
+		end
+	end
+	return true;
+end
+
+function dbuffer_methods:sub(i, j)
+	if j == nil then
+		j = -1;
+	end
+	if j < 0 then
+		j = self._length + (j+1);
+	end
+	if i < 0 then
+		i = self._length + (i+1);
+	end
+	if i < 1 then
+		i = 1;
+	end
+	if j > self._length then
+		j = self._length;
+	end
+	if i > j then
+		return "";
+	end
+
+	self:collapse(j);
+
+	return self.items:peek():sub(i, j);
+end
+
+function dbuffer_methods:byte(i, j)
+	i = i or 1;
+	j = j or i;
+	return string.byte(self:sub(i, j), 1, -1);
+end
+
+function dbuffer_methods:length()
+	return self._length;
+end
+dynamic_buffer_mt.__len = dbuffer_methods.length; -- support # operator
+
+function dbuffer_methods:collapse(bytes)
+	bytes = bytes or self._length;
+
+	local front_chunk = self.items:peek();
+
+	if #front_chunk - self.front_consumed >= bytes then
+		return;
+	end
+
+	local front_chunks = { front_chunk:sub(self.front_consumed+1) };
+	local front_bytes = #front_chunks[1];
+
+	while front_bytes < bytes do
+		self.items:pop();
+		local chunk = self.items:peek();
+		front_bytes = front_bytes + #chunk;
+		table.insert(front_chunks, chunk);
+	end
+	self.items:replace(table.concat(front_chunks));
+	self.front_consumed = 0;
+end
+
+local function new(max_size, max_chunks)
+	if max_size and max_size <= 0 then
+		return nil;
+	end
+	return setmetatable({
+		front_consumed = 0;
+		_length = 0;
+		max_size = max_size;
+		items = queue.new(max_chunks or 32);
+	}, dynamic_buffer_mt);
+end
+
+return {
+	new = new;
+};
--- a/util/queue.lua	Thu Jun 25 20:45:06 2020 +0200
+++ b/util/queue.lua	Fri Jun 26 16:41:31 2020 +0100
@@ -51,6 +51,13 @@
 			end
 			return t[tail];
 		end;
+		replace = function (self, data)
+			if items == 0 then
+				return self:push(data);
+			end
+			t[tail] = data;
+			return true;
+		end;
 		items = function (self)
 			return function (_, pos)
 				if pos >= items then