From d8294b2eb95858818a0ce1d5fe3e980aadf7ce53 Mon Sep 17 00:00:00 2001 From: Linus Nordberg Date: Thu, 11 Sep 2014 11:54:10 +0200 Subject: Another hashtree implementation, first cut. This one stores the tree in arrays, one per layer in the tree. It's implemented as gen_server. --- src/ht.erl | 329 ++++++++++++++++++++++++++++++++++--------------------------- src/ts.erl | 81 ++++++++++----- 2 files changed, 239 insertions(+), 171 deletions(-) diff --git a/src/ht.erl b/src/ht.erl index 5398483..5fa285c 100644 --- a/src/ht.erl +++ b/src/ht.erl @@ -8,125 +8,181 @@ %%% %%% [0] https://www.usenix.org/event/sec09/tech/full_papers/crosby.pdf %%% -%%% Useful terminology: -%%% E -- Entry, stored in leaf nodes. -%%% I(i,r) -- Inner node with index i, on layer r. -%%% A(v)(i,r) -- Hash of I(i,r) in tree of version v. -%%% d -- Depth of tree. -%%% -%%% Nodes are identified by their index i and layer r. -%%% I(i,r) has left child I(i,r-1) and right child I(i+2^(r-1),r-1). -%%% -%%% A version-n tree stores n+1 entries. -%%% In a version-n tree, I(i,r) is frozen when n >= i + 2^r - 1. +%%% Hashes of inner nodes and leaves are stored in arrays, one per +%%% layer with layer 0 being where the leaves are. The total number of +%%% arrays is equal to the depth of the tree. The depth of the tree is +%%% ceil(lg2(number of leaves)). +%%% Let {r,i} denote the hash with index i on layer r. The first leaf +%%% is {0,0}, second is {0,1} and n:th is {0,n-1}. +%%% The parent of {r,i} is {r+1,floor(i/2)} (not true bc of +%%% "placeholders", FIXME: doc) +%%% The sibling of {r,i} is {r,i+1} when i is even and {r,i-1} when i +%%% is odd. -%%% -%%% Data types: -%%% history_tree -%%% version (integer) -%%% store (tree storage handled by ts) -%%% -%%% Interface: -%%% create tree (-> tree) -%%% add entry (data -> tree) -%%% get hash of leaf or inner node (i, r -> hash) -%%% get inclusion proof for leaf i in version-n tree (i, n -> hashes) -%%% get consistency proof between trees of version n and m (n, m -> hashes) -%%% -module(ht). +-behaviour(gen_server). + +-export([size/0, add/1, tree_hash/0, tree_hash/1]). +-export([get_incl/2, get_cons/2]). +-export([mktree/1]). +-export([start_link/0, start_link/1, stop/0]). +-export([init/1, handle_call/3, terminate/2, handle_cast/2, handle_info/2, + code_change/3]). + -include("$CTROOT/plop/include/plop.hrl"). -include_lib("eunit/include/eunit.hrl"). -import(lists, [foreach/2, foldl/3, reverse/1]). --export_type([history_tree/0, inner/0]). --export([new/0, new/1, destroy/1, size/1, add/2]). --export([get_hash/3, get_incl/3, get_cons/3]). --export([tree_hash/1, tree_hash/2]). +%% Data types. +-record(tree, {version :: integer(), + evaluated :: integer(), + store :: ts:tree_store()}). +-type tree() :: #tree{}. %%%%%%%%%%%%%%%%%%%% %% Public interface. +start_link() -> + gen_server:start_link({local, ?MODULE}, ?MODULE, [], []). +start_link(NEntries) -> + gen_server:start_link({local, ?MODULE}, ?MODULE, [NEntries], []). +stop() -> + gen_server:call(?MODULE, stop). +size() -> + gen_server:call(?MODULE, size). +add(Entry) -> + gen_server:call(?MODULE, {add, Entry}). +tree_hash() -> + gen_server:call(?MODULE, tree_hash). +tree_hash(Version) -> + gen_server:call(?MODULE, {tree_hash, Version}). +get_incl(I, V) -> + gen_server:call(?MODULE, {inclusion, I, V}). +get_cons(V1, V2) -> + gen_server:call(?MODULE, {consistency, V1, V2}). + +%% gen_server callbacks +init([]) -> + {ok, new()}; +init(Args) -> + {ok, new(Args)}. +handle_cast(_Request, State) -> + {noreply, State}. +handle_info(_Info, State) -> + {noreply, State}. +code_change(_OldVersion, State, _Extra) -> + {ok, State}. +terminate(_Reason, _State) -> + ok. +handle_call(stop, _From, State) -> + {stop, normal, stopped, State}; +handle_call(size, _From, State) -> + {reply, State#tree.version + 1, State}; +handle_call({add, Entry}, _From, State) -> + {reply, ok, add(State, Entry)}; +handle_call(tree_hash, _From, State) -> + {NewState, Hash} = tree_hash(State, State#tree.version), + {reply, Hash, NewState}; +handle_call({tree_hash, Version}, _From, State) -> + {NewState, Hash} = tree_hash(State, Version), + {reply, Hash, NewState}; +handle_call({inclusion, _Index, _Version}, _From, State) -> + {reply, nyi, State}; +handle_call({consistency, _Version1, _Version2}, _From, State) -> + {reply, nyi, State}. %%%%%%%%%%%%%%%%%%%% -%% Data types. --record(history_tree, {version :: integer(), - store :: ts:tree_store()}). --type history_tree() :: #history_tree{}. +%% Private. +-spec get_hash(tree(), non_neg_integer(), tuple()) -> {tree(), binary()}. +get_hash(Tree, Version, IR) -> + NewTree = update(Tree, Version), + Hash = ts:retrieve_hash(NewTree#tree.store, IR), % TODO: Honour version! + %%io:format("DEBUG: get_hash(Tree=~p,~nVersion=~p,~nIR=~p):~nNewTree=~p~n", [Tree, Version, IR, NewTree]), + {NewTree, Hash}. --record(inner, {index :: non_neg_integer(), % 27bit integer => max 134e6 entries - layer :: non_neg_integer(), % 5bit integer - hash :: binary()}). --type inner() :: #inner{}. +-spec tree_hash(tree(), integer()) -> {tree(), binary()}. +tree_hash(Tree, -1) -> + {Tree, hash(<<"">>)}; +tree_hash(Tree, Version) -> + get_hash(Tree, Version, {0, depth(Tree) - 1}). -%%%%%%%%%%%%%%%%%%%% -%% Public interface. -size(#history_tree{version = V}) -> - V + 1. +%% @doc Add an entry but don't update the tree. +-spec add(tree(), binary()) -> tree(). +add(Tree = #tree{version = V, store = Store}, Entry) -> + NewVersion = V + 1, + LeafIndex = NewVersion, + LeafHash = mkleafhash(Entry), + Tree#tree{version = NewVersion, + store = ts:store(Store, {LeafIndex, 0}, LeafHash)}. --spec new() -> history_tree(). +-spec new() -> tree(). new() -> - #history_tree{version = -1, store = ts:new()}. + #tree{version = -1, + evaluated = -1, + store = ts:new()}. --spec new(non_neg_integer()) -> history_tree(). -new(Version) -> +-spec new(non_neg_integer()) -> tree(). +new([Version]) when is_integer(Version) -> foldl(fun(#mtl{entry = E}, Tree) -> D = (E#timestamped_entry.entry)#plop_entry.data, add(Tree, D) % Return value -> Tree in next invocation. - end, new(), db:get_by_index_sorted(0, Version)). - -destroy(Tree) -> - ts:delete(Tree#history_tree.store). + end, new(), db:get_by_index_sorted(0, Version)); +new([List]) when is_list(List) -> + foldl(fun(D, Tree) -> + add(Tree, D) % Return value -> Tree in next invocation. + end, new(), List). -%% @doc Which layers need to change when creating a version-n tree? -%% Layer 0 is where the new leaf is added and always needs -%% updating. Apart from 0, the layers with a number corresponding to -%% the "positions of the bits set in n" are the ones that need to be -%% touched. This assumes a somewhat unorthodox bit numbering where the -%% least significant bit has number 1 (rather than 0). -%% -%% For example: -%% A version 2 (0010) tree has had changes to layer 2 -%% A version 5 (0101) tree has had changes to layers 1 and 3 -%% A version 8 (1000) tree has had changes to layer 4 --spec add(history_tree(), binary()) -> history_tree(). -add(#history_tree{version = V, store = Store}, Entry) -> - NewVersion = V + 1, - LeafIndex = NewVersion, - LeafHash = mkleafhash(Entry), - LayerMap = reverse([B || <> <= binary:encode_unsigned(NewVersion)]), - #history_tree{version = NewVersion, - store = update(ts:store(Store, {LeafIndex, 0}, LeafHash), - 1, LayerMap, - #inner{index = LeafIndex, layer = 0, - hash = LeafHash})}. +%% @doc Calculate hashes in Tree up to and including leaf with index +%% equal to Version. Update Tree.evaluated to reflect the new state. +-spec update(tree(), non_neg_integer()) -> tree(). +update(Tree, 0) -> + %% A version 0 tree needs no updating. + Tree; +update(Tree = #tree{evaluated = E}, V) when E >= V -> + %% Evaluated enough already. Nothing to do. + Tree; +update(Tree = #tree{version = MaxV}, V) when V > MaxV -> + %% Asking for more than we've got. Do as much as possible. + update(Tree, MaxV); +update(Tree = #tree{evaluated = Evaluated}, Version) -> + NewTree = update_layer(Tree, 0, Evaluated + 1, Version), + NewTree#tree{evaluated = Version}. --spec tree_hash(history_tree()) -> binary(). -tree_hash(Tree) -> - tree_hash(Tree, Tree#history_tree.version). +-spec update_layer(tree(), non_neg_integer(), non_neg_integer(), + non_neg_integer()) -> tree(). +update_layer(Tree, _Layer, _ICur, 0) -> + Tree; +update_layer(Tree, Layer, ICur, ILast) -> + NewTree = Tree#tree{store = update_parent(Tree#tree.store, Layer, + strip_bits_bottom(ICur, 1), ILast)}, + %%io:format("DEBUG: update_layer(~p, ~p, ~p, ~p + update_layer(NewTree, Layer + 1, parent(ICur), parent(ILast)). --spec tree_hash(history_tree(), integer()) -> binary(). -tree_hash(_, -1) -> - hash(""); -tree_hash(Tree, Version) -> - get_hash(Tree, Version, {0, depth(Tree) - 1}). +-spec update_parent(ts:tree_store(), non_neg_integer(), non_neg_integer(), + non_neg_integer()) -> ts:tree_store(). +update_parent(S, Layer, I, ILast) when I >= ILast -> + case right_leaf_p(ILast) of + true -> S; + _ -> ts:append(S, Layer + 1, ts:retrieve_hash(S, {ILast, Layer})) + end; +update_parent(S, Layer, I, ILast) -> + %%io:format("DEBUG: update_parent(R=~p, I=~p, ILast=~p): parent(I)=~p~n", [Layer, I, ILast, parent(I)]), + S2 = ts:store(S, {parent(I), Layer + 1}, + mkinnerhash(ts:retrieve_hash(S, {I, Layer}), + ts:retrieve_hash(S, {I + 1, Layer}))), + update_parent(S2, Layer, I + 2, ILast). --spec get_hash(history_tree(), non_neg_integer(), tuple()) -> binary(). -get_hash(#history_tree{store = S}, _Version, IR) -> - %% TODO: Use Version! - {{_I, _R}, Hash} = ts:retrieve(S, IR), - Hash. +parent(I) -> + I bsr 1. -%-spec get_incl/3 -get_incl(_, _, _) -> - nyi. -%-spec get_cons/3 -get_cons(_, _, _) -> - nyi. +-spec right_leaf_p(integer()) -> boolean(). +right_leaf_p(Index) -> + case Index band 1 of + 1 -> true; + _ -> false + end. -%% Private. -%% -spec head(history_tree()) -> inner(). -%% head(#history_tree{store = Store}) -> -%% {{I, R}, Hash} = ts:retrieve(Store, {0, 0}), -%% #inner{index = I, layer = R, hash = Hash}. +strip_bits_bottom(N, Nbits) -> + (N bsr Nbits) bsl Nbits. %% @doc Return position of highest bit set, counting from the least %% significant bit, starting at 1. @@ -141,65 +197,23 @@ ffs([H|T], Acc) -> _ -> Acc end. -depth(#history_tree{version = -1}) -> +depth(#tree{version = -1}) -> 0; -depth(#history_tree{version = V}) -> +depth(#tree{version = V}) -> bitpos_first_set(V) + 1. --spec update(ts:tree_store(), pos_integer(), list(), inner()) -> ts:tree_store(). -update(Store, _, [], _) -> - Store; -update(Store, - CurrentLayer, [UpdateThisLayerP|LayerMap], - Child = #inner{index = ChildIndex, - layer = ChildLayer, - hash = ChildHash}) -> - case UpdateThisLayerP == 1 of - true -> - %% Create/update the parent of ChildHash at ChildIndex. - %% - %% Parent index is equal to child index with its r lowest - %% bits stripped off, where r is current layer. - %% - %% Other child is found by comparing index of parent and - %% known child. If they're the same, the known child is - %% the left child and its sibling is found on the same - %% layer. If they differ, the known child is the right - %% child and the left child is to be found one layer below - %% the parent, same index. - - Index = strip_low_bits(ChildIndex, CurrentLayer), - Hash = - case Index == ChildIndex of - true -> - {_SibIR, SibHash} = - ts:retrieve(Store, {Index + (1 bsl ChildLayer) - 1, - ChildLayer}), - mkinnerhash(ChildHash, SibHash); - _ -> - {_SibIR, SibHash} = - ts:retrieve(Store, {Index, CurrentLayer - 1}), - mkinnerhash(SibHash, ChildHash) - end, - update(ts:store(Store, {Index, CurrentLayer}, Hash), - CurrentLayer + 1, LayerMap, - #inner{index = Index, layer = CurrentLayer, hash = Hash}); - _ -> - update(Store, CurrentLayer + 1, LayerMap, Child) - end. - -strip_low_bits(N, Nbits) -> - (N bsr Nbits) bsl Nbits. - -hash(Data) -> - crypto:hash(sha256, Data). - +-spec mkleafhash(binary()) -> binary(). mkleafhash(Data) -> hash([<<"\x00">>, Data]). +-spec mkinnerhash(binary(), binary()) -> binary(). mkinnerhash(Hash1, Hash2) -> hash([<<"\x01">>, Hash1, Hash2]). +-spec hash(binary()) -> binary() | iolist(). +hash(Data) -> + crypto:hash(sha256, Data). + %%%%%%%%%%%%%%%%%%%% %% Testing ht. -define(TEST_VECTOR_LEAVES, @@ -214,14 +228,33 @@ mkinnerhash(Hash1, Hash2) -> "ddb89be403809e325750d3d263cd78929c2942b7942a34b77e122c9594a74c8c", "5dc9da79a70659a9ad559cb701ded9a2ab9d823aad2f4960cfe370eff4604328"]). -%% @doc Build trees using add/2 and mth/2 and compare the resulting +%% @doc Build tree using add/2 and mth/2 and compare the resulting %% tree hashes. +%% FIXME: Don't start and stop the server here! add_test() -> lists:foreach( fun(X) -> L = lists:sublist(?TEST_VECTOR_LEAVES, X), - ?assertEqual(mth(L), tree_hash(mktree(L))) - end, - lists:seq(1, length(?TEST_VECTOR_LEAVES))). + {ok, _Pid} = start_link(L), + mktree(L), + ?assertEqual(mth(L), tree_hash()), + stop() end, + random_entries(length(?TEST_VECTOR_LEAVES))). + +random_entries(N) -> + [V || {_, V} <- lists:sort( + [{random:uniform(N), E} || E <- lists:seq(1, N)])]. + +update_test_OFF() -> + mktree(?TEST_VECTOR_LEAVES), + lists:foreach( + fun(Test) -> + L = lists:sublist(?TEST_VECTOR_LEAVES, Test), + ?assertEqual(mth(L), + tree_hash(lists:nth(Test, ?TEST_VECTOR_HASHES))) end, + random_entries(length(?TEST_VECTOR_LEAVES))). + +%% verify_old_versions_test() -> +%% {ok, _Pid} = start_link(db:size() - 1). %%%%%%%%%%%%%%%%%%%% %% Testing the test helpers. diff --git a/src/ts.erl b/src/ts.erl index 345ed66..a4d8107 100644 --- a/src/ts.erl +++ b/src/ts.erl @@ -2,36 +2,71 @@ -module(ts). -include_lib("eunit/include/eunit.hrl"). -export_type([tree_store/0]). --export([new/0, delete/1, size/1, store/3, retrieve/2, retrieve_hash/2]). +-export([new/0, store/3, append/3, truncate/2, retrieve/2, retrieve_hash/2, delete/2]). -%% -record(tree_store, {warm :: ets:tid(), -%% frozen :: list()}). % [ets:tid()] --record(tree_store, {table :: ets:tid()}). +%% FIXME: keep the arrays in an array? or maybe an array of binaries? +%% hashes do have fixed lenght. +-record(tree_store, {arrays :: map()}). % Map of arrays, indexed by layer. -type tree_store() :: #tree_store{}. new() -> - %% tree_store#{warm = ets:new(nil, [{read_concurrency, true}]), - %% frozen = ets:new(nil, [{read_concurrency, true}])}. - #tree_store{table = ets:new(nil, [{read_concurrency, true}])}. - -delete(Store) -> - ets:delete(Store#tree_store.table). - -size(Store) -> - ets:info(Store#tree_store.table, size). + #tree_store{arrays = #{}}. -spec store(tree_store(), tuple(), binary()) -> tree_store(). -store(Store, IR, Hash) -> - ets:insert(Store#tree_store.table, {IR, Hash}), - Store. +store(S = #tree_store{arrays = Arrays}, {I, R}, Hash) -> + {A, NewArrays} = get_array(R, Arrays), + Index = case I of + append -> array:size(A); + N -> N + end, + S#tree_store{arrays = maps:put(R, array:set(Index, Hash, A), NewArrays)}. -spec retrieve(tree_store(), tuple()) -> {tuple(), binary()}. -retrieve(#tree_store{table = Tab}, IR) -> - case ets:lookup(Tab, IR) of - [] -> exit(IR); - [R] -> R - end. +retrieve(#tree_store{arrays = Arrays}, {I, R}) -> + {{I, R}, + case maps:get(R, Arrays, undefined) of + undefined -> undefined; + A -> array:get(I, A) + end}. -spec retrieve_hash(tree_store(), tuple()) -> binary(). -retrieve_hash(#tree_store{table = Tab}, IR) -> - ets:lookup_element(Tab, IR, 2). +retrieve_hash(S, IR) -> + element(2, retrieve(S, IR)). + +-spec delete(tree_store(), tuple()) -> tree_store(). +delete(S, {I, R}) -> + {A, NewArrays} = get_array(R, S#tree_store.arrays), + Index = case I of + last -> array:size(A) - 1; + N -> N + end, + S#tree_store{arrays = maps:put(R, + array:reset(Index, A), + NewArrays)}. +append(S, R, Hash) -> + store(S, {append, R}, Hash). + +truncate(S, R) -> + delete(S, {last, R}). + +%% Private +get_array(Layer, Arrays) -> + case maps:is_key(Layer, Arrays) of + true -> {maps:get(Layer, Arrays), Arrays} ; + false -> + NewArray = array:new(), + {NewArray, maps:put(Layer, NewArray, Arrays)} + end. + +%%%%%%%%%%%%%%%%%%%% +%% Testing ts. +-define(TEST_VECTOR, {{{0,0}, <<00>>}, + {{0,1}, <<01>>}, + {{1,0}, <<10>>}}). +store_test_() -> + T = ?TEST_VECTOR, + S = new(), + S0 = store(S, {0,0}, <<00>>), + S1 = store(S0, {0,1}, <<01>>), + S2 = store(S1, {1,0}, <<10>>), + [?_assertEqual(retrieve(S2, {1,0}), element(3, T))]. -- cgit v1.1