From d8294b2eb95858818a0ce1d5fe3e980aadf7ce53 Mon Sep 17 00:00:00 2001 From: Linus Nordberg Date: Thu, 11 Sep 2014 11:54:10 +0200 Subject: Another hashtree implementation, first cut. This one stores the tree in arrays, one per layer in the tree. It's implemented as gen_server. --- src/ht.erl | 329 ++++++++++++++++++++++++++++++++++--------------------------- 1 file changed, 181 insertions(+), 148 deletions(-) (limited to 'src/ht.erl') diff --git a/src/ht.erl b/src/ht.erl index 5398483..5fa285c 100644 --- a/src/ht.erl +++ b/src/ht.erl @@ -8,125 +8,181 @@ %%% %%% [0] https://www.usenix.org/event/sec09/tech/full_papers/crosby.pdf %%% -%%% Useful terminology: -%%% E -- Entry, stored in leaf nodes. -%%% I(i,r) -- Inner node with index i, on layer r. -%%% A(v)(i,r) -- Hash of I(i,r) in tree of version v. -%%% d -- Depth of tree. -%%% -%%% Nodes are identified by their index i and layer r. -%%% I(i,r) has left child I(i,r-1) and right child I(i+2^(r-1),r-1). -%%% -%%% A version-n tree stores n+1 entries. -%%% In a version-n tree, I(i,r) is frozen when n >= i + 2^r - 1. +%%% Hashes of inner nodes and leaves are stored in arrays, one per +%%% layer with layer 0 being where the leaves are. The total number of +%%% arrays is equal to the depth of the tree. The depth of the tree is +%%% ceil(lg2(number of leaves)). +%%% Let {r,i} denote the hash with index i on layer r. The first leaf +%%% is {0,0}, second is {0,1} and n:th is {0,n-1}. +%%% The parent of {r,i} is {r+1,floor(i/2)} (not true bc of +%%% "placeholders", FIXME: doc) +%%% The sibling of {r,i} is {r,i+1} when i is even and {r,i-1} when i +%%% is odd. -%%% -%%% Data types: -%%% history_tree -%%% version (integer) -%%% store (tree storage handled by ts) -%%% -%%% Interface: -%%% create tree (-> tree) -%%% add entry (data -> tree) -%%% get hash of leaf or inner node (i, r -> hash) -%%% get inclusion proof for leaf i in version-n tree (i, n -> hashes) -%%% get consistency proof between trees of version n and m (n, m -> hashes) -%%% -module(ht). +-behaviour(gen_server). + +-export([size/0, add/1, tree_hash/0, tree_hash/1]). +-export([get_incl/2, get_cons/2]). +-export([mktree/1]). +-export([start_link/0, start_link/1, stop/0]). +-export([init/1, handle_call/3, terminate/2, handle_cast/2, handle_info/2, + code_change/3]). + -include("$CTROOT/plop/include/plop.hrl"). -include_lib("eunit/include/eunit.hrl"). -import(lists, [foreach/2, foldl/3, reverse/1]). --export_type([history_tree/0, inner/0]). --export([new/0, new/1, destroy/1, size/1, add/2]). --export([get_hash/3, get_incl/3, get_cons/3]). --export([tree_hash/1, tree_hash/2]). +%% Data types. +-record(tree, {version :: integer(), + evaluated :: integer(), + store :: ts:tree_store()}). +-type tree() :: #tree{}. %%%%%%%%%%%%%%%%%%%% %% Public interface. +start_link() -> + gen_server:start_link({local, ?MODULE}, ?MODULE, [], []). +start_link(NEntries) -> + gen_server:start_link({local, ?MODULE}, ?MODULE, [NEntries], []). +stop() -> + gen_server:call(?MODULE, stop). +size() -> + gen_server:call(?MODULE, size). +add(Entry) -> + gen_server:call(?MODULE, {add, Entry}). +tree_hash() -> + gen_server:call(?MODULE, tree_hash). +tree_hash(Version) -> + gen_server:call(?MODULE, {tree_hash, Version}). +get_incl(I, V) -> + gen_server:call(?MODULE, {inclusion, I, V}). +get_cons(V1, V2) -> + gen_server:call(?MODULE, {consistency, V1, V2}). + +%% gen_server callbacks +init([]) -> + {ok, new()}; +init(Args) -> + {ok, new(Args)}. +handle_cast(_Request, State) -> + {noreply, State}. +handle_info(_Info, State) -> + {noreply, State}. +code_change(_OldVersion, State, _Extra) -> + {ok, State}. +terminate(_Reason, _State) -> + ok. +handle_call(stop, _From, State) -> + {stop, normal, stopped, State}; +handle_call(size, _From, State) -> + {reply, State#tree.version + 1, State}; +handle_call({add, Entry}, _From, State) -> + {reply, ok, add(State, Entry)}; +handle_call(tree_hash, _From, State) -> + {NewState, Hash} = tree_hash(State, State#tree.version), + {reply, Hash, NewState}; +handle_call({tree_hash, Version}, _From, State) -> + {NewState, Hash} = tree_hash(State, Version), + {reply, Hash, NewState}; +handle_call({inclusion, _Index, _Version}, _From, State) -> + {reply, nyi, State}; +handle_call({consistency, _Version1, _Version2}, _From, State) -> + {reply, nyi, State}. %%%%%%%%%%%%%%%%%%%% -%% Data types. --record(history_tree, {version :: integer(), - store :: ts:tree_store()}). --type history_tree() :: #history_tree{}. +%% Private. +-spec get_hash(tree(), non_neg_integer(), tuple()) -> {tree(), binary()}. +get_hash(Tree, Version, IR) -> + NewTree = update(Tree, Version), + Hash = ts:retrieve_hash(NewTree#tree.store, IR), % TODO: Honour version! + %%io:format("DEBUG: get_hash(Tree=~p,~nVersion=~p,~nIR=~p):~nNewTree=~p~n", [Tree, Version, IR, NewTree]), + {NewTree, Hash}. --record(inner, {index :: non_neg_integer(), % 27bit integer => max 134e6 entries - layer :: non_neg_integer(), % 5bit integer - hash :: binary()}). --type inner() :: #inner{}. +-spec tree_hash(tree(), integer()) -> {tree(), binary()}. +tree_hash(Tree, -1) -> + {Tree, hash(<<"">>)}; +tree_hash(Tree, Version) -> + get_hash(Tree, Version, {0, depth(Tree) - 1}). -%%%%%%%%%%%%%%%%%%%% -%% Public interface. -size(#history_tree{version = V}) -> - V + 1. +%% @doc Add an entry but don't update the tree. +-spec add(tree(), binary()) -> tree(). +add(Tree = #tree{version = V, store = Store}, Entry) -> + NewVersion = V + 1, + LeafIndex = NewVersion, + LeafHash = mkleafhash(Entry), + Tree#tree{version = NewVersion, + store = ts:store(Store, {LeafIndex, 0}, LeafHash)}. --spec new() -> history_tree(). +-spec new() -> tree(). new() -> - #history_tree{version = -1, store = ts:new()}. + #tree{version = -1, + evaluated = -1, + store = ts:new()}. --spec new(non_neg_integer()) -> history_tree(). -new(Version) -> +-spec new(non_neg_integer()) -> tree(). +new([Version]) when is_integer(Version) -> foldl(fun(#mtl{entry = E}, Tree) -> D = (E#timestamped_entry.entry)#plop_entry.data, add(Tree, D) % Return value -> Tree in next invocation. - end, new(), db:get_by_index_sorted(0, Version)). - -destroy(Tree) -> - ts:delete(Tree#history_tree.store). + end, new(), db:get_by_index_sorted(0, Version)); +new([List]) when is_list(List) -> + foldl(fun(D, Tree) -> + add(Tree, D) % Return value -> Tree in next invocation. + end, new(), List). -%% @doc Which layers need to change when creating a version-n tree? -%% Layer 0 is where the new leaf is added and always needs -%% updating. Apart from 0, the layers with a number corresponding to -%% the "positions of the bits set in n" are the ones that need to be -%% touched. This assumes a somewhat unorthodox bit numbering where the -%% least significant bit has number 1 (rather than 0). -%% -%% For example: -%% A version 2 (0010) tree has had changes to layer 2 -%% A version 5 (0101) tree has had changes to layers 1 and 3 -%% A version 8 (1000) tree has had changes to layer 4 --spec add(history_tree(), binary()) -> history_tree(). -add(#history_tree{version = V, store = Store}, Entry) -> - NewVersion = V + 1, - LeafIndex = NewVersion, - LeafHash = mkleafhash(Entry), - LayerMap = reverse([B || <> <= binary:encode_unsigned(NewVersion)]), - #history_tree{version = NewVersion, - store = update(ts:store(Store, {LeafIndex, 0}, LeafHash), - 1, LayerMap, - #inner{index = LeafIndex, layer = 0, - hash = LeafHash})}. +%% @doc Calculate hashes in Tree up to and including leaf with index +%% equal to Version. Update Tree.evaluated to reflect the new state. +-spec update(tree(), non_neg_integer()) -> tree(). +update(Tree, 0) -> + %% A version 0 tree needs no updating. + Tree; +update(Tree = #tree{evaluated = E}, V) when E >= V -> + %% Evaluated enough already. Nothing to do. + Tree; +update(Tree = #tree{version = MaxV}, V) when V > MaxV -> + %% Asking for more than we've got. Do as much as possible. + update(Tree, MaxV); +update(Tree = #tree{evaluated = Evaluated}, Version) -> + NewTree = update_layer(Tree, 0, Evaluated + 1, Version), + NewTree#tree{evaluated = Version}. --spec tree_hash(history_tree()) -> binary(). -tree_hash(Tree) -> - tree_hash(Tree, Tree#history_tree.version). +-spec update_layer(tree(), non_neg_integer(), non_neg_integer(), + non_neg_integer()) -> tree(). +update_layer(Tree, _Layer, _ICur, 0) -> + Tree; +update_layer(Tree, Layer, ICur, ILast) -> + NewTree = Tree#tree{store = update_parent(Tree#tree.store, Layer, + strip_bits_bottom(ICur, 1), ILast)}, + %%io:format("DEBUG: update_layer(~p, ~p, ~p, ~p + update_layer(NewTree, Layer + 1, parent(ICur), parent(ILast)). --spec tree_hash(history_tree(), integer()) -> binary(). -tree_hash(_, -1) -> - hash(""); -tree_hash(Tree, Version) -> - get_hash(Tree, Version, {0, depth(Tree) - 1}). +-spec update_parent(ts:tree_store(), non_neg_integer(), non_neg_integer(), + non_neg_integer()) -> ts:tree_store(). +update_parent(S, Layer, I, ILast) when I >= ILast -> + case right_leaf_p(ILast) of + true -> S; + _ -> ts:append(S, Layer + 1, ts:retrieve_hash(S, {ILast, Layer})) + end; +update_parent(S, Layer, I, ILast) -> + %%io:format("DEBUG: update_parent(R=~p, I=~p, ILast=~p): parent(I)=~p~n", [Layer, I, ILast, parent(I)]), + S2 = ts:store(S, {parent(I), Layer + 1}, + mkinnerhash(ts:retrieve_hash(S, {I, Layer}), + ts:retrieve_hash(S, {I + 1, Layer}))), + update_parent(S2, Layer, I + 2, ILast). --spec get_hash(history_tree(), non_neg_integer(), tuple()) -> binary(). -get_hash(#history_tree{store = S}, _Version, IR) -> - %% TODO: Use Version! - {{_I, _R}, Hash} = ts:retrieve(S, IR), - Hash. +parent(I) -> + I bsr 1. -%-spec get_incl/3 -get_incl(_, _, _) -> - nyi. -%-spec get_cons/3 -get_cons(_, _, _) -> - nyi. +-spec right_leaf_p(integer()) -> boolean(). +right_leaf_p(Index) -> + case Index band 1 of + 1 -> true; + _ -> false + end. -%% Private. -%% -spec head(history_tree()) -> inner(). -%% head(#history_tree{store = Store}) -> -%% {{I, R}, Hash} = ts:retrieve(Store, {0, 0}), -%% #inner{index = I, layer = R, hash = Hash}. +strip_bits_bottom(N, Nbits) -> + (N bsr Nbits) bsl Nbits. %% @doc Return position of highest bit set, counting from the least %% significant bit, starting at 1. @@ -141,65 +197,23 @@ ffs([H|T], Acc) -> _ -> Acc end. -depth(#history_tree{version = -1}) -> +depth(#tree{version = -1}) -> 0; -depth(#history_tree{version = V}) -> +depth(#tree{version = V}) -> bitpos_first_set(V) + 1. --spec update(ts:tree_store(), pos_integer(), list(), inner()) -> ts:tree_store(). -update(Store, _, [], _) -> - Store; -update(Store, - CurrentLayer, [UpdateThisLayerP|LayerMap], - Child = #inner{index = ChildIndex, - layer = ChildLayer, - hash = ChildHash}) -> - case UpdateThisLayerP == 1 of - true -> - %% Create/update the parent of ChildHash at ChildIndex. - %% - %% Parent index is equal to child index with its r lowest - %% bits stripped off, where r is current layer. - %% - %% Other child is found by comparing index of parent and - %% known child. If they're the same, the known child is - %% the left child and its sibling is found on the same - %% layer. If they differ, the known child is the right - %% child and the left child is to be found one layer below - %% the parent, same index. - - Index = strip_low_bits(ChildIndex, CurrentLayer), - Hash = - case Index == ChildIndex of - true -> - {_SibIR, SibHash} = - ts:retrieve(Store, {Index + (1 bsl ChildLayer) - 1, - ChildLayer}), - mkinnerhash(ChildHash, SibHash); - _ -> - {_SibIR, SibHash} = - ts:retrieve(Store, {Index, CurrentLayer - 1}), - mkinnerhash(SibHash, ChildHash) - end, - update(ts:store(Store, {Index, CurrentLayer}, Hash), - CurrentLayer + 1, LayerMap, - #inner{index = Index, layer = CurrentLayer, hash = Hash}); - _ -> - update(Store, CurrentLayer + 1, LayerMap, Child) - end. - -strip_low_bits(N, Nbits) -> - (N bsr Nbits) bsl Nbits. - -hash(Data) -> - crypto:hash(sha256, Data). - +-spec mkleafhash(binary()) -> binary(). mkleafhash(Data) -> hash([<<"\x00">>, Data]). +-spec mkinnerhash(binary(), binary()) -> binary(). mkinnerhash(Hash1, Hash2) -> hash([<<"\x01">>, Hash1, Hash2]). +-spec hash(binary()) -> binary() | iolist(). +hash(Data) -> + crypto:hash(sha256, Data). + %%%%%%%%%%%%%%%%%%%% %% Testing ht. -define(TEST_VECTOR_LEAVES, @@ -214,14 +228,33 @@ mkinnerhash(Hash1, Hash2) -> "ddb89be403809e325750d3d263cd78929c2942b7942a34b77e122c9594a74c8c", "5dc9da79a70659a9ad559cb701ded9a2ab9d823aad2f4960cfe370eff4604328"]). -%% @doc Build trees using add/2 and mth/2 and compare the resulting +%% @doc Build tree using add/2 and mth/2 and compare the resulting %% tree hashes. +%% FIXME: Don't start and stop the server here! add_test() -> lists:foreach( fun(X) -> L = lists:sublist(?TEST_VECTOR_LEAVES, X), - ?assertEqual(mth(L), tree_hash(mktree(L))) - end, - lists:seq(1, length(?TEST_VECTOR_LEAVES))). + {ok, _Pid} = start_link(L), + mktree(L), + ?assertEqual(mth(L), tree_hash()), + stop() end, + random_entries(length(?TEST_VECTOR_LEAVES))). + +random_entries(N) -> + [V || {_, V} <- lists:sort( + [{random:uniform(N), E} || E <- lists:seq(1, N)])]. + +update_test_OFF() -> + mktree(?TEST_VECTOR_LEAVES), + lists:foreach( + fun(Test) -> + L = lists:sublist(?TEST_VECTOR_LEAVES, Test), + ?assertEqual(mth(L), + tree_hash(lists:nth(Test, ?TEST_VECTOR_HASHES))) end, + random_entries(length(?TEST_VECTOR_LEAVES))). + +%% verify_old_versions_test() -> +%% {ok, _Pid} = start_link(db:size() - 1). %%%%%%%%%%%%%%%%%%%% %% Testing the test helpers. -- cgit v1.1