From 068495af568f93b3db3ac5c29cf85f962b250632 Mon Sep 17 00:00:00 2001 From: Linus Nordberg Date: Tue, 7 Mar 2017 17:54:40 +0100 Subject: Don't corrupt internal tree structure when evaluating a version zero tree. add() and reset_tree(): Update the tree too. consistency(), path() and head(): Stop updating the tree. read_new_entries(): Update the tree after every MAX_READ_ENTRIES (10k) entries. Remove update_tree/2 since we don't do partial updates of the tree any more. In tests, add consistency checks for tree data structure and underlying storage. Fix by map@ and linus@. Closes CATLFISH-100. --- src/ht.erl | 142 +++++++++++++++++++++++++++++++++++++------------------------ 1 file changed, 86 insertions(+), 56 deletions(-) diff --git a/src/ht.erl b/src/ht.erl index 87090f0..7f52db0 100644 --- a/src/ht.erl +++ b/src/ht.erl @@ -41,7 +41,6 @@ -include("timeouts.hrl"). -define(MAX_READ_ENTRIES, 10000). --define(MAX_CALC_ENTRIES, 10000). %% Data types. -record(tree, {version :: integer(), @@ -100,12 +99,10 @@ print_tree(HashOutputLen) -> %% gen_server callbacks init(Args) -> - lager:info("reading tree"), - Tree = new(Args), - lager:info("building tree"), - UpdatedTree = update(Tree), + lager:info("reading and building tree"), + Tree = new_and_update(Args), lager:info("finished"), - {ok, UpdatedTree}. + {ok, Tree}. handle_cast(_Request, State) -> {noreply, State}. handle_info(_Info, State) -> @@ -117,7 +114,7 @@ terminate(_Reason, _State) -> % public api handle_call({reset_tree, Arg}, _From, _State) -> - NewTree = new(Arg), + NewTree = new_and_update(Arg), {reply, NewTree, NewTree}; handle_call({load_tree, Version}, _From, State) -> {Reply, NewTree} = read_new_entries(State, Version), @@ -127,7 +124,7 @@ handle_call(stop, _From, State) -> handle_call(size, _From, State) -> {reply, State#tree.version + 1, State}; handle_call({add, Hash}, _From, State) -> - {reply, ok, add(State, Hash)}; + {reply, ok, add_with_update(State, Hash)}; handle_call(root, _From, State) -> {NewState, Hash} = head(State, State#tree.version), {reply, Hash, NewState}; @@ -159,14 +156,13 @@ consistency(Tree, _V1, V2) when V2 > Tree#tree.version -> consistency(Tree, V1, V2) -> %% Walk up the tree from V1 to first left child. {StartLayer, StartIndex} = first_left_node(0, V1), - UpdTree = update(Tree, V2), First = case StartIndex of 0 -> []; - _ -> [get_hash(UpdTree, {StartLayer, StartIndex})] + _ -> [get_hash(Tree, {StartLayer, StartIndex})] end, %% Get path from first left child to head of V2. - {_, Path} = path(UpdTree, StartLayer, StartIndex, V2), - {UpdTree, First ++ Path}. + {_, Path} = path(Tree, StartLayer, StartIndex, V2), + {Tree, First ++ Path}. %% @doc Return a list of hashes showing the path from leaf Index to %% the tree head in the tree of version Version. @@ -182,8 +178,7 @@ path(Tree = #tree{version = V}, _, _, Version) when Version > V -> {Tree, []}; % FIXME: Return an error? path(Tree, Layer, Index, Version) -> %% The magic here is to tell path/6 to stop at Version >> Layer. - UpdTree = update(Tree, Version), - {UpdTree, path(UpdTree, Layer, Index, Version bsr Layer, Version, [])}. + {Tree, path(Tree, Layer, Index, Version bsr Layer, Version, [])}. %% @doc Return path from {Layer,I} to head of tree Version. I is the %% leftmost and ILast the rightmost node to consider, at Layer. @@ -214,25 +209,11 @@ get_hash(Tree, {R, I}) -> head(Tree, -1) -> {Tree, hash(<<"">>)}; head(Tree = #tree{version = V}, Version) when Version == V -> - EndBound = min(Version, Tree#tree.evaluated + ?MAX_CALC_ENTRIES), - NewTree = update(Tree, EndBound), - case EndBound of - Version -> - {NewTree, get_hash(NewTree, {depth(Tree) - 1, 0})}; - _ -> - {NewTree, eagain} - end; + {Tree, get_hash(Tree, {depth(Tree) - 1, 0})}; head(Tree = #tree{version = V}, Version) when Version > V -> {Tree, enotimetravel}; head(Tree, Version) -> - EndBound = min(Version, Tree#tree.evaluated + ?MAX_CALC_ENTRIES), - NewTree = update(Tree, EndBound), - case EndBound of - Version -> - {NewTree, old_version_tree_head(NewTree, Version)}; - _ -> - {NewTree, eagain} - end. + {Tree, old_version_tree_head(Tree, Version)}. -spec old_version_tree_head(tree(), non_neg_integer()) -> binary(). old_version_tree_head(Tree, Version) -> @@ -290,26 +271,37 @@ first_left_node(Layer, Index, BAL) -> false -> {Layer, Index} end. +%% @doc Add a hash and update the tree. +-spec add_with_update(tree(), binary()) -> tree(). +add_with_update(Tree, Hash) -> + update(add(Tree, Hash)). + %% @doc Add a hash but don't update the tree. -spec add(tree(), binary()) -> tree(). add(Tree = #tree{version = V, store = Store}, Hash) -> Tree#tree{version = V + 1, store = ts:add(Store, 0, Hash)}. -read_new_entries(State, Version) when is_integer(Version) -> - EndBound = min(Version, State#tree.version + ?MAX_READ_ENTRIES), - NewEntries = db:get_by_indices(State#tree.version + 1, EndBound, {sorted, true}), - NewState = foldl(fun(Hash, Tree) -> - add(Tree, Hash) - end, State, [H || {_I, H, _E} <- - NewEntries]), +read_new_entries(Tree, Version) when is_integer(Version) -> + EndBound = min(Version, Tree#tree.version + ?MAX_READ_ENTRIES), + NewEntries = db:get_by_indices(Tree#tree.version + 1, EndBound, {sorted, true}), + NewHashes = [H || {_I, H, _E} <- NewEntries], + NewTree = foldl(fun(H, T) -> + add(T, H) + end, Tree, NewHashes), + UpdatedTree = update(NewTree), case EndBound of Version -> - {ok, NewState}; + {ok, UpdatedTree}; _ -> - {eagain, NewState} + {eagain, UpdatedTree} end. -%% @doc Return a new tree. +%% @doc Return a new updated tree. +-spec new_and_update(list()) -> tree(). +new_and_update(List) -> + update(new(List)). + +%% @doc Return a new tree, not updated. -spec new(list()) -> tree(). new([]) -> #tree{version = -1, @@ -329,22 +321,14 @@ new([List]) when is_list(List) -> foldl(fun(Hash, Tree) -> add(Tree, Hash) end, new([]), List). -update(Tree) -> - update(Tree, Tree#tree.version). - -%% @doc Calculate hashes in Tree up to and including node with index -%% equal to Version. Update Tree.evaluated to reflect the new state. --spec update(tree(), non_neg_integer()) -> tree(). -update(Tree, 0) -> - %% A version 0 tree needs no updating. - Tree#tree{evaluated = 0}; -update(Tree = #tree{evaluated = E}, V) when E >= V -> +%% @doc Calculate hashes in Tree. Update Tree.evaluated to reflect the +%% new state. +-spec update(tree()) -> tree(). +update(Tree = #tree{version = Version, evaluated = E}) when E >= Version -> %% Evaluated enough already. Nothing to do. + true = E == Version, % ASSERTION. Tree; -update(Tree = #tree{version = MaxV}, V) when V > MaxV -> - %% Asking for more than we've got. Do as much as possible. - update(Tree, MaxV); -update(Tree = #tree{evaluated = Evaluated}, Version) -> +update(Tree = #tree{version = Version, evaluated = Evaluated}) -> NewTree = update_layer(Tree, 0, Evaluated + 1, Version), NewTree#tree{evaluated = Version}. @@ -462,8 +446,7 @@ print_layer(Tree, HashOutputLen, Layer, ILast) -> %%%%%%%%%%%%%%%%%%%% %% Testing ht. -%% TODO: Move all these tests to a separate file in ../test. They're -%% only using external functions. + -define(TEST_VECTOR_LEAVES, ["", "\x00", "\x10", " !", "01", "@ABC", "PQRSTUVW", "`abcdefghijklmno"]). @@ -473,6 +456,53 @@ test_init(L) -> stop(), {ok, _Pid} = start_link(L). +%% Using internal interfaces. +storage_consistency_test() -> + LEAVES = [<> || X <- lists:seq(0, 64)], + test_init(lists:map(fun mkleafhash/1, LEAVES)), + Tree = testing_get_state(), + ?assert(check_consistency(Tree)), + Tree2 = update(Tree), + ?assert(check_consistency(Tree2)), + Tree3 = add_with_update(Tree2, <<>>), + ?assert(check_consistency(Tree3)). + +%%%% Test helpers, internal interfaces. +levelsize(0, 1) -> + 0; +levelsize(LastIndex, Level) -> + trunc((LastIndex)/math:pow(2, Level))+1. + +deptheval(#tree{evaluated = -1}) -> + 0; +deptheval(#tree{evaluated = V}) -> + bitpos_first_set(V) + 1. + +%% check_consistency(#tree{evaluated = Evaluated}) when Evaluated == -1 -> +%% true; +check_consistency(Tree) -> + check_consistency_tree(Tree) and check_consistency_storage(Tree). + +check_consistency_tree(Tree) -> + Tree#tree.evaluated == Tree#tree.version. + +check_consistency_storage(Tree) -> + lists:all(fun (R) -> + ActualSize = ts:count(Tree#tree.store, R), + ShouldBeSize = levelsize(Tree#tree.evaluated, R), + case ActualSize of + ShouldBeSize -> + true; + _ -> + lager:error("lastindex: ~p level ~p actual size: ~p should be: ~p", [Tree#tree.evaluated, R, ActualSize, ShouldBeSize]), + false + end + end, lists:seq(1, deptheval(Tree) - 1)). + +%% Using only exported functions. +%% TODO: Move all these tests to a separate file in ../test. They're +%% only using external functions. + %% @doc Verify trees using add/2. add_test() -> lists:foreach( -- cgit v1.1