-module('ht'). -include_lib("eunit/include/eunit.hrl"). -record(leaf, {hash :: binary()}). % hash of data -record(inner, {child0 :: #leaf{} | #inner{}, % left child child1 :: #leaf{} | #inner{}, % right child hash :: binary()}). % hash of children's hashes -record(head, {version :: non_neg_integer(), % number of leafs tree :: tree()}). % the tree -type head() :: #head{}. -type tree() :: inner() | leaf() | undefined. -type leaf() :: #leaf{}. -type inner() :: #inner{}. -export_type([head/0, tree/0, inner/0, leaf/0]). -export([create/0, append/2, tree_hash/1, size/1, audit_path/2, consistency_proof/2]). %%%%%%%%%%%%%%%%%%%% %% Public interface. %% @doc Return an empty tree. -spec create() -> head(). create() -> mkhead(0, undefined). %% @doc Return the merkle tree hash of a tree. -spec tree_hash(head()) -> binary(); (tree()) -> binary(). tree_hash(#head{tree=T}) -> case T of undefined -> hashfun(<<>>); #inner{hash=H} -> H; #leaf{hash=H} -> H end. %% @doc Return the size of a tree. -spec size(head()) -> non_neg_integer(). size(Head) -> tree_version(Head). %% @doc Append a leaf to a tree. %% %% Walk down the tree in Head, stop at The Right Place and make Leaf %% the right sibling of that place. To find the right place, let d be %% the depth of the tree, then go down the tree on the right side to %% level l, where l is the position of the first set bit in d, looking %% at d "from the right". l=0 is where the leafs are and l=d-1 is the %% root. %% %% The depth of the tree is found by walking down the right path. It %% would be better if we inserted the leaf and calculated the nodes on %% the way up instead of walking down the tree again. Worst case this %% is lg2(N) iterations, i.e. 24 steps for N=16e10. %% %% Example: N=3 (011) => l=0, the rightmost leaf. %% Example: N=4 (100) => l=2, the root (soon not to be root). %% Example: N=5 (101) => l=0, the rightmost leaf. %% Example: N=6 (110) => l=1, the last and rightmost inner node. -spec append(head(), leaf() | iolist() | binary()) -> head(). append(#head{version = 0}, Leaf) when is_record(Leaf, leaf) -> mkhead(1, Leaf); append(Head, Leaf) when is_record(Leaf, leaf) -> N = Head#head.version, %Depth = depth(Head), Level = fls(N), RBD = rightbranchdepth(Head#head.tree), %io:format("N=~p, Depth=~p, Level=~p, RBD=~p~n", [N, Depth, Level, RBD]), #head{version = N + 1, tree = append(Head#head.tree, Leaf, RBD-Level-1)}; append(Head, Data) -> append(Head, mkleaf(Data)). -spec append(tree(), tree(), pos_integer()) -> tree(). append(Dest, Newtree, _) when is_record(Dest, leaf) -> mkinner(Dest, Newtree); append(Dest, Newtree, 0) when is_record(Dest, inner) -> mkinner(Dest, Newtree); append(Dest, Newtree, Depth) when is_record(Dest, inner) -> mkinner(Dest#inner.child0, append(Dest#inner.child1, Newtree, Depth - 1)). %% @doc Return an audit path. %% %% An audit path is a list of those hashes needed to calculate the %% tree hash for Head given the knowledge of the nth hash in the %% tree. Audit paths prove existance of a given leaf in a given tree. -spec audit_path(head(), non_neg_integer()) -> list(). audit_path(Head, N) -> [fixme, Head, N]. %% @doc Return a consistency proof. %% %% A consistency proof is the shortest list of nodes required to %% verify that the tree built from the first n leaves of a given tree %% is a subset of that tree. Consistency proofs prove the append-only %% property of a tree. -spec consistency_proof(head(), non_neg_integer()) -> list(). consistency_proof(Head, N) -> [fixme, Head, N]. %%%%%%%%%%%%%%%%%%%% %% Private functions. %% @doc Return version number of a tree. %% %% The version number is the number of leafs in tree. Note that this %% is set off by one (higher) compared to the history tree version as %% explained by Crosby and Wallach. -spec tree_version(head()) -> non_neg_integer(). tree_version(#head{version=Ver}) -> Ver. -spec mkhead(non_neg_integer(), tree()) -> head(); (head(), list()) -> head(). mkhead(Version, Tree) when is_integer(Version) -> #head{version=Version, tree=Tree}; mkhead(Head, []) -> Head; mkhead(Head, [H|T]) -> append(mkhead(Head, T), mkleaf(H)). -spec hashfun(iolist() | binary()) -> binary(). hashfun(Data) -> crypto:hash(sha256, Data). -spec mkleaf(iolist() | binary()) -> leaf(). mkleaf(Data) -> #leaf{hash = hashfun([<<"\x00">>, Data])}. -spec mkinner(tree(), tree()) -> inner(). mkinner(Leaf, Tree) -> #inner{child0 = Leaf, child1 = Tree, hash = mkhash(Leaf, Tree)}. %% TODO: merge mkhash/2 and gethash? if so, use it in mkleaf/1. -spec mkhash(tree(), tree()) -> binary(). mkhash(Tree0, Tree1) -> hashfun([<<"\x01">>, gethash(Tree0), gethash(Tree1)]). -spec gethash(tree()) -> binary(). gethash(#leaf{hash=Hash}) -> Hash; gethash(#inner{child0=Child0, child1=Child1}) -> mkhash(Child0, Child1). %% @doc Unsigned integer -> binary. %% In R16, we can use integer_to_binary/1. -spec ui2b(pos_integer()) -> binary(). ui2b(Unsigned) -> binary:encode_unsigned(Unsigned). %% @doc Find first set bit in V, starting counting at zero from the %% least significant bit. ffs(V) when is_integer(V) -> L = [Bit || <> <= ui2b(V)], length(L) - ffs(L, 0) - 1. ffs([], Acc) -> Acc; ffs([H|T], Acc) -> case H of 0 -> ffs(T, Acc+1); _ -> Acc end. fls(V) when is_integer(V) -> L = lists:reverse([Bit || <> <= ui2b(V)]), ffs(L, 0). rightbranchdepth(Tree) -> 1 + rightbranchdepth(Tree, 0). -spec rightbranchdepth(tree(), non_neg_integer()) -> non_neg_integer(). rightbranchdepth(Tree, Acc) when is_record(Tree, leaf) -> Acc; rightbranchdepth(Tree, Acc) -> rightbranchdepth(Tree#inner.child1, Acc + 1). %%%%%%%%%%%%%%%%%%%% %% Internal tests. -define(TEST_VECTOR_TREES, [[<<"a foo is a bar">>, <<148,242,40,0,3,172,180,106,111,230,146,161,32,40,128,38,103,8,194, 102,72,68, 126,70,108,47,8,216,208,146,178,107>>]]). %% Test vectors from certificate-transparency/src/python/ct/crypto/merkle_test.py. -define(EMPTY_HASH, "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"). -define(TEST_VECTOR_LEAVES, ["", "\x00", "\x10", " !", "01", "@ABC", "PQRSTUVW", "`abcdefghijklmno"]). -define(TEST_VECTOR_HASHES, ["6e340b9cffb37a989ca544e6bb780a2c78901d3fb33738768511a30617afa01d", "fac54203e7cc696cf0dfcb42c92a1d9dbaf70ad9e621f4bd8d98662f00e3c125", "aeb6bcfe274b70a14fb067a5e5578264db0fa9b51af5e0ba159158f329e06e77", "d37ee418976dd95753c1c73862b9398fa2a2cf9b4ff0fdfe8b30cd95209614b7", "4e3bbb1f7b478dcfe71fb631631519a3bca12c9aefca1612bfce4c13a86264d4", "76e67dadbcdf1e10e1b74ddc608abd2f98dfb16fbce75277b5232a127f2087ef", "ddb89be403809e325750d3d263cd78929c2942b7942a34b77e122c9594a74c8c", "5dc9da79a70659a9ad559cb701ded9a2ab9d823aad2f4960cfe370eff4604328"]). basic_helpers_test_() -> [test_bitcount()]. basic_tree_test_() -> TVT1 = lists:nth(1, ?TEST_VECTOR_TREES), [?_assertEqual(#head{version = 0, tree = undefined}, create()), ?_assertEqual(#head{version = 1, tree = #leaf{hash = lists:last(TVT1)}}, create(hd(TVT1)))]. empty_hash_test_() -> [?_assertEqual(hex:hexstr_to_bin(?EMPTY_HASH), mth([]))]. mth_test() -> lists:foreach( fun(X) -> ?assertEqual( mth(lists:sublist(?TEST_VECTOR_LEAVES, X)), hex:hexstr_to_bin(lists:nth(X, ?TEST_VECTOR_HASHES))) end, lists:seq(1, length(?TEST_VECTOR_LEAVES))). new_tree_test_() -> TVT1 = lists:nth(1, ?TEST_VECTOR_TREES), V0 = create(), V1 = append(V0, hd(TVT1)), [?_assertEqual(0, V0#head.version), ?_assertEqual(undefined, V0#head.tree), ?_assertEqual(lists:last(TVT1), (V1#head.tree)#leaf.hash), ?_assertEqual(1, V1#head.version)]. %% @doc Build trees using append/2 and mth/2 and compare the resulting %% tree hashes. append_test() -> lists:foreach( fun(X) -> L = lists:sublist(?TEST_VECTOR_LEAVES, X), ?assertEqual( mth(L), gethash((mkhead(L))#head.tree)) end, lists:seq(1, length(?TEST_VECTOR_LEAVES))). mkhead_from_list_test() -> L = ["a", "b"], ?assertEqual(append(mkhead(1, mkleaf(lists:nth(1, L))), mkleaf(lists:nth(2, L))), mkhead(L)). append_eq_mth_test() -> Count = 300, L = [<> || X <- lists:seq(0, Count)], ?assertEqual(gethash((mkhead(L))#head.tree), mth(L)). audit_path_test_() -> Leaves = ["d0", "d1", "d2", "d3", "d4", "d5", "d6"], %%Tree = lists:foldl(fun(E, T) -> ht:append(T, E) end, ht:create(), Leaves), H = #{ a => "C67F9FFE68E0761021341DD516428F42FBDEA633731CBDADA03BEA6B84C652F7", b => "49B717E4D6ECDD82F6F6648CF8F86FDF4A912600A4557398E1733186FA952C1D", c => "F366DF4718EF75064317794FF5300E0963E96DD93FE24203118055FA5A00BE13", d => "5E0C4E1130DFA84D27437BA073EB817E1896643D42EA100A0940F8752D496783", e => "39298BE94337336FC5515E7A34DE6EF23C9A1BFF66378B71918AE2D105D684C8", f => "6D1BB6BBB111AF4A1E9EC0B9FB2613CC2BCB394141CEE8C2CD462B5AD3803D78", g => "46C78708413A23175F51FAF1C22604BCCB44482D553B45943B189130EA8221C8", h => "C59E9A6D9575777BA3BDBD3E3086516196CF87EC9760861362ABA5CD0F78DF1D", i => "A4F2A847CCE0DCE0519B1D6B83E4CA15166193DBB0C8F864E736665EDBDE1994", j => "D750CA922FABC5422EEC469D4370779B61D5488186CB871EEEA299D8113D20BC", k => "8DF3870B33FAE650E81938994F98EB4551B143B86C95D3DAE4E6444E00715016", l => "3CF05FF16D26C024828E93B3A14C5656E5ABCBC5E6F0BCE2CF8A169720599674"}, [?_assertEqual([maps:get(b, H), maps:get(h, H), maps:get(l, H)], lists:map(fun hex:bin_to_hexstr/1, path(0, Leaves))), ?_assertEqual([maps:get(c, H), maps:get(g, H), maps:get(l, H)], lists:map(fun hex:bin_to_hexstr/1, path(3, Leaves))), ?_assertEqual([maps:get(f, H), maps:get(j, H), maps:get(k, H)], lists:map(fun hex:bin_to_hexstr/1, path(4, Leaves))), ?_assertEqual([maps:get(i, H), maps:get(k, H)], lists:map(fun hex:bin_to_hexstr/1, path(6, Leaves)))]. consistency_proof_test_() -> H = mkhead([]), M = 0, [?_assertEqual([niy, H, M], proof(H, M))]. %%%%%%%%%%%%%%%%%%%% %% Test helpers. %% @doc Calculate a Merkle Tree Hash from an ordered list as specified %% in RFC 6962. %% %% K, the split point, is the number of leafs comprising the largest %% possible full tree. %% %% The way we calculate K is to let N be the number of entries, find %% the most significant bit in N-1 and raise two to that number. This %% is K. test_bitcount() -> L = [1, 2, 3, 255, 256, 511, 512, 32767, 32768, 65535, 65536, 2147483648, 4294967296, 8589934591, 8589934592, 18446744073709551616], [[?_assertEqual(lists:nth(1, tv_bitcount(X)), ffs(X)) || X <- L], [?_assertEqual(lists:nth(2, tv_bitcount(X)), fls(X)) || X <- L]]. tv_bitcount(1) -> [0, 0]; tv_bitcount(2) -> [1, 1]; tv_bitcount(3) -> [1, 0]; tv_bitcount(255) -> [7, 0]; tv_bitcount(256) -> [8, 8]; tv_bitcount(511) -> [8, 0]; tv_bitcount(512) -> [9, 9]; tv_bitcount(32767) -> [14, 0]; tv_bitcount(32768) -> [15, 15]; tv_bitcount(65535) -> [15, 0]; tv_bitcount(65536) -> [16, 16]; tv_bitcount(2147483648) -> [31, 31]; tv_bitcount(4294967296) -> [32, 32]; tv_bitcount(8589934591) -> [32, 0]; tv_bitcount(8589934592) -> [33, 33]; tv_bitcount(18446744073709551616) -> [64, 64]. %% @doc Return the Merkle Tree Head for the leafs in L. Implements %% the algorithm in section 2.1 of RFC 6962. Used for testing. -spec mth(list()) -> binary(). mth([]) -> hashfun(<<"">>); mth(L) -> case length(L) of 1 -> hashfun([<<"\x00">>, L]); _ -> Split = 1 bsl ffs(length(L) - 1), {L1, L2} = lists:split(Split, L), % TODO: PERF hashfun([<<"\x01">>, mth(L1), mth(L2)]) end. %% @doc Return the Merkle Audit Path for leaf m in L. Implements the %% algorithm in section 2.1.1 of RFC 6962. Used for testing. -spec path(integer(), list()) -> list(). path(0, L) when length(L) == 1 -> []; path(M, L) -> K = 1 bsl ffs(length(L) - 1), {L1, L2} = lists:split(K, L), R = case M < K of true -> [path(M, L1), mth(L2)]; _ -> [path(M - K, L2), mth(L1)] end, lists:flatten(R). %% @doc Return the Merkle Consistency Proof between the first m leaves %% of L and L. Implements the algorithm in section 2.1.2 of RFC %% 6962. Used for testing. -spec proof(integer(), list()) -> list(). proof(M, L) -> [niy, M, L]. -spec create(iolist() | binary()) -> head(). create(D) -> mkhead(1, mkleaf(D)). -spec mkhead(list()) -> head(). mkhead([]) -> mkhead(1, mkleaf([])); mkhead(L) when is_list(L) -> mkhead(create(hd(L)), lists:reverse(tl(L))). %% For looking at a tree in a REPL. %% hashes_hex(Head) -> %% lists:map(fun hex:bin_to_hexstr/1, hashes(Head)). %% hashes(Head) when is_record(Head, head) -> %% lists:flatten(hashes(Head#head.tree)); %% hashes(#inner{child0 = L, child1 = R, hash = H}) -> %% [H | [hashes(L), hashes(R)]]; %% hashes(#leaf{hash = H}) -> %% H.