From d86483fc4b3fda70b6e342d0aa1c0fa49d7b6943 Mon Sep 17 00:00:00 2001 From: Linus Nordberg Date: Tue, 28 Oct 2014 13:45:50 +0100 Subject: Database documentation, first cut. --- doc/Makefile | 6 ++++++ doc/db.md | 59 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 65 insertions(+) create mode 100644 doc/Makefile create mode 100644 doc/db.md diff --git a/doc/Makefile b/doc/Makefile new file mode 100644 index 0000000..05b987d --- /dev/null +++ b/doc/Makefile @@ -0,0 +1,6 @@ +ALL = db.html + +all: $(ALL) + +%.html: %.md + markdown $< > $@ diff --git a/doc/db.md b/doc/db.md new file mode 100644 index 0000000..79ff9ac --- /dev/null +++ b/doc/db.md @@ -0,0 +1,59 @@ +# plop database + +The plop database is a distributed, single-master, append-only +database suitable for transparency systems like Certificate +Transparency. + +Data entries are stored together with three attributes: + +- index + + the first entry in a database has index 0, the next 1 and so on + +- entry hash + + the hash over the entry, used for duplicate detection + +- leaf hash + + hash over specific parts of the entry, usually together with a + timestamp, for use in a merkle tree + +## Erlang code in src/ + +- db.erl + + public interface for adding entries and getting entries by index, + leaf hash and entry hash + +- index.erl + + file-based storage for ordered append-only lists of fixed-sized + entries, retrievable by index + +- atomic.erl + + atomic file operations + +- util.erl + + helper functions for lower level file handling + +- fsyncport.erl + + interface to C implementation for fsync(2) syscall + +## C code in c_src/ + +- net_read_write.c + + read and write to/from a file descriptor, using fsync(2) to increase + probability that data lands on disk + +- fsynchelper.c + + erlang port for net_read_write + +- erlport.c + + glue -- cgit v1.1 From ce07eb3da38afce436615faf56c919e50720d8c3 Mon Sep 17 00:00:00 2001 From: Linus Nordberg Date: Wed, 29 Oct 2014 13:02:19 +0100 Subject: Add more db documentation. --- doc/db.md | 38 +++++++++++++++++++++++++++++++++++++- 1 file changed, 37 insertions(+), 1 deletion(-) diff --git a/doc/db.md b/doc/db.md index 79ff9ac..7a411d1 100644 --- a/doc/db.md +++ b/doc/db.md @@ -17,7 +17,39 @@ Data entries are stored together with three attributes: - leaf hash hash over specific parts of the entry, usually together with a - timestamp, for use in a merkle tree + timestamp for use in a merkle tree + +## Storage in a file system + +Two files (catlfish names in parentheses): + +- treesize (treesize) + + filename is static, contains one line -- the number of entries in + the database + +- index (index) + + filename is static, contains one line per entry -- the leafhash + +Three directories, "bucketed" in three levels, one file per database +entry: + +- entry (certentries) + + filename=leafhash, content=the actual data of the entry + +- entryhash (entryhash) + + filename=entryhash, content=leafhash + +- indexforhash (certindex) + + filename=leafhash, content=index + +## Distributed + +TODO: describe distribution ## Erlang code in src/ @@ -31,6 +63,10 @@ Data entries are stored together with three attributes: file-based storage for ordered append-only lists of fixed-sized entries, retrievable by index +- perm.erl + + reading and writing of files + - atomic.erl atomic file operations -- cgit v1.1 From 99eb1e889b3168779a64b1a6bb52766f6d26fbc6 Mon Sep 17 00:00:00 2001 From: Linus Nordberg Date: Tue, 18 Nov 2014 01:40:58 +0100 Subject: Clarify index and add a comment about directory names. --- doc/db.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/db.md b/doc/db.md index 7a411d1..9b7f9d3 100644 --- a/doc/db.md +++ b/doc/db.md @@ -8,7 +8,7 @@ Data entries are stored together with three attributes: - index - the first entry in a database has index 0, the next 1 and so on + integer; the first entry has index 0, the next one 1 and so on - entry hash @@ -33,7 +33,7 @@ Two files (catlfish names in parentheses): filename is static, contains one line per entry -- the leafhash Three directories, "bucketed" in three levels, one file per database -entry: +entry (catlfish names in parentheses): - entry (certentries) -- cgit v1.1