/* * Copyright (c) 2015, NORDUnet A/S. * See LICENSE for licensing information. */ #define _GNU_SOURCE #include #include #include #include #include #include #include #include #include #include #include #include "erlport.h" #include "permdb.h" #include "hash.h" #include "filebuffer.h" #include "util.h" #include "uthash.h" #define INDEX_COMMIT_TRAILER_SIZE (sizeof(uint64_t) + SHA256_DIGEST_SIZE + sizeof(index_commit_cookie)) static const int bitsperlevel = 2; static const int keylen = 32; struct permdb_object { struct nodecache *nodecache; struct nodecache *dirtynodes; buffered_file *datafile; buffered_file *indexfile; char *error; }; static const node_object nullnode = {{0, 0, 0, 0}}; static const node_object errornode = {{NODE_ENTRY_ERROR_NODE, NODE_ENTRY_ERROR_NODE, NODE_ENTRY_ERROR_NODE, NODE_ENTRY_ERROR_NODE}}; struct nodecache { node_object value; char *key; UT_hash_handle hh; }; static node_object get_node_from_cache(permdb_object *state, const char *key) { #if DEBUG_CACHE fprintf(stderr, "getting cache key %s, ", key); #endif struct nodecache *node; HASH_FIND(hh, state->nodecache, key, strlen(key), node); if (node == NULL) { #if DEBUG_CACHE fprintf(stderr, "found nothing in cache\n"); #endif return errornode; } #if DEBUG_CACHE fprintf(stderr, "got cache key %s: ", node->key); print_hex(&node->value, sizeof(struct node_object)); #endif return node->value; } static node_object get_node_from_dirtynodes(permdb_object *state, const char *key) { #if DEBUG_CACHE fprintf(stderr, "getting key %s, ", key); #endif struct nodecache *node; HASH_FIND(hh, state->dirtynodes, key, strlen(key), node); if (node == NULL) { #if DEBUG_CACHE fprintf(stderr, "found nothing\n"); #endif return errornode; } #if DEBUG_CACHE fprintf(stderr, "got key %s: ", node->key); print_hex(&node->value, sizeof(struct node_object)); #endif return node->value; } static void put_node_in_cache(permdb_object *state, const char *key, node_object value) { #if DEBUG_CACHE fprintf(stderr, "putting cache key %s: ", key); print_hex(&value, sizeof(node_object)); #endif struct nodecache *node; HASH_FIND(hh, state->nodecache, key, strlen(key), node); if (node) { node->value = value; return; } node = malloc(sizeof(struct nodecache)); node->value = value; node->key = strdup(key); HASH_ADD(hh, state->nodecache, key[0], strlen(node->key), node); } static void put_node_in_dirtynodes(permdb_object *state, const char *key, node_object value) { #if DEBUG_CACHE fprintf(stderr, "putting key %s: ", key); print_hex(&value, sizeof(node_object)); #endif struct nodecache *node; HASH_FIND(hh, state->dirtynodes, key, strlen(key), node); if (node) { node->value = value; return; } node = malloc(sizeof(struct nodecache)); node->value = value; node->key = strdup(key); HASH_ADD(hh, state->dirtynodes, key[0], strlen(node->key), node); } void delete_all_nodes_in_cache(permdb_object *state) { struct nodecache *node, *tmp; HASH_ITER(hh, state->nodecache, node, tmp) { HASH_DEL(state->nodecache, node); free(node->key); free(node); } } static void delete_all_dirty_nodes(permdb_object *state) { struct nodecache *node, *tmp; HASH_ITER(hh, state->dirtynodes, node, tmp) { HASH_DEL(state->dirtynodes, node); free(node->key); free(node); } } static const uint64_t index_file_cookie = 0xb7e16b02ba8a6d1b; static const uint64_t index_commit_cookie = 0x2fb1778c74a402e4; static const uint64_t index_node_cookie = 0x2e0f555ad73210d1; static const uint8_t data_file_cookie[] = {0xd5, 0x35, 0x51, 0xba, 0x53, 0x9a, 0x42, 0x52}; static const uint8_t data_entry_cookie[] = {0xe7, 0xc1, 0xcd, 0xc2, 0xba, 0x3d, 0xc7, 0x7c}; static const uint8_t data_commit_start_cookie[] = {0x75, 0xc2, 0xe4, 0xb3, 0xd5, 0xf6, 0x43, 0xa1}; static const uint8_t data_commit_end_cookie[] = {0x2b, 0x05, 0xee, 0xd6, 0x1b, 0x5a, 0xf5, 0x50}; int committree(permdb_object *state); void indexfile_add_header(buffered_file *file) { bf_add(file, &index_file_cookie, sizeof(index_file_cookie)); bf_flush(file); } void datafile_add_header(buffered_file *file) { //fprintf(stderr, "adding header to %s\n", bf_name(file)); bf_add(file, &data_file_cookie, sizeof(data_file_cookie)); bf_add_be32(file, 4096); bf_add_be32(file, 2); bf_add_be32(file, 32); bf_flush(file); } void initial_node(permdb_object *state) { put_node_in_dirtynodes(state, "", nullnode); } int initial_commit(permdb_object *state) { initial_node(state); return committree(state); } struct commit_info { node_offset start; node_offset length; uint8_t checksum[SHA256_DIGEST_SIZE]; }; int validate_checksum(struct commit_info *commit, buffered_file *file) { //fprintf(stderr, "validate_checksum: read from file: length %llu start %llu\n", commit->length, commit->start); unsigned char *checksumdata = bf_read(file, commit->start, commit->length, NULL); if (checksumdata == NULL) { return -1; } uint8_t checksum[SHA256_DIGEST_SIZE]; struct sha256_ctx commit_checksum_context; sha256_init(&commit_checksum_context); sha256_update(&commit_checksum_context, commit->length, checksumdata); sha256_digest(&commit_checksum_context, SHA256_DIGEST_SIZE, checksum); if (memcmp(checksum, commit->checksum, SHA256_DIGEST_SIZE) == 0) { free(checksumdata); return 0; } free(checksumdata); return -1; } int verify_index_commit(buffered_file *file, node_offset offset) { //fprintf(stderr, "verifying index file: commit verification\n"); offset -= INDEX_COMMIT_TRAILER_SIZE; unsigned char *data = bf_read(file, offset, INDEX_COMMIT_TRAILER_SIZE, NULL); if (memcmp(data + sizeof(uint64_t) + SHA256_DIGEST_SIZE, &index_commit_cookie, sizeof(index_commit_cookie)) != 0) { fprintf(stderr, "verifying index file: incorrect commit cookie\n"); free(data); return -1; } struct commit_info commit; commit.length = read_host64(data); commit.start = offset + sizeof(uint64_t) - commit.length; memcpy(commit.checksum, data + sizeof(uint64_t), SHA256_DIGEST_SIZE); free(data); return validate_checksum(&commit, file); } int indexfile_verify_file(buffered_file *file) { //fprintf(stderr, "verifying index file\n"); unsigned char *header = bf_read(file, 0, sizeof(index_file_cookie), NULL); if (memcmp(header, &index_file_cookie, sizeof(index_file_cookie)) != 0) { free(header); fprintf(stderr, "verifying index file: incorrect file cookie\n"); return -1; } free(header); if (verify_index_commit(file, bf_total_length(file)) < 0) { fprintf(stderr, "verifying index file: commit verification failed\n"); return -1; } return 0; } struct commit_info * read_data_commit_backward(buffered_file *file, node_offset *offset); int datafile_verify_file(buffered_file *file) { unsigned char *header = bf_read(file, 0, sizeof(data_file_cookie), NULL); if (header == NULL || memcmp(header, &data_file_cookie, sizeof(data_file_cookie)) != 0) { free(header); return -1; } free(header); node_offset offset = bf_lastcommit(file); //fprintf(stderr, "verifying commit: %llu\n", offset); struct commit_info *data_commit = read_data_commit_backward(file, &offset); if (data_commit == NULL || validate_checksum(data_commit, file) < 0) { //fprintf(stderr, "commit broken: %llu\n", offset); free(data_commit); return -1; } free(data_commit); return 0; } static unsigned char * readdatakeyandlen(permdb_object *state, node_offset offset, size_t *datalen); struct commit_info * read_data_commit(buffered_file *file, node_offset *offset) { unsigned char *data = bf_read(file, *offset, sizeof(uint32_t) + SHA256_DIGEST_SIZE + sizeof(data_commit_end_cookie), NULL); if (data == NULL || memcmp(data + sizeof(uint32_t) + SHA256_DIGEST_SIZE, data_commit_end_cookie, sizeof(data_commit_end_cookie)) != 0) { free(data); return NULL; } *offset += sizeof(uint32_t); struct commit_info *commit = malloc(sizeof(struct commit_info)); //fprintf(stderr, "read commit: %llu\n", *offset); //print_hex(data, sizeof(uint32_t) + SHA256_DIGEST_SIZE); commit->length = read_be32(data); commit->start = *offset - commit->length; memcpy(&commit->checksum, data + sizeof(uint32_t), SHA256_DIGEST_SIZE); *offset += SHA256_DIGEST_SIZE + sizeof(data_commit_end_cookie); free(data); return commit; } struct commit_info * read_data_commit_forward(buffered_file *file, node_offset *offset) { int padding = calc_padding(*offset, 4); *offset += sizeof(data_commit_start_cookie) + padding; return read_data_commit(file, offset); } struct commit_info * read_data_commit_backward(buffered_file *file, node_offset *offset) { *offset -= sizeof(uint32_t) + SHA256_DIGEST_SIZE + sizeof(data_commit_end_cookie); return read_data_commit(file, offset); } int addindex(permdb_object *state, const unsigned char *key, unsigned int keylength, node_offset dataoffset); int rebuild_index_file(permdb_object *state) { bf_truncate(state->indexfile); indexfile_add_header(state->indexfile); initial_node(state); node_offset offset = sizeof(data_file_cookie) + sizeof(uint32_t) * 3; while (1) { unsigned char *cookie = bf_read(state->datafile, offset, sizeof(data_entry_cookie), NULL); if (cookie == NULL) { break; } if (memcmp(&data_entry_cookie, cookie, sizeof(data_entry_cookie)) == 0) { size_t datalen; unsigned char *datakey = readdatakeyandlen(state, offset, &datalen); //fprintf(stderr, "entry %llu: %zu\n", offset, datalen); int result = addindex(state, datakey, keylen, offset); offset += sizeof(data_entry_cookie) + keylen + sizeof(uint32_t) + datalen; free(datakey); if (result != 1) { free(cookie); return -1; } } else if (memcmp(&data_commit_start_cookie, cookie, sizeof(data_commit_start_cookie)) == 0) { struct commit_info *data_commit = read_data_commit_forward(state->datafile, &offset); //fprintf(stderr, "verifying commit: %llu %p\n", offset, data_commit); if (data_commit == NULL || validate_checksum(data_commit, state->datafile) < 0) { free(data_commit); fprintf(stderr, "commit broken: %llu\n", (unsigned long long) offset); free(cookie); return -1; } free(data_commit); //fprintf(stderr, "commit %llu\n", offset); } else { //fprintf(stderr, "error %llu\n", offset); //print_hex(cookie, sizeof(data_entry_cookie)); free(cookie); return -1; } free(cookie); } fprintf(stderr, "index file rebuilt\n"); return committree(state); } permdb_object * permdb_alloc(const char *dbpath) { char *idxpath = NULL; if (asprintf(&idxpath, "%s.idx", dbpath) == -1) { return NULL; } permdb_object *state = malloc(sizeof(permdb_object)); state->datafile = bf_open(dbpath, O_RDWR|O_CREAT, "datafile"); state->indexfile = bf_open(idxpath, O_RDWR|O_CREAT, "indexfile"); free(idxpath); state->nodecache = NULL; state->dirtynodes = NULL; state->error = NULL; if (bf_total_length(state->datafile) == 0 && bf_total_length(state->indexfile) == 0) { #if DEBUG_WRITE fprintf(stderr, "writing header\n"); #endif indexfile_add_header(state->indexfile); datafile_add_header(state->datafile); initial_commit(state); } else if (bf_total_length(state->datafile) > 0 && bf_total_length(state->indexfile) == 0) { if (rebuild_index_file(state) < 0) { warnx("index file rebuilding failed"); permdb_free(state); return NULL; } } if (datafile_verify_file(state->datafile) < 0) { warnx("data file verification failed"); permdb_free(state); return NULL; } if (indexfile_verify_file(state->indexfile) < 0) { warnx("index file verification failed, rebuilding"); if (rebuild_index_file(state) < 0) { warnx("index file rebuilding failed"); permdb_free(state); return NULL; } } return state; } void permdb_free(permdb_object *state) { bf_close(state->datafile); bf_close(state->indexfile); delete_all_nodes_in_cache(state); delete_all_dirty_nodes(state); free(state); } static unsigned char keybits(const unsigned char *key, unsigned int level) { unsigned char b = key[level/4]; return (b >> (6-(level*2) % 8)) & 0x3; } static char * keypart(const unsigned char *key, unsigned int level) { unsigned char *result = malloc(level+1); unsigned int i; for (i = 0; i < level; i++) { unsigned char b = keybits(key, i); result[i] = b + (unsigned char) '0'; } result[level] = 0; return (char *)result; } static void addentry(node_object *node, unsigned int n, node_entry entry) { assert(n < entriespernode); assert(node->data[n] == 0); node->data[n] = entry; } static void overwriteentry(node_object *node, unsigned int n, node_entry entry) { assert(n < entriespernode); assert(node->data[n] != 0); node->data[n] = entry; } node_entry get_entry_in_node(node_object node, unsigned char n) { return node.data[n]; } static node_object unpacknode(permdb_object *state, const unsigned char *data, size_t datalen) { if (memcmp(&index_node_cookie, data, sizeof(index_node_cookie)) != 0) { print_hex(data, sizeof(index_node_cookie)); print_hex(&index_node_cookie, sizeof(index_node_cookie)); set_error(&state->error, "incorrect magic (node) %02x%02x\n", (unsigned char)data[0], (unsigned char)data[1]); return errornode; } if (datalen != sizeof(node_object) + sizeof(index_node_cookie)) { return errornode; } node_object node; memcpy(&node, data + sizeof(index_node_cookie), sizeof(node)); return node; } unsigned char * read_internal_data(permdb_object *state, node_offset offset, size_t length) { buffered_file *file = state->datafile; #if DEBUG_READ fprintf(stderr, "reading data: offset %llu\n", offset); #endif return bf_read(file, offset, length, &state->error); } static int iserrornode(node_object node) { return node.data[0] == NODE_ENTRY_ERROR_NODE && node.data[1] == NODE_ENTRY_ERROR_NODE && node.data[2] == NODE_ENTRY_ERROR_NODE && node.data[3] == NODE_ENTRY_ERROR_NODE; } node_object readnode(permdb_object *state, node_offset offset, const char *cachekey) { #if DEBUG_READ fprintf(stderr, "reading node: offset %llu cachekey '%s'\n", offset, cachekey ? cachekey : "none"); #endif if (cachekey) { node_object dirtynode = get_node_from_dirtynodes(state, cachekey); if (!iserrornode(dirtynode)) { #if DEBUG_READ fprintf(stderr, "reading node: found node in dirty nodes\n"); #endif return dirtynode; } if (offset == NODE_ENTRY_DIRTY_NODE) { set_error(&state->error, "referring to dirty node at key %s, but node not in dirtynodes\n", cachekey); #if DEBUG_READ fprintf(stderr, "reading node: referring to dirty node at key %s, but node not in dirtynodes\n", cachekey); #endif return errornode; } node_object cachednode = get_node_from_cache(state, cachekey); if (!iserrornode(cachednode)) { #if DEBUG_READ fprintf(stderr, "reading node: found node in cache\n"); #endif return cachednode; } } size_t length = sizeof(index_node_cookie) + sizeof(node_object); unsigned char *buffer = bf_read(state->indexfile, offset, length, &state->error); if (buffer == NULL) { return errornode; } node_object result = unpacknode(state, buffer, length); free(buffer); if (cachekey) { put_node_in_cache(state, cachekey, result); } #if DEBUG_READ fprintf(stderr, "reading node: success\n"); #endif return result; } static int isdata(node_entry entry) { return (entry & NODE_ENTRY_ISDATA) == NODE_ENTRY_ISDATA; } static node_offset entryoffset(node_entry entry) { return entry & NODE_ENTRY_OFFSET_MASK; } struct nodelist { node_object *nodes; unsigned int len; unsigned int pos; }; static void add_entry_to_nodelist(struct nodelist *list, node_object node) { list->nodes[list->pos] = node; list->pos++; } static void init_nodelist(struct nodelist *list) { list->len = keylen * 8 / bitsperlevel; list->nodes = malloc(sizeof(node_object) * list->len); list->pos = 0; } static void free_nodelist(struct nodelist *list) { free(list->nodes); } static char getpath(permdb_object *state, const unsigned char *key, struct nodelist *nodes) { unsigned int level = 0; #if 0 if (bf_lastcommit(state->indexfile) < (sizeof(index_node_cookie) + sizeof(node_object) + INDEX_COMMIT_TRAILER_SIZE)) { fprintf(stderr, "No commit exists (lastcommit %llu)\n", (long long unsigned int) bf_lastcommit(state->indexfile)); return -1; } #endif node_offset rootoffset = bf_lastcommit(state->indexfile) - (sizeof(index_node_cookie) + sizeof(node_object) + INDEX_COMMIT_TRAILER_SIZE); node_object node = readnode(state, rootoffset, ""); if (iserrornode(node)) { fprintf(stderr, "cannot find root node at offset %llu (lastcommit %llu)\n", (long long unsigned int) rootoffset, (long long unsigned int) bf_lastcommit(state->indexfile)); return -1; } #if DEBUG_READ fprintf(stderr, "getpath: got node\n"); #endif while (1) { unsigned char kb = keybits(key, level); node_entry entry = get_entry_in_node(node, kb); if (nodes->pos >= nodes->len) { fprintf(stderr, "tried to write after end of allocated list: level %d\n", level); return -1; } add_entry_to_nodelist(nodes, node); if (entry == 0 || isdata(entry)) { #if DEBUG_READ fprintf(stderr, "getpath: return node\n"); #endif return (char) kb; } level++; char *kp = keypart(key, level); node = readnode(state, entryoffset(entry), kp); if (iserrornode(node)) { free(kp); #if DEBUG_READ fprintf(stderr, "getpath: not found\n"); #endif return -1; } free(kp); } } static node_entry getpathlastnode(permdb_object *state, const unsigned char *key) { unsigned int level = 0; node_offset rootoffset = bf_lastcommit(state->indexfile) - (sizeof(index_node_cookie) + sizeof(node_object) + INDEX_COMMIT_TRAILER_SIZE); node_object node = readnode(state, rootoffset, ""); if (iserrornode(node)) { #if DEBUG_READ fprintf(stderr, "getpathlastnode: no node\n"); #endif return NODE_ENTRY_ERROR_NODE; } #if DEBUG_READ fprintf(stderr, "getpathlastnode: got node\n"); #endif unsigned char kb; while (1) { kb = keybits(key, level); node_entry entry = get_entry_in_node(node, kb); if (entry == 0 || isdata(entry)) { break; } level++; char *kp = keypart(key, level); node = readnode(state, entryoffset(entry), kp); free(kp); } node_entry entry = get_entry_in_node(node, kb); return entry; } static node_offset writenode(permdb_object *state, node_object node, const char *cachekey) { node_offset offset = bf_total_length(state->indexfile); put_node_in_cache(state, cachekey, node); #if DEBUG_WRITE fprintf(stderr, "writing node: offset %llu\n", offset); #endif bf_add(state->indexfile, &index_node_cookie, sizeof(index_node_cookie)); bf_add(state->indexfile, &node, sizeof(node_object)); return offset; } node_offset datasize(permdb_object *state) { return bf_total_length(state->indexfile); } static node_entry buildentry(int isdata, node_offset offset) { if (isdata) { return NODE_ENTRY_ISDATA | offset; } else { return offset; } } static void * memsub(void *src, size_t offset, size_t length) { void *result = malloc(length); memcpy(result, ((char *)src) + offset, length); return result; } static unsigned char * readdatakey(permdb_object *state, node_offset offset) { unsigned char *data = read_internal_data(state, offset, sizeof(data_entry_cookie) + keylen); if (data == NULL) { return NULL; } if (memcmp(&data_entry_cookie, data, sizeof(data_entry_cookie)) != 0) { free(data); set_error(&state->error, "incorrect magic (entry) %02x%02x\n", (unsigned char)data[0], (unsigned char)data[1]); return NULL; } unsigned char *result = memsub(data, sizeof(data_entry_cookie), keylen); free(data); return result; } static unsigned char * readdatakeyandlen(permdb_object *state, node_offset offset, size_t *datalen) { unsigned char *data = read_internal_data(state, offset, sizeof(data_entry_cookie) + keylen + 4); if (data == NULL) { return NULL; } if (memcmp(&data_entry_cookie, data, sizeof(data_entry_cookie)) != 0) { free(data); set_error(&state->error, "incorrect magic (entry) %02x%02x\n", (unsigned char)data[0], (unsigned char)data[1]); return NULL; } unsigned char *result = memsub(data, sizeof(data_entry_cookie), keylen); uint16_t nchunks = read_be16(data+sizeof(data_entry_cookie)+keylen); *datalen = read_be16(data+sizeof(data_entry_cookie)+keylen+sizeof(uint16_t)); if (nchunks != 1) { errx(1, "number of chunks is %d, but only one chunk is supported right now", nchunks); } free(data); return result; } static unsigned char * readdata(permdb_object *state, node_offset offset, size_t datalen) { return read_internal_data(state, offset + sizeof(data_entry_cookie) + keylen + 4, datalen); } static node_offset writedata(permdb_object *state, const unsigned char *key, const unsigned char *data, size_t datalength) { if (datalength > 65535) { errx(1, "data length is %zu, but only < 64K lengths are supported right now", datalength); } node_offset offset = bf_total_length(state->datafile); #if DEBUG_WRITE fprintf(stderr, "writing data: offset %llu\n", offset); #endif bf_add(state->datafile, &data_entry_cookie, sizeof(data_entry_cookie)); bf_add(state->datafile, key, keylen); bf_add_be16(state->datafile, 1); bf_add_be16(state->datafile, datalength); bf_add(state->datafile, data, datalength); return offset; } int addindex(permdb_object *state, const unsigned char *key, unsigned int keylength, node_offset dataoffset) { struct nodelist nodes; init_nodelist(&nodes); char kb = getpath(state, key, &nodes); if (kb == -1) { free_nodelist(&nodes); return -1; } unsigned int foundlevel = nodes.pos - 1; if (foundlevel >= nodes.len) { fprintf(stderr, "tried to read after end of allocated list\n"); free_nodelist(&nodes); return 0; } node_object lastnode = nodes.nodes[foundlevel]; if (get_entry_in_node(lastnode, (unsigned char) kb) == 0) { addentry(&lastnode, keybits(key, foundlevel), buildentry(1, dataoffset)); } else { node_offset olddataoffset = entryoffset(get_entry_in_node(lastnode, (unsigned char) kb)); unsigned char *olddatakey = readdatakey(state, olddataoffset); if (olddatakey == NULL) { free_nodelist(&nodes); return -1; } if (memcmp(olddatakey, key, keylen) == 0) { free_nodelist(&nodes); free(olddatakey); return 0; } unsigned int level = foundlevel + 1; while (keybits(key, level) == keybits(olddatakey, level)) { level++; } node_object leafnode = nullnode; addentry(&leafnode, keybits(key, level), buildentry(1, dataoffset)); addentry(&leafnode, keybits(olddatakey, level), buildentry(1, olddataoffset)); free(olddatakey); { char *cachekey = keypart(key, level); put_node_in_dirtynodes(state, cachekey, leafnode); free(cachekey); } level--; while (level > foundlevel) { node_object node = nullnode; addentry(&node, keybits(key, level), NODE_ENTRY_DIRTY_NODE); char *cachekey = keypart(key, level); put_node_in_dirtynodes(state, cachekey, node); free(cachekey); level--; } overwriteentry(&lastnode, keybits(key, foundlevel), NODE_ENTRY_DIRTY_NODE); } int level = (int) foundlevel; { char *cachekey = keypart(key, (unsigned int) level); put_node_in_dirtynodes(state, cachekey, lastnode); free(cachekey); } level--; while (level >= 0) { if (level >= (int) nodes.len) { fprintf(stderr, "tried to read after end of allocated list\n"); free_nodelist(&nodes); return 0; } node_object node = nodes.nodes[level]; overwriteentry(&node, keybits(key, (unsigned int) level), NODE_ENTRY_DIRTY_NODE); char *cachekey = keypart(key, (unsigned int) level); put_node_in_dirtynodes(state, cachekey, node); free(cachekey); level--; } free_nodelist(&nodes); return 1; } int addvalue(permdb_object *state, const unsigned char *key, unsigned int keylength, const unsigned char *data, size_t datalength) { struct nodelist nodes; init_nodelist(&nodes); char kb = getpath(state, key, &nodes); if (kb == -1) { free_nodelist(&nodes); return -1; } unsigned int foundlevel = nodes.pos - 1; if (foundlevel >= nodes.len) { fprintf(stderr, "tried to read after end of allocated list\n"); free_nodelist(&nodes); return 0; } node_object lastnode = nodes.nodes[foundlevel]; if (get_entry_in_node(lastnode, (unsigned char) kb) == 0) { node_offset dataoffset = writedata(state, key, data, datalength); addentry(&lastnode, keybits(key, foundlevel), buildentry(1, dataoffset)); } else { node_offset olddataoffset = entryoffset(get_entry_in_node(lastnode, (unsigned char) kb)); unsigned char *olddatakey = readdatakey(state, olddataoffset); if (olddatakey == NULL) { free_nodelist(&nodes); return -1; } if (memcmp(olddatakey, key, keylen) == 0) { free_nodelist(&nodes); free(olddatakey); return 0; } node_offset dataoffset = writedata(state, key, data, datalength); unsigned int level = foundlevel + 1; while (keybits(key, level) == keybits(olddatakey, level)) { level++; } node_object leafnode = nullnode; addentry(&leafnode, keybits(key, level), buildentry(1, dataoffset)); addentry(&leafnode, keybits(olddatakey, level), buildentry(1, olddataoffset)); free(olddatakey); { char *cachekey = keypart(key, level); put_node_in_dirtynodes(state, cachekey, leafnode); free(cachekey); } level--; while (level > foundlevel) { node_object node = nullnode; addentry(&node, keybits(key, level), NODE_ENTRY_DIRTY_NODE); char *cachekey = keypart(key, level); put_node_in_dirtynodes(state, cachekey, node); free(cachekey); level--; } overwriteentry(&lastnode, keybits(key, foundlevel), NODE_ENTRY_DIRTY_NODE); } int level = (int) foundlevel; { char *cachekey = keypart(key, (unsigned int) level); put_node_in_dirtynodes(state, cachekey, lastnode); free(cachekey); } level--; while (level >= 0) { if (level >= (int) nodes.len) { fprintf(stderr, "tried to read after end of allocated list\n"); free_nodelist(&nodes); return 0; } node_object node = nodes.nodes[level]; overwriteentry(&node, keybits(key, (unsigned int) level), NODE_ENTRY_DIRTY_NODE); char *cachekey = keypart(key, (unsigned int) level); put_node_in_dirtynodes(state, cachekey, node); free(cachekey); level--; } free_nodelist(&nodes); return 1; } unsigned char * getvalue(permdb_object *state, const unsigned char *key, size_t keylength, size_t *datalen) { node_entry entry = getpathlastnode(state, key); if (entry == 0) { #if DEBUG_READ fprintf(stderr, "getvalue: no node\n"); #endif return NULL; } #if DEBUG_READ fprintf(stderr, "getvalue: got node\n"); #endif node_offset olddataoffset = entryoffset(entry); unsigned char *datakey = readdatakeyandlen(state, olddataoffset, datalen); if (datakey == NULL) { return NULL; } if (memcmp(datakey, key, keylength) != 0) { free(datakey); return NULL; } free(datakey); return readdata(state, olddataoffset, *datalen); } struct keylist { char **keys; int pos; }; #if 0 static int add_entry_to_keylist(void *ptr, void *arg) { struct keylist *list = (struct keylist *) arg; struct nodecache *node = (struct nodecache *)ptr; list->keys[list->pos] = strdup(node->key); list->pos++; return 0; } #endif static int string_length_comparison(struct nodecache *a, struct nodecache *b) { size_t a_len = strlen(a->key); size_t b_len = strlen(b->key); if (b_len > a_len) { return 1; } else if (b_len == a_len) { return 0; } else { return -1; } } int committree(permdb_object *state) { get_node_from_dirtynodes(state, ""); if (state->dirtynodes == NULL) { return 0; } HASH_SORT(state->dirtynodes, string_length_comparison); struct nodecache *lastobject = ELMT_FROM_HH(state->dirtynodes->hh.tbl, state->dirtynodes->hh.tbl->tail); if (lastobject->key[0] != '\0') { fprintf(stderr, "sorted list doesn't end with root node\n"); return -1; } #if DEBUG_WRITE fprintf(stderr, "committing %d dirty nodes at offset %llu\n", ncommits, bf_total_length(state->indexfile)); #endif struct nodecache *node, *tmp; HASH_ITER(hh, state->dirtynodes, node, tmp) { assert(get_entry_in_node(node->value, 0) != NODE_ENTRY_DIRTY_NODE); assert(get_entry_in_node(node->value, 1) != NODE_ENTRY_DIRTY_NODE); assert(get_entry_in_node(node->value, 2) != NODE_ENTRY_DIRTY_NODE); assert(get_entry_in_node(node->value, 3) != NODE_ENTRY_DIRTY_NODE); node_offset offset = writenode(state, node->value, node->key); size_t keylength = strlen(node->key); if (keylength != 0) { char *parent = strdup(node->key); parent[keylength - 1] = '\0'; unsigned int entrynumber = (unsigned int) (node->key[keylength - 1] - '0'); node_object parentnode = get_node_from_dirtynodes(state, parent); overwriteentry(&parentnode, entrynumber, offset); put_node_in_dirtynodes(state, parent, parentnode); free(parent); } } #if DEBUG_WRITE fprintf(stderr, "writing data commit trailer at offset %llu\n", bf_total_length(state->datafile)); #endif int data_commit_padding_size = calc_padding(bf_total_length(state->datafile), 4); uint8_t padding[4] = {0, 0, 0, 0}; unsigned char data_commit_checksum[SHA256_DIGEST_SIZE]; bf_add(state->datafile, data_commit_start_cookie, 8); bf_add(state->datafile, padding, data_commit_padding_size); bf_add_be32(state->datafile, bf_total_length(state->datafile) - bf_lastcommit(state->datafile) + sizeof(uint32_t)); bf_sha256(state->datafile, data_commit_checksum); bf_add(state->datafile, data_commit_checksum, SHA256_DIGEST_SIZE); bf_add(state->datafile, &data_commit_end_cookie, sizeof(data_commit_end_cookie)); #if DEBUG_WRITE fprintf(stderr, "finished writing data commit trailer at offset %llu\n", bf_total_length(state->datafile)); #endif if (bf_flush(state->datafile) == -1) { set_error(&state->error, "data file flushing failed\n"); return -1; } #if DEBUG_WRITE fprintf(stderr, "writing index commit trailer at offset %llu\n", bf_total_length(state->indexfile)); #endif uint64_t index_commit_length = bf_total_length(state->indexfile) - bf_lastcommit(state->indexfile) + sizeof(uint64_t); unsigned char index_commit_checksum[SHA256_DIGEST_SIZE]; bf_add_host64(state->indexfile, index_commit_length); bf_sha256(state->indexfile, index_commit_checksum); bf_add(state->indexfile, index_commit_checksum, SHA256_DIGEST_SIZE); bf_add(state->indexfile, &index_commit_cookie, sizeof(index_commit_cookie)); #if DEBUG_WRITE fprintf(stderr, "finished writing index commit trailer at offset %llu\n", bf_total_length(state->indexfile)); #endif if (bf_flush(state->indexfile) == -1) { set_error(&state->error, "index file flushing failed\n"); return -1; } delete_all_dirty_nodes(state); return 0; } void portloop(permdb_object *state) { unsigned char buf[65536]; ssize_t len; while ((len = read_command(buf, sizeof(buf)-1, 4)) > 0) { if (buf[0] == 0) { #if DEBUG_PORT fprintf(stderr, "get\n"); #endif if (len != keylen+1) { write_reply(NULL, 0, 4); continue; } assert(len == keylen+1); size_t datalen; unsigned char *result = getvalue(state, (buf+1), keylen, &datalen); if (result == NULL) { write_reply(NULL, 0, 4); } else { write_reply(result, datalen, 4); } #if DEBUG_PORT fprintf(stderr, "get reply\n"); #endif free(result); } else if (buf[0] == 1) { #if DEBUG_PORT fprintf(stderr, "add\n"); #endif if (len < (keylen + 1)) { write_reply(NULL, 0, 4); fprintf(stderr, "invalid addvalue command, length was %zd\n", len); continue; } size_t datalen = (size_t) (len - keylen - 1); unsigned char *key = buf + 1; unsigned char *data = key + keylen; int result = addvalue(state, key, keylen, data, datalen); if (result < 0) { write_reply(NULL, 0, 4); } else { unsigned char result_byte = (unsigned char) result; write_reply(&result_byte, 1, 4); } #if DEBUG_PORT fprintf(stderr, "add reply\n"); #endif } else if (buf[0] == 2) { #if DEBUG_PORT fprintf(stderr, "commit\n"); #endif if (len != 1) { write_reply(NULL, 0, 4); fprintf(stderr, "invalid commit command, length was %zd\n", len); continue; } int result = committree(state); if (result < 0) { write_reply(NULL, 0, 4); continue; } else { unsigned char result_byte = (unsigned char) result; write_reply(&result_byte, 1, 4); } #if DEBUG_PORT fprintf(stderr, "commit reply\n"); #endif } else { #if DEBUG_PORT fprintf(stderr, "unknown command\n"); #endif write_reply(NULL, 0, 4); } } if (len == -1) { fprintf(stderr, "read command failed\n"); } permdb_free(state); }