1812 lines
44 KiB
C
1812 lines
44 KiB
C
/*
|
|
Copyright 2020 Google LLC
|
|
|
|
Use of this source code is governed by a BSD-style
|
|
license that can be found in the LICENSE file or at
|
|
https://developers.google.com/open-source/licenses/bsd
|
|
*/
|
|
|
|
#include "stack.h"
|
|
|
|
#include "system.h"
|
|
#include "constants.h"
|
|
#include "merged.h"
|
|
#include "reader.h"
|
|
#include "reftable-error.h"
|
|
#include "reftable-record.h"
|
|
#include "reftable-merged.h"
|
|
#include "writer.h"
|
|
|
|
static int stack_try_add(struct reftable_stack *st,
|
|
int (*write_table)(struct reftable_writer *wr,
|
|
void *arg),
|
|
void *arg);
|
|
static int stack_write_compact(struct reftable_stack *st,
|
|
struct reftable_writer *wr,
|
|
size_t first, size_t last,
|
|
struct reftable_log_expiry_config *config);
|
|
static void reftable_addition_close(struct reftable_addition *add);
|
|
static int reftable_stack_reload_maybe_reuse(struct reftable_stack *st,
|
|
int reuse_open);
|
|
|
|
static int stack_filename(struct reftable_buf *dest, struct reftable_stack *st,
|
|
const char *name)
|
|
{
|
|
int err;
|
|
reftable_buf_reset(dest);
|
|
if ((err = reftable_buf_addstr(dest, st->reftable_dir)) < 0 ||
|
|
(err = reftable_buf_addstr(dest, "/")) < 0 ||
|
|
(err = reftable_buf_addstr(dest, name)) < 0)
|
|
return err;
|
|
return 0;
|
|
}
|
|
|
|
static int stack_fsync(const struct reftable_write_options *opts, int fd)
|
|
{
|
|
if (opts->fsync)
|
|
return opts->fsync(fd);
|
|
return fsync(fd);
|
|
}
|
|
|
|
struct fd_writer {
|
|
const struct reftable_write_options *opts;
|
|
int fd;
|
|
};
|
|
|
|
static ssize_t fd_writer_write(void *arg, const void *data, size_t sz)
|
|
{
|
|
struct fd_writer *writer = arg;
|
|
return write_in_full(writer->fd, data, sz);
|
|
}
|
|
|
|
static int fd_writer_flush(void *arg)
|
|
{
|
|
struct fd_writer *writer = arg;
|
|
return stack_fsync(writer->opts, writer->fd);
|
|
}
|
|
|
|
int reftable_new_stack(struct reftable_stack **dest, const char *dir,
|
|
const struct reftable_write_options *_opts)
|
|
{
|
|
struct reftable_buf list_file_name = REFTABLE_BUF_INIT;
|
|
struct reftable_write_options opts = { 0 };
|
|
struct reftable_stack *p;
|
|
int err;
|
|
|
|
p = reftable_calloc(1, sizeof(*p));
|
|
if (!p) {
|
|
err = REFTABLE_OUT_OF_MEMORY_ERROR;
|
|
goto out;
|
|
}
|
|
|
|
if (_opts)
|
|
opts = *_opts;
|
|
if (opts.hash_id == 0)
|
|
opts.hash_id = REFTABLE_HASH_SHA1;
|
|
|
|
*dest = NULL;
|
|
|
|
reftable_buf_reset(&list_file_name);
|
|
if ((err = reftable_buf_addstr(&list_file_name, dir)) < 0 ||
|
|
(err = reftable_buf_addstr(&list_file_name, "/tables.list")) < 0)
|
|
goto out;
|
|
|
|
p->list_file = reftable_buf_detach(&list_file_name);
|
|
p->list_fd = -1;
|
|
p->opts = opts;
|
|
p->reftable_dir = reftable_strdup(dir);
|
|
if (!p->reftable_dir) {
|
|
err = REFTABLE_OUT_OF_MEMORY_ERROR;
|
|
goto out;
|
|
}
|
|
|
|
err = reftable_stack_reload_maybe_reuse(p, 1);
|
|
if (err < 0)
|
|
goto out;
|
|
|
|
*dest = p;
|
|
err = 0;
|
|
|
|
out:
|
|
if (err < 0)
|
|
reftable_stack_destroy(p);
|
|
return err;
|
|
}
|
|
|
|
static int fd_read_lines(int fd, char ***namesp)
|
|
{
|
|
off_t size = lseek(fd, 0, SEEK_END);
|
|
char *buf = NULL;
|
|
int err = 0;
|
|
if (size < 0) {
|
|
err = REFTABLE_IO_ERROR;
|
|
goto done;
|
|
}
|
|
err = lseek(fd, 0, SEEK_SET);
|
|
if (err < 0) {
|
|
err = REFTABLE_IO_ERROR;
|
|
goto done;
|
|
}
|
|
|
|
REFTABLE_ALLOC_ARRAY(buf, size + 1);
|
|
if (!buf) {
|
|
err = REFTABLE_OUT_OF_MEMORY_ERROR;
|
|
goto done;
|
|
}
|
|
|
|
if (read_in_full(fd, buf, size) != size) {
|
|
err = REFTABLE_IO_ERROR;
|
|
goto done;
|
|
}
|
|
buf[size] = 0;
|
|
|
|
*namesp = parse_names(buf, size);
|
|
if (!*namesp) {
|
|
err = REFTABLE_OUT_OF_MEMORY_ERROR;
|
|
goto done;
|
|
}
|
|
|
|
done:
|
|
reftable_free(buf);
|
|
return err;
|
|
}
|
|
|
|
int read_lines(const char *filename, char ***namesp)
|
|
{
|
|
int fd = open(filename, O_RDONLY);
|
|
int err = 0;
|
|
if (fd < 0) {
|
|
if (errno == ENOENT) {
|
|
REFTABLE_CALLOC_ARRAY(*namesp, 1);
|
|
if (!*namesp)
|
|
return REFTABLE_OUT_OF_MEMORY_ERROR;
|
|
return 0;
|
|
}
|
|
|
|
return REFTABLE_IO_ERROR;
|
|
}
|
|
err = fd_read_lines(fd, namesp);
|
|
close(fd);
|
|
return err;
|
|
}
|
|
|
|
int reftable_stack_init_ref_iterator(struct reftable_stack *st,
|
|
struct reftable_iterator *it)
|
|
{
|
|
return merged_table_init_iter(reftable_stack_merged_table(st),
|
|
it, BLOCK_TYPE_REF);
|
|
}
|
|
|
|
int reftable_stack_init_log_iterator(struct reftable_stack *st,
|
|
struct reftable_iterator *it)
|
|
{
|
|
return merged_table_init_iter(reftable_stack_merged_table(st),
|
|
it, BLOCK_TYPE_LOG);
|
|
}
|
|
|
|
struct reftable_merged_table *
|
|
reftable_stack_merged_table(struct reftable_stack *st)
|
|
{
|
|
return st->merged;
|
|
}
|
|
|
|
static int has_name(char **names, const char *name)
|
|
{
|
|
while (*names) {
|
|
if (!strcmp(*names, name))
|
|
return 1;
|
|
names++;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
/* Close and free the stack */
|
|
void reftable_stack_destroy(struct reftable_stack *st)
|
|
{
|
|
char **names = NULL;
|
|
int err = 0;
|
|
|
|
if (!st)
|
|
return;
|
|
|
|
if (st->merged) {
|
|
reftable_merged_table_free(st->merged);
|
|
st->merged = NULL;
|
|
}
|
|
|
|
err = read_lines(st->list_file, &names);
|
|
if (err < 0) {
|
|
REFTABLE_FREE_AND_NULL(names);
|
|
}
|
|
|
|
if (st->readers) {
|
|
int i = 0;
|
|
struct reftable_buf filename = REFTABLE_BUF_INIT;
|
|
for (i = 0; i < st->readers_len; i++) {
|
|
const char *name = reader_name(st->readers[i]);
|
|
int try_unlinking = 1;
|
|
|
|
reftable_buf_reset(&filename);
|
|
if (names && !has_name(names, name)) {
|
|
if (stack_filename(&filename, st, name) < 0)
|
|
try_unlinking = 0;
|
|
}
|
|
reftable_reader_decref(st->readers[i]);
|
|
|
|
if (try_unlinking && filename.len) {
|
|
/* On Windows, can only unlink after closing. */
|
|
unlink(filename.buf);
|
|
}
|
|
}
|
|
reftable_buf_release(&filename);
|
|
st->readers_len = 0;
|
|
REFTABLE_FREE_AND_NULL(st->readers);
|
|
}
|
|
|
|
if (st->list_fd >= 0) {
|
|
close(st->list_fd);
|
|
st->list_fd = -1;
|
|
}
|
|
|
|
REFTABLE_FREE_AND_NULL(st->list_file);
|
|
REFTABLE_FREE_AND_NULL(st->reftable_dir);
|
|
reftable_free(st);
|
|
free_names(names);
|
|
}
|
|
|
|
static struct reftable_reader **stack_copy_readers(struct reftable_stack *st,
|
|
size_t cur_len)
|
|
{
|
|
struct reftable_reader **cur = reftable_calloc(cur_len, sizeof(*cur));
|
|
if (!cur)
|
|
return NULL;
|
|
for (size_t i = 0; i < cur_len; i++)
|
|
cur[i] = st->readers[i];
|
|
return cur;
|
|
}
|
|
|
|
static int reftable_stack_reload_once(struct reftable_stack *st,
|
|
const char **names,
|
|
int reuse_open)
|
|
{
|
|
size_t cur_len = !st->merged ? 0 : st->merged->readers_len;
|
|
struct reftable_reader **cur = NULL;
|
|
struct reftable_reader **reused = NULL;
|
|
struct reftable_reader **new_readers = NULL;
|
|
size_t reused_len = 0, reused_alloc = 0, names_len;
|
|
size_t new_readers_len = 0;
|
|
struct reftable_merged_table *new_merged = NULL;
|
|
struct reftable_buf table_path = REFTABLE_BUF_INIT;
|
|
int err = 0;
|
|
size_t i;
|
|
|
|
if (cur_len) {
|
|
cur = stack_copy_readers(st, cur_len);
|
|
if (!cur) {
|
|
err = REFTABLE_OUT_OF_MEMORY_ERROR;
|
|
goto done;
|
|
}
|
|
}
|
|
|
|
names_len = names_length(names);
|
|
|
|
if (names_len) {
|
|
new_readers = reftable_calloc(names_len, sizeof(*new_readers));
|
|
if (!new_readers) {
|
|
err = REFTABLE_OUT_OF_MEMORY_ERROR;
|
|
goto done;
|
|
}
|
|
}
|
|
|
|
while (*names) {
|
|
struct reftable_reader *rd = NULL;
|
|
const char *name = *names++;
|
|
|
|
/* this is linear; we assume compaction keeps the number of
|
|
tables under control so this is not quadratic. */
|
|
for (i = 0; reuse_open && i < cur_len; i++) {
|
|
if (cur[i] && 0 == strcmp(cur[i]->name, name)) {
|
|
rd = cur[i];
|
|
cur[i] = NULL;
|
|
|
|
/*
|
|
* When reloading the stack fails, we end up
|
|
* releasing all new readers. This also
|
|
* includes the reused readers, even though
|
|
* they are still in used by the old stack. We
|
|
* thus need to keep them alive here, which we
|
|
* do by bumping their refcount.
|
|
*/
|
|
REFTABLE_ALLOC_GROW_OR_NULL(reused,
|
|
reused_len + 1,
|
|
reused_alloc);
|
|
if (!reused) {
|
|
err = REFTABLE_OUT_OF_MEMORY_ERROR;
|
|
goto done;
|
|
}
|
|
reused[reused_len++] = rd;
|
|
reftable_reader_incref(rd);
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (!rd) {
|
|
struct reftable_block_source src = { NULL };
|
|
|
|
err = stack_filename(&table_path, st, name);
|
|
if (err < 0)
|
|
goto done;
|
|
|
|
err = reftable_block_source_from_file(&src,
|
|
table_path.buf);
|
|
if (err < 0)
|
|
goto done;
|
|
|
|
err = reftable_reader_new(&rd, &src, name);
|
|
if (err < 0)
|
|
goto done;
|
|
}
|
|
|
|
new_readers[new_readers_len] = rd;
|
|
new_readers_len++;
|
|
}
|
|
|
|
/* success! */
|
|
err = reftable_merged_table_new(&new_merged, new_readers,
|
|
new_readers_len, st->opts.hash_id);
|
|
if (err < 0)
|
|
goto done;
|
|
|
|
/*
|
|
* Close the old, non-reused readers and proactively try to unlink
|
|
* them. This is done for systems like Windows, where the underlying
|
|
* file of such an open reader wouldn't have been possible to be
|
|
* unlinked by the compacting process.
|
|
*/
|
|
for (i = 0; i < cur_len; i++) {
|
|
if (cur[i]) {
|
|
const char *name = reader_name(cur[i]);
|
|
|
|
err = stack_filename(&table_path, st, name);
|
|
if (err < 0)
|
|
goto done;
|
|
|
|
reftable_reader_decref(cur[i]);
|
|
unlink(table_path.buf);
|
|
}
|
|
}
|
|
|
|
/* Update the stack to point to the new tables. */
|
|
if (st->merged)
|
|
reftable_merged_table_free(st->merged);
|
|
new_merged->suppress_deletions = 1;
|
|
st->merged = new_merged;
|
|
|
|
if (st->readers)
|
|
reftable_free(st->readers);
|
|
st->readers = new_readers;
|
|
st->readers_len = new_readers_len;
|
|
new_readers = NULL;
|
|
new_readers_len = 0;
|
|
|
|
/*
|
|
* Decrement the refcount of reused readers again. This only needs to
|
|
* happen on the successful case, because on the unsuccessful one we
|
|
* decrement their refcount via `new_readers`.
|
|
*/
|
|
for (i = 0; i < reused_len; i++)
|
|
reftable_reader_decref(reused[i]);
|
|
|
|
done:
|
|
for (i = 0; i < new_readers_len; i++)
|
|
reftable_reader_decref(new_readers[i]);
|
|
reftable_free(new_readers);
|
|
reftable_free(reused);
|
|
reftable_free(cur);
|
|
reftable_buf_release(&table_path);
|
|
return err;
|
|
}
|
|
|
|
/* return negative if a before b. */
|
|
static int tv_cmp(struct timeval *a, struct timeval *b)
|
|
{
|
|
time_t diff = a->tv_sec - b->tv_sec;
|
|
int udiff = a->tv_usec - b->tv_usec;
|
|
|
|
if (diff != 0)
|
|
return diff;
|
|
|
|
return udiff;
|
|
}
|
|
|
|
static int reftable_stack_reload_maybe_reuse(struct reftable_stack *st,
|
|
int reuse_open)
|
|
{
|
|
char **names = NULL, **names_after = NULL;
|
|
struct timeval deadline;
|
|
int64_t delay = 0;
|
|
int tries = 0, err;
|
|
int fd = -1;
|
|
|
|
err = gettimeofday(&deadline, NULL);
|
|
if (err < 0)
|
|
goto out;
|
|
deadline.tv_sec += 3;
|
|
|
|
while (1) {
|
|
struct timeval now;
|
|
|
|
err = gettimeofday(&now, NULL);
|
|
if (err < 0)
|
|
goto out;
|
|
|
|
/*
|
|
* Only look at deadlines after the first few times. This
|
|
* simplifies debugging in GDB.
|
|
*/
|
|
tries++;
|
|
if (tries > 3 && tv_cmp(&now, &deadline) >= 0)
|
|
goto out;
|
|
|
|
fd = open(st->list_file, O_RDONLY);
|
|
if (fd < 0) {
|
|
if (errno != ENOENT) {
|
|
err = REFTABLE_IO_ERROR;
|
|
goto out;
|
|
}
|
|
|
|
REFTABLE_CALLOC_ARRAY(names, 1);
|
|
if (!names) {
|
|
err = REFTABLE_OUT_OF_MEMORY_ERROR;
|
|
goto out;
|
|
}
|
|
} else {
|
|
err = fd_read_lines(fd, &names);
|
|
if (err < 0)
|
|
goto out;
|
|
}
|
|
|
|
err = reftable_stack_reload_once(st, (const char **) names, reuse_open);
|
|
if (!err)
|
|
break;
|
|
if (err != REFTABLE_NOT_EXIST_ERROR)
|
|
goto out;
|
|
|
|
/*
|
|
* REFTABLE_NOT_EXIST_ERROR can be caused by a concurrent
|
|
* writer. Check if there was one by checking if the name list
|
|
* changed.
|
|
*/
|
|
err = read_lines(st->list_file, &names_after);
|
|
if (err < 0)
|
|
goto out;
|
|
if (names_equal((const char **) names_after,
|
|
(const char **) names)) {
|
|
err = REFTABLE_NOT_EXIST_ERROR;
|
|
goto out;
|
|
}
|
|
|
|
free_names(names);
|
|
names = NULL;
|
|
free_names(names_after);
|
|
names_after = NULL;
|
|
close(fd);
|
|
fd = -1;
|
|
|
|
delay = delay + (delay * git_rand(CSPRNG_BYTES_INSECURE)) / UINT32_MAX + 1;
|
|
sleep_millisec(delay);
|
|
}
|
|
|
|
out:
|
|
/*
|
|
* Invalidate the stat cache. It is sufficient to only close the file
|
|
* descriptor and keep the cached stat info because we never use the
|
|
* latter when the former is negative.
|
|
*/
|
|
if (st->list_fd >= 0) {
|
|
close(st->list_fd);
|
|
st->list_fd = -1;
|
|
}
|
|
|
|
/*
|
|
* Cache stat information in case it provides a useful signal to us.
|
|
* According to POSIX, "The st_ino and st_dev fields taken together
|
|
* uniquely identify the file within the system." That being said,
|
|
* Windows is not POSIX compliant and we do not have these fields
|
|
* available. So the information we have there is insufficient to
|
|
* determine whether two file descriptors point to the same file.
|
|
*
|
|
* While we could fall back to using other signals like the file's
|
|
* mtime, those are not sufficient to avoid races. We thus refrain from
|
|
* using the stat cache on such systems and fall back to the secondary
|
|
* caching mechanism, which is to check whether contents of the file
|
|
* have changed.
|
|
*
|
|
* On other systems which are POSIX compliant we must keep the file
|
|
* descriptor open. This is to avoid a race condition where two
|
|
* processes access the reftable stack at the same point in time:
|
|
*
|
|
* 1. A reads the reftable stack and caches its stat info.
|
|
*
|
|
* 2. B updates the stack, appending a new table to "tables.list".
|
|
* This will both use a new inode and result in a different file
|
|
* size, thus invalidating A's cache in theory.
|
|
*
|
|
* 3. B decides to auto-compact the stack and merges two tables. The
|
|
* file size now matches what A has cached again. Furthermore, the
|
|
* filesystem may decide to recycle the inode number of the file
|
|
* we have replaced in (2) because it is not in use anymore.
|
|
*
|
|
* 4. A reloads the reftable stack. Neither the inode number nor the
|
|
* file size changed. If the timestamps did not change either then
|
|
* we think the cached copy of our stack is up-to-date.
|
|
*
|
|
* By keeping the file descriptor open the inode number cannot be
|
|
* recycled, mitigating the race.
|
|
*/
|
|
if (!err && fd >= 0 && !fstat(fd, &st->list_st) &&
|
|
st->list_st.st_dev && st->list_st.st_ino) {
|
|
st->list_fd = fd;
|
|
fd = -1;
|
|
}
|
|
|
|
if (fd >= 0)
|
|
close(fd);
|
|
free_names(names);
|
|
free_names(names_after);
|
|
|
|
if (st->opts.on_reload)
|
|
st->opts.on_reload(st->opts.on_reload_payload);
|
|
|
|
return err;
|
|
}
|
|
|
|
/* -1 = error
|
|
0 = up to date
|
|
1 = changed. */
|
|
static int stack_uptodate(struct reftable_stack *st)
|
|
{
|
|
char **names = NULL;
|
|
int err;
|
|
int i = 0;
|
|
|
|
/*
|
|
* When we have cached stat information available then we use it to
|
|
* verify whether the file has been rewritten.
|
|
*
|
|
* Note that we explicitly do not want to use `stat_validity_check()`
|
|
* and friends here because they may end up not comparing the `st_dev`
|
|
* and `st_ino` fields. These functions thus cannot guarantee that we
|
|
* indeed still have the same file.
|
|
*/
|
|
if (st->list_fd >= 0) {
|
|
struct stat list_st;
|
|
|
|
if (stat(st->list_file, &list_st) < 0) {
|
|
/*
|
|
* It's fine for "tables.list" to not exist. In that
|
|
* case, we have to refresh when the loaded stack has
|
|
* any readers.
|
|
*/
|
|
if (errno == ENOENT)
|
|
return !!st->readers_len;
|
|
return REFTABLE_IO_ERROR;
|
|
}
|
|
|
|
/*
|
|
* When "tables.list" refers to the same file we can assume
|
|
* that it didn't change. This is because we always use
|
|
* rename(3P) to update the file and never write to it
|
|
* directly.
|
|
*/
|
|
if (st->list_st.st_dev == list_st.st_dev &&
|
|
st->list_st.st_ino == list_st.st_ino)
|
|
return 0;
|
|
}
|
|
|
|
err = read_lines(st->list_file, &names);
|
|
if (err < 0)
|
|
return err;
|
|
|
|
for (i = 0; i < st->readers_len; i++) {
|
|
if (!names[i]) {
|
|
err = 1;
|
|
goto done;
|
|
}
|
|
|
|
if (strcmp(st->readers[i]->name, names[i])) {
|
|
err = 1;
|
|
goto done;
|
|
}
|
|
}
|
|
|
|
if (names[st->merged->readers_len]) {
|
|
err = 1;
|
|
goto done;
|
|
}
|
|
|
|
done:
|
|
free_names(names);
|
|
return err;
|
|
}
|
|
|
|
int reftable_stack_reload(struct reftable_stack *st)
|
|
{
|
|
int err = stack_uptodate(st);
|
|
if (err > 0)
|
|
return reftable_stack_reload_maybe_reuse(st, 1);
|
|
return err;
|
|
}
|
|
|
|
int reftable_stack_add(struct reftable_stack *st,
|
|
int (*write)(struct reftable_writer *wr, void *arg),
|
|
void *arg)
|
|
{
|
|
int err = stack_try_add(st, write, arg);
|
|
if (err < 0) {
|
|
if (err == REFTABLE_OUTDATED_ERROR) {
|
|
/* Ignore error return, we want to propagate
|
|
REFTABLE_OUTDATED_ERROR.
|
|
*/
|
|
reftable_stack_reload(st);
|
|
}
|
|
return err;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int format_name(struct reftable_buf *dest, uint64_t min, uint64_t max)
|
|
{
|
|
char buf[100];
|
|
uint32_t rnd = git_rand(CSPRNG_BYTES_INSECURE);
|
|
snprintf(buf, sizeof(buf), "0x%012" PRIx64 "-0x%012" PRIx64 "-%08x",
|
|
min, max, rnd);
|
|
reftable_buf_reset(dest);
|
|
return reftable_buf_addstr(dest, buf);
|
|
}
|
|
|
|
struct reftable_addition {
|
|
struct reftable_flock tables_list_lock;
|
|
struct reftable_stack *stack;
|
|
|
|
char **new_tables;
|
|
size_t new_tables_len, new_tables_cap;
|
|
uint64_t next_update_index;
|
|
};
|
|
|
|
#define REFTABLE_ADDITION_INIT {0}
|
|
|
|
static int reftable_stack_init_addition(struct reftable_addition *add,
|
|
struct reftable_stack *st,
|
|
unsigned int flags)
|
|
{
|
|
struct reftable_buf lock_file_name = REFTABLE_BUF_INIT;
|
|
int err;
|
|
|
|
add->stack = st;
|
|
|
|
err = flock_acquire(&add->tables_list_lock, st->list_file,
|
|
st->opts.lock_timeout_ms);
|
|
if (err < 0) {
|
|
if (errno == EEXIST) {
|
|
err = REFTABLE_LOCK_ERROR;
|
|
} else {
|
|
err = REFTABLE_IO_ERROR;
|
|
}
|
|
goto done;
|
|
}
|
|
if (st->opts.default_permissions) {
|
|
if (chmod(add->tables_list_lock.path,
|
|
st->opts.default_permissions) < 0) {
|
|
err = REFTABLE_IO_ERROR;
|
|
goto done;
|
|
}
|
|
}
|
|
|
|
err = stack_uptodate(st);
|
|
if (err < 0)
|
|
goto done;
|
|
if (err > 0 && flags & REFTABLE_STACK_NEW_ADDITION_RELOAD) {
|
|
err = reftable_stack_reload_maybe_reuse(add->stack, 1);
|
|
if (err)
|
|
goto done;
|
|
}
|
|
if (err > 0) {
|
|
err = REFTABLE_OUTDATED_ERROR;
|
|
goto done;
|
|
}
|
|
|
|
add->next_update_index = reftable_stack_next_update_index(st);
|
|
done:
|
|
if (err)
|
|
reftable_addition_close(add);
|
|
reftable_buf_release(&lock_file_name);
|
|
return err;
|
|
}
|
|
|
|
static void reftable_addition_close(struct reftable_addition *add)
|
|
{
|
|
struct reftable_buf nm = REFTABLE_BUF_INIT;
|
|
size_t i;
|
|
|
|
for (i = 0; i < add->new_tables_len; i++) {
|
|
if (!stack_filename(&nm, add->stack, add->new_tables[i]))
|
|
unlink(nm.buf);
|
|
reftable_free(add->new_tables[i]);
|
|
add->new_tables[i] = NULL;
|
|
}
|
|
reftable_free(add->new_tables);
|
|
add->new_tables = NULL;
|
|
add->new_tables_len = 0;
|
|
add->new_tables_cap = 0;
|
|
|
|
flock_release(&add->tables_list_lock);
|
|
reftable_buf_release(&nm);
|
|
}
|
|
|
|
void reftable_addition_destroy(struct reftable_addition *add)
|
|
{
|
|
if (!add) {
|
|
return;
|
|
}
|
|
reftable_addition_close(add);
|
|
reftable_free(add);
|
|
}
|
|
|
|
int reftable_addition_commit(struct reftable_addition *add)
|
|
{
|
|
struct reftable_buf table_list = REFTABLE_BUF_INIT;
|
|
int err = 0;
|
|
size_t i;
|
|
|
|
if (add->new_tables_len == 0)
|
|
goto done;
|
|
|
|
for (i = 0; i < add->stack->merged->readers_len; i++) {
|
|
if ((err = reftable_buf_addstr(&table_list, add->stack->readers[i]->name)) < 0 ||
|
|
(err = reftable_buf_addstr(&table_list, "\n")) < 0)
|
|
goto done;
|
|
}
|
|
for (i = 0; i < add->new_tables_len; i++) {
|
|
if ((err = reftable_buf_addstr(&table_list, add->new_tables[i])) < 0 ||
|
|
(err = reftable_buf_addstr(&table_list, "\n")) < 0)
|
|
goto done;
|
|
}
|
|
|
|
err = write_in_full(add->tables_list_lock.fd, table_list.buf, table_list.len);
|
|
reftable_buf_release(&table_list);
|
|
if (err < 0) {
|
|
err = REFTABLE_IO_ERROR;
|
|
goto done;
|
|
}
|
|
|
|
err = stack_fsync(&add->stack->opts, add->tables_list_lock.fd);
|
|
if (err < 0) {
|
|
err = REFTABLE_IO_ERROR;
|
|
goto done;
|
|
}
|
|
|
|
err = flock_commit(&add->tables_list_lock);
|
|
if (err < 0) {
|
|
err = REFTABLE_IO_ERROR;
|
|
goto done;
|
|
}
|
|
|
|
/* success, no more state to clean up. */
|
|
for (i = 0; i < add->new_tables_len; i++)
|
|
reftable_free(add->new_tables[i]);
|
|
reftable_free(add->new_tables);
|
|
add->new_tables = NULL;
|
|
add->new_tables_len = 0;
|
|
add->new_tables_cap = 0;
|
|
|
|
err = reftable_stack_reload_maybe_reuse(add->stack, 1);
|
|
if (err)
|
|
goto done;
|
|
|
|
if (!add->stack->opts.disable_auto_compact) {
|
|
/*
|
|
* Auto-compact the stack to keep the number of tables in
|
|
* control. It is possible that a concurrent writer is already
|
|
* trying to compact parts of the stack, which would lead to a
|
|
* `REFTABLE_LOCK_ERROR` because parts of the stack are locked
|
|
* already. This is a benign error though, so we ignore it.
|
|
*/
|
|
err = reftable_stack_auto_compact(add->stack);
|
|
if (err < 0 && err != REFTABLE_LOCK_ERROR)
|
|
goto done;
|
|
err = 0;
|
|
}
|
|
|
|
done:
|
|
reftable_addition_close(add);
|
|
return err;
|
|
}
|
|
|
|
int reftable_stack_new_addition(struct reftable_addition **dest,
|
|
struct reftable_stack *st,
|
|
unsigned int flags)
|
|
{
|
|
int err = 0;
|
|
struct reftable_addition empty = REFTABLE_ADDITION_INIT;
|
|
|
|
REFTABLE_CALLOC_ARRAY(*dest, 1);
|
|
if (!*dest)
|
|
return REFTABLE_OUT_OF_MEMORY_ERROR;
|
|
|
|
**dest = empty;
|
|
err = reftable_stack_init_addition(*dest, st, flags);
|
|
if (err) {
|
|
reftable_free(*dest);
|
|
*dest = NULL;
|
|
}
|
|
return err;
|
|
}
|
|
|
|
static int stack_try_add(struct reftable_stack *st,
|
|
int (*write_table)(struct reftable_writer *wr,
|
|
void *arg),
|
|
void *arg)
|
|
{
|
|
struct reftable_addition add = REFTABLE_ADDITION_INIT;
|
|
int err = reftable_stack_init_addition(&add, st, 0);
|
|
if (err < 0)
|
|
goto done;
|
|
|
|
err = reftable_addition_add(&add, write_table, arg);
|
|
if (err < 0)
|
|
goto done;
|
|
|
|
err = reftable_addition_commit(&add);
|
|
done:
|
|
reftable_addition_close(&add);
|
|
return err;
|
|
}
|
|
|
|
int reftable_addition_add(struct reftable_addition *add,
|
|
int (*write_table)(struct reftable_writer *wr,
|
|
void *arg),
|
|
void *arg)
|
|
{
|
|
struct reftable_buf temp_tab_file_name = REFTABLE_BUF_INIT;
|
|
struct reftable_buf tab_file_name = REFTABLE_BUF_INIT;
|
|
struct reftable_buf next_name = REFTABLE_BUF_INIT;
|
|
struct reftable_writer *wr = NULL;
|
|
struct reftable_tmpfile tab_file = REFTABLE_TMPFILE_INIT;
|
|
struct fd_writer writer = {
|
|
.opts = &add->stack->opts,
|
|
};
|
|
int err = 0;
|
|
|
|
reftable_buf_reset(&next_name);
|
|
|
|
err = format_name(&next_name, add->next_update_index, add->next_update_index);
|
|
if (err < 0)
|
|
goto done;
|
|
|
|
err = stack_filename(&temp_tab_file_name, add->stack, next_name.buf);
|
|
if (err < 0)
|
|
goto done;
|
|
|
|
err = reftable_buf_addstr(&temp_tab_file_name, ".temp.XXXXXX");
|
|
if (err < 0)
|
|
goto done;
|
|
|
|
err = tmpfile_from_pattern(&tab_file, temp_tab_file_name.buf);
|
|
if (err < 0)
|
|
goto done;
|
|
if (add->stack->opts.default_permissions) {
|
|
if (chmod(tab_file.path,
|
|
add->stack->opts.default_permissions)) {
|
|
err = REFTABLE_IO_ERROR;
|
|
goto done;
|
|
}
|
|
}
|
|
|
|
writer.fd = tab_file.fd;
|
|
err = reftable_writer_new(&wr, fd_writer_write, fd_writer_flush,
|
|
&writer, &add->stack->opts);
|
|
if (err < 0)
|
|
goto done;
|
|
|
|
err = write_table(wr, arg);
|
|
if (err < 0)
|
|
goto done;
|
|
|
|
err = reftable_writer_close(wr);
|
|
if (err == REFTABLE_EMPTY_TABLE_ERROR) {
|
|
err = 0;
|
|
goto done;
|
|
}
|
|
if (err < 0)
|
|
goto done;
|
|
|
|
err = tmpfile_close(&tab_file);
|
|
if (err < 0)
|
|
goto done;
|
|
|
|
if (wr->min_update_index < add->next_update_index) {
|
|
err = REFTABLE_API_ERROR;
|
|
goto done;
|
|
}
|
|
|
|
err = format_name(&next_name, wr->min_update_index, wr->max_update_index);
|
|
if (err < 0)
|
|
goto done;
|
|
|
|
err = reftable_buf_addstr(&next_name, ".ref");
|
|
if (err < 0)
|
|
goto done;
|
|
|
|
err = stack_filename(&tab_file_name, add->stack, next_name.buf);
|
|
if (err < 0)
|
|
goto done;
|
|
|
|
/*
|
|
On windows, this relies on rand() picking a unique destination name.
|
|
Maybe we should do retry loop as well?
|
|
*/
|
|
err = tmpfile_rename(&tab_file, tab_file_name.buf);
|
|
if (err < 0)
|
|
goto done;
|
|
|
|
REFTABLE_ALLOC_GROW_OR_NULL(add->new_tables, add->new_tables_len + 1,
|
|
add->new_tables_cap);
|
|
if (!add->new_tables) {
|
|
err = REFTABLE_OUT_OF_MEMORY_ERROR;
|
|
goto done;
|
|
}
|
|
add->new_tables[add->new_tables_len++] = reftable_buf_detach(&next_name);
|
|
|
|
done:
|
|
tmpfile_delete(&tab_file);
|
|
reftable_buf_release(&temp_tab_file_name);
|
|
reftable_buf_release(&tab_file_name);
|
|
reftable_buf_release(&next_name);
|
|
reftable_writer_free(wr);
|
|
return err;
|
|
}
|
|
|
|
uint64_t reftable_stack_next_update_index(struct reftable_stack *st)
|
|
{
|
|
int sz = st->merged->readers_len;
|
|
if (sz > 0)
|
|
return reftable_reader_max_update_index(st->readers[sz - 1]) +
|
|
1;
|
|
return 1;
|
|
}
|
|
|
|
static int stack_compact_locked(struct reftable_stack *st,
|
|
size_t first, size_t last,
|
|
struct reftable_log_expiry_config *config,
|
|
struct reftable_tmpfile *tab_file_out)
|
|
{
|
|
struct reftable_buf next_name = REFTABLE_BUF_INIT;
|
|
struct reftable_buf tab_file_path = REFTABLE_BUF_INIT;
|
|
struct reftable_writer *wr = NULL;
|
|
struct fd_writer writer= {
|
|
.opts = &st->opts,
|
|
};
|
|
struct reftable_tmpfile tab_file = REFTABLE_TMPFILE_INIT;
|
|
int err = 0;
|
|
|
|
err = format_name(&next_name, reftable_reader_min_update_index(st->readers[first]),
|
|
reftable_reader_max_update_index(st->readers[last]));
|
|
if (err < 0)
|
|
goto done;
|
|
|
|
err = stack_filename(&tab_file_path, st, next_name.buf);
|
|
if (err < 0)
|
|
goto done;
|
|
|
|
err = reftable_buf_addstr(&tab_file_path, ".temp.XXXXXX");
|
|
if (err < 0)
|
|
goto done;
|
|
|
|
err = tmpfile_from_pattern(&tab_file, tab_file_path.buf);
|
|
if (err < 0)
|
|
goto done;
|
|
|
|
if (st->opts.default_permissions &&
|
|
chmod(tab_file.path, st->opts.default_permissions) < 0) {
|
|
err = REFTABLE_IO_ERROR;
|
|
goto done;
|
|
}
|
|
|
|
writer.fd = tab_file.fd;
|
|
err = reftable_writer_new(&wr, fd_writer_write, fd_writer_flush,
|
|
&writer, &st->opts);
|
|
if (err < 0)
|
|
goto done;
|
|
|
|
err = stack_write_compact(st, wr, first, last, config);
|
|
if (err < 0)
|
|
goto done;
|
|
|
|
err = reftable_writer_close(wr);
|
|
if (err < 0)
|
|
goto done;
|
|
|
|
err = tmpfile_close(&tab_file);
|
|
if (err < 0)
|
|
goto done;
|
|
|
|
*tab_file_out = tab_file;
|
|
tab_file = REFTABLE_TMPFILE_INIT;
|
|
|
|
done:
|
|
tmpfile_delete(&tab_file);
|
|
reftable_writer_free(wr);
|
|
reftable_buf_release(&next_name);
|
|
reftable_buf_release(&tab_file_path);
|
|
return err;
|
|
}
|
|
|
|
static int stack_write_compact(struct reftable_stack *st,
|
|
struct reftable_writer *wr,
|
|
size_t first, size_t last,
|
|
struct reftable_log_expiry_config *config)
|
|
{
|
|
struct reftable_merged_table *mt = NULL;
|
|
struct reftable_iterator it = { NULL };
|
|
struct reftable_ref_record ref = { NULL };
|
|
struct reftable_log_record log = { NULL };
|
|
size_t subtabs_len = last - first + 1;
|
|
uint64_t entries = 0;
|
|
int err = 0;
|
|
|
|
for (size_t i = first; i <= last; i++)
|
|
st->stats.bytes += st->readers[i]->size;
|
|
reftable_writer_set_limits(wr, st->readers[first]->min_update_index,
|
|
st->readers[last]->max_update_index);
|
|
|
|
err = reftable_merged_table_new(&mt, st->readers + first, subtabs_len,
|
|
st->opts.hash_id);
|
|
if (err < 0)
|
|
goto done;
|
|
|
|
err = merged_table_init_iter(mt, &it, BLOCK_TYPE_REF);
|
|
if (err < 0)
|
|
goto done;
|
|
|
|
err = reftable_iterator_seek_ref(&it, "");
|
|
if (err < 0)
|
|
goto done;
|
|
|
|
while (1) {
|
|
err = reftable_iterator_next_ref(&it, &ref);
|
|
if (err > 0) {
|
|
err = 0;
|
|
break;
|
|
}
|
|
if (err < 0)
|
|
goto done;
|
|
|
|
if (first == 0 && reftable_ref_record_is_deletion(&ref)) {
|
|
continue;
|
|
}
|
|
|
|
err = reftable_writer_add_ref(wr, &ref);
|
|
if (err < 0)
|
|
goto done;
|
|
entries++;
|
|
}
|
|
reftable_iterator_destroy(&it);
|
|
|
|
err = merged_table_init_iter(mt, &it, BLOCK_TYPE_LOG);
|
|
if (err < 0)
|
|
goto done;
|
|
|
|
err = reftable_iterator_seek_log(&it, "");
|
|
if (err < 0)
|
|
goto done;
|
|
|
|
while (1) {
|
|
err = reftable_iterator_next_log(&it, &log);
|
|
if (err > 0) {
|
|
err = 0;
|
|
break;
|
|
}
|
|
if (err < 0)
|
|
goto done;
|
|
if (first == 0 && reftable_log_record_is_deletion(&log)) {
|
|
continue;
|
|
}
|
|
|
|
if (config && config->min_update_index > 0 &&
|
|
log.update_index < config->min_update_index) {
|
|
continue;
|
|
}
|
|
|
|
if (config && config->time > 0 &&
|
|
log.value.update.time < config->time) {
|
|
continue;
|
|
}
|
|
|
|
err = reftable_writer_add_log(wr, &log);
|
|
if (err < 0)
|
|
goto done;
|
|
entries++;
|
|
}
|
|
|
|
done:
|
|
reftable_iterator_destroy(&it);
|
|
if (mt)
|
|
reftable_merged_table_free(mt);
|
|
reftable_ref_record_release(&ref);
|
|
reftable_log_record_release(&log);
|
|
st->stats.entries_written += entries;
|
|
return err;
|
|
}
|
|
|
|
enum stack_compact_range_flags {
|
|
/*
|
|
* Perform a best-effort compaction. That is, even if we cannot lock
|
|
* all tables in the specified range, we will try to compact the
|
|
* remaining slice.
|
|
*/
|
|
STACK_COMPACT_RANGE_BEST_EFFORT = (1 << 0),
|
|
};
|
|
|
|
/*
|
|
* Compact all tables in the range `[first, last)` into a single new table.
|
|
*
|
|
* This function returns `0` on success or a code `< 0` on failure. When the
|
|
* stack or any of the tables in the specified range are already locked then
|
|
* this function returns `REFTABLE_LOCK_ERROR`. This is a benign error that
|
|
* callers can either ignore, or they may choose to retry compaction after some
|
|
* amount of time.
|
|
*/
|
|
static int stack_compact_range(struct reftable_stack *st,
|
|
size_t first, size_t last,
|
|
struct reftable_log_expiry_config *expiry,
|
|
unsigned int flags)
|
|
{
|
|
struct reftable_buf tables_list_buf = REFTABLE_BUF_INIT;
|
|
struct reftable_buf new_table_name = REFTABLE_BUF_INIT;
|
|
struct reftable_buf new_table_path = REFTABLE_BUF_INIT;
|
|
struct reftable_buf table_name = REFTABLE_BUF_INIT;
|
|
struct reftable_flock tables_list_lock = REFTABLE_FLOCK_INIT;
|
|
struct reftable_flock *table_locks = NULL;
|
|
struct reftable_tmpfile new_table = REFTABLE_TMPFILE_INIT;
|
|
int is_empty_table = 0, err = 0;
|
|
size_t first_to_replace, last_to_replace;
|
|
size_t i, nlocks = 0;
|
|
char **names = NULL;
|
|
|
|
if (first > last || (!expiry && first == last)) {
|
|
err = 0;
|
|
goto done;
|
|
}
|
|
|
|
st->stats.attempts++;
|
|
|
|
/*
|
|
* Hold the lock so that we can read "tables.list" and lock all tables
|
|
* which are part of the user-specified range.
|
|
*/
|
|
err = flock_acquire(&tables_list_lock, st->list_file, st->opts.lock_timeout_ms);
|
|
if (err < 0) {
|
|
if (errno == EEXIST)
|
|
err = REFTABLE_LOCK_ERROR;
|
|
else
|
|
err = REFTABLE_IO_ERROR;
|
|
goto done;
|
|
}
|
|
|
|
err = stack_uptodate(st);
|
|
if (err)
|
|
goto done;
|
|
|
|
/*
|
|
* Lock all tables in the user-provided range. This is the slice of our
|
|
* stack which we'll compact.
|
|
*
|
|
* Note that we lock tables in reverse order from last to first. The
|
|
* intent behind this is to allow a newer process to perform best
|
|
* effort compaction of tables that it has added in the case where an
|
|
* older process is still busy compacting tables which are preexisting
|
|
* from the point of view of the newer process.
|
|
*/
|
|
REFTABLE_ALLOC_ARRAY(table_locks, last - first + 1);
|
|
if (!table_locks) {
|
|
err = REFTABLE_OUT_OF_MEMORY_ERROR;
|
|
goto done;
|
|
}
|
|
for (i = 0; i < last - first + 1; i++)
|
|
table_locks[i] = REFTABLE_FLOCK_INIT;
|
|
|
|
for (i = last + 1; i > first; i--) {
|
|
err = stack_filename(&table_name, st, reader_name(st->readers[i - 1]));
|
|
if (err < 0)
|
|
goto done;
|
|
|
|
err = flock_acquire(&table_locks[nlocks], table_name.buf, 0);
|
|
if (err < 0) {
|
|
/*
|
|
* When the table is locked already we may do a
|
|
* best-effort compaction and compact only the tables
|
|
* that we have managed to lock so far. This of course
|
|
* requires that we have been able to lock at least two
|
|
* tables, otherwise there would be nothing to compact.
|
|
* In that case, we return a lock error to our caller.
|
|
*/
|
|
if (errno == EEXIST && last - (i - 1) >= 2 &&
|
|
flags & STACK_COMPACT_RANGE_BEST_EFFORT) {
|
|
err = 0;
|
|
/*
|
|
* The subtraction is to offset the index, the
|
|
* addition is to only compact up to the table
|
|
* of the preceding iteration. They obviously
|
|
* cancel each other out, but that may be
|
|
* non-obvious when it was omitted.
|
|
*/
|
|
first = (i - 1) + 1;
|
|
break;
|
|
} else if (errno == EEXIST) {
|
|
err = REFTABLE_LOCK_ERROR;
|
|
goto done;
|
|
} else {
|
|
err = REFTABLE_IO_ERROR;
|
|
goto done;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* We need to close the lockfiles as we might otherwise easily
|
|
* run into file descriptor exhaustion when we compress a lot
|
|
* of tables.
|
|
*/
|
|
err = flock_close(&table_locks[nlocks++]);
|
|
if (err < 0) {
|
|
err = REFTABLE_IO_ERROR;
|
|
goto done;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* We have locked all tables in our range and can thus release the
|
|
* "tables.list" lock while compacting the locked tables. This allows
|
|
* concurrent updates to the stack to proceed.
|
|
*/
|
|
err = flock_release(&tables_list_lock);
|
|
if (err < 0) {
|
|
err = REFTABLE_IO_ERROR;
|
|
goto done;
|
|
}
|
|
|
|
/*
|
|
* Compact the now-locked tables into a new table. Note that compacting
|
|
* these tables may end up with an empty new table in case tombstones
|
|
* end up cancelling out all refs in that range.
|
|
*/
|
|
err = stack_compact_locked(st, first, last, expiry, &new_table);
|
|
if (err < 0) {
|
|
if (err != REFTABLE_EMPTY_TABLE_ERROR)
|
|
goto done;
|
|
is_empty_table = 1;
|
|
}
|
|
|
|
/*
|
|
* Now that we have written the new, compacted table we need to re-lock
|
|
* "tables.list". We'll then replace the compacted range of tables with
|
|
* the new table.
|
|
*/
|
|
err = flock_acquire(&tables_list_lock, st->list_file, st->opts.lock_timeout_ms);
|
|
if (err < 0) {
|
|
if (errno == EEXIST)
|
|
err = REFTABLE_LOCK_ERROR;
|
|
else
|
|
err = REFTABLE_IO_ERROR;
|
|
goto done;
|
|
}
|
|
|
|
if (st->opts.default_permissions) {
|
|
if (chmod(tables_list_lock.path,
|
|
st->opts.default_permissions) < 0) {
|
|
err = REFTABLE_IO_ERROR;
|
|
goto done;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* As we have unlocked the stack while compacting our slice of tables
|
|
* it may have happened that a concurrently running process has updated
|
|
* the stack while we were compacting. In that case, we need to check
|
|
* whether the tables that we have just compacted still exist in the
|
|
* stack in the exact same order as we have compacted them.
|
|
*
|
|
* If they do exist, then it is fine to continue and replace those
|
|
* tables with our compacted version. If they don't, then we need to
|
|
* abort.
|
|
*/
|
|
err = stack_uptodate(st);
|
|
if (err < 0)
|
|
goto done;
|
|
if (err > 0) {
|
|
ssize_t new_offset = -1;
|
|
int fd;
|
|
|
|
fd = open(st->list_file, O_RDONLY);
|
|
if (fd < 0) {
|
|
err = REFTABLE_IO_ERROR;
|
|
goto done;
|
|
}
|
|
|
|
err = fd_read_lines(fd, &names);
|
|
close(fd);
|
|
if (err < 0)
|
|
goto done;
|
|
|
|
/*
|
|
* Search for the offset of the first table that we have
|
|
* compacted in the updated "tables.list" file.
|
|
*/
|
|
for (size_t i = 0; names[i]; i++) {
|
|
if (strcmp(names[i], st->readers[first]->name))
|
|
continue;
|
|
|
|
/*
|
|
* We have found the first entry. Verify that all the
|
|
* subsequent tables we have compacted still exist in
|
|
* the modified stack in the exact same order as we
|
|
* have compacted them.
|
|
*/
|
|
for (size_t j = 1; j < last - first + 1; j++) {
|
|
const char *old = first + j < st->merged->readers_len ?
|
|
st->readers[first + j]->name : NULL;
|
|
const char *new = names[i + j];
|
|
|
|
/*
|
|
* If some entries are missing or in case the tables
|
|
* have changed then we need to bail out. Again, this
|
|
* shouldn't ever happen because we have locked the
|
|
* tables we are compacting.
|
|
*/
|
|
if (!old || !new || strcmp(old, new)) {
|
|
err = REFTABLE_OUTDATED_ERROR;
|
|
goto done;
|
|
}
|
|
}
|
|
|
|
new_offset = i;
|
|
break;
|
|
}
|
|
|
|
/*
|
|
* In case we didn't find our compacted tables in the stack we
|
|
* need to bail out. In theory, this should have never happened
|
|
* because we locked the tables we are compacting.
|
|
*/
|
|
if (new_offset < 0) {
|
|
err = REFTABLE_OUTDATED_ERROR;
|
|
goto done;
|
|
}
|
|
|
|
/*
|
|
* We have found the new range that we want to replace, so
|
|
* let's update the range of tables that we want to replace.
|
|
*/
|
|
first_to_replace = new_offset;
|
|
last_to_replace = last + (new_offset - first);
|
|
} else {
|
|
/*
|
|
* `fd_read_lines()` uses a `NULL` sentinel to indicate that
|
|
* the array is at its end. As we use `free_names()` to free
|
|
* the array, we need to include this sentinel value here and
|
|
* thus have to allocate `readers_len + 1` many entries.
|
|
*/
|
|
REFTABLE_CALLOC_ARRAY(names, st->merged->readers_len + 1);
|
|
if (!names) {
|
|
err = REFTABLE_OUT_OF_MEMORY_ERROR;
|
|
goto done;
|
|
}
|
|
|
|
for (size_t i = 0; i < st->merged->readers_len; i++) {
|
|
names[i] = reftable_strdup(st->readers[i]->name);
|
|
if (!names[i]) {
|
|
err = REFTABLE_OUT_OF_MEMORY_ERROR;
|
|
goto done;
|
|
}
|
|
}
|
|
first_to_replace = first;
|
|
last_to_replace = last;
|
|
}
|
|
|
|
/*
|
|
* If the resulting compacted table is not empty, then we need to move
|
|
* it into place now.
|
|
*/
|
|
if (!is_empty_table) {
|
|
err = format_name(&new_table_name, st->readers[first]->min_update_index,
|
|
st->readers[last]->max_update_index);
|
|
if (err < 0)
|
|
goto done;
|
|
|
|
err = reftable_buf_addstr(&new_table_name, ".ref");
|
|
if (err < 0)
|
|
goto done;
|
|
|
|
err = stack_filename(&new_table_path, st, new_table_name.buf);
|
|
if (err < 0)
|
|
goto done;
|
|
|
|
err = tmpfile_rename(&new_table, new_table_path.buf);
|
|
if (err < 0)
|
|
goto done;
|
|
}
|
|
|
|
/*
|
|
* Write the new "tables.list" contents with the compacted table we
|
|
* have just written. In case the compacted table became empty we
|
|
* simply skip writing it.
|
|
*/
|
|
for (i = 0; i < first_to_replace; i++) {
|
|
if ((err = reftable_buf_addstr(&tables_list_buf, names[i])) < 0 ||
|
|
(err = reftable_buf_addstr(&tables_list_buf, "\n")) < 0)
|
|
goto done;
|
|
}
|
|
if (!is_empty_table) {
|
|
if ((err = reftable_buf_addstr(&tables_list_buf, new_table_name.buf)) < 0 ||
|
|
(err = reftable_buf_addstr(&tables_list_buf, "\n")) < 0)
|
|
goto done;
|
|
}
|
|
for (i = last_to_replace + 1; names[i]; i++) {
|
|
if ((err = reftable_buf_addstr(&tables_list_buf, names[i])) < 0 ||
|
|
(err = reftable_buf_addstr(&tables_list_buf, "\n")) < 0)
|
|
goto done;
|
|
}
|
|
|
|
err = write_in_full(tables_list_lock.fd,
|
|
tables_list_buf.buf, tables_list_buf.len);
|
|
if (err < 0) {
|
|
err = REFTABLE_IO_ERROR;
|
|
unlink(new_table_path.buf);
|
|
goto done;
|
|
}
|
|
|
|
err = stack_fsync(&st->opts, tables_list_lock.fd);
|
|
if (err < 0) {
|
|
err = REFTABLE_IO_ERROR;
|
|
unlink(new_table_path.buf);
|
|
goto done;
|
|
}
|
|
|
|
err = flock_commit(&tables_list_lock);
|
|
if (err < 0) {
|
|
err = REFTABLE_IO_ERROR;
|
|
unlink(new_table_path.buf);
|
|
goto done;
|
|
}
|
|
|
|
/*
|
|
* Reload the stack before deleting the compacted tables. We can only
|
|
* delete the files after we closed them on Windows, so this needs to
|
|
* happen first.
|
|
*/
|
|
err = reftable_stack_reload_maybe_reuse(st, first < last);
|
|
if (err < 0)
|
|
goto done;
|
|
|
|
/*
|
|
* Delete the old tables. They may still be in use by concurrent
|
|
* readers, so it is expected that unlinking tables may fail.
|
|
*/
|
|
for (i = 0; i < nlocks; i++) {
|
|
struct reftable_flock *table_lock = &table_locks[i];
|
|
|
|
reftable_buf_reset(&table_name);
|
|
err = reftable_buf_add(&table_name, table_lock->path,
|
|
strlen(table_lock->path) - strlen(".lock"));
|
|
if (err)
|
|
continue;
|
|
|
|
unlink(table_name.buf);
|
|
}
|
|
|
|
done:
|
|
flock_release(&tables_list_lock);
|
|
for (i = 0; table_locks && i < nlocks; i++)
|
|
flock_release(&table_locks[i]);
|
|
reftable_free(table_locks);
|
|
|
|
tmpfile_delete(&new_table);
|
|
reftable_buf_release(&new_table_name);
|
|
reftable_buf_release(&new_table_path);
|
|
reftable_buf_release(&tables_list_buf);
|
|
reftable_buf_release(&table_name);
|
|
free_names(names);
|
|
|
|
if (err == REFTABLE_LOCK_ERROR)
|
|
st->stats.failures++;
|
|
|
|
return err;
|
|
}
|
|
|
|
int reftable_stack_compact_all(struct reftable_stack *st,
|
|
struct reftable_log_expiry_config *config)
|
|
{
|
|
size_t last = st->merged->readers_len ? st->merged->readers_len - 1 : 0;
|
|
return stack_compact_range(st, 0, last, config, 0);
|
|
}
|
|
|
|
static int segment_size(struct segment *s)
|
|
{
|
|
return s->end - s->start;
|
|
}
|
|
|
|
struct segment suggest_compaction_segment(uint64_t *sizes, size_t n,
|
|
uint8_t factor)
|
|
{
|
|
struct segment seg = { 0 };
|
|
uint64_t bytes;
|
|
size_t i;
|
|
|
|
if (!factor)
|
|
factor = DEFAULT_GEOMETRIC_FACTOR;
|
|
|
|
/*
|
|
* If there are no tables or only a single one then we don't have to
|
|
* compact anything. The sequence is geometric by definition already.
|
|
*/
|
|
if (n <= 1)
|
|
return seg;
|
|
|
|
/*
|
|
* Find the ending table of the compaction segment needed to restore the
|
|
* geometric sequence. Note that the segment end is exclusive.
|
|
*
|
|
* To do so, we iterate backwards starting from the most recent table
|
|
* until a valid segment end is found. If the preceding table is smaller
|
|
* than the current table multiplied by the geometric factor (2), the
|
|
* compaction segment end has been identified.
|
|
*
|
|
* Tables after the ending point are not added to the byte count because
|
|
* they are already valid members of the geometric sequence. Due to the
|
|
* properties of a geometric sequence, it is not possible for the sum of
|
|
* these tables to exceed the value of the ending point table.
|
|
*
|
|
* Example table size sequence requiring no compaction:
|
|
* 64, 32, 16, 8, 4, 2, 1
|
|
*
|
|
* Example table size sequence where compaction segment end is set to
|
|
* the last table. Since the segment end is exclusive, the last table is
|
|
* excluded during subsequent compaction and the table with size 3 is
|
|
* the final table included:
|
|
* 64, 32, 16, 8, 4, 3, 1
|
|
*/
|
|
for (i = n - 1; i > 0; i--) {
|
|
if (sizes[i - 1] < sizes[i] * factor) {
|
|
seg.end = i + 1;
|
|
bytes = sizes[i];
|
|
break;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Find the starting table of the compaction segment by iterating
|
|
* through the remaining tables and keeping track of the accumulated
|
|
* size of all tables seen from the segment end table. The previous
|
|
* table is compared to the accumulated size because the tables from the
|
|
* segment end are merged backwards recursively.
|
|
*
|
|
* Note that we keep iterating even after we have found the first
|
|
* starting point. This is because there may be tables in the stack
|
|
* preceding that first starting point which violate the geometric
|
|
* sequence.
|
|
*
|
|
* Example compaction segment start set to table with size 32:
|
|
* 128, 32, 16, 8, 4, 3, 1
|
|
*/
|
|
for (; i > 0; i--) {
|
|
uint64_t curr = bytes;
|
|
bytes += sizes[i - 1];
|
|
|
|
if (sizes[i - 1] < curr * factor) {
|
|
seg.start = i - 1;
|
|
seg.bytes = bytes;
|
|
}
|
|
}
|
|
|
|
return seg;
|
|
}
|
|
|
|
static uint64_t *stack_table_sizes_for_compaction(struct reftable_stack *st)
|
|
{
|
|
int version = (st->opts.hash_id == REFTABLE_HASH_SHA1) ? 1 : 2;
|
|
int overhead = header_size(version) - 1;
|
|
uint64_t *sizes;
|
|
|
|
REFTABLE_CALLOC_ARRAY(sizes, st->merged->readers_len);
|
|
if (!sizes)
|
|
return NULL;
|
|
|
|
for (size_t i = 0; i < st->merged->readers_len; i++)
|
|
sizes[i] = st->readers[i]->size - overhead;
|
|
|
|
return sizes;
|
|
}
|
|
|
|
int reftable_stack_auto_compact(struct reftable_stack *st)
|
|
{
|
|
struct segment seg;
|
|
uint64_t *sizes;
|
|
|
|
if (st->merged->readers_len < 2)
|
|
return 0;
|
|
|
|
sizes = stack_table_sizes_for_compaction(st);
|
|
if (!sizes)
|
|
return REFTABLE_OUT_OF_MEMORY_ERROR;
|
|
|
|
seg = suggest_compaction_segment(sizes, st->merged->readers_len,
|
|
st->opts.auto_compaction_factor);
|
|
reftable_free(sizes);
|
|
|
|
if (segment_size(&seg) > 0)
|
|
return stack_compact_range(st, seg.start, seg.end - 1,
|
|
NULL, STACK_COMPACT_RANGE_BEST_EFFORT);
|
|
|
|
return 0;
|
|
}
|
|
|
|
struct reftable_compaction_stats *
|
|
reftable_stack_compaction_stats(struct reftable_stack *st)
|
|
{
|
|
return &st->stats;
|
|
}
|
|
|
|
int reftable_stack_read_ref(struct reftable_stack *st, const char *refname,
|
|
struct reftable_ref_record *ref)
|
|
{
|
|
struct reftable_iterator it = { 0 };
|
|
int ret;
|
|
|
|
ret = reftable_merged_table_init_ref_iterator(st->merged, &it);
|
|
if (ret)
|
|
goto out;
|
|
|
|
ret = reftable_iterator_seek_ref(&it, refname);
|
|
if (ret)
|
|
goto out;
|
|
|
|
ret = reftable_iterator_next_ref(&it, ref);
|
|
if (ret)
|
|
goto out;
|
|
|
|
if (strcmp(ref->refname, refname) ||
|
|
reftable_ref_record_is_deletion(ref)) {
|
|
reftable_ref_record_release(ref);
|
|
ret = 1;
|
|
goto out;
|
|
}
|
|
|
|
out:
|
|
reftable_iterator_destroy(&it);
|
|
return ret;
|
|
}
|
|
|
|
int reftable_stack_read_log(struct reftable_stack *st, const char *refname,
|
|
struct reftable_log_record *log)
|
|
{
|
|
struct reftable_iterator it = {0};
|
|
int err;
|
|
|
|
err = reftable_stack_init_log_iterator(st, &it);
|
|
if (err)
|
|
goto done;
|
|
|
|
err = reftable_iterator_seek_log(&it, refname);
|
|
if (err)
|
|
goto done;
|
|
|
|
err = reftable_iterator_next_log(&it, log);
|
|
if (err)
|
|
goto done;
|
|
|
|
if (strcmp(log->refname, refname) ||
|
|
reftable_log_record_is_deletion(log)) {
|
|
err = 1;
|
|
goto done;
|
|
}
|
|
|
|
done:
|
|
if (err) {
|
|
reftable_log_record_release(log);
|
|
}
|
|
reftable_iterator_destroy(&it);
|
|
return err;
|
|
}
|
|
|
|
static int is_table_name(const char *s)
|
|
{
|
|
const char *dot = strrchr(s, '.');
|
|
return dot && !strcmp(dot, ".ref");
|
|
}
|
|
|
|
static void remove_maybe_stale_table(struct reftable_stack *st, uint64_t max,
|
|
const char *name)
|
|
{
|
|
int err = 0;
|
|
uint64_t update_idx = 0;
|
|
struct reftable_block_source src = { NULL };
|
|
struct reftable_reader *rd = NULL;
|
|
struct reftable_buf table_path = REFTABLE_BUF_INIT;
|
|
|
|
err = stack_filename(&table_path, st, name);
|
|
if (err < 0)
|
|
goto done;
|
|
|
|
err = reftable_block_source_from_file(&src, table_path.buf);
|
|
if (err < 0)
|
|
goto done;
|
|
|
|
err = reftable_reader_new(&rd, &src, name);
|
|
if (err < 0)
|
|
goto done;
|
|
|
|
update_idx = reftable_reader_max_update_index(rd);
|
|
reftable_reader_decref(rd);
|
|
|
|
if (update_idx <= max) {
|
|
unlink(table_path.buf);
|
|
}
|
|
done:
|
|
reftable_buf_release(&table_path);
|
|
}
|
|
|
|
static int reftable_stack_clean_locked(struct reftable_stack *st)
|
|
{
|
|
uint64_t max = reftable_merged_table_max_update_index(
|
|
reftable_stack_merged_table(st));
|
|
DIR *dir = opendir(st->reftable_dir);
|
|
struct dirent *d = NULL;
|
|
if (!dir) {
|
|
return REFTABLE_IO_ERROR;
|
|
}
|
|
|
|
while ((d = readdir(dir))) {
|
|
int i = 0;
|
|
int found = 0;
|
|
if (!is_table_name(d->d_name))
|
|
continue;
|
|
|
|
for (i = 0; !found && i < st->readers_len; i++) {
|
|
found = !strcmp(reader_name(st->readers[i]), d->d_name);
|
|
}
|
|
if (found)
|
|
continue;
|
|
|
|
remove_maybe_stale_table(st, max, d->d_name);
|
|
}
|
|
|
|
closedir(dir);
|
|
return 0;
|
|
}
|
|
|
|
int reftable_stack_clean(struct reftable_stack *st)
|
|
{
|
|
struct reftable_addition *add = NULL;
|
|
int err = reftable_stack_new_addition(&add, st, 0);
|
|
if (err < 0) {
|
|
goto done;
|
|
}
|
|
|
|
err = reftable_stack_reload(st);
|
|
if (err < 0) {
|
|
goto done;
|
|
}
|
|
|
|
err = reftable_stack_clean_locked(st);
|
|
|
|
done:
|
|
reftable_addition_destroy(add);
|
|
return err;
|
|
}
|
|
|
|
enum reftable_hash reftable_stack_hash_id(struct reftable_stack *st)
|
|
{
|
|
return reftable_merged_table_hash_id(st->merged);
|
|
}
|