gossip_store: don't make bogus assumption that writes are atomic wrt readers.

They're not defined to be, though we've not seen this on Linux (testing
showed that it is page-level atomic, which means it can still happen across
page boundaries though!).  This was pointed out by whitslack in
https://github.com/ElementsProject/lightning/issues/4288

In practice, this just means not complaining when it happens, and also
not trying to get tricky to use it on MacOS (we can safely seek & write,
since we're single-threaded).

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Changelog-Removed: Removed bogus UNUSUAL log about gossip_store 'short test'.
This commit is contained in:
Rusty Russell 2021-03-17 12:24:51 +10:30
parent cbde1f8158
commit 7b853d0fa5
2 changed files with 10 additions and 27 deletions

View File

@ -58,7 +58,8 @@ static bool timestamp_filter(const struct per_peer_state *pps, u32 timestamp)
&& timestamp <= pps->gs->timestamp_max;
}
static void undo_read(int fd, int len, size_t wanted)
/* Not all the data we expected was there: rewind file */
static void failed_read(int fd, int len)
{
if (len < 0) {
/* Grab errno before lseek overrides it */
@ -68,10 +69,6 @@ static void undo_read(int fd, int len, size_t wanted)
(u64)lseek(fd, 0, SEEK_CUR), err);
}
/* Shouldn't happen, but some filesystems are not as atomic as
* they should be! */
status_unusual("gossip_store: short read %i of %zu @%"PRIu64,
len, wanted, (u64)lseek(fd, 0, SEEK_CUR) - len);
lseek(fd, -len, SEEK_CUR);
}
@ -93,7 +90,7 @@ u8 *gossip_store_next(const tal_t *ctx, struct per_peer_state *pps)
if (r != sizeof(hdr)) {
/* We expect a 0 read here at EOF */
if (r != 0)
undo_read(pps->gossip_store_fd, r, sizeof(hdr));
failed_read(pps->gossip_store_fd, r);
per_peer_state_reset_gossip_timer(pps);
return NULL;
}
@ -116,7 +113,7 @@ u8 *gossip_store_next(const tal_t *ctx, struct per_peer_state *pps)
msg = tal_arr(ctx, u8, msglen);
r = read(pps->gossip_store_fd, msg, msglen);
if (r != msglen) {
undo_read(pps->gossip_store_fd, r, msglen);
failed_read(pps->gossip_store_fd, r);
per_peer_state_reset_gossip_timer(pps);
return NULL;
}

View File

@ -62,6 +62,7 @@ static void gossip_store_destroy(struct gossip_store *gs)
}
#if HAVE_PWRITEV
/* One fewer syscall for the win! */
static ssize_t gossip_pwritev(int fd, const struct iovec *iov, int iovcnt,
off_t offset)
{
@ -71,25 +72,9 @@ static ssize_t gossip_pwritev(int fd, const struct iovec *iov, int iovcnt,
static ssize_t gossip_pwritev(int fd, const struct iovec *iov, int iovcnt,
off_t offset)
{
u8 *buf;
size_t len;
ssize_t ret;
/* Make a temporary linear buffer to fall back to pwrite() */
len = 0;
for (size_t i = 0; i < iovcnt; i++)
len += iov[i].iov_len;
buf = tal_arr(NULL, u8, len);
len = 0;
for (size_t i = 0; i < iovcnt; i++) {
memcpy(buf + len, iov[i].iov_base, iov[i].iov_len);
len += iov[i].iov_len;
}
ret = pwrite(fd, buf, len, offset);
tal_free(buf);
return ret;
if (lseek(fd, offset, SEEK_SET) != offset)
return -1;
return writev(fd, iov, iovcnt);
}
#endif /* !HAVE_PWRITEV */
@ -107,7 +92,8 @@ static bool append_msg(int fd, const u8 *msg, u32 timestamp,
hdr.crc = cpu_to_be32(crc32c(timestamp, msg, msglen));
hdr.timestamp = cpu_to_be32(timestamp);
/* Use pwritev so it will appear in store atomically */
/* pwritev makes it more likely to appear at once, plus it's
* exactly what we want. */
iov[0].iov_base = &hdr;
iov[0].iov_len = sizeof(hdr);
iov[1].iov_base = (void *)msg;