gossip_store: don't make bogus assumption that writes are atomic wrt readers.
They're not defined to be, though we've not seen this on Linux (testing showed that it is page-level atomic, which means it can still happen across page boundaries though!). This was pointed out by whitslack in https://github.com/ElementsProject/lightning/issues/4288 In practice, this just means not complaining when it happens, and also not trying to get tricky to use it on MacOS (we can safely seek & write, since we're single-threaded). Signed-off-by: Rusty Russell <rusty@rustcorp.com.au> Changelog-Removed: Removed bogus UNUSUAL log about gossip_store 'short test'.
This commit is contained in:
parent
cbde1f8158
commit
7b853d0fa5
|
@ -58,7 +58,8 @@ static bool timestamp_filter(const struct per_peer_state *pps, u32 timestamp)
|
|||
&& timestamp <= pps->gs->timestamp_max;
|
||||
}
|
||||
|
||||
static void undo_read(int fd, int len, size_t wanted)
|
||||
/* Not all the data we expected was there: rewind file */
|
||||
static void failed_read(int fd, int len)
|
||||
{
|
||||
if (len < 0) {
|
||||
/* Grab errno before lseek overrides it */
|
||||
|
@ -68,10 +69,6 @@ static void undo_read(int fd, int len, size_t wanted)
|
|||
(u64)lseek(fd, 0, SEEK_CUR), err);
|
||||
}
|
||||
|
||||
/* Shouldn't happen, but some filesystems are not as atomic as
|
||||
* they should be! */
|
||||
status_unusual("gossip_store: short read %i of %zu @%"PRIu64,
|
||||
len, wanted, (u64)lseek(fd, 0, SEEK_CUR) - len);
|
||||
lseek(fd, -len, SEEK_CUR);
|
||||
}
|
||||
|
||||
|
@ -93,7 +90,7 @@ u8 *gossip_store_next(const tal_t *ctx, struct per_peer_state *pps)
|
|||
if (r != sizeof(hdr)) {
|
||||
/* We expect a 0 read here at EOF */
|
||||
if (r != 0)
|
||||
undo_read(pps->gossip_store_fd, r, sizeof(hdr));
|
||||
failed_read(pps->gossip_store_fd, r);
|
||||
per_peer_state_reset_gossip_timer(pps);
|
||||
return NULL;
|
||||
}
|
||||
|
@ -116,7 +113,7 @@ u8 *gossip_store_next(const tal_t *ctx, struct per_peer_state *pps)
|
|||
msg = tal_arr(ctx, u8, msglen);
|
||||
r = read(pps->gossip_store_fd, msg, msglen);
|
||||
if (r != msglen) {
|
||||
undo_read(pps->gossip_store_fd, r, msglen);
|
||||
failed_read(pps->gossip_store_fd, r);
|
||||
per_peer_state_reset_gossip_timer(pps);
|
||||
return NULL;
|
||||
}
|
||||
|
|
|
@ -62,6 +62,7 @@ static void gossip_store_destroy(struct gossip_store *gs)
|
|||
}
|
||||
|
||||
#if HAVE_PWRITEV
|
||||
/* One fewer syscall for the win! */
|
||||
static ssize_t gossip_pwritev(int fd, const struct iovec *iov, int iovcnt,
|
||||
off_t offset)
|
||||
{
|
||||
|
@ -71,25 +72,9 @@ static ssize_t gossip_pwritev(int fd, const struct iovec *iov, int iovcnt,
|
|||
static ssize_t gossip_pwritev(int fd, const struct iovec *iov, int iovcnt,
|
||||
off_t offset)
|
||||
{
|
||||
u8 *buf;
|
||||
size_t len;
|
||||
ssize_t ret;
|
||||
|
||||
/* Make a temporary linear buffer to fall back to pwrite() */
|
||||
len = 0;
|
||||
for (size_t i = 0; i < iovcnt; i++)
|
||||
len += iov[i].iov_len;
|
||||
|
||||
buf = tal_arr(NULL, u8, len);
|
||||
len = 0;
|
||||
for (size_t i = 0; i < iovcnt; i++) {
|
||||
memcpy(buf + len, iov[i].iov_base, iov[i].iov_len);
|
||||
len += iov[i].iov_len;
|
||||
}
|
||||
|
||||
ret = pwrite(fd, buf, len, offset);
|
||||
tal_free(buf);
|
||||
return ret;
|
||||
if (lseek(fd, offset, SEEK_SET) != offset)
|
||||
return -1;
|
||||
return writev(fd, iov, iovcnt);
|
||||
}
|
||||
#endif /* !HAVE_PWRITEV */
|
||||
|
||||
|
@ -107,7 +92,8 @@ static bool append_msg(int fd, const u8 *msg, u32 timestamp,
|
|||
hdr.crc = cpu_to_be32(crc32c(timestamp, msg, msglen));
|
||||
hdr.timestamp = cpu_to_be32(timestamp);
|
||||
|
||||
/* Use pwritev so it will appear in store atomically */
|
||||
/* pwritev makes it more likely to appear at once, plus it's
|
||||
* exactly what we want. */
|
||||
iov[0].iov_base = &hdr;
|
||||
iov[0].iov_len = sizeof(hdr);
|
||||
iov[1].iov_base = (void *)msg;
|
||||
|
|
Loading…
Reference in New Issue