Merge branch 'hashx_perf' into 'main'

hashx: Performance improvements for program generation

See merge request tpo/core/arti!1524
This commit is contained in:
Ian Jackson 2023-08-23 09:24:13 +00:00
commit 696696857d
10 changed files with 393 additions and 394 deletions

View File

@ -7,7 +7,7 @@
//! `Executable` wraps a mmap buffer from the `dynasmrt` crate and the
//! `Architecture` is implemented in a CPU-specific way.
use crate::{program::InstructionArray, register::RegisterFile, CompilerError};
use crate::{program::Instruction, register::RegisterFile, CompilerError};
#[cfg(all(feature = "compiler", target_arch = "x86_64"))]
mod x86_64;
@ -41,7 +41,7 @@ pub(crate) struct Executable {
not(any(target_arch = "x86_64", target_arch = "aarch64"))
))]
impl Architecture for Executable {
fn compile(_program: &InstructionArray) -> Result<Self, CompilerError> {
fn compile(_program: &[Instruction]) -> Result<Self, CompilerError> {
Err(CompilerError::NotAvailable)
}
@ -71,7 +71,7 @@ where
Self: Sized,
{
/// Compile an array of instructions into an Executable
fn compile(program: &InstructionArray) -> Result<Self, CompilerError>;
fn compile(program: &[Instruction]) -> Result<Self, CompilerError>;
/// Run the compiled code, with a RegisterFile for input and output
fn invoke(&self, regs: &mut RegisterFile);

View File

@ -1,22 +1,31 @@
//! Dynamically emitted HashX assembly code for aarch64 targets
use crate::compiler::{util, Architecture, Executable};
use crate::program::{self, Instruction, InstructionArray};
use crate::register::{RegisterFile, RegisterId, RegisterSet};
use crate::program::{self, Instruction};
use crate::register::{RegisterFile, RegisterId};
use crate::CompilerError;
use dynasmrt::{aarch64, DynasmApi, DynasmLabelApi};
use std::mem;
impl Architecture for Executable {
fn compile(program: &InstructionArray) -> Result<Self, CompilerError> {
fn compile(program: &[Instruction]) -> Result<Self, CompilerError> {
let mut asm = Assembler::new();
emit_load_input(&mut asm);
emit_init_locals(&mut asm);
for inst in program {
emit_instruction(&mut asm, inst);
{
emit_load_input(&mut asm);
emit_init_locals(&mut asm);
debug_assert_eq!(asm.len(), PROLOGUE_SIZE);
}
for inst in program {
let prev_len = asm.len();
emit_instruction(&mut asm, inst);
debug_assert!(asm.len() - prev_len <= INSTRUCTION_SIZE_LIMIT);
}
{
let prev_len = asm.len();
emit_store_output(&mut asm);
emit_return(&mut asm);
debug_assert_eq!(asm.len() - prev_len, EPILOGUE_SIZE);
}
emit_store_output(&mut asm);
emit_return(&mut asm);
asm.finalize()
}
@ -36,8 +45,19 @@ impl Architecture for Executable {
}
}
/// Architecture-specific capacity for the temporary output buffer
const BUFFER_CAPACITY: usize = 0x200 + program::NUM_INSTRUCTIONS * 16;
/// Architecture-specific fixed prologue size
const PROLOGUE_SIZE: usize = 0x28;
/// Architecture-specific fixed epilogue size
const EPILOGUE_SIZE: usize = 0x24;
/// Architecture-specific maximum size for one instruction
const INSTRUCTION_SIZE_LIMIT: usize = 0x18;
/// Capacity for the temporary output buffer, before code is copied into
/// a long-lived allocation that can be made executable.
const BUFFER_CAPACITY: usize =
PROLOGUE_SIZE + EPILOGUE_SIZE + program::NUM_INSTRUCTIONS * INSTRUCTION_SIZE_LIMIT;
/// Architecture-specific specialization of the Assembler
type Assembler = util::Assembler<aarch64::Aarch64Relocation, BUFFER_CAPACITY>;
@ -51,9 +71,12 @@ trait RegisterMapper {
}
impl RegisterMapper for RegisterId {
#[inline(always)]
fn x(&self) -> u32 {
1 + (self.as_usize() as u32)
}
#[inline(always)]
fn offset(&self) -> u32 {
(self.as_usize() * mem::size_of::<u64>()) as u32
}
@ -75,7 +98,8 @@ macro_rules! dynasm {
}
/// Emit code to initialize our local variables to default values.
fn emit_init_locals(asm: &mut Assembler) {
#[inline(always)]
fn emit_init_locals<A: DynasmApi>(asm: &mut A) {
dynasm!(asm
; mov mulh_result32, wzr
; mov branch_prohibit_flag, wzr
@ -84,30 +108,32 @@ fn emit_init_locals(asm: &mut Assembler) {
/// Emit code to move all input values from the RegisterFile into their
/// actual hardware registers.
fn emit_load_input(asm: &mut Assembler) {
RegisterSet::all().filter(|reg| {
#[inline(always)]
fn emit_load_input<A: DynasmApi>(asm: &mut A) {
for reg in RegisterId::all() {
dynasm!(asm; ldr X(reg.x()), [register_file_ptr, #(reg.offset())]);
true
});
}
}
/// Emit code to move all output values from machine registers back into
/// their RegisterFile slots.
fn emit_store_output(asm: &mut Assembler) {
RegisterSet::all().filter(|reg| {
#[inline(always)]
fn emit_store_output<A: DynasmApi>(asm: &mut A) {
for reg in RegisterId::all() {
dynasm!(asm; str X(reg.x()), [register_file_ptr, #(reg.offset())]);
true
});
}
}
/// Emit a return instruction.
fn emit_return(asm: &mut Assembler) {
#[inline(always)]
fn emit_return<A: DynasmApi>(asm: &mut A) {
dynasm!(asm; ret);
}
/// Load a sign extended 32-bit constant into the const_temp_64
/// register, using a movz/movn and movk pair.
fn emit_i32_const_temp_64(asm: &mut Assembler, value: i32) {
#[inline(always)]
fn emit_i32_const_temp_64<A: DynasmApi>(asm: &mut A, value: i32) {
let high = (value >> 16) as u32;
let low = (value & 0xFFFF) as u32;
if value < 0 {
@ -119,7 +145,8 @@ fn emit_i32_const_temp_64(asm: &mut Assembler, value: i32) {
}
/// Load a 32-bit constant into const_temp_32, without extending.
fn emit_u32_const_temp_32(asm: &mut Assembler, value: u32) {
#[inline(always)]
fn emit_u32_const_temp_32<A: DynasmApi>(asm: &mut A, value: u32) {
let high = value >> 16;
let low = value & 0xFFFF;
dynasm!(asm
@ -129,6 +156,7 @@ fn emit_u32_const_temp_32(asm: &mut Assembler, value: u32) {
}
/// Emit code for a single [`Instruction`] in the hash program.
#[inline(always)]
fn emit_instruction(asm: &mut Assembler, inst: &Instruction) {
/// Common implementation for binary operations on registers
macro_rules! reg_op {

View File

@ -33,14 +33,22 @@ pub(crate) struct Assembler<R: Relocation, const S: usize> {
impl<R: Relocation, const S: usize> Assembler<R, S> {
/// Return the entry point as an [`AssemblyOffset`].
#[inline(always)]
pub(crate) fn entry() -> AssemblyOffset {
AssemblyOffset(0)
}
/// Size of the code stored so far, in bytes
#[inline(always)]
pub(crate) fn len(&self) -> usize {
self.buffer.len()
}
/// Make a new assembler with a temporary buffer but no executable buffer.
#[inline(always)]
pub(crate) fn new() -> Self {
Self {
buffer: Default::default(),
buffer: ArrayVec::new(),
target: None,
phantom: PhantomData,
}
@ -51,6 +59,7 @@ impl<R: Relocation, const S: usize> Assembler<R, S> {
/// This may fail if we can't allocate some memory, fill it, and mark
/// it as executable. For example, a Linux platform with policy to restrict
/// `mprotect` will show runtime errors at this point.
#[inline(always)]
pub(crate) fn finalize(self) -> Result<Executable, CompilerError> {
// We never execute code from the buffer until it's complete, and we use
// a freshly mmap'ed buffer for each program. Because of this, we don't
@ -83,11 +92,13 @@ impl std::fmt::Debug for Executable {
impl<R: Relocation, const S: usize> DynasmLabelApi for Assembler<R, S> {
type Relocation = R;
#[inline(always)]
fn local_label(&mut self, name: &'static str) {
debug_assert_eq!(name, "target");
self.target = Some(self.offset());
}
#[inline(always)]
fn backward_relocation(
&mut self,
name: &'static str,
@ -154,12 +165,14 @@ impl<R: Relocation, const S: usize> DynasmLabelApi for Assembler<R, S> {
}
impl<R: Relocation, const S: usize> Extend<u8> for Assembler<R, S> {
#[inline(always)]
fn extend<T: IntoIterator<Item = u8>>(&mut self, iter: T) {
self.buffer.extend(iter);
}
}
impl<'a, R: Relocation, const S: usize> Extend<&'a u8> for Assembler<R, S> {
#[inline(always)]
fn extend<T: IntoIterator<Item = &'a u8>>(&mut self, iter: T) {
for byte in iter {
self.buffer.push(*byte);
@ -168,20 +181,17 @@ impl<'a, R: Relocation, const S: usize> Extend<&'a u8> for Assembler<R, S> {
}
impl<R: Relocation, const S: usize> DynasmApi for Assembler<R, S> {
#[inline(always)]
fn offset(&self) -> AssemblyOffset {
AssemblyOffset(self.buffer.len())
}
#[inline(always)]
fn push(&mut self, byte: u8) {
self.buffer.push(byte);
}
fn align(&mut self, alignment: usize, with: u8) {
let offset = self.buffer.len() % alignment;
if offset != 0 {
for _ in offset..alignment {
self.buffer.push(with);
}
}
fn align(&mut self, _alignment: usize, _with: u8) {
unreachable!();
}
}

View File

@ -1,24 +1,33 @@
//! Dynamically emitted HashX assembly code for x86_64 targets
use crate::compiler::{util, Architecture, Executable};
use crate::program::{self, Instruction, InstructionArray};
use crate::register::{RegisterFile, RegisterId, RegisterSet};
use crate::program::{self, Instruction};
use crate::register::{RegisterFile, RegisterId};
use crate::CompilerError;
use dynasmrt::{x64, x64::Rq, DynasmApi, DynasmLabelApi, Register};
use dynasmrt::{x64, x64::Rq, DynasmApi, DynasmLabelApi};
use std::mem;
impl Architecture for Executable {
fn compile(program: &InstructionArray) -> Result<Self, CompilerError> {
fn compile(program: &[Instruction]) -> Result<Self, CompilerError> {
let mut asm = Assembler::new();
emit_save_regs(&mut asm);
emit_load_input(&mut asm);
emit_init_locals(&mut asm);
for inst in program {
emit_instruction(&mut asm, inst);
{
emit_save_regs(&mut asm);
emit_load_input(&mut asm);
emit_init_locals(&mut asm);
debug_assert_eq!(asm.len(), PROLOGUE_SIZE);
}
for inst in program {
let prev_len = asm.len();
emit_instruction(&mut asm, inst);
debug_assert!(asm.len() - prev_len <= INSTRUCTION_SIZE_LIMIT);
}
{
let prev_len = asm.len();
emit_store_output(&mut asm);
emit_restore_regs(&mut asm);
emit_return(&mut asm);
debug_assert_eq!(asm.len() - prev_len, EPILOGUE_SIZE);
}
emit_store_output(&mut asm);
emit_restore_regs(&mut asm);
emit_return(&mut asm);
asm.finalize()
}
@ -37,8 +46,19 @@ impl Architecture for Executable {
}
}
/// Architecture-specific capacity for the temporary output buffer
const BUFFER_CAPACITY: usize = 0x200 + program::NUM_INSTRUCTIONS * 16;
/// Architecture-specific fixed prologue size
const PROLOGUE_SIZE: usize = 0x68;
/// Architecture-specific fixed epilogue size
const EPILOGUE_SIZE: usize = 0x60;
/// Architecture-specific maximum size for one instruction
const INSTRUCTION_SIZE_LIMIT: usize = 0x11;
/// Capacity for the temporary output buffer, before code is copied into
/// a long-lived allocation that can be made executable.
const BUFFER_CAPACITY: usize =
PROLOGUE_SIZE + EPILOGUE_SIZE + program::NUM_INSTRUCTIONS * INSTRUCTION_SIZE_LIMIT;
/// Architecture-specific specialization of the Assembler
type Assembler = util::Assembler<x64::X64Relocation, BUFFER_CAPACITY>;
@ -52,9 +72,12 @@ trait RegisterMapper {
}
impl RegisterMapper for RegisterId {
#[inline(always)]
fn rq(&self) -> u8 {
8 + (self.as_usize() as u8)
}
#[inline(always)]
fn offset(&self) -> i32 {
(self.as_usize() * mem::size_of::<u64>()) as i32
}
@ -77,7 +100,8 @@ macro_rules! dynasm {
}
/// Emit code to initialize our local variables to default values.
fn emit_init_locals(asm: &mut Assembler) {
#[inline(always)]
fn emit_init_locals<A: DynasmApi>(asm: &mut A) {
dynasm!(asm
; xor mulh_result64, mulh_result64
; xor branch_prohibit_flag, branch_prohibit_flag
@ -105,47 +129,51 @@ const fn stack_size() -> i32 {
}
/// Emit code to allocate stack space and store REGS_TO_SAVE.
fn emit_save_regs(asm: &mut Assembler) {
#[inline(always)]
fn emit_save_regs<A: DynasmApi>(asm: &mut A) {
dynasm!(asm; sub rsp, stack_size());
for (i, reg) in REGS_TO_SAVE.as_ref().iter().enumerate() {
let offset = (i * mem::size_of::<u64>()) as i32;
dynasm!(asm; mov [rsp + offset], Rq(reg.code()));
dynasm!(asm; mov [rsp + offset], Rq(*reg as u8));
}
}
/// Emit code to restore REGS_TO_SAVE and deallocate stack space.
fn emit_restore_regs(asm: &mut Assembler) {
#[inline(always)]
fn emit_restore_regs<A: DynasmApi>(asm: &mut A) {
for (i, reg) in REGS_TO_SAVE.as_ref().iter().enumerate() {
let offset = (i * mem::size_of::<u64>()) as i32;
dynasm!(asm; mov Rq(reg.code()), [rsp + offset]);
dynasm!(asm; mov Rq(*reg as u8), [rsp + offset]);
}
dynasm!(asm; add rsp, stack_size());
}
/// Emit code to move all input values from the RegisterFile into their
/// actual hardware registers.
fn emit_load_input(asm: &mut Assembler) {
RegisterSet::all().filter(|reg| {
#[inline(always)]
fn emit_load_input<A: DynasmApi>(asm: &mut A) {
for reg in RegisterId::all() {
dynasm!(asm; mov Rq(reg.rq()), [register_file_ptr + reg.offset()]);
true
});
}
}
/// Emit code to move all output values from machine registers back into
/// their RegisterFile slots.
fn emit_store_output(asm: &mut Assembler) {
RegisterSet::all().filter(|reg| {
#[inline(always)]
fn emit_store_output<A: DynasmApi>(asm: &mut A) {
for reg in RegisterId::all() {
dynasm!(asm; mov [register_file_ptr + reg.offset()], Rq(reg.rq()));
true
});
}
}
/// Emit a return instruction.
fn emit_return(asm: &mut Assembler) {
#[inline(always)]
fn emit_return<A: DynasmApi>(asm: &mut A) {
dynasm!(asm; ret);
}
/// Emit code for a single [`Instruction`] in the hash program.
#[inline(always)]
fn emit_instruction(asm: &mut Assembler, inst: &Instruction) {
/// Common implementation for binary operations on registers
macro_rules! reg_op {

View File

@ -9,7 +9,7 @@
//! Generating correct HashX output depends on applying exactly the right
//! constraints.
use crate::program::{Instruction, InstructionArray, Opcode};
use crate::program::{Instruction, Opcode};
use crate::register::{RegisterId, RegisterSet, NUM_REGISTERS};
use crate::scheduler::Scheduler;
@ -39,7 +39,7 @@ mod model {
matches!(op, Opcode::Mul | Opcode::SMulH | Opcode::UMulH)
}
/// Does an instruction prohibit using the same register for source and dest?
/// Does an instruction prohibit using the same register for src and dst?
///
/// Meaningful only for ops that have both a source and destination register.
#[inline(always)]
@ -85,31 +85,19 @@ mod model {
#[inline(always)]
pub(super) fn writer_pair_allowed(
pass: Pass,
last_writer: Option<&RegisterWriter>,
this_writer: &RegisterWriter,
last_writer: RegisterWriter,
this_writer: RegisterWriter,
) -> bool {
match (last_writer, this_writer) {
// HashX disallows back-to-back 64-bit multiplies on the
// same destination register in Pass::Original but permits
// them on the retry if the source register isn't identical.
(
Some(RegisterWriter::RegSource(Opcode::Mul, _)),
RegisterWriter::RegSource(Opcode::Mul, _),
) if matches!(pass, Pass::Original) => false,
// Add/Sub from the same source register can't be paired
// with each other. (They might cancel out)
(
Some(RegisterWriter::RegSource(Opcode::AddShift, last_src)),
RegisterWriter::RegSource(Opcode::Sub, this_src),
) if this_src == last_src => false,
(
Some(RegisterWriter::RegSource(Opcode::Sub, last_src)),
RegisterWriter::RegSource(Opcode::AddShift, this_src),
) if this_src == last_src => false,
(RegisterWriter::Mul(_), RegisterWriter::Mul(_)) if matches!(pass, Pass::Original) => {
false
}
// Other pairings are allowed if the writer info differs at all.
(last_writer, this_writer) => last_writer != Some(this_writer),
(last_writer, this_writer) => last_writer != this_writer,
}
}
@ -132,29 +120,57 @@ mod model {
/// This is conceptually similar to storing the last [`super::Instruction`]
/// that wrote to a register, but HashX sometimes needs information for
/// constraints which won't end up in the final `Instruction`.
#[derive(Debug, Clone, Eq, PartialEq)]
///
/// We've chosen the encoding to minimize the code size in
/// writer_pair_allowed. Most pairwise comparisons can just be a register
/// equality test.
///
/// The instructions here fall into three categories which use their own
/// format for encoding arguments:
///
/// - Wide Multiply, extra u32
///
/// UMulH and SMulH use an additional otherwise unused 32-bit value
/// from the Rng when considering writer collisions.
///
/// As far as I can tell this is a bug in the original implementation
/// but we can't change the behavior without breaking compatibility.
///
/// The collisions are rare enough not to be a worthwhile addition
/// to ASIC-resistance. It seems like this was a vestigial feature
/// left over from immediate value matching features which were removed
/// during the development of HashX, but I can't be sure.
///
/// - Constant source
///
/// Only considers the opcode itself, not the specific immediate value.
///
/// - Register source
///
/// Considers the source register, collapses add/subtract into one op.
///
#[derive(Debug, Default, Clone, Copy, Eq, PartialEq)]
pub(crate) enum RegisterWriter {
/// Special format for wide multiply
///
/// HashX includes an otherwise unused phantom immediate value which
/// can (very rarely) affect constraint selection if it collides.
///
/// As far as I can tell this is a bug in the original implementation
/// but we can't change the behavior without breaking compatibility.
///
/// The collisions are rare enough not to be a worthwhile addition
/// to ASIC-resistance. It seems like this was a vestigial feature
/// left over from immediate value matching features which were removed
/// during the development of HashX, but I can't be sure.
WideMul(Opcode, u32),
/// Writer for instructions with an immediate source
///
/// The specific immediate value is not used.
ConstSource(Opcode),
/// Writer for instructions with register source, unique by source register
RegSource(Opcode, RegisterId),
/// Register not written yet
#[default]
None,
/// Register source writer for [`super::Instruction::Mul`]
Mul(RegisterId),
/// Wide multiply writer for [`super::Instruction::UMulH`]
UMulH(u32),
/// Wide multiply writer for [`super::Instruction::SMulH`]
SMulH(u32),
/// Register source writer for [`super::Instruction::AddShift`]
/// and [`super::Instruction::Sub`]
AddSub(RegisterId),
/// Constant source writer for [`super::Instruction::AddConst`]
AddConst,
/// Register source writer for [`super::Instruction::Xor`]
Xor(RegisterId),
/// Constant source writer for [`super::Instruction::XorConst`]
XorConst,
/// Constant source writer for [`super::Instruction::Rotate`]
Rotate,
}
}
@ -187,16 +203,13 @@ impl Validator {
/// Commit a new instruction to the validator state.
#[inline(always)]
pub(crate) fn commit_instruction(&mut self, inst: &Instruction, regw: Option<RegisterWriter>) {
pub(crate) fn commit_instruction(&mut self, inst: &Instruction, regw: RegisterWriter) {
if model::is_multiply(inst.opcode()) {
self.multiply_count += 1;
}
match inst.destination() {
None => assert!(regw.is_none()),
Some(dst) => self.writer_map.insert(
dst,
regw.expect("instructions with destination always have a RegisterWriter"),
),
None => debug_assert_eq!(regw, RegisterWriter::None),
Some(dst) => self.writer_map.insert(dst, regw),
}
}
@ -208,7 +221,7 @@ impl Validator {
pub(crate) fn check_whole_program(
&self,
scheduler: &Scheduler,
instructions: &InstructionArray,
instructions: &[Instruction],
) -> Result<(), ()> {
if instructions.len() == model::REQUIRED_INSTRUCTIONS
&& scheduler.overall_latency().as_usize() == model::REQUIRED_OVERALL_RESULT_AT_CYCLE
@ -220,36 +233,67 @@ impl Validator {
}
}
/// Figure out the allowed set of destination registers for an op after its
/// source is known, using the current state of the validator.
/// Begin checking which destination registers are allowed for an op after
/// its source is known, using the current state of the validator.
///
/// Returns a DstRegisterChecker which can be used to test each specific
/// destination RegisterId quickly.
#[inline(always)]
pub(crate) fn dst_registers_allowed(
&self,
available: RegisterSet,
op: Opcode,
pass: Pass,
writer_info: &RegisterWriter,
writer_info: RegisterWriter,
src: Option<RegisterId>,
) -> RegisterSet {
available.filter(
#[inline(always)]
|dst| {
// One register specified by DISALLOW_REGISTER_FOR_ADDSHIFT can't
// be used as destination for AddShift.
if op == Opcode::AddShift && dst == model::DISALLOW_REGISTER_FOR_ADDSHIFT {
return false;
}
// A few instructions disallow choosing src and dst as the same
if model::disallow_src_is_dst(op) && src == Some(dst) {
return false;
}
// Additional constraints are written on the pair of previous and
// current instructions with the same destination.
model::writer_pair_allowed(pass, self.writer_map.get(dst), writer_info)
) -> DstRegisterChecker<'_> {
DstRegisterChecker {
pass,
writer_info,
writer_map: &self.writer_map,
op_is_add_shift: op == Opcode::AddShift,
disallow_equal: if model::disallow_src_is_dst(op) {
src
} else {
None
},
)
}
}
}
/// State information returned by [`Validator::dst_registers_allowed`]
#[derive(Debug, Clone)]
pub(crate) struct DstRegisterChecker<'v> {
/// Is this the original or retry pass?
pass: Pass,
/// Reference to a table of [`RegisterWriter`] information for each register
writer_map: &'v RegisterWriterMap,
/// The new [`RegisterWriter`] under consideration
writer_info: RegisterWriter,
/// Was this [`Opcode::AddShift`]?
op_is_add_shift: bool,
/// Optionally disallow one matching register, used to implement [`model::disallow_src_is_dst`]
disallow_equal: Option<RegisterId>,
}
impl<'v> DstRegisterChecker<'v> {
/// Check a single destination register for usability, using context from
/// [`Validator::dst_registers_allowed`]
#[inline(always)]
pub(crate) fn check(&self, dst: RegisterId) -> bool {
// One register specified by DISALLOW_REGISTER_FOR_ADDSHIFT can't
// be used as destination for AddShift.
if self.op_is_add_shift && dst == model::DISALLOW_REGISTER_FOR_ADDSHIFT {
return false;
}
// A few instructions disallow choosing src and dst as the same
if Some(dst) == self.disallow_equal {
return false;
}
// Additional constraints are written on the pair of previous and
// current instructions with the same destination.
model::writer_pair_allowed(self.pass, self.writer_map.get(dst), self.writer_info)
}
}
@ -259,7 +303,7 @@ impl Validator {
pub(crate) fn src_registers_allowed(available: RegisterSet, op: Opcode) -> RegisterSet {
// HashX defines a special case DISALLOW_REGISTER_FOR_ADDSHIFT for
// destination registers, and it also includes a look-ahead
// condition here in source register allocation to prevent the dest
// condition here in source register allocation to prevent the dst
// allocation from getting stuck as often. If we have only two
// remaining registers for AddShift and one is the disallowed reg,
// HashX defines that the random choice is short-circuited early
@ -269,7 +313,7 @@ pub(crate) fn src_registers_allowed(available: RegisterSet, op: Opcode) -> Regis
&& available.contains(model::DISALLOW_REGISTER_FOR_ADDSHIFT)
&& available.len() == 2
{
available.filter(
RegisterSet::from_filter(
#[inline(always)]
|reg| reg == model::DISALLOW_REGISTER_FOR_ADDSHIFT,
)
@ -297,9 +341,9 @@ pub(crate) fn opcode_pair_allowed(previous: Option<Opcode>, proposed: Opcode) ->
}
}
/// Map each [`RegisterId`] to an [`Option<RegisterWriter>`]
/// Map each [`RegisterId`] to an [`RegisterWriter`]
#[derive(Default, Debug, Clone)]
struct RegisterWriterMap([Option<RegisterWriter>; NUM_REGISTERS]);
struct RegisterWriterMap([RegisterWriter; NUM_REGISTERS]);
impl RegisterWriterMap {
/// Make a new empty register writer map.
@ -313,12 +357,12 @@ impl RegisterWriterMap {
/// Write or overwrite the last [`RegisterWriter`] associated with `reg`.
#[inline(always)]
fn insert(&mut self, reg: RegisterId, writer: RegisterWriter) {
self.0[reg.as_usize()] = Some(writer);
self.0[reg.as_usize()] = writer;
}
/// Return the most recent mapping for 'reg', if any.
#[inline(always)]
fn get(&self, reg: RegisterId) -> Option<&RegisterWriter> {
self.0[reg.as_usize()].as_ref()
fn get(&self, reg: RegisterId) -> RegisterWriter {
self.0[reg.as_usize()]
}
}

View File

@ -1,7 +1,7 @@
//! Pseudorandom generator for hash programs and parts thereof
use crate::constraints::{self, Pass, RegisterWriter, Validator};
use crate::program::{Instruction, InstructionArray, Opcode, Program};
use crate::program::{Instruction, Opcode};
use crate::rand::RngBuffer;
use crate::register::{RegisterId, RegisterSet};
use crate::scheduler::{InstructionPlan, Scheduler};
@ -84,17 +84,8 @@ mod model {
pub(super) const BRANCH_MASK_BIT_WEIGHT: usize = 4;
}
/// Generate a hash program from an arbitrary [`RngCore`] implementer.
///
/// This can return [`Error::ProgramConstraints`] if the HashX post-generation
/// program verification fails. During normal use this will happen once per
/// several thousand random seeds, and the caller should skip to another seed.
pub(crate) fn generate_program<T: RngCore>(rng: &mut T) -> Result<Program, Error> {
Generator::new(rng).generate_program()
}
/// Internal state for the program generator
struct Generator<'r, R: RngCore> {
/// Program generator
pub(crate) struct Generator<'r, R: RngCore> {
/// The program generator wraps a random number generator, via [`RngBuffer`].
rng: RngBuffer<'r, R>,
@ -118,7 +109,7 @@ struct Generator<'r, R: RngCore> {
impl<'r, R: RngCore> Generator<'r, R> {
/// Create a fresh program generator from a random number generator state.
#[inline(always)]
fn new(rng: &'r mut R) -> Self {
pub(crate) fn new(rng: &'r mut R) -> Self {
Generator {
rng: RngBuffer::new(rng),
scheduler: Scheduler::new(),
@ -135,7 +126,7 @@ impl<'r, R: RngCore> Generator<'r, R> {
/// The choice is perfectly uniform only if the register set is a power of
/// two length. Uniformity is not critical here.
#[inline(always)]
fn select_register(&mut self, reg_options: RegisterSet) -> Result<RegisterId, ()> {
fn select_register(&mut self, reg_options: &RegisterSet) -> Result<RegisterId, ()> {
match reg_options.len() {
0 => Err(()),
1 => Ok(reg_options.index(0)),
@ -198,30 +189,28 @@ impl<'r, R: RngCore> Generator<'r, R> {
/// Generate an entire program.
///
/// This generates instructions until the state can't be advanced any
/// further. Returns with [`Error::ProgramConstraints`] if the program
/// fails the `HashX` whole-program checks. These constraint failures occur
/// in normal use, on a small fraction of seed values.
/// Generates instructions into a provided [`Vec`] until the generator
/// state can't be advanced any further. Runs the whole-program validator.
/// Returns with [`Error::ProgramConstraints`] if the program fails these
/// checks. This happens in normal use on a small fraction of seed values.
#[inline(always)]
fn generate_program(&mut self) -> Result<Program, Error> {
let mut array: InstructionArray = Default::default();
while array.len() < array.capacity() {
pub(crate) fn generate_program(&mut self, output: &mut Vec<Instruction>) -> Result<(), Error> {
assert!(output.is_empty());
while output.len() < output.capacity() {
match self.generate_instruction() {
Err(()) => break,
Ok((inst, regw)) => {
let state_advance = self.commit_instruction_state(&inst, regw);
array.push(inst);
output.push(inst);
if let Err(()) = state_advance {
break;
}
}
}
}
let result = self.validator.check_whole_program(&self.scheduler, &array);
match result {
Err(()) => Err(Error::ProgramConstraints),
Ok(()) => Ok(Program::new(array)),
}
self.validator
.check_whole_program(&self.scheduler, output)
.map_err(|()| Error::ProgramConstraints)
}
/// Generate the next instruction.
@ -235,7 +224,7 @@ impl<'r, R: RngCore> Generator<'r, R> {
/// This only returns `Err(())` if we've hit a stopping condition for the
/// program.
#[inline(always)]
fn generate_instruction(&mut self) -> Result<(Instruction, Option<RegisterWriter>), ()> {
fn generate_instruction(&mut self) -> Result<(Instruction, RegisterWriter), ()> {
loop {
if let Ok(result) = self.instruction_gen_attempt(Pass::Original) {
return Ok(result);
@ -268,18 +257,30 @@ impl<'r, R: RngCore> Generator<'r, R> {
/// choosing the opcode-specific parts of the instruction. Each of these
/// choices affects the Rng state, and may fail if conditions are not met.
#[inline(always)]
fn instruction_gen_attempt(
&mut self,
pass: Pass,
) -> Result<(Instruction, Option<RegisterWriter>), ()> {
fn instruction_gen_attempt(&mut self, pass: Pass) -> Result<(Instruction, RegisterWriter), ()> {
let op = self.choose_opcode(pass);
let plan = self.scheduler.instruction_plan(op)?;
let (inst, regw) = self.choose_instruction_with_opcode_plan(op, pass, &plan)?;
assert_eq!(inst.opcode(), op);
debug_assert_eq!(inst.opcode(), op);
self.scheduler.commit_instruction_plan(&plan, &inst);
Ok((inst, regw))
}
/// Choose only a source register, depending on the opcode and timing plan
#[inline(never)]
fn choose_src_reg(
&mut self,
op: Opcode,
timing_plan: &InstructionPlan,
) -> Result<RegisterId, ()> {
let src_set = RegisterSet::from_filter(|src| {
self.scheduler
.register_available(src, timing_plan.cycle_issued())
});
let src_set = constraints::src_registers_allowed(src_set, op);
self.select_register(&src_set)
}
/// Choose both a source and destination register using a normal
/// [`RegisterWriter`] for two-operand instructions.
#[inline(always)]
@ -287,18 +288,12 @@ impl<'r, R: RngCore> Generator<'r, R> {
&mut self,
op: Opcode,
pass: Pass,
writer_info_fn: fn(RegisterId) -> RegisterWriter,
timing_plan: &InstructionPlan,
) -> Result<(RegisterId, RegisterId, RegisterWriter), ()> {
let avail_set = self
.scheduler
.registers_available(timing_plan.cycle_issued());
let src_set = constraints::src_registers_allowed(avail_set, op);
let src = self.select_register(src_set)?;
let writer_info = RegisterWriter::RegSource(op, src);
let dst_set =
self.validator
.dst_registers_allowed(avail_set, op, pass, &writer_info, Some(src));
let dst = self.select_register(dst_set)?;
let src = self.choose_src_reg(op, timing_plan)?;
let writer_info = writer_info_fn(src);
let dst = self.choose_dst_reg(op, pass, writer_info, Some(src), timing_plan)?;
Ok((src, dst, writer_info))
}
@ -310,38 +305,34 @@ impl<'r, R: RngCore> Generator<'r, R> {
&mut self,
op: Opcode,
pass: Pass,
writer_info: &RegisterWriter,
writer_info: RegisterWriter,
timing_plan: &InstructionPlan,
) -> Result<(RegisterId, RegisterId), ()> {
let avail_set = self
.scheduler
.registers_available(timing_plan.cycle_issued());
let src_set = constraints::src_registers_allowed(avail_set, op);
let src = self.select_register(src_set)?;
let dst_set =
self.validator
.dst_registers_allowed(avail_set, op, pass, writer_info, Some(src));
let dst = self.select_register(dst_set)?;
let src = self.choose_src_reg(op, timing_plan)?;
let dst = self.choose_dst_reg(op, pass, writer_info, Some(src), timing_plan)?;
Ok((src, dst))
}
/// Choose a destination register only.
#[inline(always)]
/// Choose a destination register only, using source and writer info
/// as well as the current state of the validator.
#[inline(never)]
fn choose_dst_reg(
&mut self,
op: Opcode,
pass: Pass,
writer_info: &RegisterWriter,
writer_info: RegisterWriter,
src: Option<RegisterId>,
timing_plan: &InstructionPlan,
) -> Result<RegisterId, ()> {
let avail_set = self
.scheduler
.registers_available(timing_plan.cycle_issued());
let dst_set = self
let validator = self
.validator
.dst_registers_allowed(avail_set, op, pass, writer_info, None);
let dst = self.select_register(dst_set)?;
Ok(dst)
.dst_registers_allowed(op, pass, writer_info, src);
let dst_set = RegisterSet::from_filter(|dst| {
self.scheduler
.register_available(dst, timing_plan.cycle_issued())
&& validator.check(dst)
});
self.select_register(&dst_set)
}
/// With an [`Opcode`] and an execution unit timing plan already in mind,
@ -355,78 +346,80 @@ impl<'r, R: RngCore> Generator<'r, R> {
op: Opcode,
pass: Pass,
plan: &InstructionPlan,
) -> Result<(Instruction, Option<RegisterWriter>), ()> {
) -> Result<(Instruction, RegisterWriter), ()> {
Ok(match op {
Opcode::Target => (Instruction::Target, None),
Opcode::Target => (Instruction::Target, RegisterWriter::None),
Opcode::Branch => (
Instruction::Branch {
mask: self.select_constant_weight_bit_mask(model::BRANCH_MASK_BIT_WEIGHT),
},
None,
RegisterWriter::None,
),
Opcode::UMulH => {
let regw = RegisterWriter::WideMul(op, self.rng.next_u32());
let (src, dst) =
self.choose_src_dst_regs_with_writer_info(op, pass, &regw, plan)?;
(Instruction::UMulH { src, dst }, Some(regw))
let regw = RegisterWriter::UMulH(self.rng.next_u32());
let (src, dst) = self.choose_src_dst_regs_with_writer_info(op, pass, regw, plan)?;
(Instruction::UMulH { src, dst }, regw)
}
Opcode::SMulH => {
let regw = RegisterWriter::WideMul(op, self.rng.next_u32());
let (src, dst) =
self.choose_src_dst_regs_with_writer_info(op, pass, &regw, plan)?;
(Instruction::SMulH { src, dst }, Some(regw))
let regw = RegisterWriter::SMulH(self.rng.next_u32());
let (src, dst) = self.choose_src_dst_regs_with_writer_info(op, pass, regw, plan)?;
(Instruction::SMulH { src, dst }, regw)
}
Opcode::Mul => {
let (src, dst, regw) = self.choose_src_dst_regs(op, pass, plan)?;
(Instruction::Mul { src, dst }, Some(regw))
let regw = RegisterWriter::Mul;
let (src, dst, regw) = self.choose_src_dst_regs(op, pass, regw, plan)?;
(Instruction::Mul { src, dst }, regw)
}
Opcode::Sub => {
let (src, dst, regw) = self.choose_src_dst_regs(op, pass, plan)?;
(Instruction::Sub { src, dst }, Some(regw))
let regw = RegisterWriter::AddSub;
let (src, dst, regw) = self.choose_src_dst_regs(op, pass, regw, plan)?;
(Instruction::Sub { src, dst }, regw)
}
Opcode::Xor => {
let (src, dst, regw) = self.choose_src_dst_regs(op, pass, plan)?;
(Instruction::Xor { src, dst }, Some(regw))
let regw = RegisterWriter::Xor;
let (src, dst, regw) = self.choose_src_dst_regs(op, pass, regw, plan)?;
(Instruction::Xor { src, dst }, regw)
}
Opcode::AddShift => {
let regw = RegisterWriter::AddSub;
let left_shift = (self.rng.next_u32() & 3) as u8;
let (src, dst, regw) = self.choose_src_dst_regs(op, pass, plan)?;
let (src, dst, regw) = self.choose_src_dst_regs(op, pass, regw, plan)?;
(
Instruction::AddShift {
src,
dst,
left_shift,
},
Some(regw),
regw,
)
}
Opcode::AddConst => {
let regw = RegisterWriter::ConstSource(op);
let regw = RegisterWriter::AddConst;
let src = self.select_nonzero_u32(u32::MAX) as i32;
let dst = self.choose_dst_reg(op, pass, &regw, plan)?;
(Instruction::AddConst { src, dst }, Some(regw))
let dst = self.choose_dst_reg(op, pass, regw, None, plan)?;
(Instruction::AddConst { src, dst }, regw)
}
Opcode::XorConst => {
let regw = RegisterWriter::ConstSource(op);
let regw = RegisterWriter::XorConst;
let src = self.select_nonzero_u32(u32::MAX) as i32;
let dst = self.choose_dst_reg(op, pass, &regw, plan)?;
(Instruction::XorConst { src, dst }, Some(regw))
let dst = self.choose_dst_reg(op, pass, regw, None, plan)?;
(Instruction::XorConst { src, dst }, regw)
}
Opcode::Rotate => {
let regw = RegisterWriter::ConstSource(op);
let regw = RegisterWriter::Rotate;
let right_rotate: u8 = self.select_nonzero_u32(63) as u8;
let dst = self.choose_dst_reg(op, pass, &regw, plan)?;
(Instruction::Rotate { dst, right_rotate }, Some(regw))
let dst = self.choose_dst_reg(op, pass, regw, None, plan)?;
(Instruction::Rotate { dst, right_rotate }, regw)
}
})
}
@ -440,7 +433,7 @@ impl<'r, R: RngCore> Generator<'r, R> {
fn commit_instruction_state(
&mut self,
inst: &Instruction,
regw: Option<RegisterWriter>,
regw: RegisterWriter,
) -> Result<(), ()> {
self.validator.commit_instruction(inst, regw);
self.scheduler.advance_instruction_stream(inst.opcode())

View File

@ -51,7 +51,6 @@ mod scheduler;
mod siphash;
use crate::compiler::{Architecture, Executable};
use crate::generator::generate_program;
use crate::program::Program;
use rand_core::RngCore;
@ -111,8 +110,8 @@ pub struct HashX {
/// to store the program data.
#[derive(Debug)]
enum RuntimeProgram {
/// Select the interpreted runtime, and hold a boxed Program for it to run.
Interpret(Box<Program>),
/// Select the interpreted runtime, and hold a Program for it to run.
Interpret(Program),
/// Select the compiled runtime, and hold an executable code page.
Compiled(Executable),
}
@ -203,7 +202,7 @@ impl HashXBuilder {
rng: &mut R,
register_key: SipState,
) -> Result<HashX, Error> {
let program = generate_program(rng)?;
let program = Program::generate(rng)?;
self.build_from_program(program, register_key)
}
@ -217,13 +216,13 @@ impl HashXBuilder {
Ok(HashX {
register_key,
program: match self.runtime {
RuntimeOption::InterpretOnly => RuntimeProgram::Interpret(Box::new(program)),
RuntimeOption::InterpretOnly => RuntimeProgram::Interpret(program),
RuntimeOption::CompileOnly => {
RuntimeProgram::Compiled(Architecture::compile((&program).into())?)
}
RuntimeOption::TryCompile => match Architecture::compile((&program).into()) {
Ok(exec) => RuntimeProgram::Compiled(exec),
Err(_) => RuntimeProgram::Interpret(Box::new(program)),
Err(_) => RuntimeProgram::Interpret(program),
},
},
})

View File

@ -1,7 +1,9 @@
//! Define the internal hash program representation used by HashX.
use crate::generator::Generator;
use crate::register::{RegisterFile, RegisterId};
use arrayvec::ArrayVec;
use crate::Error;
use rand_core::RngCore;
use std::fmt;
use std::ops::BitXor;
@ -179,24 +181,14 @@ impl Instruction {
}
}
/// Fixed-size array of instructions, either a complete program or a
/// program under construction
pub(crate) type InstructionArray = ArrayVec<Instruction, NUM_INSTRUCTIONS>;
/// Generated `HashX` program, as a list of instructions.
/// Generated `HashX` program, as a boxed slice of instructions
#[derive(Clone, Default)]
pub struct Program {
/// The InstructionArray that this Program wraps
///
/// InstructionArray provides storage, and this type indicates that the
/// program should be a well-formed HashX function.
instructions: InstructionArray,
}
pub struct Program(Box<[Instruction]>);
impl fmt::Debug for Program {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
writeln!(f, "Program {{")?;
for (addr, inst) in self.instructions.iter().enumerate() {
for (addr, inst) in self.0.iter().enumerate() {
writeln!(f, " [{:3}]: {:?}", addr, inst)?;
}
write!(f, "}}")
@ -204,10 +196,16 @@ impl fmt::Debug for Program {
}
impl Program {
/// Construct a finished `Program` from a list of instructions.
#[inline(always)]
pub(crate) fn new(instructions: InstructionArray) -> Self {
Self { instructions }
/// Generate a new `Program` from an arbitrary [`RngCore`] implementer
///
/// This can return [`Error::ProgramConstraints`] if the HashX
/// post-generation program verification fails. During normal use this
/// will happen once per several thousand random seeds, and the caller
/// should skip to another seed.
pub(crate) fn generate<T: RngCore>(rng: &mut T) -> Result<Self, Error> {
let mut instructions = Vec::with_capacity(NUM_INSTRUCTIONS);
Generator::new(rng).generate_program(&mut instructions)?;
Ok(Program(instructions.into_boxed_slice()))
}
/// Reference implementation for `Program` behavior
@ -254,9 +252,9 @@ impl Program {
}};
}
while program_counter < self.instructions.len() {
while program_counter < self.0.len() {
let next_pc = program_counter + 1;
program_counter = match &self.instructions[program_counter] {
program_counter = match &self.0[program_counter] {
Instruction::Target => {
branch_target = Some(program_counter);
next_pc
@ -305,9 +303,9 @@ impl Program {
}
}
impl<'a> From<&'a Program> for &'a InstructionArray {
impl<'a> From<&'a Program> for &'a [Instruction] {
#[inline(always)]
fn from(prog: &'a Program) -> Self {
&prog.instructions
&prog.0
}
}

View File

@ -1,6 +1,7 @@
//! Define HashX's register file, and how it's created and digested.
use crate::siphash::{siphash24_ctr, SipState};
use arrayvec::ArrayVec;
use std::fmt;
/// Number of virtual registers in the HashX machine
@ -29,6 +30,12 @@ impl RegisterId {
pub(crate) fn as_usize(&self) -> usize {
self.0 as usize
}
/// Create an iterator over all RegisterId
#[inline(always)]
pub(crate) fn all() -> impl Iterator<Item = RegisterId> {
(0_u8..(NUM_REGISTERS as u8)).map(RegisterId)
}
}
/// Identify a set of RegisterIds
@ -36,15 +43,10 @@ impl RegisterId {
/// This could be done compactly as a u8 bitfield for storage purposes, but
/// in our program generator this is never stored long-term. Instead, we want
/// something the optimizer can reason about as effectively as possible, and
/// let's inline as much as possible in order to resolve special cases in
/// the program generator at compile time.
#[derive(Clone, Copy, Eq, PartialEq)]
pub(crate) struct RegisterSet {
/// Number of registers in the set
len: usize,
/// Array indexed by register Id, indicating registers we've excluded
reg_not_in_set: [bool; 8],
}
/// we want to optimize for an index() implementation that doesn't branch.
/// This uses a fixed-capacity array of registers in-set, always sorted.
#[derive(Default, Clone, Eq, PartialEq)]
pub(crate) struct RegisterSet(ArrayVec<RegisterId, NUM_REGISTERS>);
impl fmt::Debug for RegisterSet {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
@ -60,66 +62,29 @@ impl fmt::Debug for RegisterSet {
}
impl RegisterSet {
/// Construct the set of all registers.
///
/// This is the main way to construct a new RegisterId, starting with
/// all available registers and filtering them repeatedly.
#[inline(always)]
pub(crate) fn all() -> Self {
Self {
len: NUM_REGISTERS,
reg_not_in_set: Default::default(),
}
}
/// Number of registers still contained in this set
#[inline(always)]
pub(crate) fn len(&self) -> usize {
self.len
self.0.len()
}
/// Test if a register is contained in the set.
#[inline(always)]
pub(crate) fn contains(&self, id: RegisterId) -> bool {
!self.reg_not_in_set[id.0 as usize]
self.0.contains(&id)
}
/// Filter this register set through a predicate.
///
/// Invokes the predicate only for registers in this set, and returns the
/// set of registers for which it returned true.
/// Build a new RegisterSet from each register for which a predicate
/// function returns `true`.
#[inline(always)]
pub(crate) fn filter<P: FnMut(RegisterId) -> bool>(&self, mut predicate: P) -> Self {
let mut result = Self {
len: 0,
reg_not_in_set: Default::default(),
};
self.filter_impl(0, &mut predicate, &mut result);
self.filter_impl(1, &mut predicate, &mut result);
self.filter_impl(2, &mut predicate, &mut result);
self.filter_impl(3, &mut predicate, &mut result);
self.filter_impl(4, &mut predicate, &mut result);
self.filter_impl(5, &mut predicate, &mut result);
self.filter_impl(6, &mut predicate, &mut result);
self.filter_impl(7, &mut predicate, &mut result);
result
}
/// Internal implementation to be unrolled by `filter`
#[inline(always)]
fn filter_impl<P: FnMut(RegisterId) -> bool>(
&self,
id: usize,
predicate: &mut P,
result: &mut Self,
) {
if self.reg_not_in_set[id] {
result.reg_not_in_set[id] = true;
} else if predicate(RegisterId(id as u8)) {
result.len += 1;
} else {
result.reg_not_in_set[id] = true;
pub(crate) fn from_filter<P: FnMut(RegisterId) -> bool>(mut predicate: P) -> Self {
let mut result: Self = Default::default();
for r in RegisterId::all() {
if predicate(r) {
result.0.push(r);
}
}
result
}
/// Return a particular register within this set, counting from R0 to R7.
@ -127,45 +92,8 @@ impl RegisterSet {
/// The supplied index must be less than the [`Self::len()`] of this set.
/// Panics if the index is out of range.
#[inline(always)]
pub(crate) fn index(&self, mut index: usize) -> RegisterId {
if let Some(result) = self.index_impl(0, &mut index) {
return result;
}
if let Some(result) = self.index_impl(1, &mut index) {
return result;
}
if let Some(result) = self.index_impl(2, &mut index) {
return result;
}
if let Some(result) = self.index_impl(3, &mut index) {
return result;
}
if let Some(result) = self.index_impl(4, &mut index) {
return result;
}
if let Some(result) = self.index_impl(5, &mut index) {
return result;
}
if let Some(result) = self.index_impl(6, &mut index) {
return result;
}
if let Some(result) = self.index_impl(7, &mut index) {
return result;
}
unreachable!();
}
/// Internal implementation to be unrolled by `index`
#[inline(always)]
fn index_impl(&self, id: usize, index: &mut usize) -> Option<RegisterId> {
if self.reg_not_in_set[id] {
None
} else if *index == 0 {
Some(RegisterId(id as u8))
} else {
*index -= 1;
None
}
pub(crate) fn index(&self, index: usize) -> RegisterId {
self.0[index]
}
}
@ -224,29 +152,3 @@ impl RegisterFile {
[x.v0 ^ y.v0, x.v1 ^ y.v1, x.v2 ^ y.v2, x.v3 ^ y.v3]
}
}
#[cfg(test)]
mod test {
use super::RegisterSet;
#[test]
fn register_set() {
let r = RegisterSet::all().filter(|_reg| true);
assert_eq!(r.len(), 8);
assert_eq!(r.index(7).as_usize(), 7);
assert_eq!(r.index(0).as_usize(), 0);
let r = r.filter(|reg| (reg.as_usize() & 1) != 0);
assert_eq!(r.len(), 4);
assert_eq!(r.index(0).as_usize(), 1);
assert_eq!(r.index(1).as_usize(), 3);
assert_eq!(r.index(2).as_usize(), 5);
assert_eq!(r.index(3).as_usize(), 7);
let r = r.filter(|reg| (reg.as_usize() & 2) != 0);
assert_eq!(r.index(0).as_usize(), 3);
assert_eq!(r.index(1).as_usize(), 7);
let r = r.filter(|_reg| true);
assert_eq!(r.len(), 2);
let r = r.filter(|_reg| false);
assert_eq!(r.len(), 0);
}
}

View File

@ -5,7 +5,7 @@
//! avoid stalls.
use crate::program::{Instruction, Opcode};
use crate::register::{RegisterId, RegisterSet, NUM_REGISTERS};
use crate::register::{RegisterId, NUM_REGISTERS};
/// Scheduling information for each opcode
mod model {
@ -197,10 +197,10 @@ impl Scheduler {
}
}
/// Figure out which registers will be available at or before the indicated cycle.
/// Look up if a register will be available at or before the indicated cycle.
#[inline(always)]
pub(crate) fn registers_available(&self, cycle: Cycle) -> RegisterSet {
self.data.registers_available(cycle)
pub(crate) fn register_available(&self, reg: RegisterId, cycle: Cycle) -> bool {
self.data.register_available(reg, cycle)
}
/// Return the overall data latency.
@ -323,13 +323,10 @@ impl DataSchedule {
self.latencies[dst.as_usize()] = cycle;
}
/// Figure out which registers will be available at or before the indicated cycle
/// Look up if a register will be available at or before the indicated cycle.
#[inline(always)]
fn registers_available(&self, cycle: Cycle) -> RegisterSet {
RegisterSet::all().filter(
#[inline(always)]
|reg| self.latencies[reg.as_usize()] <= cycle,
)
fn register_available(&self, reg: RegisterId, cycle: Cycle) -> bool {
self.latencies[reg.as_usize()] <= cycle
}
/// Return the overall latency, the [`Cycle`] at which we expect
@ -447,7 +444,7 @@ struct MicroOpPlan {
///
/// This is defined as either one or two micro-operations
/// scheduled on the same cycle.
#[derive(Debug, Clone, Eq, PartialEq)]
#[derive(Debug, Clone, Copy, Eq, PartialEq)]
pub(crate) struct InstructionPlan {
/// The Cycle this whole instruction begins on
cycle: Cycle,