diff --git a/crates/hashx/src/compiler.rs b/crates/hashx/src/compiler.rs index d0f610a8d..fd8f7dc1f 100644 --- a/crates/hashx/src/compiler.rs +++ b/crates/hashx/src/compiler.rs @@ -7,7 +7,7 @@ //! `Executable` wraps a mmap buffer from the `dynasmrt` crate and the //! `Architecture` is implemented in a CPU-specific way. -use crate::{program::InstructionArray, register::RegisterFile, CompilerError}; +use crate::{program::Instruction, register::RegisterFile, CompilerError}; #[cfg(all(feature = "compiler", target_arch = "x86_64"))] mod x86_64; @@ -41,7 +41,7 @@ pub(crate) struct Executable { not(any(target_arch = "x86_64", target_arch = "aarch64")) ))] impl Architecture for Executable { - fn compile(_program: &InstructionArray) -> Result { + fn compile(_program: &[Instruction]) -> Result { Err(CompilerError::NotAvailable) } @@ -71,7 +71,7 @@ where Self: Sized, { /// Compile an array of instructions into an Executable - fn compile(program: &InstructionArray) -> Result; + fn compile(program: &[Instruction]) -> Result; /// Run the compiled code, with a RegisterFile for input and output fn invoke(&self, regs: &mut RegisterFile); diff --git a/crates/hashx/src/compiler/aarch64.rs b/crates/hashx/src/compiler/aarch64.rs index 95cba0686..9fbd4e14c 100644 --- a/crates/hashx/src/compiler/aarch64.rs +++ b/crates/hashx/src/compiler/aarch64.rs @@ -1,22 +1,31 @@ //! Dynamically emitted HashX assembly code for aarch64 targets use crate::compiler::{util, Architecture, Executable}; -use crate::program::{self, Instruction, InstructionArray}; -use crate::register::{RegisterFile, RegisterId, RegisterSet}; +use crate::program::{self, Instruction}; +use crate::register::{RegisterFile, RegisterId}; use crate::CompilerError; use dynasmrt::{aarch64, DynasmApi, DynasmLabelApi}; use std::mem; impl Architecture for Executable { - fn compile(program: &InstructionArray) -> Result { + fn compile(program: &[Instruction]) -> Result { let mut asm = Assembler::new(); - emit_load_input(&mut asm); - emit_init_locals(&mut asm); - for inst in program { - emit_instruction(&mut asm, inst); + { + emit_load_input(&mut asm); + emit_init_locals(&mut asm); + debug_assert_eq!(asm.len(), PROLOGUE_SIZE); + } + for inst in program { + let prev_len = asm.len(); + emit_instruction(&mut asm, inst); + debug_assert!(asm.len() - prev_len <= INSTRUCTION_SIZE_LIMIT); + } + { + let prev_len = asm.len(); + emit_store_output(&mut asm); + emit_return(&mut asm); + debug_assert_eq!(asm.len() - prev_len, EPILOGUE_SIZE); } - emit_store_output(&mut asm); - emit_return(&mut asm); asm.finalize() } @@ -36,8 +45,19 @@ impl Architecture for Executable { } } -/// Architecture-specific capacity for the temporary output buffer -const BUFFER_CAPACITY: usize = 0x200 + program::NUM_INSTRUCTIONS * 16; +/// Architecture-specific fixed prologue size +const PROLOGUE_SIZE: usize = 0x28; + +/// Architecture-specific fixed epilogue size +const EPILOGUE_SIZE: usize = 0x24; + +/// Architecture-specific maximum size for one instruction +const INSTRUCTION_SIZE_LIMIT: usize = 0x18; + +/// Capacity for the temporary output buffer, before code is copied into +/// a long-lived allocation that can be made executable. +const BUFFER_CAPACITY: usize = + PROLOGUE_SIZE + EPILOGUE_SIZE + program::NUM_INSTRUCTIONS * INSTRUCTION_SIZE_LIMIT; /// Architecture-specific specialization of the Assembler type Assembler = util::Assembler; @@ -51,9 +71,12 @@ trait RegisterMapper { } impl RegisterMapper for RegisterId { + #[inline(always)] fn x(&self) -> u32 { 1 + (self.as_usize() as u32) } + + #[inline(always)] fn offset(&self) -> u32 { (self.as_usize() * mem::size_of::()) as u32 } @@ -75,7 +98,8 @@ macro_rules! dynasm { } /// Emit code to initialize our local variables to default values. -fn emit_init_locals(asm: &mut Assembler) { +#[inline(always)] +fn emit_init_locals(asm: &mut A) { dynasm!(asm ; mov mulh_result32, wzr ; mov branch_prohibit_flag, wzr @@ -84,30 +108,32 @@ fn emit_init_locals(asm: &mut Assembler) { /// Emit code to move all input values from the RegisterFile into their /// actual hardware registers. -fn emit_load_input(asm: &mut Assembler) { - RegisterSet::all().filter(|reg| { +#[inline(always)] +fn emit_load_input(asm: &mut A) { + for reg in RegisterId::all() { dynasm!(asm; ldr X(reg.x()), [register_file_ptr, #(reg.offset())]); - true - }); + } } /// Emit code to move all output values from machine registers back into /// their RegisterFile slots. -fn emit_store_output(asm: &mut Assembler) { - RegisterSet::all().filter(|reg| { +#[inline(always)] +fn emit_store_output(asm: &mut A) { + for reg in RegisterId::all() { dynasm!(asm; str X(reg.x()), [register_file_ptr, #(reg.offset())]); - true - }); + } } /// Emit a return instruction. -fn emit_return(asm: &mut Assembler) { +#[inline(always)] +fn emit_return(asm: &mut A) { dynasm!(asm; ret); } /// Load a sign extended 32-bit constant into the const_temp_64 /// register, using a movz/movn and movk pair. -fn emit_i32_const_temp_64(asm: &mut Assembler, value: i32) { +#[inline(always)] +fn emit_i32_const_temp_64(asm: &mut A, value: i32) { let high = (value >> 16) as u32; let low = (value & 0xFFFF) as u32; if value < 0 { @@ -119,7 +145,8 @@ fn emit_i32_const_temp_64(asm: &mut Assembler, value: i32) { } /// Load a 32-bit constant into const_temp_32, without extending. -fn emit_u32_const_temp_32(asm: &mut Assembler, value: u32) { +#[inline(always)] +fn emit_u32_const_temp_32(asm: &mut A, value: u32) { let high = value >> 16; let low = value & 0xFFFF; dynasm!(asm @@ -129,6 +156,7 @@ fn emit_u32_const_temp_32(asm: &mut Assembler, value: u32) { } /// Emit code for a single [`Instruction`] in the hash program. +#[inline(always)] fn emit_instruction(asm: &mut Assembler, inst: &Instruction) { /// Common implementation for binary operations on registers macro_rules! reg_op { diff --git a/crates/hashx/src/compiler/util.rs b/crates/hashx/src/compiler/util.rs index 0188211a1..03ec54e43 100644 --- a/crates/hashx/src/compiler/util.rs +++ b/crates/hashx/src/compiler/util.rs @@ -33,14 +33,22 @@ pub(crate) struct Assembler { impl Assembler { /// Return the entry point as an [`AssemblyOffset`]. + #[inline(always)] pub(crate) fn entry() -> AssemblyOffset { AssemblyOffset(0) } + /// Size of the code stored so far, in bytes + #[inline(always)] + pub(crate) fn len(&self) -> usize { + self.buffer.len() + } + /// Make a new assembler with a temporary buffer but no executable buffer. + #[inline(always)] pub(crate) fn new() -> Self { Self { - buffer: Default::default(), + buffer: ArrayVec::new(), target: None, phantom: PhantomData, } @@ -51,6 +59,7 @@ impl Assembler { /// This may fail if we can't allocate some memory, fill it, and mark /// it as executable. For example, a Linux platform with policy to restrict /// `mprotect` will show runtime errors at this point. + #[inline(always)] pub(crate) fn finalize(self) -> Result { // We never execute code from the buffer until it's complete, and we use // a freshly mmap'ed buffer for each program. Because of this, we don't @@ -83,11 +92,13 @@ impl std::fmt::Debug for Executable { impl DynasmLabelApi for Assembler { type Relocation = R; + #[inline(always)] fn local_label(&mut self, name: &'static str) { debug_assert_eq!(name, "target"); self.target = Some(self.offset()); } + #[inline(always)] fn backward_relocation( &mut self, name: &'static str, @@ -154,12 +165,14 @@ impl DynasmLabelApi for Assembler { } impl Extend for Assembler { + #[inline(always)] fn extend>(&mut self, iter: T) { self.buffer.extend(iter); } } impl<'a, R: Relocation, const S: usize> Extend<&'a u8> for Assembler { + #[inline(always)] fn extend>(&mut self, iter: T) { for byte in iter { self.buffer.push(*byte); @@ -168,20 +181,17 @@ impl<'a, R: Relocation, const S: usize> Extend<&'a u8> for Assembler { } impl DynasmApi for Assembler { + #[inline(always)] fn offset(&self) -> AssemblyOffset { AssemblyOffset(self.buffer.len()) } + #[inline(always)] fn push(&mut self, byte: u8) { self.buffer.push(byte); } - fn align(&mut self, alignment: usize, with: u8) { - let offset = self.buffer.len() % alignment; - if offset != 0 { - for _ in offset..alignment { - self.buffer.push(with); - } - } + fn align(&mut self, _alignment: usize, _with: u8) { + unreachable!(); } } diff --git a/crates/hashx/src/compiler/x86_64.rs b/crates/hashx/src/compiler/x86_64.rs index ec81f3282..8e5e20305 100644 --- a/crates/hashx/src/compiler/x86_64.rs +++ b/crates/hashx/src/compiler/x86_64.rs @@ -1,24 +1,33 @@ //! Dynamically emitted HashX assembly code for x86_64 targets use crate::compiler::{util, Architecture, Executable}; -use crate::program::{self, Instruction, InstructionArray}; -use crate::register::{RegisterFile, RegisterId, RegisterSet}; +use crate::program::{self, Instruction}; +use crate::register::{RegisterFile, RegisterId}; use crate::CompilerError; -use dynasmrt::{x64, x64::Rq, DynasmApi, DynasmLabelApi, Register}; +use dynasmrt::{x64, x64::Rq, DynasmApi, DynasmLabelApi}; use std::mem; impl Architecture for Executable { - fn compile(program: &InstructionArray) -> Result { + fn compile(program: &[Instruction]) -> Result { let mut asm = Assembler::new(); - emit_save_regs(&mut asm); - emit_load_input(&mut asm); - emit_init_locals(&mut asm); - for inst in program { - emit_instruction(&mut asm, inst); + { + emit_save_regs(&mut asm); + emit_load_input(&mut asm); + emit_init_locals(&mut asm); + debug_assert_eq!(asm.len(), PROLOGUE_SIZE); + } + for inst in program { + let prev_len = asm.len(); + emit_instruction(&mut asm, inst); + debug_assert!(asm.len() - prev_len <= INSTRUCTION_SIZE_LIMIT); + } + { + let prev_len = asm.len(); + emit_store_output(&mut asm); + emit_restore_regs(&mut asm); + emit_return(&mut asm); + debug_assert_eq!(asm.len() - prev_len, EPILOGUE_SIZE); } - emit_store_output(&mut asm); - emit_restore_regs(&mut asm); - emit_return(&mut asm); asm.finalize() } @@ -37,8 +46,19 @@ impl Architecture for Executable { } } -/// Architecture-specific capacity for the temporary output buffer -const BUFFER_CAPACITY: usize = 0x200 + program::NUM_INSTRUCTIONS * 16; +/// Architecture-specific fixed prologue size +const PROLOGUE_SIZE: usize = 0x68; + +/// Architecture-specific fixed epilogue size +const EPILOGUE_SIZE: usize = 0x60; + +/// Architecture-specific maximum size for one instruction +const INSTRUCTION_SIZE_LIMIT: usize = 0x11; + +/// Capacity for the temporary output buffer, before code is copied into +/// a long-lived allocation that can be made executable. +const BUFFER_CAPACITY: usize = + PROLOGUE_SIZE + EPILOGUE_SIZE + program::NUM_INSTRUCTIONS * INSTRUCTION_SIZE_LIMIT; /// Architecture-specific specialization of the Assembler type Assembler = util::Assembler; @@ -52,9 +72,12 @@ trait RegisterMapper { } impl RegisterMapper for RegisterId { + #[inline(always)] fn rq(&self) -> u8 { 8 + (self.as_usize() as u8) } + + #[inline(always)] fn offset(&self) -> i32 { (self.as_usize() * mem::size_of::()) as i32 } @@ -77,7 +100,8 @@ macro_rules! dynasm { } /// Emit code to initialize our local variables to default values. -fn emit_init_locals(asm: &mut Assembler) { +#[inline(always)] +fn emit_init_locals(asm: &mut A) { dynasm!(asm ; xor mulh_result64, mulh_result64 ; xor branch_prohibit_flag, branch_prohibit_flag @@ -105,47 +129,51 @@ const fn stack_size() -> i32 { } /// Emit code to allocate stack space and store REGS_TO_SAVE. -fn emit_save_regs(asm: &mut Assembler) { +#[inline(always)] +fn emit_save_regs(asm: &mut A) { dynasm!(asm; sub rsp, stack_size()); for (i, reg) in REGS_TO_SAVE.as_ref().iter().enumerate() { let offset = (i * mem::size_of::()) as i32; - dynasm!(asm; mov [rsp + offset], Rq(reg.code())); + dynasm!(asm; mov [rsp + offset], Rq(*reg as u8)); } } /// Emit code to restore REGS_TO_SAVE and deallocate stack space. -fn emit_restore_regs(asm: &mut Assembler) { +#[inline(always)] +fn emit_restore_regs(asm: &mut A) { for (i, reg) in REGS_TO_SAVE.as_ref().iter().enumerate() { let offset = (i * mem::size_of::()) as i32; - dynasm!(asm; mov Rq(reg.code()), [rsp + offset]); + dynasm!(asm; mov Rq(*reg as u8), [rsp + offset]); } dynasm!(asm; add rsp, stack_size()); } /// Emit code to move all input values from the RegisterFile into their /// actual hardware registers. -fn emit_load_input(asm: &mut Assembler) { - RegisterSet::all().filter(|reg| { +#[inline(always)] +fn emit_load_input(asm: &mut A) { + for reg in RegisterId::all() { dynasm!(asm; mov Rq(reg.rq()), [register_file_ptr + reg.offset()]); - true - }); + } } /// Emit code to move all output values from machine registers back into /// their RegisterFile slots. -fn emit_store_output(asm: &mut Assembler) { - RegisterSet::all().filter(|reg| { +#[inline(always)] +fn emit_store_output(asm: &mut A) { + for reg in RegisterId::all() { dynasm!(asm; mov [register_file_ptr + reg.offset()], Rq(reg.rq())); - true - }); + } } /// Emit a return instruction. -fn emit_return(asm: &mut Assembler) { +#[inline(always)] +fn emit_return(asm: &mut A) { dynasm!(asm; ret); } /// Emit code for a single [`Instruction`] in the hash program. +#[inline(always)] fn emit_instruction(asm: &mut Assembler, inst: &Instruction) { /// Common implementation for binary operations on registers macro_rules! reg_op { diff --git a/crates/hashx/src/constraints.rs b/crates/hashx/src/constraints.rs index b28d19d66..4177cfebd 100644 --- a/crates/hashx/src/constraints.rs +++ b/crates/hashx/src/constraints.rs @@ -9,7 +9,7 @@ //! Generating correct HashX output depends on applying exactly the right //! constraints. -use crate::program::{Instruction, InstructionArray, Opcode}; +use crate::program::{Instruction, Opcode}; use crate::register::{RegisterId, RegisterSet, NUM_REGISTERS}; use crate::scheduler::Scheduler; @@ -39,7 +39,7 @@ mod model { matches!(op, Opcode::Mul | Opcode::SMulH | Opcode::UMulH) } - /// Does an instruction prohibit using the same register for source and dest? + /// Does an instruction prohibit using the same register for src and dst? /// /// Meaningful only for ops that have both a source and destination register. #[inline(always)] @@ -85,31 +85,19 @@ mod model { #[inline(always)] pub(super) fn writer_pair_allowed( pass: Pass, - last_writer: Option<&RegisterWriter>, - this_writer: &RegisterWriter, + last_writer: RegisterWriter, + this_writer: RegisterWriter, ) -> bool { match (last_writer, this_writer) { // HashX disallows back-to-back 64-bit multiplies on the // same destination register in Pass::Original but permits // them on the retry if the source register isn't identical. - ( - Some(RegisterWriter::RegSource(Opcode::Mul, _)), - RegisterWriter::RegSource(Opcode::Mul, _), - ) if matches!(pass, Pass::Original) => false, - - // Add/Sub from the same source register can't be paired - // with each other. (They might cancel out) - ( - Some(RegisterWriter::RegSource(Opcode::AddShift, last_src)), - RegisterWriter::RegSource(Opcode::Sub, this_src), - ) if this_src == last_src => false, - ( - Some(RegisterWriter::RegSource(Opcode::Sub, last_src)), - RegisterWriter::RegSource(Opcode::AddShift, this_src), - ) if this_src == last_src => false, + (RegisterWriter::Mul(_), RegisterWriter::Mul(_)) if matches!(pass, Pass::Original) => { + false + } // Other pairings are allowed if the writer info differs at all. - (last_writer, this_writer) => last_writer != Some(this_writer), + (last_writer, this_writer) => last_writer != this_writer, } } @@ -132,29 +120,57 @@ mod model { /// This is conceptually similar to storing the last [`super::Instruction`] /// that wrote to a register, but HashX sometimes needs information for /// constraints which won't end up in the final `Instruction`. - #[derive(Debug, Clone, Eq, PartialEq)] + /// + /// We've chosen the encoding to minimize the code size in + /// writer_pair_allowed. Most pairwise comparisons can just be a register + /// equality test. + /// + /// The instructions here fall into three categories which use their own + /// format for encoding arguments: + /// + /// - Wide Multiply, extra u32 + /// + /// UMulH and SMulH use an additional otherwise unused 32-bit value + /// from the Rng when considering writer collisions. + /// + /// As far as I can tell this is a bug in the original implementation + /// but we can't change the behavior without breaking compatibility. + /// + /// The collisions are rare enough not to be a worthwhile addition + /// to ASIC-resistance. It seems like this was a vestigial feature + /// left over from immediate value matching features which were removed + /// during the development of HashX, but I can't be sure. + /// + /// - Constant source + /// + /// Only considers the opcode itself, not the specific immediate value. + /// + /// - Register source + /// + /// Considers the source register, collapses add/subtract into one op. + /// + #[derive(Debug, Default, Clone, Copy, Eq, PartialEq)] pub(crate) enum RegisterWriter { - /// Special format for wide multiply - /// - /// HashX includes an otherwise unused phantom immediate value which - /// can (very rarely) affect constraint selection if it collides. - /// - /// As far as I can tell this is a bug in the original implementation - /// but we can't change the behavior without breaking compatibility. - /// - /// The collisions are rare enough not to be a worthwhile addition - /// to ASIC-resistance. It seems like this was a vestigial feature - /// left over from immediate value matching features which were removed - /// during the development of HashX, but I can't be sure. - WideMul(Opcode, u32), - - /// Writer for instructions with an immediate source - /// - /// The specific immediate value is not used. - ConstSource(Opcode), - - /// Writer for instructions with register source, unique by source register - RegSource(Opcode, RegisterId), + /// Register not written yet + #[default] + None, + /// Register source writer for [`super::Instruction::Mul`] + Mul(RegisterId), + /// Wide multiply writer for [`super::Instruction::UMulH`] + UMulH(u32), + /// Wide multiply writer for [`super::Instruction::SMulH`] + SMulH(u32), + /// Register source writer for [`super::Instruction::AddShift`] + /// and [`super::Instruction::Sub`] + AddSub(RegisterId), + /// Constant source writer for [`super::Instruction::AddConst`] + AddConst, + /// Register source writer for [`super::Instruction::Xor`] + Xor(RegisterId), + /// Constant source writer for [`super::Instruction::XorConst`] + XorConst, + /// Constant source writer for [`super::Instruction::Rotate`] + Rotate, } } @@ -187,16 +203,13 @@ impl Validator { /// Commit a new instruction to the validator state. #[inline(always)] - pub(crate) fn commit_instruction(&mut self, inst: &Instruction, regw: Option) { + pub(crate) fn commit_instruction(&mut self, inst: &Instruction, regw: RegisterWriter) { if model::is_multiply(inst.opcode()) { self.multiply_count += 1; } match inst.destination() { - None => assert!(regw.is_none()), - Some(dst) => self.writer_map.insert( - dst, - regw.expect("instructions with destination always have a RegisterWriter"), - ), + None => debug_assert_eq!(regw, RegisterWriter::None), + Some(dst) => self.writer_map.insert(dst, regw), } } @@ -208,7 +221,7 @@ impl Validator { pub(crate) fn check_whole_program( &self, scheduler: &Scheduler, - instructions: &InstructionArray, + instructions: &[Instruction], ) -> Result<(), ()> { if instructions.len() == model::REQUIRED_INSTRUCTIONS && scheduler.overall_latency().as_usize() == model::REQUIRED_OVERALL_RESULT_AT_CYCLE @@ -220,36 +233,67 @@ impl Validator { } } - /// Figure out the allowed set of destination registers for an op after its - /// source is known, using the current state of the validator. + /// Begin checking which destination registers are allowed for an op after + /// its source is known, using the current state of the validator. + /// + /// Returns a DstRegisterChecker which can be used to test each specific + /// destination RegisterId quickly. #[inline(always)] pub(crate) fn dst_registers_allowed( &self, - available: RegisterSet, op: Opcode, pass: Pass, - writer_info: &RegisterWriter, + writer_info: RegisterWriter, src: Option, - ) -> RegisterSet { - available.filter( - #[inline(always)] - |dst| { - // One register specified by DISALLOW_REGISTER_FOR_ADDSHIFT can't - // be used as destination for AddShift. - if op == Opcode::AddShift && dst == model::DISALLOW_REGISTER_FOR_ADDSHIFT { - return false; - } - - // A few instructions disallow choosing src and dst as the same - if model::disallow_src_is_dst(op) && src == Some(dst) { - return false; - } - - // Additional constraints are written on the pair of previous and - // current instructions with the same destination. - model::writer_pair_allowed(pass, self.writer_map.get(dst), writer_info) + ) -> DstRegisterChecker<'_> { + DstRegisterChecker { + pass, + writer_info, + writer_map: &self.writer_map, + op_is_add_shift: op == Opcode::AddShift, + disallow_equal: if model::disallow_src_is_dst(op) { + src + } else { + None }, - ) + } + } +} + +/// State information returned by [`Validator::dst_registers_allowed`] +#[derive(Debug, Clone)] +pub(crate) struct DstRegisterChecker<'v> { + /// Is this the original or retry pass? + pass: Pass, + /// Reference to a table of [`RegisterWriter`] information for each register + writer_map: &'v RegisterWriterMap, + /// The new [`RegisterWriter`] under consideration + writer_info: RegisterWriter, + /// Was this [`Opcode::AddShift`]? + op_is_add_shift: bool, + /// Optionally disallow one matching register, used to implement [`model::disallow_src_is_dst`] + disallow_equal: Option, +} + +impl<'v> DstRegisterChecker<'v> { + /// Check a single destination register for usability, using context from + /// [`Validator::dst_registers_allowed`] + #[inline(always)] + pub(crate) fn check(&self, dst: RegisterId) -> bool { + // One register specified by DISALLOW_REGISTER_FOR_ADDSHIFT can't + // be used as destination for AddShift. + if self.op_is_add_shift && dst == model::DISALLOW_REGISTER_FOR_ADDSHIFT { + return false; + } + + // A few instructions disallow choosing src and dst as the same + if Some(dst) == self.disallow_equal { + return false; + } + + // Additional constraints are written on the pair of previous and + // current instructions with the same destination. + model::writer_pair_allowed(self.pass, self.writer_map.get(dst), self.writer_info) } } @@ -259,7 +303,7 @@ impl Validator { pub(crate) fn src_registers_allowed(available: RegisterSet, op: Opcode) -> RegisterSet { // HashX defines a special case DISALLOW_REGISTER_FOR_ADDSHIFT for // destination registers, and it also includes a look-ahead - // condition here in source register allocation to prevent the dest + // condition here in source register allocation to prevent the dst // allocation from getting stuck as often. If we have only two // remaining registers for AddShift and one is the disallowed reg, // HashX defines that the random choice is short-circuited early @@ -269,7 +313,7 @@ pub(crate) fn src_registers_allowed(available: RegisterSet, op: Opcode) -> Regis && available.contains(model::DISALLOW_REGISTER_FOR_ADDSHIFT) && available.len() == 2 { - available.filter( + RegisterSet::from_filter( #[inline(always)] |reg| reg == model::DISALLOW_REGISTER_FOR_ADDSHIFT, ) @@ -297,9 +341,9 @@ pub(crate) fn opcode_pair_allowed(previous: Option, proposed: Opcode) -> } } -/// Map each [`RegisterId`] to an [`Option`] +/// Map each [`RegisterId`] to an [`RegisterWriter`] #[derive(Default, Debug, Clone)] -struct RegisterWriterMap([Option; NUM_REGISTERS]); +struct RegisterWriterMap([RegisterWriter; NUM_REGISTERS]); impl RegisterWriterMap { /// Make a new empty register writer map. @@ -313,12 +357,12 @@ impl RegisterWriterMap { /// Write or overwrite the last [`RegisterWriter`] associated with `reg`. #[inline(always)] fn insert(&mut self, reg: RegisterId, writer: RegisterWriter) { - self.0[reg.as_usize()] = Some(writer); + self.0[reg.as_usize()] = writer; } /// Return the most recent mapping for 'reg', if any. #[inline(always)] - fn get(&self, reg: RegisterId) -> Option<&RegisterWriter> { - self.0[reg.as_usize()].as_ref() + fn get(&self, reg: RegisterId) -> RegisterWriter { + self.0[reg.as_usize()] } } diff --git a/crates/hashx/src/generator.rs b/crates/hashx/src/generator.rs index 568346668..c5a50459c 100644 --- a/crates/hashx/src/generator.rs +++ b/crates/hashx/src/generator.rs @@ -1,7 +1,7 @@ //! Pseudorandom generator for hash programs and parts thereof use crate::constraints::{self, Pass, RegisterWriter, Validator}; -use crate::program::{Instruction, InstructionArray, Opcode, Program}; +use crate::program::{Instruction, Opcode}; use crate::rand::RngBuffer; use crate::register::{RegisterId, RegisterSet}; use crate::scheduler::{InstructionPlan, Scheduler}; @@ -84,17 +84,8 @@ mod model { pub(super) const BRANCH_MASK_BIT_WEIGHT: usize = 4; } -/// Generate a hash program from an arbitrary [`RngCore`] implementer. -/// -/// This can return [`Error::ProgramConstraints`] if the HashX post-generation -/// program verification fails. During normal use this will happen once per -/// several thousand random seeds, and the caller should skip to another seed. -pub(crate) fn generate_program(rng: &mut T) -> Result { - Generator::new(rng).generate_program() -} - -/// Internal state for the program generator -struct Generator<'r, R: RngCore> { +/// Program generator +pub(crate) struct Generator<'r, R: RngCore> { /// The program generator wraps a random number generator, via [`RngBuffer`]. rng: RngBuffer<'r, R>, @@ -118,7 +109,7 @@ struct Generator<'r, R: RngCore> { impl<'r, R: RngCore> Generator<'r, R> { /// Create a fresh program generator from a random number generator state. #[inline(always)] - fn new(rng: &'r mut R) -> Self { + pub(crate) fn new(rng: &'r mut R) -> Self { Generator { rng: RngBuffer::new(rng), scheduler: Scheduler::new(), @@ -135,7 +126,7 @@ impl<'r, R: RngCore> Generator<'r, R> { /// The choice is perfectly uniform only if the register set is a power of /// two length. Uniformity is not critical here. #[inline(always)] - fn select_register(&mut self, reg_options: RegisterSet) -> Result { + fn select_register(&mut self, reg_options: &RegisterSet) -> Result { match reg_options.len() { 0 => Err(()), 1 => Ok(reg_options.index(0)), @@ -198,30 +189,28 @@ impl<'r, R: RngCore> Generator<'r, R> { /// Generate an entire program. /// - /// This generates instructions until the state can't be advanced any - /// further. Returns with [`Error::ProgramConstraints`] if the program - /// fails the `HashX` whole-program checks. These constraint failures occur - /// in normal use, on a small fraction of seed values. + /// Generates instructions into a provided [`Vec`] until the generator + /// state can't be advanced any further. Runs the whole-program validator. + /// Returns with [`Error::ProgramConstraints`] if the program fails these + /// checks. This happens in normal use on a small fraction of seed values. #[inline(always)] - fn generate_program(&mut self) -> Result { - let mut array: InstructionArray = Default::default(); - while array.len() < array.capacity() { + pub(crate) fn generate_program(&mut self, output: &mut Vec) -> Result<(), Error> { + assert!(output.is_empty()); + while output.len() < output.capacity() { match self.generate_instruction() { Err(()) => break, Ok((inst, regw)) => { let state_advance = self.commit_instruction_state(&inst, regw); - array.push(inst); + output.push(inst); if let Err(()) = state_advance { break; } } } } - let result = self.validator.check_whole_program(&self.scheduler, &array); - match result { - Err(()) => Err(Error::ProgramConstraints), - Ok(()) => Ok(Program::new(array)), - } + self.validator + .check_whole_program(&self.scheduler, output) + .map_err(|()| Error::ProgramConstraints) } /// Generate the next instruction. @@ -235,7 +224,7 @@ impl<'r, R: RngCore> Generator<'r, R> { /// This only returns `Err(())` if we've hit a stopping condition for the /// program. #[inline(always)] - fn generate_instruction(&mut self) -> Result<(Instruction, Option), ()> { + fn generate_instruction(&mut self) -> Result<(Instruction, RegisterWriter), ()> { loop { if let Ok(result) = self.instruction_gen_attempt(Pass::Original) { return Ok(result); @@ -268,18 +257,30 @@ impl<'r, R: RngCore> Generator<'r, R> { /// choosing the opcode-specific parts of the instruction. Each of these /// choices affects the Rng state, and may fail if conditions are not met. #[inline(always)] - fn instruction_gen_attempt( - &mut self, - pass: Pass, - ) -> Result<(Instruction, Option), ()> { + fn instruction_gen_attempt(&mut self, pass: Pass) -> Result<(Instruction, RegisterWriter), ()> { let op = self.choose_opcode(pass); let plan = self.scheduler.instruction_plan(op)?; let (inst, regw) = self.choose_instruction_with_opcode_plan(op, pass, &plan)?; - assert_eq!(inst.opcode(), op); + debug_assert_eq!(inst.opcode(), op); self.scheduler.commit_instruction_plan(&plan, &inst); Ok((inst, regw)) } + /// Choose only a source register, depending on the opcode and timing plan + #[inline(never)] + fn choose_src_reg( + &mut self, + op: Opcode, + timing_plan: &InstructionPlan, + ) -> Result { + let src_set = RegisterSet::from_filter(|src| { + self.scheduler + .register_available(src, timing_plan.cycle_issued()) + }); + let src_set = constraints::src_registers_allowed(src_set, op); + self.select_register(&src_set) + } + /// Choose both a source and destination register using a normal /// [`RegisterWriter`] for two-operand instructions. #[inline(always)] @@ -287,18 +288,12 @@ impl<'r, R: RngCore> Generator<'r, R> { &mut self, op: Opcode, pass: Pass, + writer_info_fn: fn(RegisterId) -> RegisterWriter, timing_plan: &InstructionPlan, ) -> Result<(RegisterId, RegisterId, RegisterWriter), ()> { - let avail_set = self - .scheduler - .registers_available(timing_plan.cycle_issued()); - let src_set = constraints::src_registers_allowed(avail_set, op); - let src = self.select_register(src_set)?; - let writer_info = RegisterWriter::RegSource(op, src); - let dst_set = - self.validator - .dst_registers_allowed(avail_set, op, pass, &writer_info, Some(src)); - let dst = self.select_register(dst_set)?; + let src = self.choose_src_reg(op, timing_plan)?; + let writer_info = writer_info_fn(src); + let dst = self.choose_dst_reg(op, pass, writer_info, Some(src), timing_plan)?; Ok((src, dst, writer_info)) } @@ -310,38 +305,34 @@ impl<'r, R: RngCore> Generator<'r, R> { &mut self, op: Opcode, pass: Pass, - writer_info: &RegisterWriter, + writer_info: RegisterWriter, timing_plan: &InstructionPlan, ) -> Result<(RegisterId, RegisterId), ()> { - let avail_set = self - .scheduler - .registers_available(timing_plan.cycle_issued()); - let src_set = constraints::src_registers_allowed(avail_set, op); - let src = self.select_register(src_set)?; - let dst_set = - self.validator - .dst_registers_allowed(avail_set, op, pass, writer_info, Some(src)); - let dst = self.select_register(dst_set)?; + let src = self.choose_src_reg(op, timing_plan)?; + let dst = self.choose_dst_reg(op, pass, writer_info, Some(src), timing_plan)?; Ok((src, dst)) } - /// Choose a destination register only. - #[inline(always)] + /// Choose a destination register only, using source and writer info + /// as well as the current state of the validator. + #[inline(never)] fn choose_dst_reg( &mut self, op: Opcode, pass: Pass, - writer_info: &RegisterWriter, + writer_info: RegisterWriter, + src: Option, timing_plan: &InstructionPlan, ) -> Result { - let avail_set = self - .scheduler - .registers_available(timing_plan.cycle_issued()); - let dst_set = self + let validator = self .validator - .dst_registers_allowed(avail_set, op, pass, writer_info, None); - let dst = self.select_register(dst_set)?; - Ok(dst) + .dst_registers_allowed(op, pass, writer_info, src); + let dst_set = RegisterSet::from_filter(|dst| { + self.scheduler + .register_available(dst, timing_plan.cycle_issued()) + && validator.check(dst) + }); + self.select_register(&dst_set) } /// With an [`Opcode`] and an execution unit timing plan already in mind, @@ -355,78 +346,80 @@ impl<'r, R: RngCore> Generator<'r, R> { op: Opcode, pass: Pass, plan: &InstructionPlan, - ) -> Result<(Instruction, Option), ()> { + ) -> Result<(Instruction, RegisterWriter), ()> { Ok(match op { - Opcode::Target => (Instruction::Target, None), + Opcode::Target => (Instruction::Target, RegisterWriter::None), Opcode::Branch => ( Instruction::Branch { mask: self.select_constant_weight_bit_mask(model::BRANCH_MASK_BIT_WEIGHT), }, - None, + RegisterWriter::None, ), Opcode::UMulH => { - let regw = RegisterWriter::WideMul(op, self.rng.next_u32()); - let (src, dst) = - self.choose_src_dst_regs_with_writer_info(op, pass, ®w, plan)?; - (Instruction::UMulH { src, dst }, Some(regw)) + let regw = RegisterWriter::UMulH(self.rng.next_u32()); + let (src, dst) = self.choose_src_dst_regs_with_writer_info(op, pass, regw, plan)?; + (Instruction::UMulH { src, dst }, regw) } Opcode::SMulH => { - let regw = RegisterWriter::WideMul(op, self.rng.next_u32()); - let (src, dst) = - self.choose_src_dst_regs_with_writer_info(op, pass, ®w, plan)?; - (Instruction::SMulH { src, dst }, Some(regw)) + let regw = RegisterWriter::SMulH(self.rng.next_u32()); + let (src, dst) = self.choose_src_dst_regs_with_writer_info(op, pass, regw, plan)?; + (Instruction::SMulH { src, dst }, regw) } Opcode::Mul => { - let (src, dst, regw) = self.choose_src_dst_regs(op, pass, plan)?; - (Instruction::Mul { src, dst }, Some(regw)) + let regw = RegisterWriter::Mul; + let (src, dst, regw) = self.choose_src_dst_regs(op, pass, regw, plan)?; + (Instruction::Mul { src, dst }, regw) } Opcode::Sub => { - let (src, dst, regw) = self.choose_src_dst_regs(op, pass, plan)?; - (Instruction::Sub { src, dst }, Some(regw)) + let regw = RegisterWriter::AddSub; + let (src, dst, regw) = self.choose_src_dst_regs(op, pass, regw, plan)?; + (Instruction::Sub { src, dst }, regw) } Opcode::Xor => { - let (src, dst, regw) = self.choose_src_dst_regs(op, pass, plan)?; - (Instruction::Xor { src, dst }, Some(regw)) + let regw = RegisterWriter::Xor; + let (src, dst, regw) = self.choose_src_dst_regs(op, pass, regw, plan)?; + (Instruction::Xor { src, dst }, regw) } Opcode::AddShift => { + let regw = RegisterWriter::AddSub; let left_shift = (self.rng.next_u32() & 3) as u8; - let (src, dst, regw) = self.choose_src_dst_regs(op, pass, plan)?; + let (src, dst, regw) = self.choose_src_dst_regs(op, pass, regw, plan)?; ( Instruction::AddShift { src, dst, left_shift, }, - Some(regw), + regw, ) } Opcode::AddConst => { - let regw = RegisterWriter::ConstSource(op); + let regw = RegisterWriter::AddConst; let src = self.select_nonzero_u32(u32::MAX) as i32; - let dst = self.choose_dst_reg(op, pass, ®w, plan)?; - (Instruction::AddConst { src, dst }, Some(regw)) + let dst = self.choose_dst_reg(op, pass, regw, None, plan)?; + (Instruction::AddConst { src, dst }, regw) } Opcode::XorConst => { - let regw = RegisterWriter::ConstSource(op); + let regw = RegisterWriter::XorConst; let src = self.select_nonzero_u32(u32::MAX) as i32; - let dst = self.choose_dst_reg(op, pass, ®w, plan)?; - (Instruction::XorConst { src, dst }, Some(regw)) + let dst = self.choose_dst_reg(op, pass, regw, None, plan)?; + (Instruction::XorConst { src, dst }, regw) } Opcode::Rotate => { - let regw = RegisterWriter::ConstSource(op); + let regw = RegisterWriter::Rotate; let right_rotate: u8 = self.select_nonzero_u32(63) as u8; - let dst = self.choose_dst_reg(op, pass, ®w, plan)?; - (Instruction::Rotate { dst, right_rotate }, Some(regw)) + let dst = self.choose_dst_reg(op, pass, regw, None, plan)?; + (Instruction::Rotate { dst, right_rotate }, regw) } }) } @@ -440,7 +433,7 @@ impl<'r, R: RngCore> Generator<'r, R> { fn commit_instruction_state( &mut self, inst: &Instruction, - regw: Option, + regw: RegisterWriter, ) -> Result<(), ()> { self.validator.commit_instruction(inst, regw); self.scheduler.advance_instruction_stream(inst.opcode()) diff --git a/crates/hashx/src/lib.rs b/crates/hashx/src/lib.rs index 28d3209fb..f63ab972f 100644 --- a/crates/hashx/src/lib.rs +++ b/crates/hashx/src/lib.rs @@ -51,7 +51,6 @@ mod scheduler; mod siphash; use crate::compiler::{Architecture, Executable}; -use crate::generator::generate_program; use crate::program::Program; use rand_core::RngCore; @@ -111,8 +110,8 @@ pub struct HashX { /// to store the program data. #[derive(Debug)] enum RuntimeProgram { - /// Select the interpreted runtime, and hold a boxed Program for it to run. - Interpret(Box), + /// Select the interpreted runtime, and hold a Program for it to run. + Interpret(Program), /// Select the compiled runtime, and hold an executable code page. Compiled(Executable), } @@ -203,7 +202,7 @@ impl HashXBuilder { rng: &mut R, register_key: SipState, ) -> Result { - let program = generate_program(rng)?; + let program = Program::generate(rng)?; self.build_from_program(program, register_key) } @@ -217,13 +216,13 @@ impl HashXBuilder { Ok(HashX { register_key, program: match self.runtime { - RuntimeOption::InterpretOnly => RuntimeProgram::Interpret(Box::new(program)), + RuntimeOption::InterpretOnly => RuntimeProgram::Interpret(program), RuntimeOption::CompileOnly => { RuntimeProgram::Compiled(Architecture::compile((&program).into())?) } RuntimeOption::TryCompile => match Architecture::compile((&program).into()) { Ok(exec) => RuntimeProgram::Compiled(exec), - Err(_) => RuntimeProgram::Interpret(Box::new(program)), + Err(_) => RuntimeProgram::Interpret(program), }, }, }) diff --git a/crates/hashx/src/program.rs b/crates/hashx/src/program.rs index 0ffa6426b..ea96a0ee0 100644 --- a/crates/hashx/src/program.rs +++ b/crates/hashx/src/program.rs @@ -1,7 +1,9 @@ //! Define the internal hash program representation used by HashX. +use crate::generator::Generator; use crate::register::{RegisterFile, RegisterId}; -use arrayvec::ArrayVec; +use crate::Error; +use rand_core::RngCore; use std::fmt; use std::ops::BitXor; @@ -179,24 +181,14 @@ impl Instruction { } } -/// Fixed-size array of instructions, either a complete program or a -/// program under construction -pub(crate) type InstructionArray = ArrayVec; - -/// Generated `HashX` program, as a list of instructions. +/// Generated `HashX` program, as a boxed slice of instructions #[derive(Clone, Default)] -pub struct Program { - /// The InstructionArray that this Program wraps - /// - /// InstructionArray provides storage, and this type indicates that the - /// program should be a well-formed HashX function. - instructions: InstructionArray, -} +pub struct Program(Box<[Instruction]>); impl fmt::Debug for Program { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { writeln!(f, "Program {{")?; - for (addr, inst) in self.instructions.iter().enumerate() { + for (addr, inst) in self.0.iter().enumerate() { writeln!(f, " [{:3}]: {:?}", addr, inst)?; } write!(f, "}}") @@ -204,10 +196,16 @@ impl fmt::Debug for Program { } impl Program { - /// Construct a finished `Program` from a list of instructions. - #[inline(always)] - pub(crate) fn new(instructions: InstructionArray) -> Self { - Self { instructions } + /// Generate a new `Program` from an arbitrary [`RngCore`] implementer + /// + /// This can return [`Error::ProgramConstraints`] if the HashX + /// post-generation program verification fails. During normal use this + /// will happen once per several thousand random seeds, and the caller + /// should skip to another seed. + pub(crate) fn generate(rng: &mut T) -> Result { + let mut instructions = Vec::with_capacity(NUM_INSTRUCTIONS); + Generator::new(rng).generate_program(&mut instructions)?; + Ok(Program(instructions.into_boxed_slice())) } /// Reference implementation for `Program` behavior @@ -254,9 +252,9 @@ impl Program { }}; } - while program_counter < self.instructions.len() { + while program_counter < self.0.len() { let next_pc = program_counter + 1; - program_counter = match &self.instructions[program_counter] { + program_counter = match &self.0[program_counter] { Instruction::Target => { branch_target = Some(program_counter); next_pc @@ -305,9 +303,9 @@ impl Program { } } -impl<'a> From<&'a Program> for &'a InstructionArray { +impl<'a> From<&'a Program> for &'a [Instruction] { #[inline(always)] fn from(prog: &'a Program) -> Self { - &prog.instructions + &prog.0 } } diff --git a/crates/hashx/src/register.rs b/crates/hashx/src/register.rs index b5073aa81..a70d39dd9 100644 --- a/crates/hashx/src/register.rs +++ b/crates/hashx/src/register.rs @@ -1,6 +1,7 @@ //! Define HashX's register file, and how it's created and digested. use crate::siphash::{siphash24_ctr, SipState}; +use arrayvec::ArrayVec; use std::fmt; /// Number of virtual registers in the HashX machine @@ -29,6 +30,12 @@ impl RegisterId { pub(crate) fn as_usize(&self) -> usize { self.0 as usize } + + /// Create an iterator over all RegisterId + #[inline(always)] + pub(crate) fn all() -> impl Iterator { + (0_u8..(NUM_REGISTERS as u8)).map(RegisterId) + } } /// Identify a set of RegisterIds @@ -36,15 +43,10 @@ impl RegisterId { /// This could be done compactly as a u8 bitfield for storage purposes, but /// in our program generator this is never stored long-term. Instead, we want /// something the optimizer can reason about as effectively as possible, and -/// let's inline as much as possible in order to resolve special cases in -/// the program generator at compile time. -#[derive(Clone, Copy, Eq, PartialEq)] -pub(crate) struct RegisterSet { - /// Number of registers in the set - len: usize, - /// Array indexed by register Id, indicating registers we've excluded - reg_not_in_set: [bool; 8], -} +/// we want to optimize for an index() implementation that doesn't branch. +/// This uses a fixed-capacity array of registers in-set, always sorted. +#[derive(Default, Clone, Eq, PartialEq)] +pub(crate) struct RegisterSet(ArrayVec); impl fmt::Debug for RegisterSet { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { @@ -60,66 +62,29 @@ impl fmt::Debug for RegisterSet { } impl RegisterSet { - /// Construct the set of all registers. - /// - /// This is the main way to construct a new RegisterId, starting with - /// all available registers and filtering them repeatedly. - #[inline(always)] - pub(crate) fn all() -> Self { - Self { - len: NUM_REGISTERS, - reg_not_in_set: Default::default(), - } - } - /// Number of registers still contained in this set #[inline(always)] pub(crate) fn len(&self) -> usize { - self.len + self.0.len() } /// Test if a register is contained in the set. #[inline(always)] pub(crate) fn contains(&self, id: RegisterId) -> bool { - !self.reg_not_in_set[id.0 as usize] + self.0.contains(&id) } - /// Filter this register set through a predicate. - /// - /// Invokes the predicate only for registers in this set, and returns the - /// set of registers for which it returned true. + /// Build a new RegisterSet from each register for which a predicate + /// function returns `true`. #[inline(always)] - pub(crate) fn filter bool>(&self, mut predicate: P) -> Self { - let mut result = Self { - len: 0, - reg_not_in_set: Default::default(), - }; - self.filter_impl(0, &mut predicate, &mut result); - self.filter_impl(1, &mut predicate, &mut result); - self.filter_impl(2, &mut predicate, &mut result); - self.filter_impl(3, &mut predicate, &mut result); - self.filter_impl(4, &mut predicate, &mut result); - self.filter_impl(5, &mut predicate, &mut result); - self.filter_impl(6, &mut predicate, &mut result); - self.filter_impl(7, &mut predicate, &mut result); - result - } - - /// Internal implementation to be unrolled by `filter` - #[inline(always)] - fn filter_impl bool>( - &self, - id: usize, - predicate: &mut P, - result: &mut Self, - ) { - if self.reg_not_in_set[id] { - result.reg_not_in_set[id] = true; - } else if predicate(RegisterId(id as u8)) { - result.len += 1; - } else { - result.reg_not_in_set[id] = true; + pub(crate) fn from_filter bool>(mut predicate: P) -> Self { + let mut result: Self = Default::default(); + for r in RegisterId::all() { + if predicate(r) { + result.0.push(r); + } } + result } /// Return a particular register within this set, counting from R0 to R7. @@ -127,45 +92,8 @@ impl RegisterSet { /// The supplied index must be less than the [`Self::len()`] of this set. /// Panics if the index is out of range. #[inline(always)] - pub(crate) fn index(&self, mut index: usize) -> RegisterId { - if let Some(result) = self.index_impl(0, &mut index) { - return result; - } - if let Some(result) = self.index_impl(1, &mut index) { - return result; - } - if let Some(result) = self.index_impl(2, &mut index) { - return result; - } - if let Some(result) = self.index_impl(3, &mut index) { - return result; - } - if let Some(result) = self.index_impl(4, &mut index) { - return result; - } - if let Some(result) = self.index_impl(5, &mut index) { - return result; - } - if let Some(result) = self.index_impl(6, &mut index) { - return result; - } - if let Some(result) = self.index_impl(7, &mut index) { - return result; - } - unreachable!(); - } - - /// Internal implementation to be unrolled by `index` - #[inline(always)] - fn index_impl(&self, id: usize, index: &mut usize) -> Option { - if self.reg_not_in_set[id] { - None - } else if *index == 0 { - Some(RegisterId(id as u8)) - } else { - *index -= 1; - None - } + pub(crate) fn index(&self, index: usize) -> RegisterId { + self.0[index] } } @@ -224,29 +152,3 @@ impl RegisterFile { [x.v0 ^ y.v0, x.v1 ^ y.v1, x.v2 ^ y.v2, x.v3 ^ y.v3] } } - -#[cfg(test)] -mod test { - use super::RegisterSet; - - #[test] - fn register_set() { - let r = RegisterSet::all().filter(|_reg| true); - assert_eq!(r.len(), 8); - assert_eq!(r.index(7).as_usize(), 7); - assert_eq!(r.index(0).as_usize(), 0); - let r = r.filter(|reg| (reg.as_usize() & 1) != 0); - assert_eq!(r.len(), 4); - assert_eq!(r.index(0).as_usize(), 1); - assert_eq!(r.index(1).as_usize(), 3); - assert_eq!(r.index(2).as_usize(), 5); - assert_eq!(r.index(3).as_usize(), 7); - let r = r.filter(|reg| (reg.as_usize() & 2) != 0); - assert_eq!(r.index(0).as_usize(), 3); - assert_eq!(r.index(1).as_usize(), 7); - let r = r.filter(|_reg| true); - assert_eq!(r.len(), 2); - let r = r.filter(|_reg| false); - assert_eq!(r.len(), 0); - } -} diff --git a/crates/hashx/src/scheduler.rs b/crates/hashx/src/scheduler.rs index 6d48f17f5..f57609c25 100644 --- a/crates/hashx/src/scheduler.rs +++ b/crates/hashx/src/scheduler.rs @@ -5,7 +5,7 @@ //! avoid stalls. use crate::program::{Instruction, Opcode}; -use crate::register::{RegisterId, RegisterSet, NUM_REGISTERS}; +use crate::register::{RegisterId, NUM_REGISTERS}; /// Scheduling information for each opcode mod model { @@ -197,10 +197,10 @@ impl Scheduler { } } - /// Figure out which registers will be available at or before the indicated cycle. + /// Look up if a register will be available at or before the indicated cycle. #[inline(always)] - pub(crate) fn registers_available(&self, cycle: Cycle) -> RegisterSet { - self.data.registers_available(cycle) + pub(crate) fn register_available(&self, reg: RegisterId, cycle: Cycle) -> bool { + self.data.register_available(reg, cycle) } /// Return the overall data latency. @@ -323,13 +323,10 @@ impl DataSchedule { self.latencies[dst.as_usize()] = cycle; } - /// Figure out which registers will be available at or before the indicated cycle + /// Look up if a register will be available at or before the indicated cycle. #[inline(always)] - fn registers_available(&self, cycle: Cycle) -> RegisterSet { - RegisterSet::all().filter( - #[inline(always)] - |reg| self.latencies[reg.as_usize()] <= cycle, - ) + fn register_available(&self, reg: RegisterId, cycle: Cycle) -> bool { + self.latencies[reg.as_usize()] <= cycle } /// Return the overall latency, the [`Cycle`] at which we expect @@ -447,7 +444,7 @@ struct MicroOpPlan { /// /// This is defined as either one or two micro-operations /// scheduled on the same cycle. -#[derive(Debug, Clone, Eq, PartialEq)] +#[derive(Debug, Clone, Copy, Eq, PartialEq)] pub(crate) struct InstructionPlan { /// The Cycle this whole instruction begins on cycle: Cycle,