slab.rs 38 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020
  1. use crate::noun::NounExt;
  2. use bitvec::prelude::{BitSlice, BitVec, Lsb0};
  3. use bitvec::view::BitView;
  4. use bitvec::{bits, bitvec};
  5. use bytes::Bytes;
  6. use either::Either;
  7. use intmap::IntMap;
  8. use nockvm::mem::NockStack;
  9. use nockvm::mug::{calc_atom_mug_u32, calc_cell_mug_u32, get_mug, set_mug};
  10. use nockvm::noun::{Atom, Cell, CellMemory, DirectAtom, IndirectAtom, Noun, NounAllocator, D};
  11. use nockvm::serialization::{met0_u64_to_usize, met0_usize};
  12. use std::alloc::Layout;
  13. use std::mem::size_of;
  14. use std::ptr::copy_nonoverlapping;
  15. use thiserror::Error;
  16. const CELL_MEM_WORD_SIZE: usize = (size_of::<CellMemory>() + 7) >> 3;
  17. /// A (mostly*) self-contained arena for allocating nouns.
  18. ///
  19. /// *Nouns may contain references to the PMA, but not other allocation arenas.
  20. #[derive(Debug)]
  21. pub struct NounSlab {
  22. root: Noun,
  23. slabs: Vec<(*mut u8, Layout)>,
  24. allocation_start: *mut u64,
  25. allocation_stop: *mut u64,
  26. }
  27. impl NounSlab {
  28. unsafe fn raw_alloc(new_layout: Layout) -> *mut u8 {
  29. if new_layout.size() == 0 {
  30. std::alloc::handle_alloc_error(new_layout);
  31. }
  32. assert!(new_layout.align().is_power_of_two(), "Invalid alignment");
  33. let slab = std::alloc::alloc(new_layout);
  34. if slab.is_null() {
  35. std::alloc::handle_alloc_error(new_layout);
  36. } else {
  37. slab
  38. }
  39. }
  40. pub fn to_vec(&self) -> Vec<Self> {
  41. self.root
  42. .list_iter()
  43. .map(|n| {
  44. let mut slab = Self::new();
  45. slab.copy_into(n);
  46. slab
  47. })
  48. .collect()
  49. }
  50. pub fn modify<F: FnOnce(Noun) -> Vec<Noun>>(&mut self, f: F) {
  51. let new_root_base = f(self.root);
  52. let new_root = nockvm::noun::T(self, &new_root_base);
  53. self.set_root(new_root);
  54. }
  55. pub fn modify_noun<F: FnOnce(Noun) -> Noun>(&mut self, f: F) {
  56. let new_root = f(self.root);
  57. self.set_root(new_root);
  58. }
  59. pub fn modify_with_imports3<F: FnOnce((Noun, Noun, Noun), Noun) -> Vec<Noun>>(
  60. &mut self,
  61. f: F,
  62. imports: (Noun, Noun, Noun),
  63. ) {
  64. self.copy_into(imports.0);
  65. self.copy_into(imports.1);
  66. self.copy_into(imports.2);
  67. let new_root_base = f(imports, self.root);
  68. let new_root = nockvm::noun::T(self, &new_root_base);
  69. self.set_root(new_root);
  70. }
  71. }
  72. impl Clone for NounSlab {
  73. fn clone(&self) -> Self {
  74. let mut slab = Self::new();
  75. slab.copy_into(self.root);
  76. slab
  77. }
  78. }
  79. impl NounAllocator for NounSlab {
  80. unsafe fn alloc_indirect(&mut self, words: usize) -> *mut u64 {
  81. let raw_size = words + 2;
  82. // Make sure we have enough space
  83. if self.allocation_start.is_null()
  84. || self.allocation_start.add(raw_size) > self.allocation_stop
  85. {
  86. let next_idx = std::cmp::max(self.slabs.len(), min_idx_for_size(raw_size));
  87. self.slabs
  88. .resize(next_idx + 1, (std::ptr::null_mut(), Layout::new::<u8>()));
  89. let new_size = idx_to_size(next_idx);
  90. let new_layout = Layout::array::<u64>(new_size).unwrap_or_else(|err| {
  91. panic!(
  92. "Panicked with {err:?} at {}:{} (git sha: {:?})",
  93. file!(),
  94. line!(),
  95. option_env!("GIT_SHA")
  96. )
  97. });
  98. let new_slab = Self::raw_alloc(new_layout);
  99. let new_slab_u64 = new_slab as *mut u64;
  100. self.slabs[next_idx] = (new_slab, new_layout);
  101. self.allocation_start = new_slab_u64;
  102. self.allocation_stop = new_slab_u64.add(new_size);
  103. }
  104. let new_indirect_ptr = self.allocation_start;
  105. self.allocation_start = self.allocation_start.add(raw_size);
  106. new_indirect_ptr
  107. }
  108. unsafe fn alloc_cell(&mut self) -> *mut CellMemory {
  109. if self.allocation_start.is_null()
  110. || self.allocation_start.add(CELL_MEM_WORD_SIZE) > self.allocation_stop
  111. // || (self.allocation_start as usize) + CELL_MEM_WORD_SIZE > (self.allocation_stop as usize)
  112. // || (self.allocation_start.expose_provenance()) + CELL_MEM_WORD_SIZE > (self.allocation_stop.expose_provenance())
  113. // || (self.allocation_start as usize) + (CELL_MEM_WORD_SIZE * std::mem::size_of::<u64>()) > (self.allocation_stop as usize)
  114. {
  115. let next_idx = std::cmp::max(self.slabs.len(), min_idx_for_size(CELL_MEM_WORD_SIZE));
  116. self.slabs
  117. .resize(next_idx + 1, (std::ptr::null_mut(), Layout::new::<u8>()));
  118. let new_size = idx_to_size(next_idx);
  119. let new_layout = Layout::array::<u64>(new_size).unwrap_or_else(|err| {
  120. panic!(
  121. "Panicked with {err:?} at {}:{} (git sha: {:?})",
  122. file!(),
  123. line!(),
  124. option_env!("GIT_SHA")
  125. )
  126. });
  127. let new_slab = Self::raw_alloc(new_layout);
  128. let new_slab_u64 = new_slab as *mut u64;
  129. self.slabs[next_idx] = (new_slab, new_layout);
  130. self.allocation_start = new_slab_u64;
  131. self.allocation_stop = new_slab_u64.add(new_size);
  132. }
  133. let new_cell_ptr = self.allocation_start as *mut CellMemory;
  134. // self.allocation_start = ((self.allocation_start.expose_provenance()) + CELL_MEM_WORD_SIZE) as *mut u64;
  135. self.allocation_start = std::ptr::with_exposed_provenance_mut(
  136. self.allocation_start.expose_provenance()
  137. + (CELL_MEM_WORD_SIZE * std::mem::size_of::<u64>()),
  138. );
  139. new_cell_ptr
  140. }
  141. unsafe fn alloc_struct<T>(&mut self, count: usize) -> *mut T {
  142. let layout = Layout::array::<T>(count).expect("Bad layout in alloc_struct");
  143. let word_size = (layout.size() + 7) >> 3;
  144. assert!(layout.align() <= std::mem::size_of::<u64>());
  145. if self.allocation_start.is_null()
  146. || self.allocation_start.add(word_size) > self.allocation_stop
  147. {
  148. let next_idx = std::cmp::max(self.slabs.len(), min_idx_for_size(word_size));
  149. self.slabs
  150. .resize(next_idx + 1, (std::ptr::null_mut(), Layout::new::<u8>()));
  151. let new_size = idx_to_size(next_idx);
  152. let new_layout = Layout::array::<u64>(new_size).unwrap_or_else(|err| {
  153. panic!(
  154. "Panicked with {err:?} at {}:{} (git sha: {:?})",
  155. file!(),
  156. line!(),
  157. option_env!("GIT_SHA")
  158. )
  159. });
  160. let new_slab = Self::raw_alloc(new_layout);
  161. let new_slab_u64 = new_slab as *mut u64;
  162. self.slabs[next_idx] = (new_slab, new_layout);
  163. self.allocation_start = new_slab_u64;
  164. self.allocation_stop = new_slab_u64.add(new_size);
  165. }
  166. let new_struct_ptr = self.allocation_start as *mut T;
  167. self.allocation_start = self.allocation_start.add(word_size);
  168. new_struct_ptr
  169. }
  170. }
  171. /// # Safety: no noun in this slab references a noun outside the slab, except in the PMA
  172. unsafe impl Send for NounSlab {}
  173. impl Default for NounSlab {
  174. fn default() -> Self {
  175. Self::new()
  176. }
  177. }
  178. impl From<Noun> for NounSlab {
  179. fn from(noun: Noun) -> Self {
  180. let mut slab = Self::new();
  181. slab.copy_into(noun);
  182. slab
  183. }
  184. }
  185. impl<const N: usize> From<[Noun; N]> for NounSlab {
  186. fn from(nouns: [Noun; N]) -> Self {
  187. let mut slab = Self::new();
  188. let new_root = nockvm::noun::T(&mut slab, &nouns);
  189. slab.set_root(new_root);
  190. slab
  191. }
  192. }
  193. impl NounSlab {
  194. /// Make a new noun slab with D(0) as the root
  195. #[tracing::instrument]
  196. pub fn new() -> Self {
  197. let slabs = Vec::new();
  198. let allocation_start: *mut u64 = std::ptr::null_mut();
  199. let allocation_stop: *mut u64 = std::ptr::null_mut();
  200. let root: Noun = D(0);
  201. NounSlab {
  202. root,
  203. slabs,
  204. allocation_start,
  205. allocation_stop,
  206. }
  207. }
  208. /// Copy a noun into this slab, only leaving references into the PMA. Set that noun as the root
  209. /// noun.
  210. pub fn copy_into(&mut self, copy_root: Noun) {
  211. let mut copied: IntMap<u64, Noun> = IntMap::new();
  212. // let mut copy_stack = vec![(copy_root, &mut self.root as *mut Noun)];
  213. let mut copy_stack = vec![(copy_root, std::ptr::addr_of_mut!(self.root))];
  214. while let Some((noun, dest)) = copy_stack.pop() {
  215. match noun.as_either_direct_allocated() {
  216. Either::Left(_direct) => {
  217. unsafe { *dest = noun };
  218. }
  219. Either::Right(allocated) => match allocated.as_either() {
  220. Either::Left(indirect) => {
  221. let indirect_ptr = unsafe { indirect.to_raw_pointer() };
  222. let indirect_mem_size = indirect.raw_size();
  223. if let Some(copied_noun) = copied.get(indirect_ptr as u64) {
  224. unsafe { *dest = *copied_noun };
  225. continue;
  226. }
  227. let indirect_new_mem = unsafe { self.alloc_indirect(indirect.size()) };
  228. unsafe {
  229. copy_nonoverlapping(indirect_ptr, indirect_new_mem, indirect_mem_size)
  230. };
  231. let copied_noun = unsafe {
  232. IndirectAtom::from_raw_pointer(indirect_new_mem)
  233. .as_atom()
  234. .as_noun()
  235. };
  236. copied.insert(indirect_ptr as u64, copied_noun);
  237. unsafe { *dest = copied_noun };
  238. }
  239. Either::Right(cell) => {
  240. let cell_ptr = unsafe { cell.to_raw_pointer() };
  241. if let Some(copied_noun) = copied.get(cell_ptr as u64) {
  242. unsafe { *dest = *copied_noun };
  243. continue;
  244. }
  245. let cell_new_mem = unsafe { self.alloc_cell() };
  246. unsafe { copy_nonoverlapping(cell_ptr, cell_new_mem, 1) };
  247. let copied_noun = unsafe { Cell::from_raw_pointer(cell_new_mem).as_noun() };
  248. copied.insert(cell_ptr as u64, copied_noun);
  249. unsafe { *dest = copied_noun };
  250. unsafe {
  251. // copy_stack
  252. // .push((cell.tail(), &mut (*cell_new_mem).tail as *mut Noun));
  253. // copy_stack
  254. // .push((cell.head(), &mut (*cell_new_mem).head as *mut Noun));
  255. copy_stack
  256. .push((cell.tail(), std::ptr::addr_of_mut!((*cell_new_mem).tail)));
  257. copy_stack
  258. .push((cell.head(), std::ptr::addr_of_mut!((*cell_new_mem).head)));
  259. }
  260. }
  261. },
  262. }
  263. }
  264. }
  265. /// Copy the root noun from this slab into the given NockStack, only leaving references into the PMA
  266. ///
  267. /// Note that this consumes the slab, the slab will be freed after and the root noun returned
  268. /// referencing the stack. Nouns referencing the slab should not be used past this point.
  269. #[tracing::instrument(skip(self, stack), level = "trace")]
  270. pub fn copy_to_stack(self, stack: &mut NockStack) -> Noun {
  271. let mut res = D(0);
  272. let mut copy_stack = vec![(self.root, &mut res as *mut Noun)];
  273. while let Some((noun, dest)) = copy_stack.pop() {
  274. if let Ok(allocated) = noun.as_allocated() {
  275. if let Some(forward) = unsafe { allocated.forwarding_pointer() } {
  276. unsafe { *dest = forward.as_noun() };
  277. } else {
  278. match allocated.as_either() {
  279. Either::Left(mut indirect) => {
  280. let raw_pointer = unsafe { indirect.to_raw_pointer() };
  281. let raw_size = indirect.raw_size();
  282. unsafe {
  283. let indirect_mem = stack.alloc_indirect(indirect.size());
  284. std::ptr::copy_nonoverlapping(raw_pointer, indirect_mem, raw_size);
  285. indirect.set_forwarding_pointer(indirect_mem);
  286. *dest = IndirectAtom::from_raw_pointer(indirect_mem)
  287. .as_atom()
  288. .as_noun();
  289. }
  290. }
  291. Either::Right(mut cell) => {
  292. let raw_pointer = unsafe { cell.to_raw_pointer() };
  293. unsafe {
  294. let cell_mem = stack.alloc_cell();
  295. copy_nonoverlapping(raw_pointer, cell_mem, 1);
  296. copy_stack.push((cell.tail(), &mut (*cell_mem).tail as *mut Noun));
  297. copy_stack.push((cell.head(), &mut (*cell_mem).head as *mut Noun));
  298. cell.set_forwarding_pointer(cell_mem);
  299. *dest = Cell::from_raw_pointer(cell_mem).as_noun()
  300. }
  301. }
  302. }
  303. }
  304. } else {
  305. unsafe {
  306. *dest = noun;
  307. } // Direct atom
  308. }
  309. }
  310. res
  311. }
  312. /// Set the root of the noun slab.
  313. ///
  314. /// Panics if the given root is not in the noun slab or PMA.
  315. pub fn set_root(&mut self, root: Noun) {
  316. if let Ok(allocated) = root.as_allocated() {
  317. match allocated.as_either() {
  318. Either::Left(indirect) => {
  319. let ptr = unsafe { indirect.to_raw_pointer() };
  320. let u8_ptr = ptr as *const u8;
  321. for slab in &self.slabs {
  322. if unsafe { u8_ptr >= slab.0 && u8_ptr < slab.0.add(slab.1.size()) } {
  323. self.root = root;
  324. return;
  325. }
  326. }
  327. panic!("Set root of NounSlab to noun from outside slab");
  328. }
  329. Either::Right(cell) => {
  330. let ptr = unsafe { cell.to_raw_pointer() };
  331. let u8_ptr = ptr as *const u8;
  332. for slab in &self.slabs {
  333. if unsafe { u8_ptr >= slab.0 && u8_ptr < slab.0.add(slab.1.size()) } {
  334. self.root = root;
  335. return;
  336. }
  337. }
  338. panic!("Set root of NounSlab to noun from outside slab");
  339. }
  340. }
  341. }
  342. self.root = root;
  343. }
  344. pub fn jam(&self) -> Bytes {
  345. let mut backref_map = NounMap::<usize>::new();
  346. let mut stack = vec![self.root];
  347. let mut buffer = bitvec![u8, Lsb0; 0; 0];
  348. while let Some(noun) = stack.pop() {
  349. if let Some(backref) = backref_map.get(noun) {
  350. if let Ok(atom) = noun.as_atom() {
  351. if met0_u64_to_usize(*backref as u64) < met0_usize(atom) {
  352. mat_backref(&mut buffer, *backref);
  353. } else {
  354. mat_atom(&mut buffer, atom)
  355. }
  356. } else {
  357. mat_backref(&mut buffer, *backref);
  358. }
  359. } else {
  360. backref_map.insert(noun, buffer.len());
  361. match noun.as_either_atom_cell() {
  362. Either::Left(atom) => {
  363. mat_atom(&mut buffer, atom);
  364. }
  365. Either::Right(cell) => {
  366. buffer.extend_from_bitslice(bits![u8, Lsb0; 1, 0]); // cell tag
  367. stack.push(cell.tail());
  368. stack.push(cell.head());
  369. }
  370. }
  371. }
  372. }
  373. Bytes::copy_from_slice(buffer.as_raw_slice())
  374. }
  375. pub fn cue_into(&mut self, jammed: Bytes) -> Result<Noun, CueError> {
  376. let mut backref_map = IntMap::new();
  377. let bitslice = jammed.view_bits::<Lsb0>();
  378. let mut cursor = 0usize;
  379. let mut res = D(0);
  380. let mut stack = vec![CueStackEntry::DestinationPointer(&mut res)];
  381. loop {
  382. match stack.pop() {
  383. Some(CueStackEntry::DestinationPointer(dest)) => {
  384. let backref = cursor as u64;
  385. if bitslice[cursor] {
  386. // 1
  387. cursor += 1;
  388. if bitslice[cursor] {
  389. // 1 - backref
  390. cursor += 1;
  391. let backref = rub_backref(&mut cursor, bitslice)?;
  392. if let Some(noun) = backref_map.get(backref as u64) {
  393. unsafe {
  394. *dest = *noun;
  395. }
  396. } else {
  397. Err(CueError::BadBackref)?
  398. }
  399. } else {
  400. // 0 - cell
  401. cursor += 1;
  402. let (cell, cell_mem) = unsafe { Cell::new_raw_mut(self) };
  403. unsafe {
  404. *dest = cell.as_noun();
  405. }
  406. unsafe {
  407. stack.push(CueStackEntry::BackRef(backref, dest as *const Noun));
  408. stack
  409. .push(CueStackEntry::DestinationPointer(&mut (*cell_mem).tail));
  410. stack
  411. .push(CueStackEntry::DestinationPointer(&mut (*cell_mem).head));
  412. }
  413. }
  414. } else {
  415. // 0 - atom
  416. cursor += 1;
  417. unsafe { *dest = rub_atom(self, &mut cursor, bitslice)?.as_noun() };
  418. backref_map.insert(backref, unsafe { *dest });
  419. }
  420. }
  421. Some(CueStackEntry::BackRef(backref, noun_ptr)) => {
  422. backref_map.insert(backref, unsafe { *noun_ptr });
  423. }
  424. None => {
  425. break;
  426. }
  427. }
  428. }
  429. Ok(res)
  430. }
  431. /// Get the root noun
  432. ///
  433. /// # Safety: The noun must not be used past the lifetime of the slab.
  434. pub unsafe fn root(&self) -> &Noun {
  435. &self.root
  436. }
  437. /// Get the root noun
  438. ///
  439. /// # Safety: The noun must not be used past the lifetime of the slab.
  440. pub unsafe fn root_mut(&mut self) -> &mut Noun {
  441. &mut self.root
  442. }
  443. }
  444. impl Drop for NounSlab {
  445. fn drop(&mut self) {
  446. for slab in self.slabs.drain(..) {
  447. if !slab.0.is_null() {
  448. unsafe { std::alloc::dealloc(slab.0, slab.1) };
  449. }
  450. }
  451. }
  452. }
  453. fn mat_backref(buffer: &mut BitVec<u8, Lsb0>, backref: usize) {
  454. if backref == 0 {
  455. buffer.extend_from_bitslice(bits![u8, Lsb0; 1, 1, 1]);
  456. return;
  457. }
  458. let backref_sz = met0_u64_to_usize(backref as u64);
  459. let backref_sz_sz = met0_u64_to_usize(backref_sz as u64);
  460. buffer.extend_from_bitslice(bits![u8, Lsb0; 1, 1]); // backref tag
  461. let buffer_len = buffer.len();
  462. buffer.resize(buffer_len + backref_sz_sz, false);
  463. buffer.push(true);
  464. buffer.extend_from_bitslice(
  465. &BitSlice::<_, Lsb0>::from_element(&backref_sz)[0..backref_sz_sz - 1],
  466. );
  467. buffer.extend_from_bitslice(&BitSlice::<_, Lsb0>::from_element(&backref)[0..backref_sz]);
  468. }
  469. fn mat_atom(buffer: &mut BitVec<u8, Lsb0>, atom: Atom) {
  470. if unsafe { atom.as_noun().raw_equals(&D(0)) } {
  471. buffer.extend_from_bitslice(bits![u8, Lsb0; 0, 1]);
  472. return;
  473. }
  474. let atom_sz = met0_usize(atom);
  475. let atom_sz_sz = met0_u64_to_usize(atom_sz as u64);
  476. buffer.push(false); // atom tag
  477. let buffer_len = buffer.len();
  478. buffer.resize(buffer_len + atom_sz_sz, false);
  479. buffer.push(true);
  480. buffer.extend_from_bitslice(&BitSlice::<_, Lsb0>::from_element(&atom_sz)[0..atom_sz_sz - 1]);
  481. buffer.extend_from_bitslice(&atom.as_bitslice()[0..atom_sz]);
  482. }
  483. fn rub_backref(cursor: &mut usize, buffer: &BitSlice<u8, Lsb0>) -> Result<usize, CueError> {
  484. if let Some(idx) = buffer[*cursor..].first_one() {
  485. if idx == 0 {
  486. *cursor += 1;
  487. Ok(0)
  488. } else {
  489. *cursor += idx + 1;
  490. let mut sz = 0usize;
  491. let sz_slice = BitSlice::<_, Lsb0>::from_element_mut(&mut sz);
  492. if buffer.len() < *cursor + idx - 1 {
  493. Err(CueError::TruncatedBuffer)?;
  494. };
  495. sz_slice[0..idx - 1].clone_from_bitslice(&buffer[*cursor..*cursor + idx - 1]);
  496. sz_slice.set(idx - 1, true);
  497. *cursor += idx - 1;
  498. if sz > size_of::<usize>() << 3 {
  499. Err(CueError::BackrefTooBig)?;
  500. }
  501. if buffer.len() < *cursor + sz {
  502. Err(CueError::TruncatedBuffer)?;
  503. }
  504. let mut backref = 0usize;
  505. let backref_slice = BitSlice::<_, Lsb0>::from_element_mut(&mut backref);
  506. backref_slice[0..sz].clone_from_bitslice(&buffer[*cursor..*cursor + sz]);
  507. *cursor += sz;
  508. Ok(backref)
  509. }
  510. } else {
  511. Err(CueError::TruncatedBuffer)
  512. }
  513. }
  514. fn rub_atom(
  515. slab: &mut NounSlab,
  516. cursor: &mut usize,
  517. buffer: &BitSlice<u8, Lsb0>,
  518. ) -> Result<Atom, CueError> {
  519. if let Some(idx) = buffer[*cursor..].first_one() {
  520. if idx == 0 {
  521. *cursor += 1;
  522. unsafe { Ok(DirectAtom::new_unchecked(0).as_atom()) }
  523. } else {
  524. *cursor += idx + 1;
  525. let mut sz = 0usize;
  526. let sz_slice = BitSlice::<_, Lsb0>::from_element_mut(&mut sz);
  527. if buffer.len() < *cursor + idx - 1 {
  528. Err(CueError::TruncatedBuffer)?;
  529. }
  530. sz_slice[0..idx - 1].clone_from_bitslice(&buffer[*cursor..*cursor + idx - 1]);
  531. sz_slice.set(idx - 1, true);
  532. *cursor += idx - 1;
  533. if buffer.len() < *cursor + sz {
  534. Err(CueError::TruncatedBuffer)?;
  535. }
  536. if sz < 64 {
  537. // Direct atom: less than 64 bits
  538. let mut data = 0u64;
  539. let atom_slice = BitSlice::<_, Lsb0>::from_element_mut(&mut data);
  540. atom_slice[0..sz].clone_from_bitslice(&buffer[*cursor..*cursor + sz]);
  541. *cursor += sz;
  542. Ok(unsafe { DirectAtom::new_unchecked(data).as_atom() })
  543. } else {
  544. // Indirect atom
  545. let indirect_words = (sz + 63) >> 6; // fast round to 64-bit words
  546. let (mut indirect, slice) =
  547. unsafe { IndirectAtom::new_raw_mut_bitslice(slab, indirect_words) };
  548. slice[0..sz].clone_from_bitslice(&buffer[*cursor..*cursor + sz]);
  549. *cursor += sz;
  550. Ok(unsafe { indirect.normalize_as_atom() })
  551. }
  552. }
  553. } else {
  554. Err(CueError::TruncatedBuffer)
  555. }
  556. }
  557. #[derive(Debug, Error)]
  558. pub enum CueError {
  559. #[error("cue: Bad backref")]
  560. BadBackref,
  561. #[error("cue: backref too big")]
  562. BackrefTooBig,
  563. #[error("cue: truncated buffer")]
  564. TruncatedBuffer,
  565. }
  566. /// Slab size from vector index, in 8-byte words
  567. fn idx_to_size(idx: usize) -> usize {
  568. 1 << (2 * idx + 9)
  569. }
  570. /// Inverse of idx_to_size
  571. fn min_idx_for_size(sz: usize) -> usize {
  572. let mut log2sz = sz.ilog2() as usize;
  573. let round_sz = 1 << log2sz;
  574. if round_sz != sz {
  575. log2sz += 1;
  576. };
  577. if log2sz <= 9 {
  578. 0
  579. } else {
  580. (log2sz - 9).div_ceil(2)
  581. }
  582. }
  583. struct NounMap<V>(IntMap<u64, Vec<(Noun, V)>>);
  584. impl<V> NounMap<V> {
  585. fn new() -> Self {
  586. NounMap(IntMap::new())
  587. }
  588. fn insert(&mut self, key: Noun, value: V) {
  589. let key_mug = slab_mug(key) as u64;
  590. if let Some(vec) = self.0.get_mut(key_mug) {
  591. let mut chain_iter = vec[..].iter_mut();
  592. if let Some(entry) = chain_iter.find(|entry| slab_noun_equality(&key, &entry.0)) {
  593. entry.1 = value;
  594. } else {
  595. vec.push((key, value))
  596. }
  597. } else {
  598. self.0.insert(key_mug, vec![(key, value)]);
  599. }
  600. }
  601. fn get(&mut self, key: Noun) -> Option<&V> {
  602. let key_mug = slab_mug(key) as u64;
  603. if let Some(vec) = self.0.get(key_mug) {
  604. let mut chain_iter = vec[..].iter();
  605. if let Some(entry) =
  606. chain_iter.find(|entry| slab_noun_equality(&(key as Noun), &entry.0))
  607. {
  608. Some(&entry.1)
  609. } else {
  610. None
  611. }
  612. } else {
  613. None
  614. }
  615. }
  616. }
  617. pub fn slab_equality(a: &NounSlab, b: &NounSlab) -> bool {
  618. slab_noun_equality(&a.root, &b.root)
  619. }
  620. // Does not unify: slabs are collected all-at-once so there's no point.
  621. pub fn slab_noun_equality(a: &Noun, b: &Noun) -> bool {
  622. let mut stack = vec![(a, b)];
  623. loop {
  624. if let Some((a, b)) = stack.pop() {
  625. if unsafe { a.raw_equals(b) } {
  626. continue;
  627. }
  628. match (
  629. a.as_ref_either_direct_allocated(),
  630. b.as_ref_either_direct_allocated(),
  631. ) {
  632. (Either::Right(a_allocated), Either::Right(b_allocated)) => {
  633. if let Some(a_mug) = a_allocated.get_cached_mug() {
  634. if let Some(b_mug) = b_allocated.get_cached_mug() {
  635. if a_mug != b_mug {
  636. break false;
  637. }
  638. }
  639. };
  640. match (a_allocated.as_ref_either(), b_allocated.as_ref_either()) {
  641. (Either::Left(a_indirect), Either::Left(b_indirect)) => {
  642. if a_indirect.as_slice() != b_indirect.as_slice() {
  643. break false;
  644. }
  645. continue;
  646. }
  647. (Either::Right(a_cell), Either::Right(b_cell)) => {
  648. stack.push((a_cell.tail_ref(), b_cell.tail_ref()));
  649. stack.push((a_cell.tail_ref(), b_cell.tail_ref()));
  650. continue;
  651. }
  652. _ => {
  653. break false;
  654. }
  655. }
  656. }
  657. _ => {
  658. break false;
  659. }
  660. }
  661. } else {
  662. break true;
  663. }
  664. }
  665. }
  666. fn slab_mug(a: Noun) -> u32 {
  667. let mut stack = vec![a];
  668. while let Some(noun) = stack.pop() {
  669. if let Ok(mut allocated) = noun.as_allocated() {
  670. if allocated.get_cached_mug().is_none() {
  671. match allocated.as_either() {
  672. Either::Left(indirect) => unsafe {
  673. set_mug(&mut allocated, calc_atom_mug_u32(indirect.as_atom()));
  674. },
  675. Either::Right(cell) => match (get_mug(cell.head()), get_mug(cell.tail())) {
  676. (Some(head_mug), Some(tail_mug)) => unsafe {
  677. set_mug(&mut allocated, calc_cell_mug_u32(head_mug, tail_mug));
  678. },
  679. _ => {
  680. stack.push(noun);
  681. stack.push(cell.tail());
  682. stack.push(cell.head());
  683. }
  684. },
  685. }
  686. }
  687. }
  688. }
  689. get_mug(a).expect("Noun should have a mug once mugged.")
  690. }
  691. enum CueStackEntry {
  692. DestinationPointer(*mut Noun),
  693. BackRef(u64, *const Noun),
  694. }
  695. #[cfg(test)]
  696. mod tests {
  697. use super::*;
  698. use crate::AtomExt;
  699. use bitvec::prelude::*;
  700. use nockvm::noun::{D, T};
  701. use nockvm_macros::tas;
  702. #[test]
  703. #[cfg_attr(miri, ignore)]
  704. fn test_jam() {
  705. let mut slab = NounSlab::new();
  706. let test_noun = T(
  707. &mut slab,
  708. &[D(tas!(b"request")), D(tas!(b"block")), D(tas!(b"by-id")), D(0)],
  709. );
  710. slab.set_root(test_noun);
  711. let jammed: Vec<u8> = slab.jam().to_vec();
  712. println!("jammed: {:?}", jammed);
  713. let mut stack = NockStack::new(1000, 0);
  714. let mut nockvm_jammed: Vec<u8> = nockvm::serialization::jam(&mut stack, test_noun)
  715. .as_ne_bytes()
  716. .to_vec();
  717. let nockvm_suffix: Vec<u8> = nockvm_jammed.split_off(jammed.len());
  718. println!("nockvm_jammed: {:?}", nockvm_jammed);
  719. assert_eq!(jammed, nockvm_jammed, "Jammed results should be identical");
  720. assert!(
  721. nockvm_suffix.iter().all(|b| { *b == 0 }),
  722. "Extra bytes in nockvm jam should all be 0"
  723. );
  724. }
  725. #[test]
  726. fn test_jam_cue_roundtrip() {
  727. let mut original_slab = NounSlab::new();
  728. let original_noun = T(&mut original_slab, &[D(5), D(23)]);
  729. println!("original_noun: {:?}", original_noun);
  730. original_slab.set_root(original_noun);
  731. // Jam the original noun
  732. let jammed: Vec<u8> = original_slab.jam().to_vec();
  733. // Cue the jammed data into a new slab
  734. let mut cued_slab = NounSlab::new();
  735. let cued_noun = cued_slab
  736. .cue_into(jammed.into())
  737. .expect("Cue should succeed");
  738. println!("cued_noun: {:?}", cued_noun);
  739. // Compare the original and cued nouns
  740. assert!(
  741. slab_noun_equality(unsafe { original_slab.root() }, &cued_noun),
  742. "Original and cued nouns should be equal"
  743. );
  744. }
  745. #[test]
  746. fn test_complex_noun() {
  747. let mut slab = NounSlab::new();
  748. let complex_noun = T(
  749. &mut slab,
  750. &[D(tas!(b"request")), D(tas!(b"block")), D(tas!(b"by-id")), D(0)],
  751. );
  752. slab.set_root(complex_noun);
  753. let jammed = slab.jam();
  754. let mut cued_slab = NounSlab::new();
  755. let cued_noun = cued_slab.cue_into(jammed).expect("Cue should succeed");
  756. assert!(
  757. slab_noun_equality(unsafe { slab.root() }, &cued_noun),
  758. "Complex nouns should be equal after jam/cue roundtrip"
  759. );
  760. }
  761. #[test]
  762. fn test_indirect_atoms() {
  763. let mut slab = NounSlab::new();
  764. let large_number = u64::MAX as u128 + 1;
  765. let large_number_bytes = Bytes::from(large_number.to_le_bytes().to_vec());
  766. let indirect_atom = Atom::from_bytes(&mut slab, &large_number_bytes);
  767. let noun_with_indirect = T(&mut slab, &[D(1), indirect_atom.as_noun(), D(2)]);
  768. println!("noun_with_indirect: {:?}", noun_with_indirect);
  769. slab.set_root(noun_with_indirect);
  770. let jammed = slab.jam();
  771. let mut cued_slab = NounSlab::new();
  772. let cued_noun = cued_slab.cue_into(jammed).expect("Cue should succeed");
  773. println!("cued_noun: {:?}", cued_noun);
  774. assert!(
  775. slab_noun_equality(&noun_with_indirect, &cued_noun),
  776. "Nouns with indirect atoms should be equal after jam/cue roundtrip"
  777. );
  778. }
  779. #[test]
  780. fn test_tas_macro() {
  781. let mut slab = NounSlab::new();
  782. let tas_noun = T(
  783. &mut slab,
  784. &[D(tas!(b"foo")), D(tas!(b"bar")), D(tas!(b"baz"))],
  785. );
  786. slab.set_root(tas_noun);
  787. let jammed = slab.jam();
  788. let mut cued_slab = NounSlab::new();
  789. let cued_noun = cued_slab.cue_into(jammed).expect("Cue should succeed");
  790. assert!(
  791. slab_noun_equality(unsafe { slab.root() }, &cued_noun),
  792. "Nouns with tas! macros should be equal after jam/cue roundtrip"
  793. );
  794. }
  795. #[test]
  796. #[cfg_attr(miri, ignore)]
  797. fn test_cue_from_file() {
  798. use bytes::Bytes;
  799. use std::fs::File;
  800. use std::io::Read;
  801. use std::path::Path;
  802. // Path to the test file
  803. // For Bazel builds, we use the test-jams directory from the environment
  804. #[cfg(feature = "bazel_build")]
  805. let file_path = match std::env::var("TEST_JAMS_DIR") {
  806. Ok(dir) => format!("{}/cue-test.jam", dir),
  807. Err(_) => String::from("test-jams/cue-test.jam"),
  808. };
  809. // For Cargo builds, we use the regular path
  810. #[cfg(not(feature = "bazel_build"))]
  811. let file_path = "test-jams/cue-test.jam";
  812. // Check if the file exists
  813. if !Path::new(&file_path).exists() {
  814. println!("Test file not found at {}, skipping test", file_path);
  815. return; // Skip the test if the file doesn't exist
  816. }
  817. // Read the jammed data from the file
  818. let mut file = File::open(file_path).expect("Failed to open file");
  819. let mut jammed_data = Vec::new();
  820. file.read_to_end(&mut jammed_data)
  821. .expect("Failed to read file");
  822. let jammed = Bytes::from(jammed_data);
  823. // Create a new NounSlab and attempt to cue the data
  824. let mut slab = NounSlab::new();
  825. let result = slab.cue_into(jammed);
  826. // Assert that cue_into does not return an error
  827. assert!(
  828. result.is_ok(),
  829. "cue_into returned an error: {:?}",
  830. result.err()
  831. );
  832. }
  833. #[test]
  834. fn test_cyclic_structure() {
  835. let mut slab = NounSlab::new();
  836. // Create a jammed representation of a cyclic structure
  837. // [0 *] where * refers back to the entire cell, i.e. 0b11110001
  838. let mut jammed = BitVec::<u8, Lsb0>::new();
  839. jammed.extend_from_bitslice(bits![u8, Lsb0; 1, 1, 1]); //Backref to the entire structure
  840. jammed.extend_from_bitslice(bits![u8, Lsb0; 1, 0 ,0]); // Atom 0
  841. jammed.extend_from_bitslice(bits![u8, Lsb0; 0, 1]); // Cell
  842. let jammed_bytes = Bytes::from(jammed.into_vec());
  843. let result = slab.cue_into(jammed_bytes);
  844. assert!(
  845. result.is_err(),
  846. "Expected error due to cyclic structure, but cue_into completed successfully"
  847. );
  848. if let Err(e) = result {
  849. println!("Error type: {:?}", e);
  850. assert!(
  851. matches!(e, CueError::BadBackref),
  852. "Expected CueError::BadBackref, but got a different error"
  853. );
  854. }
  855. }
  856. #[test]
  857. fn test_cue_simple_cell() {
  858. let mut slab = NounSlab::new();
  859. // Create a jammed representation of [1 0] by hand
  860. let mut jammed = BitVec::<u8, Lsb0>::new();
  861. jammed.extend_from_bitslice(bits![u8, Lsb0; 1, 0, 0, 0, 1, 1, 0, 1]); // 0b10110001
  862. let jammed_bytes = Bytes::from(jammed.into_vec());
  863. let result = slab.cue_into(jammed_bytes);
  864. assert!(result.is_ok(), "cue_into should succeed");
  865. if let Ok(cued_noun) = result {
  866. let expected_noun = T(&mut slab, &[D(1), D(0)]);
  867. assert!(
  868. slab_noun_equality(&cued_noun, &expected_noun),
  869. "Cued noun should equal [1 0]"
  870. );
  871. }
  872. }
  873. #[test]
  874. fn test_cell_construction_for_noun_slab() {
  875. let mut slab = NounSlab::new();
  876. let (cell, cell_mem_ptr) = unsafe { Cell::new_raw_mut(&mut slab) };
  877. unsafe { assert!(cell_mem_ptr as *const CellMemory == cell.to_raw_pointer()) };
  878. }
  879. #[test]
  880. fn test_noun_slab_copy_into() {
  881. let mut slab = NounSlab::new();
  882. let test_noun = T(&mut slab, &[D(5), D(23)]);
  883. slab.set_root(test_noun);
  884. let mut copy_slab = NounSlab::new();
  885. copy_slab.copy_into(test_noun);
  886. }
  887. // Fails in Miri
  888. // #[test]
  889. // fn test_alloc_cell_for_noun_slab_uninit() {
  890. // let mut slab = NounSlab::new();
  891. // let cell_ptr = unsafe { slab.alloc_cell() };
  892. // let cell: Cell = unsafe { Cell::from_raw_pointer(cell_ptr) };
  893. // unsafe { assert_eq!(cell.head().as_raw(), 0) };
  894. // }
  895. #[test]
  896. fn test_alloc_cell_for_noun_slab_set_value() {
  897. let mut slab = NounSlab::new();
  898. let mut i = 0;
  899. while i < 100 {
  900. let cell_ptr = unsafe { slab.alloc_cell() };
  901. let cell_memory = CellMemory {
  902. metadata: 0,
  903. head: D(i),
  904. tail: D(i + 1),
  905. };
  906. unsafe { (*cell_ptr) = cell_memory };
  907. i += 1;
  908. println!("allocation_start: {:?}", slab.allocation_start);
  909. }
  910. // let cell_ptr = unsafe { slab.alloc_cell() };
  911. // // Set the cell_ptr to a value
  912. // let cell_memory = CellMemory { metadata: 0, head: D(5), tail: D(23) };
  913. // unsafe { (*cell_ptr) = cell_memory };
  914. // let cell: Cell = unsafe { Cell::from_raw_pointer(cell_ptr) };
  915. // unsafe { assert_eq!(cell.head().as_raw(), 5) };
  916. }
  917. #[test]
  918. fn test_nounslab_modify() {
  919. let mut slab = NounSlab::new();
  920. slab.modify(|root| vec![D(0), D(tas!(b"bind")), root]);
  921. let mut test_slab = NounSlab::new();
  922. slab_noun_equality(
  923. &slab.root,
  924. &T(&mut test_slab, &[D(0), D(tas!(b"bind")), D(0)]),
  925. );
  926. // let peek_res = unsafe { bind_slab.root_owned() };
  927. // let bind_noun = T(&mut bind_slab, &[D(pid), D(tas!(b"bind")), peek_res]);
  928. }
  929. // // This test _should_ fail under Miri
  930. // #[test]
  931. // #[should_panic(expected = "error: Undefined Behavior: using uninitialized data, but this operation requires initialized memory")]
  932. // fn test_raw_alloc() {
  933. // let layout = Layout::array::<u64>(512).unwrap_or_else(|| panic!("Panicked at {}:{} (git sha: {:?})", file!(), line!(), option_env!("GIT_SHA")));
  934. // let slab = unsafe { NounSlab::raw_alloc(layout) };
  935. // assert!(!slab.is_null());
  936. // // cast doesn't hide it from Miri
  937. // let new_slab_u64 = slab as *mut u64;
  938. // let _huh = unsafe { *new_slab_u64 };
  939. // unsafe { std::alloc::dealloc(slab, layout) };
  940. // }
  941. }