Skip to main content
Fix for current library versions
Source Link
Anders Kaseorg
  • 40.7k
  • 3
  • 76
  • 149
use ahash::AHasher; use hashbrown::hash_map::{HashMap, RawEntryMut}; use mimalloc::MiMalloc; use nom::bytes::complete::tag; use nom::character::complete::{char, digit1, multispace0}; use nom::combinator::{map, map_res}; use nom::multi::separated_list0; use nom::sequence::{delimited, preceded}; use nom::IResult; use std::collections::VecDeque; use std::convert::TryInto; use std::error::Error; use std::hash::{Hash, Hasher}; use std::io; use std::mem; use std::str::FromStr; #[global_allocator] static GLOBAL: MiMalloc = MiMalloc; #[derive(Debug)] struct Automaton<Set> { size: u32, alphabet: usize, transitions: Vec<Vec<Set>>, initial: Set, accepting: Vec<u32>, } fn parse_vec<'a, T>( item: impl FnMut(&'a str) -> IResult<&'a str, T>, input: &'a str, ) -> IResult<&'a str, Vec<T>> { delimited( char('['), map( separated_list0( preceded(multispace0, char(',')), preceded(multispace0, item), ), |v| v.into_iter().collect(), ), preceded(multispace0, char(']')), )(input) } type Id = u32; type Node = u128; const ID_BITS: u32 = mem::size_of::<Id>() as u32 * 8; const NODE_BITS: u32 = mem::size_of::<Node>() as u32 * 8; const DEGREE: u32 = NODE_BITS / ID_BITS; struct Trie { size: u32, nodes: Vec<Node>, ids: HashMap<Id, ()>, } fn pack(ids: [Id; DEGREE as usize]) -> Node { let mut node = 0; for k in 0..DEGREE { node |= (ids[k as usize] as Node) << ID_BITS * k; } node } fn unpack(node: Node) -> [Id; DEGREE as usize] { let mut ids = [0; DEGREE as usize]; for k in 0..DEGREE { ids[k as usize] = (node >> ID_BITS * k) as Id; } ids } fn node_hash(node: Node) -> u64 { let mut hasher = AHasher::default(); node.hash(&mut hasher); hasher.finish() } impl Trie { fn new(real_size: u32) -> Trie { let mut size = NODE_BITS; while size < real_size { size *= DEGREE; } let mut trie = Trie { size, nodes: vec![], ids: HashMap::new(), }; let zero_id = trie.node_id(0); debug_assert_eq!(zero_id, 0); trie } fn node_id(&mut self, node: Node) -> Id { let hash = node_hash(node); let nodes = &mut self.nodes; match self .ids .raw_entry_mut() .from_hash(hash, |&id| nodes[id as usize] == node) { RawEntryMut::Occupied(e) => *e.key(), RawEntryMut::Vacant(e) => { let id: Id = nodes.len().try_into().unwrap(); nodes.push(node); e.insert_with_hasher(hash, id, (), |&id| node_hash(nodes[id as usize])); id } } } fn vec_id(&mut self, low: u32, high: u32, vec: Vec<u32>) -> Id { if vec.is_empty() { 0 } else if high - low <= NODE_BITS { let mut node: Node = 0; for n in vec { node |= 1 << n - low; } self.node_id(node) } else { let step = (high - low) / DEGREE; let mut vecs: [Vec<u32>; DEGREE as usize] = Default::default(); for n in vec { vecs[((n - low) / step) as usize].push(n); } let mut ids = [0; DEGREE as usize]; for k in 0..DEGREE { ids[k as usize] = self.vec_id( low + k * step, low + (k + 1) * step, mem::take(&mut vecs[k as usize]), ); } self.node_id(pack(ids)) } } fn parse_set<'a>(&mut self, input: &'a str) -> IResult<&'a str, Id> { let (input, vec) = parse_vec(map_res(digit1, u32::from_str), input)?; Ok((input, self.vec_id(0, self.size, vec))) } fn intersects(&self, size: u32, a: Id, b: Id) -> bool { if a == 0 || b == 0 { false } else { let a_node = self.nodes[a as usize]; let b_node = self.nodes[b as usize]; if size <= NODE_BITS { a_node & b_node != 0 } else { let step = size / DEGREE; let a_ids = unpack(a_node); let b_ids = unpack(b_node); (0..DEGREE).any(|k| self.intersects(step, a_ids[k as usize], b_ids[k as usize])) } } } fn union(&mut self, size: u32, ids: &mut Vec<Id>) -> Id { ids.retain(|&id| id != 0); if ids.len() < 2 { ids.drain(..).next().unwrap_or(0) } else { let mut node; if size <= NODE_BITS { node = 0; for id in ids.drain(..) { node |= self.nodes[id as usize]; } } else { let step = size / DEGREE; let mut vecs: [Vec<Id>; DEGREE as usize] = Default::default(); for vec in &mut vecs { vec.reserve(ids.len()); } for id in ids.drain(..) { let ids1 = unpack(self.nodes[id as usize]); for k in 0..DEGREE { vecs[k as usize].push(ids1[k as usize]); } } let mut ids = [0; DEGREE as usize]; for k in 0..DEGREE { ids[k as usize] = self.union(step, &mut vecs[k as usize]); } node = pack(ids) }; self.node_id(node) } } fn for_each(&self, low: u32, high: u32, id: Id, f: &mut impl FnMut(u32)) { if id != 0 { let mut node = self.nodes[id as usize]; if high - low <= NODE_BITS { while node != 0 { let k = node.trailing_zeros(); f(low + k); node &= !(1 << k); } } else { let step = (high - low) / DEGREE; let ids = unpack(node); for k in 0..DEGREE { self.for_each(low + k * step, low + (k + 1) * step, ids[k as usize], f); } } } } } fn parse_nfa(input: &str) -> IResult<&str, (Trie, Automaton<Id>)> { let (input, _) = tag("Automaton")(input)?; let (input, _) = preceded(multispace0, char('('))(input)?; let (input, _) = preceded(multispace0, tag("\"nondet\""))(input)?; let (input, _) = preceded(multispace0, char(','))(input)?; let (input, size) = preceded(multispace0, map_res(digit1, u32::from_str))(input)?; let mut trie = Trie::new(size); let (input, _) = preceded(multispace0, char(','))(input)?; let (input, alphabet) = preceded(multispace0, map_res(digit1, usize::from_str))(input)?; let (input, _) = preceded(multispace0, char(','))(input)?; let (input, transitions) = preceded(multispace0, |input| { parse_vec( |input| parse_vec(|input| trie.parse_set(input), input), input, ) })(input)?; let (input, _) = preceded(multispace0, char(','))(input)?; let (input, initial) = preceded(multispace0, |input| trie.parse_set(input))(input)?; let (input, _) = preceded(multispace0, char(','))(input)?; let (input, accepting) = preceded(multispace0, |input| { parse_vec(|input| map_res(digit1, u32::from_str)(input), input) })(input)?; let (input, _) = preceded(multispace0, char(')'))(input)?; Ok(( input, ( trie, Automaton { size, alphabet, transitions, initial, accepting, }, ), )) } struct DFABuilder { nfa_accepting: Id, trie: Trie, set_dstate: HashMap<Id, u32>, queue: VecDeque<Id>, dfa: Automaton<u32>, } impl DFABuilder { fn visit(&mut self, set: Id) -> u32 { let DFABuilder { nfa_accepting, trie, set_dstate, queue, dfa, } = self; *set_dstate.entry(set).or_insert_with(|| { dfa.size += 1; if trie.intersects(trie.size, *nfa_accepting, set) { dfa.accepting.push(dfa.size); } queue.push_back(set); dfa.size }) } } fn nfa_to_dfa(mut trie: Trie, nfa: Automaton<Id>) -> Automaton<u32> { let mut builder = DFABuilder { nfa_accepting: trie.vec_id(0, trie.size, nfa.accepting.clone()), trie, set_dstate: HashMap::new(), queue: VecDeque::new(), dfa: Automaton { size: 0, alphabet: nfa.alphabet, transitions: vec![vec![]; nfa.alphabet], initial: !0, accepting: vec![], }, }; builder.dfa.initial = builder.visit(nfa.initial); let mut sets = Vec::new(); while let Some(set) = builder.queue.pop_front() { for (letter, transition) in nfa.transitions.iter().enumerate() { builder .trie .for_each(0, builder.trie.size, set, &mut |nstate| { sets.push(transition[nstate as usize - 1]) }); let set1 = builder.trie.union(builder.trie.size, &mut sets); debug_assert!(sets.is_empty()); let dstate = builder.visit(set1); builder.dfa.transitions[letter].push(dstate); } } builder.dfa } fn main() -> Result<(), Box<dyn Error>> { let mut line = String::new(); io::stdin().read_line(&mut line)?; let (rest, (trie, nfa)) = delimited(multispace0, parse_nfa, multispace0)(&line).map_err(|e| e.to_ownedto_string())?; if rest != "" { return Err("expected end of input".into()); } let dfa = nfa_to_dfa(trie, nfa); println!( "Automaton(\"det\", {}, {}, {:?}, [{}], {:?})", dfa.size, dfa.alphabet, dfa.transitions, dfa.initial, dfa.accepting ); Ok(()) } 
[package] name = "automaton" version = "0.1.0" authors = ["Anders Kaseorg <[email protected]>"] edition = "2018" [dependencies] nom = "6.02.0-alpha1"1" mimalloc = { version = "0.1.19"26", default-features = false } hashbrown = { version = "0.711.2", features = ["raw"] } ahash = "0.37.3"4" 
use ahash::AHasher; use hashbrown::hash_map::{HashMap, RawEntryMut}; use mimalloc::MiMalloc; use nom::bytes::complete::tag; use nom::character::complete::{char, digit1, multispace0}; use nom::combinator::{map, map_res}; use nom::multi::separated_list0; use nom::sequence::{delimited, preceded}; use nom::IResult; use std::collections::VecDeque; use std::convert::TryInto; use std::error::Error; use std::hash::{Hash, Hasher}; use std::io; use std::mem; use std::str::FromStr; #[global_allocator] static GLOBAL: MiMalloc = MiMalloc; #[derive(Debug)] struct Automaton<Set> { size: u32, alphabet: usize, transitions: Vec<Vec<Set>>, initial: Set, accepting: Vec<u32>, } fn parse_vec<'a, T>( item: impl FnMut(&'a str) -> IResult<&'a str, T>, input: &'a str, ) -> IResult<&'a str, Vec<T>> { delimited( char('['), map( separated_list0( preceded(multispace0, char(',')), preceded(multispace0, item), ), |v| v.into_iter().collect(), ), preceded(multispace0, char(']')), )(input) } type Id = u32; type Node = u128; const ID_BITS: u32 = mem::size_of::<Id>() as u32 * 8; const NODE_BITS: u32 = mem::size_of::<Node>() as u32 * 8; const DEGREE: u32 = NODE_BITS / ID_BITS; struct Trie { size: u32, nodes: Vec<Node>, ids: HashMap<Id, ()>, } fn pack(ids: [Id; DEGREE as usize]) -> Node { let mut node = 0; for k in 0..DEGREE { node |= (ids[k as usize] as Node) << ID_BITS * k; } node } fn unpack(node: Node) -> [Id; DEGREE as usize] { let mut ids = [0; DEGREE as usize]; for k in 0..DEGREE { ids[k as usize] = (node >> ID_BITS * k) as Id; } ids } fn node_hash(node: Node) -> u64 { let mut hasher = AHasher::default(); node.hash(&mut hasher); hasher.finish() } impl Trie { fn new(real_size: u32) -> Trie { let mut size = NODE_BITS; while size < real_size { size *= DEGREE; } let mut trie = Trie { size, nodes: vec![], ids: HashMap::new(), }; let zero_id = trie.node_id(0); debug_assert_eq!(zero_id, 0); trie } fn node_id(&mut self, node: Node) -> Id { let hash = node_hash(node); let nodes = &mut self.nodes; match self .ids .raw_entry_mut() .from_hash(hash, |&id| nodes[id as usize] == node) { RawEntryMut::Occupied(e) => *e.key(), RawEntryMut::Vacant(e) => { let id: Id = nodes.len().try_into().unwrap(); nodes.push(node); e.insert_with_hasher(hash, id, (), |&id| node_hash(nodes[id as usize])); id } } } fn vec_id(&mut self, low: u32, high: u32, vec: Vec<u32>) -> Id { if vec.is_empty() { 0 } else if high - low <= NODE_BITS { let mut node: Node = 0; for n in vec { node |= 1 << n - low; } self.node_id(node) } else { let step = (high - low) / DEGREE; let mut vecs: [Vec<u32>; DEGREE as usize] = Default::default(); for n in vec { vecs[((n - low) / step) as usize].push(n); } let mut ids = [0; DEGREE as usize]; for k in 0..DEGREE { ids[k as usize] = self.vec_id( low + k * step, low + (k + 1) * step, mem::take(&mut vecs[k as usize]), ); } self.node_id(pack(ids)) } } fn parse_set<'a>(&mut self, input: &'a str) -> IResult<&'a str, Id> { let (input, vec) = parse_vec(map_res(digit1, u32::from_str), input)?; Ok((input, self.vec_id(0, self.size, vec))) } fn intersects(&self, size: u32, a: Id, b: Id) -> bool { if a == 0 || b == 0 { false } else { let a_node = self.nodes[a as usize]; let b_node = self.nodes[b as usize]; if size <= NODE_BITS { a_node & b_node != 0 } else { let step = size / DEGREE; let a_ids = unpack(a_node); let b_ids = unpack(b_node); (0..DEGREE).any(|k| self.intersects(step, a_ids[k as usize], b_ids[k as usize])) } } } fn union(&mut self, size: u32, ids: &mut Vec<Id>) -> Id { ids.retain(|&id| id != 0); if ids.len() < 2 { ids.drain(..).next().unwrap_or(0) } else { let mut node; if size <= NODE_BITS { node = 0; for id in ids.drain(..) { node |= self.nodes[id as usize]; } } else { let step = size / DEGREE; let mut vecs: [Vec<Id>; DEGREE as usize] = Default::default(); for vec in &mut vecs { vec.reserve(ids.len()); } for id in ids.drain(..) { let ids1 = unpack(self.nodes[id as usize]); for k in 0..DEGREE { vecs[k as usize].push(ids1[k as usize]); } } let mut ids = [0; DEGREE as usize]; for k in 0..DEGREE { ids[k as usize] = self.union(step, &mut vecs[k as usize]); } node = pack(ids) }; self.node_id(node) } } fn for_each(&self, low: u32, high: u32, id: Id, f: &mut impl FnMut(u32)) { if id != 0 { let mut node = self.nodes[id as usize]; if high - low <= NODE_BITS { while node != 0 { let k = node.trailing_zeros(); f(low + k); node &= !(1 << k); } } else { let step = (high - low) / DEGREE; let ids = unpack(node); for k in 0..DEGREE { self.for_each(low + k * step, low + (k + 1) * step, ids[k as usize], f); } } } } } fn parse_nfa(input: &str) -> IResult<&str, (Trie, Automaton<Id>)> { let (input, _) = tag("Automaton")(input)?; let (input, _) = preceded(multispace0, char('('))(input)?; let (input, _) = preceded(multispace0, tag("\"nondet\""))(input)?; let (input, _) = preceded(multispace0, char(','))(input)?; let (input, size) = preceded(multispace0, map_res(digit1, u32::from_str))(input)?; let mut trie = Trie::new(size); let (input, _) = preceded(multispace0, char(','))(input)?; let (input, alphabet) = preceded(multispace0, map_res(digit1, usize::from_str))(input)?; let (input, _) = preceded(multispace0, char(','))(input)?; let (input, transitions) = preceded(multispace0, |input| { parse_vec( |input| parse_vec(|input| trie.parse_set(input), input), input, ) })(input)?; let (input, _) = preceded(multispace0, char(','))(input)?; let (input, initial) = preceded(multispace0, |input| trie.parse_set(input))(input)?; let (input, _) = preceded(multispace0, char(','))(input)?; let (input, accepting) = preceded(multispace0, |input| { parse_vec(|input| map_res(digit1, u32::from_str)(input), input) })(input)?; let (input, _) = preceded(multispace0, char(')'))(input)?; Ok(( input, ( trie, Automaton { size, alphabet, transitions, initial, accepting, }, ), )) } struct DFABuilder { nfa_accepting: Id, trie: Trie, set_dstate: HashMap<Id, u32>, queue: VecDeque<Id>, dfa: Automaton<u32>, } impl DFABuilder { fn visit(&mut self, set: Id) -> u32 { let DFABuilder { nfa_accepting, trie, set_dstate, queue, dfa, } = self; *set_dstate.entry(set).or_insert_with(|| { dfa.size += 1; if trie.intersects(trie.size, *nfa_accepting, set) { dfa.accepting.push(dfa.size); } queue.push_back(set); dfa.size }) } } fn nfa_to_dfa(mut trie: Trie, nfa: Automaton<Id>) -> Automaton<u32> { let mut builder = DFABuilder { nfa_accepting: trie.vec_id(0, trie.size, nfa.accepting.clone()), trie, set_dstate: HashMap::new(), queue: VecDeque::new(), dfa: Automaton { size: 0, alphabet: nfa.alphabet, transitions: vec![vec![]; nfa.alphabet], initial: !0, accepting: vec![], }, }; builder.dfa.initial = builder.visit(nfa.initial); let mut sets = Vec::new(); while let Some(set) = builder.queue.pop_front() { for (letter, transition) in nfa.transitions.iter().enumerate() { builder .trie .for_each(0, builder.trie.size, set, &mut |nstate| { sets.push(transition[nstate as usize - 1]) }); let set1 = builder.trie.union(builder.trie.size, &mut sets); debug_assert!(sets.is_empty()); let dstate = builder.visit(set1); builder.dfa.transitions[letter].push(dstate); } } builder.dfa } fn main() -> Result<(), Box<dyn Error>> { let mut line = String::new(); io::stdin().read_line(&mut line)?; let (rest, (trie, nfa)) = delimited(multispace0, parse_nfa, multispace0)(&line).map_err(|e| e.to_owned())?; if rest != "" { return Err("expected end of input".into()); } let dfa = nfa_to_dfa(trie, nfa); println!( "Automaton(\"det\", {}, {}, {:?}, [{}], {:?})", dfa.size, dfa.alphabet, dfa.transitions, dfa.initial, dfa.accepting ); Ok(()) } 
[package] name = "automaton" version = "0.1.0" authors = ["Anders Kaseorg <[email protected]>"] edition = "2018" [dependencies] nom = "6.0.0-alpha1" mimalloc = { version = "0.1.19", default-features = false } hashbrown = { version = "0.7.2", features = ["raw"] } ahash = "0.3.3" 
use ahash::AHasher; use hashbrown::hash_map::{HashMap, RawEntryMut}; use mimalloc::MiMalloc; use nom::bytes::complete::tag; use nom::character::complete::{char, digit1, multispace0}; use nom::combinator::{map, map_res}; use nom::multi::separated_list0; use nom::sequence::{delimited, preceded}; use nom::IResult; use std::collections::VecDeque; use std::convert::TryInto; use std::error::Error; use std::hash::{Hash, Hasher}; use std::io; use std::mem; use std::str::FromStr; #[global_allocator] static GLOBAL: MiMalloc = MiMalloc; #[derive(Debug)] struct Automaton<Set> { size: u32, alphabet: usize, transitions: Vec<Vec<Set>>, initial: Set, accepting: Vec<u32>, } fn parse_vec<'a, T>( item: impl FnMut(&'a str) -> IResult<&'a str, T>, input: &'a str, ) -> IResult<&'a str, Vec<T>> { delimited( char('['), map( separated_list0( preceded(multispace0, char(',')), preceded(multispace0, item), ), |v| v.into_iter().collect(), ), preceded(multispace0, char(']')), )(input) } type Id = u32; type Node = u128; const ID_BITS: u32 = mem::size_of::<Id>() as u32 * 8; const NODE_BITS: u32 = mem::size_of::<Node>() as u32 * 8; const DEGREE: u32 = NODE_BITS / ID_BITS; struct Trie { size: u32, nodes: Vec<Node>, ids: HashMap<Id, ()>, } fn pack(ids: [Id; DEGREE as usize]) -> Node { let mut node = 0; for k in 0..DEGREE { node |= (ids[k as usize] as Node) << ID_BITS * k; } node } fn unpack(node: Node) -> [Id; DEGREE as usize] { let mut ids = [0; DEGREE as usize]; for k in 0..DEGREE { ids[k as usize] = (node >> ID_BITS * k) as Id; } ids } fn node_hash(node: Node) -> u64 { let mut hasher = AHasher::default(); node.hash(&mut hasher); hasher.finish() } impl Trie { fn new(real_size: u32) -> Trie { let mut size = NODE_BITS; while size < real_size { size *= DEGREE; } let mut trie = Trie { size, nodes: vec![], ids: HashMap::new(), }; let zero_id = trie.node_id(0); debug_assert_eq!(zero_id, 0); trie } fn node_id(&mut self, node: Node) -> Id { let hash = node_hash(node); let nodes = &mut self.nodes; match self .ids .raw_entry_mut() .from_hash(hash, |&id| nodes[id as usize] == node) { RawEntryMut::Occupied(e) => *e.key(), RawEntryMut::Vacant(e) => { let id: Id = nodes.len().try_into().unwrap(); nodes.push(node); e.insert_with_hasher(hash, id, (), |&id| node_hash(nodes[id as usize])); id } } } fn vec_id(&mut self, low: u32, high: u32, vec: Vec<u32>) -> Id { if vec.is_empty() { 0 } else if high - low <= NODE_BITS { let mut node: Node = 0; for n in vec { node |= 1 << n - low; } self.node_id(node) } else { let step = (high - low) / DEGREE; let mut vecs: [Vec<u32>; DEGREE as usize] = Default::default(); for n in vec { vecs[((n - low) / step) as usize].push(n); } let mut ids = [0; DEGREE as usize]; for k in 0..DEGREE { ids[k as usize] = self.vec_id( low + k * step, low + (k + 1) * step, mem::take(&mut vecs[k as usize]), ); } self.node_id(pack(ids)) } } fn parse_set<'a>(&mut self, input: &'a str) -> IResult<&'a str, Id> { let (input, vec) = parse_vec(map_res(digit1, u32::from_str), input)?; Ok((input, self.vec_id(0, self.size, vec))) } fn intersects(&self, size: u32, a: Id, b: Id) -> bool { if a == 0 || b == 0 { false } else { let a_node = self.nodes[a as usize]; let b_node = self.nodes[b as usize]; if size <= NODE_BITS { a_node & b_node != 0 } else { let step = size / DEGREE; let a_ids = unpack(a_node); let b_ids = unpack(b_node); (0..DEGREE).any(|k| self.intersects(step, a_ids[k as usize], b_ids[k as usize])) } } } fn union(&mut self, size: u32, ids: &mut Vec<Id>) -> Id { ids.retain(|&id| id != 0); if ids.len() < 2 { ids.drain(..).next().unwrap_or(0) } else { let mut node; if size <= NODE_BITS { node = 0; for id in ids.drain(..) { node |= self.nodes[id as usize]; } } else { let step = size / DEGREE; let mut vecs: [Vec<Id>; DEGREE as usize] = Default::default(); for vec in &mut vecs { vec.reserve(ids.len()); } for id in ids.drain(..) { let ids1 = unpack(self.nodes[id as usize]); for k in 0..DEGREE { vecs[k as usize].push(ids1[k as usize]); } } let mut ids = [0; DEGREE as usize]; for k in 0..DEGREE { ids[k as usize] = self.union(step, &mut vecs[k as usize]); } node = pack(ids) }; self.node_id(node) } } fn for_each(&self, low: u32, high: u32, id: Id, f: &mut impl FnMut(u32)) { if id != 0 { let mut node = self.nodes[id as usize]; if high - low <= NODE_BITS { while node != 0 { let k = node.trailing_zeros(); f(low + k); node &= !(1 << k); } } else { let step = (high - low) / DEGREE; let ids = unpack(node); for k in 0..DEGREE { self.for_each(low + k * step, low + (k + 1) * step, ids[k as usize], f); } } } } } fn parse_nfa(input: &str) -> IResult<&str, (Trie, Automaton<Id>)> { let (input, _) = tag("Automaton")(input)?; let (input, _) = preceded(multispace0, char('('))(input)?; let (input, _) = preceded(multispace0, tag("\"nondet\""))(input)?; let (input, _) = preceded(multispace0, char(','))(input)?; let (input, size) = preceded(multispace0, map_res(digit1, u32::from_str))(input)?; let mut trie = Trie::new(size); let (input, _) = preceded(multispace0, char(','))(input)?; let (input, alphabet) = preceded(multispace0, map_res(digit1, usize::from_str))(input)?; let (input, _) = preceded(multispace0, char(','))(input)?; let (input, transitions) = preceded(multispace0, |input| { parse_vec( |input| parse_vec(|input| trie.parse_set(input), input), input, ) })(input)?; let (input, _) = preceded(multispace0, char(','))(input)?; let (input, initial) = preceded(multispace0, |input| trie.parse_set(input))(input)?; let (input, _) = preceded(multispace0, char(','))(input)?; let (input, accepting) = preceded(multispace0, |input| { parse_vec(|input| map_res(digit1, u32::from_str)(input), input) })(input)?; let (input, _) = preceded(multispace0, char(')'))(input)?; Ok(( input, ( trie, Automaton { size, alphabet, transitions, initial, accepting, }, ), )) } struct DFABuilder { nfa_accepting: Id, trie: Trie, set_dstate: HashMap<Id, u32>, queue: VecDeque<Id>, dfa: Automaton<u32>, } impl DFABuilder { fn visit(&mut self, set: Id) -> u32 { let DFABuilder { nfa_accepting, trie, set_dstate, queue, dfa, } = self; *set_dstate.entry(set).or_insert_with(|| { dfa.size += 1; if trie.intersects(trie.size, *nfa_accepting, set) { dfa.accepting.push(dfa.size); } queue.push_back(set); dfa.size }) } } fn nfa_to_dfa(mut trie: Trie, nfa: Automaton<Id>) -> Automaton<u32> { let mut builder = DFABuilder { nfa_accepting: trie.vec_id(0, trie.size, nfa.accepting.clone()), trie, set_dstate: HashMap::new(), queue: VecDeque::new(), dfa: Automaton { size: 0, alphabet: nfa.alphabet, transitions: vec![vec![]; nfa.alphabet], initial: !0, accepting: vec![], }, }; builder.dfa.initial = builder.visit(nfa.initial); let mut sets = Vec::new(); while let Some(set) = builder.queue.pop_front() { for (letter, transition) in nfa.transitions.iter().enumerate() { builder .trie .for_each(0, builder.trie.size, set, &mut |nstate| { sets.push(transition[nstate as usize - 1]) }); let set1 = builder.trie.union(builder.trie.size, &mut sets); debug_assert!(sets.is_empty()); let dstate = builder.visit(set1); builder.dfa.transitions[letter].push(dstate); } } builder.dfa } fn main() -> Result<(), Box<dyn Error>> { let mut line = String::new(); io::stdin().read_line(&mut line)?; let (rest, (trie, nfa)) = delimited(multispace0, parse_nfa, multispace0)(&line).map_err(|e| e.to_string())?; if rest != "" { return Err("expected end of input".into()); } let dfa = nfa_to_dfa(trie, nfa); println!( "Automaton(\"det\", {}, {}, {:?}, [{}], {:?})", dfa.size, dfa.alphabet, dfa.transitions, dfa.initial, dfa.accepting ); Ok(()) } 
[package] name = "automaton" version = "0.1.0" authors = ["Anders Kaseorg <[email protected]>"] edition = "2018" [dependencies] nom = "6.2.1" mimalloc = { version = "0.1.26", default-features = false } hashbrown = { version = "0.11.2", features = ["raw"] } ahash = "0.7.4" 
Remove unsafe code
Source Link
Anders Kaseorg
  • 40.7k
  • 3
  • 76
  • 149
use ahash::AHasher; use hashbrown::rawhash_map::RawTable; use{HashMap, hashbrown::HashMap;RawEntryMut}; use mimalloc::MiMalloc; use nom::bytes::complete::tag; use nom::character::complete::{char, digit1, multispace0}; use nom::combinator::{map, map_res}; use nom::multi::separated_list0; use nom::sequence::{delimited, preceded}; use nom::IResult; use std::collections::VecDeque; use std::convert::TryInto; use std::error::Error; use std::hash::{Hash, Hasher}; use std::io; use std::mem; use std::str::FromStr; #[global_allocator] static GLOBAL: MiMalloc = MiMalloc; #[derive(Debug)] struct Automaton<Set> { size: u32, alphabet: usize, transitions: Vec<Vec<Set>>, initial: Set, accepting: Vec<u32>, } fn parse_vec<'a, T>( item: impl FnMut(&'a str) -> IResult<&'a str, T>, input: &'a str, ) -> IResult<&'a str, Vec<T>> { delimited( char('['), map( separated_list0( preceded(multispace0, char(',')), preceded(multispace0, item), ), |v| v.into_iter().collect(), ), preceded(multispace0, char(']')), )(input) } type Id = u32; type Node = u128; const ID_BITS: u32 = mem::size_of::<Id>() as u32 * 8; const NODE_BITS: u32 = mem::size_of::<Node>() as u32 * 8; const DEGREE: u32 = NODE_BITS / ID_BITS; struct Trie { size: u32, nodes: Vec<Node>, ids: RawTable<Id>HashMap<Id, ()>, } fn pack(ids: [Id; DEGREE as usize]) -> Node { let mut node = 0; for k in 0..DEGREE { node |= (ids[k as usize] as Node) << ID_BITS * k; } node } fn unpack(node: Node) -> [Id; DEGREE as usize] { let mut ids = [0; DEGREE as usize]; for k in 0..DEGREE { ids[k as usize] = (node >> ID_BITS * k) as Id; } ids } fn node_hash(node: Node) -> u64 { let mut hasher = AHasher::default(); node.hash(&mut hasher); hasher.finish() } fn get_hasher<'a>(nodes: &'a [Node]) -> impl 'a + Fn(&'_ u32) -> u64 { move |&id| node_hash(nodes[id as usize]) } impl Trie { fn new(real_size: u32) -> Trie { let mut size = NODE_BITS; while size < real_size { size *= DEGREE; } let mut trie = Trie { size, nodes: vec![], ids: RawTableHashMap::new(), }; let zero_id = trie.node_id(0); debug_assert_eq!(zero_id, 0); trie } fn node_id(&mut self, node: Node) -> Id { let hash = node_hash(node); if let Some(bucket)nodes = &mut self.nodes;  match self .ids .findraw_entry_mut() .from_hash(hash, |&id| self.nodes[id as usize] == node)  { unsafeRawEntryMut::Occupied(e) {=> *bucket*e.as_refkey() }, } else  RawEntryMut::Vacant(e) => {   let id: Id = self.nodes.len().try_into().unwrap(); self. nodes.push(node); self.ids e.insertinsert_with_hasher(hash, id, get_hasher(&self.nodes), |&id| node_hash(nodes[id as usize]));   id } } } fn vec_id(&mut self, low: u32, high: u32, vec: Vec<u32>) -> Id { if vec.is_empty() { 0 } else if high - low <= NODE_BITS { let mut node: Node = 0; for n in vec { node |= 1 << n - low; } self.node_id(node) } else { let step = (high - low) / DEGREE; let mut vecs: [Vec<u32>; DEGREE as usize] = Default::default(); for n in vec { vecs[((n - low) / step) as usize].push(n); } let mut ids = [0; DEGREE as usize]; for k in 0..DEGREE { ids[k as usize] = self.vec_id( low + k * step, low + (k + 1) * step, mem::take(&mut vecs[k as usize]), ); } self.node_id(pack(ids)) } } fn parse_set<'a>(&mut self, input: &'a str) -> IResult<&'a str, Id> { let (input, vec) = parse_vec(map_res(digit1, u32::from_str), input)?; Ok((input, self.vec_id(0, self.size, vec))) } fn intersects(&self, size: u32, a: Id, b: Id) -> bool { if a == 0 || b == 0 { false } else { let a_node = self.nodes[a as usize]; let b_node = self.nodes[b as usize]; if size <= NODE_BITS { a_node & b_node != 0 } else { let step = size / DEGREE; let a_ids = unpack(a_node); let b_ids = unpack(b_node); (0..DEGREE).any(|k| self.intersects(step, a_ids[k as usize], b_ids[k as usize])) } } } fn union(&mut self, size: u32, ids: &mut Vec<Id>) -> Id { ids.retain(|&id| id != 0); if ids.len() < 2 { ids.drain(..).next().unwrap_or(0) } else { let mut node; if size <= NODE_BITS { node = 0; for id in ids.drain(..) { node |= self.nodes[id as usize]; } } else { let step = size / DEGREE; let mut vecs: [Vec<Id>; DEGREE as usize] = Default::default(); for vec in &mut vecs { vec.reserve(ids.len()); } for id in ids.drain(..) { let ids1 = unpack(self.nodes[id as usize]); for k in 0..DEGREE { vecs[k as usize].push(ids1[k as usize]); } } let mut ids = [0; DEGREE as usize]; for k in 0..DEGREE { ids[k as usize] = self.union(step, &mut vecs[k as usize]); } node = pack(ids) }; self.node_id(node) } } fn for_each(&self, low: u32, high: u32, id: Id, f: &mut impl FnMut(u32)) { if id != 0 { let mut node = self.nodes[id as usize]; if high - low <= NODE_BITS { while node != 0 { let k = node.trailing_zeros(); f(low + k); node &= !(1 << k); } } else { let step = (high - low) / DEGREE; let ids = unpack(node); for k in 0..DEGREE { self.for_each(low + k * step, low + (k + 1) * step, ids[k as usize], f); } } } } } fn parse_nfa(input: &str) -> IResult<&str, (Trie, Automaton<Id>)> { let (input, _) = tag("Automaton")(input)?; let (input, _) = preceded(multispace0, char('('))(input)?; let (input, _) = preceded(multispace0, tag("\"nondet\""))(input)?; let (input, _) = preceded(multispace0, char(','))(input)?; let (input, size) = preceded(multispace0, map_res(digit1, u32::from_str))(input)?; let mut trie = Trie::new(size); let (input, _) = preceded(multispace0, char(','))(input)?; let (input, alphabet) = preceded(multispace0, map_res(digit1, usize::from_str))(input)?; let (input, _) = preceded(multispace0, char(','))(input)?; let (input, transitions) = preceded(multispace0, |input| { parse_vec( |input| parse_vec(|input| trie.parse_set(input), input), input, ) })(input)?; let (input, _) = preceded(multispace0, char(','))(input)?; let (input, initial) = preceded(multispace0, |input| trie.parse_set(input))(input)?; let (input, _) = preceded(multispace0, char(','))(input)?; let (input, accepting) = preceded(multispace0, |input| { parse_vec(|input| map_res(digit1, u32::from_str)(input), input) })(input)?; let (input, _) = preceded(multispace0, char(')'))(input)?; Ok(( input, ( trie, Automaton { size, alphabet, transitions, initial, accepting, }, ), )) } struct DFABuilder { nfa_accepting: Id, trie: Trie, set_dstate: HashMap<Id, u32>, queue: VecDeque<Id>, dfa: Automaton<u32>, } impl DFABuilder { fn visit(&mut self, set: Id) -> u32 { let DFABuilder { nfa_accepting, trie, set_dstate, queue, dfa, } = self; *set_dstate.entry(set).or_insert_with(|| { dfa.size += 1; if trie.intersects(trie.size, *nfa_accepting, set) { dfa.accepting.push(dfa.size); } queue.push_back(set); dfa.size }) } } fn nfa_to_dfa(mut trie: Trie, nfa: Automaton<Id>) -> Automaton<u32> { let mut builder = DFABuilder { nfa_accepting: trie.vec_id(0, trie.size, nfa.accepting.clone()), trie, set_dstate: HashMap::new(), queue: VecDeque::new(), dfa: Automaton { size: 0, alphabet: nfa.alphabet, transitions: vec![vec![]; nfa.alphabet], initial: !0, accepting: vec![], }, }; builder.dfa.initial = builder.visit(nfa.initial); let mut sets = Vec::new(); while let Some(set) = builder.queue.pop_front() { for (letter, transition) in nfa.transitions.iter().enumerate() { builder .trie .for_each(0, builder.trie.size, set, &mut |nstate| { sets.push(transition[nstate as usize - 1]) }); let set1 = builder.trie.union(builder.trie.size, &mut sets); debug_assert!(sets.is_empty()); let dstate = builder.visit(set1); builder.dfa.transitions[letter].push(dstate); } } builder.dfa } fn main() -> Result<(), Box<dyn Error>> { let mut line = String::new(); io::stdin().read_line(&mut line)?; let (rest, (trie, nfa)) = delimited(multispace0, parse_nfa, multispace0)(&line).map_err(|e| e.to_owned())?; if rest != "" { return Err("expected end of input".into()); } let dfa = nfa_to_dfa(trie, nfa); println!( "Automaton(\"det\", {}, {}, {:?}, [{}], {:?})", dfa.size, dfa.alphabet, dfa.transitions, dfa.initial, dfa.accepting ); Ok(()) } 
use ahash::AHasher; use hashbrown::raw::RawTable; use hashbrown::HashMap; use mimalloc::MiMalloc; use nom::bytes::complete::tag; use nom::character::complete::{char, digit1, multispace0}; use nom::combinator::{map, map_res}; use nom::multi::separated_list0; use nom::sequence::{delimited, preceded}; use nom::IResult; use std::collections::VecDeque; use std::convert::TryInto; use std::error::Error; use std::hash::{Hash, Hasher}; use std::io; use std::mem; use std::str::FromStr; #[global_allocator] static GLOBAL: MiMalloc = MiMalloc; #[derive(Debug)] struct Automaton<Set> { size: u32, alphabet: usize, transitions: Vec<Vec<Set>>, initial: Set, accepting: Vec<u32>, } fn parse_vec<'a, T>( item: impl FnMut(&'a str) -> IResult<&'a str, T>, input: &'a str, ) -> IResult<&'a str, Vec<T>> { delimited( char('['), map( separated_list0( preceded(multispace0, char(',')), preceded(multispace0, item), ), |v| v.into_iter().collect(), ), preceded(multispace0, char(']')), )(input) } type Id = u32; type Node = u128; const ID_BITS: u32 = mem::size_of::<Id>() as u32 * 8; const NODE_BITS: u32 = mem::size_of::<Node>() as u32 * 8; const DEGREE: u32 = NODE_BITS / ID_BITS; struct Trie { size: u32, nodes: Vec<Node>, ids: RawTable<Id>, } fn pack(ids: [Id; DEGREE as usize]) -> Node { let mut node = 0; for k in 0..DEGREE { node |= (ids[k as usize] as Node) << ID_BITS * k; } node } fn unpack(node: Node) -> [Id; DEGREE as usize] { let mut ids = [0; DEGREE as usize]; for k in 0..DEGREE { ids[k as usize] = (node >> ID_BITS * k) as Id; } ids } fn node_hash(node: Node) -> u64 { let mut hasher = AHasher::default(); node.hash(&mut hasher); hasher.finish() } fn get_hasher<'a>(nodes: &'a [Node]) -> impl 'a + Fn(&'_ u32) -> u64 { move |&id| node_hash(nodes[id as usize]) } impl Trie { fn new(real_size: u32) -> Trie { let mut size = NODE_BITS; while size < real_size { size *= DEGREE; } let mut trie = Trie { size, nodes: vec![], ids: RawTable::new(), }; let zero_id = trie.node_id(0); debug_assert_eq!(zero_id, 0); trie } fn node_id(&mut self, node: Node) -> Id { let hash = node_hash(node); if let Some(bucket) = self.ids.find(hash, |&id| self.nodes[id as usize] == node) { unsafe { *bucket.as_ref() } } else { let id: Id = self.nodes.len().try_into().unwrap(); self.nodes.push(node); self.ids.insert(hash, id, get_hasher(&self.nodes)); id } } fn vec_id(&mut self, low: u32, high: u32, vec: Vec<u32>) -> Id { if vec.is_empty() { 0 } else if high - low <= NODE_BITS { let mut node: Node = 0; for n in vec { node |= 1 << n - low; } self.node_id(node) } else { let step = (high - low) / DEGREE; let mut vecs: [Vec<u32>; DEGREE as usize] = Default::default(); for n in vec { vecs[((n - low) / step) as usize].push(n); } let mut ids = [0; DEGREE as usize]; for k in 0..DEGREE { ids[k as usize] = self.vec_id( low + k * step, low + (k + 1) * step, mem::take(&mut vecs[k as usize]), ); } self.node_id(pack(ids)) } } fn parse_set<'a>(&mut self, input: &'a str) -> IResult<&'a str, Id> { let (input, vec) = parse_vec(map_res(digit1, u32::from_str), input)?; Ok((input, self.vec_id(0, self.size, vec))) } fn intersects(&self, size: u32, a: Id, b: Id) -> bool { if a == 0 || b == 0 { false } else { let a_node = self.nodes[a as usize]; let b_node = self.nodes[b as usize]; if size <= NODE_BITS { a_node & b_node != 0 } else { let step = size / DEGREE; let a_ids = unpack(a_node); let b_ids = unpack(b_node); (0..DEGREE).any(|k| self.intersects(step, a_ids[k as usize], b_ids[k as usize])) } } } fn union(&mut self, size: u32, ids: &mut Vec<Id>) -> Id { ids.retain(|&id| id != 0); if ids.len() < 2 { ids.drain(..).next().unwrap_or(0) } else { let mut node; if size <= NODE_BITS { node = 0; for id in ids.drain(..) { node |= self.nodes[id as usize]; } } else { let step = size / DEGREE; let mut vecs: [Vec<Id>; DEGREE as usize] = Default::default(); for vec in &mut vecs { vec.reserve(ids.len()); } for id in ids.drain(..) { let ids1 = unpack(self.nodes[id as usize]); for k in 0..DEGREE { vecs[k as usize].push(ids1[k as usize]); } } let mut ids = [0; DEGREE as usize]; for k in 0..DEGREE { ids[k as usize] = self.union(step, &mut vecs[k as usize]); } node = pack(ids) }; self.node_id(node) } } fn for_each(&self, low: u32, high: u32, id: Id, f: &mut impl FnMut(u32)) { if id != 0 { let mut node = self.nodes[id as usize]; if high - low <= NODE_BITS { while node != 0 { let k = node.trailing_zeros(); f(low + k); node &= !(1 << k); } } else { let step = (high - low) / DEGREE; let ids = unpack(node); for k in 0..DEGREE { self.for_each(low + k * step, low + (k + 1) * step, ids[k as usize], f); } } } } } fn parse_nfa(input: &str) -> IResult<&str, (Trie, Automaton<Id>)> { let (input, _) = tag("Automaton")(input)?; let (input, _) = preceded(multispace0, char('('))(input)?; let (input, _) = preceded(multispace0, tag("\"nondet\""))(input)?; let (input, _) = preceded(multispace0, char(','))(input)?; let (input, size) = preceded(multispace0, map_res(digit1, u32::from_str))(input)?; let mut trie = Trie::new(size); let (input, _) = preceded(multispace0, char(','))(input)?; let (input, alphabet) = preceded(multispace0, map_res(digit1, usize::from_str))(input)?; let (input, _) = preceded(multispace0, char(','))(input)?; let (input, transitions) = preceded(multispace0, |input| { parse_vec( |input| parse_vec(|input| trie.parse_set(input), input), input, ) })(input)?; let (input, _) = preceded(multispace0, char(','))(input)?; let (input, initial) = preceded(multispace0, |input| trie.parse_set(input))(input)?; let (input, _) = preceded(multispace0, char(','))(input)?; let (input, accepting) = preceded(multispace0, |input| { parse_vec(|input| map_res(digit1, u32::from_str)(input), input) })(input)?; let (input, _) = preceded(multispace0, char(')'))(input)?; Ok(( input, ( trie, Automaton { size, alphabet, transitions, initial, accepting, }, ), )) } struct DFABuilder { nfa_accepting: Id, trie: Trie, set_dstate: HashMap<Id, u32>, queue: VecDeque<Id>, dfa: Automaton<u32>, } impl DFABuilder { fn visit(&mut self, set: Id) -> u32 { let DFABuilder { nfa_accepting, trie, set_dstate, queue, dfa, } = self; *set_dstate.entry(set).or_insert_with(|| { dfa.size += 1; if trie.intersects(trie.size, *nfa_accepting, set) { dfa.accepting.push(dfa.size); } queue.push_back(set); dfa.size }) } } fn nfa_to_dfa(mut trie: Trie, nfa: Automaton<Id>) -> Automaton<u32> { let mut builder = DFABuilder { nfa_accepting: trie.vec_id(0, trie.size, nfa.accepting.clone()), trie, set_dstate: HashMap::new(), queue: VecDeque::new(), dfa: Automaton { size: 0, alphabet: nfa.alphabet, transitions: vec![vec![]; nfa.alphabet], initial: !0, accepting: vec![], }, }; builder.dfa.initial = builder.visit(nfa.initial); let mut sets = Vec::new(); while let Some(set) = builder.queue.pop_front() { for (letter, transition) in nfa.transitions.iter().enumerate() { builder .trie .for_each(0, builder.trie.size, set, &mut |nstate| { sets.push(transition[nstate as usize - 1]) }); let set1 = builder.trie.union(builder.trie.size, &mut sets); debug_assert!(sets.is_empty()); let dstate = builder.visit(set1); builder.dfa.transitions[letter].push(dstate); } } builder.dfa } fn main() -> Result<(), Box<dyn Error>> { let mut line = String::new(); io::stdin().read_line(&mut line)?; let (rest, (trie, nfa)) = delimited(multispace0, parse_nfa, multispace0)(&line).map_err(|e| e.to_owned())?; if rest != "" { return Err("expected end of input".into()); } let dfa = nfa_to_dfa(trie, nfa); println!( "Automaton(\"det\", {}, {}, {:?}, [{}], {:?})", dfa.size, dfa.alphabet, dfa.transitions, dfa.initial, dfa.accepting ); Ok(()) } 
use ahash::AHasher; use hashbrown::hash_map::{HashMap, RawEntryMut}; use mimalloc::MiMalloc; use nom::bytes::complete::tag; use nom::character::complete::{char, digit1, multispace0}; use nom::combinator::{map, map_res}; use nom::multi::separated_list0; use nom::sequence::{delimited, preceded}; use nom::IResult; use std::collections::VecDeque; use std::convert::TryInto; use std::error::Error; use std::hash::{Hash, Hasher}; use std::io; use std::mem; use std::str::FromStr; #[global_allocator] static GLOBAL: MiMalloc = MiMalloc; #[derive(Debug)] struct Automaton<Set> { size: u32, alphabet: usize, transitions: Vec<Vec<Set>>, initial: Set, accepting: Vec<u32>, } fn parse_vec<'a, T>( item: impl FnMut(&'a str) -> IResult<&'a str, T>, input: &'a str, ) -> IResult<&'a str, Vec<T>> { delimited( char('['), map( separated_list0( preceded(multispace0, char(',')), preceded(multispace0, item), ), |v| v.into_iter().collect(), ), preceded(multispace0, char(']')), )(input) } type Id = u32; type Node = u128; const ID_BITS: u32 = mem::size_of::<Id>() as u32 * 8; const NODE_BITS: u32 = mem::size_of::<Node>() as u32 * 8; const DEGREE: u32 = NODE_BITS / ID_BITS; struct Trie { size: u32, nodes: Vec<Node>, ids: HashMap<Id, ()>, } fn pack(ids: [Id; DEGREE as usize]) -> Node { let mut node = 0; for k in 0..DEGREE { node |= (ids[k as usize] as Node) << ID_BITS * k; } node } fn unpack(node: Node) -> [Id; DEGREE as usize] { let mut ids = [0; DEGREE as usize]; for k in 0..DEGREE { ids[k as usize] = (node >> ID_BITS * k) as Id; } ids } fn node_hash(node: Node) -> u64 { let mut hasher = AHasher::default(); node.hash(&mut hasher); hasher.finish() } impl Trie { fn new(real_size: u32) -> Trie { let mut size = NODE_BITS; while size < real_size { size *= DEGREE; } let mut trie = Trie { size, nodes: vec![], ids: HashMap::new(), }; let zero_id = trie.node_id(0); debug_assert_eq!(zero_id, 0); trie } fn node_id(&mut self, node: Node) -> Id { let hash = node_hash(node); let nodes = &mut self.nodes;  match self .ids .raw_entry_mut() .from_hash(hash, |&id| nodes[id as usize] == node)  { RawEntryMut::Occupied(e) => *e.key(),   RawEntryMut::Vacant(e) => {   let id: Id = nodes.len().try_into().unwrap();  nodes.push(node);  e.insert_with_hasher(hash, id, (), |&id| node_hash(nodes[id as usize]));   id } } } fn vec_id(&mut self, low: u32, high: u32, vec: Vec<u32>) -> Id { if vec.is_empty() { 0 } else if high - low <= NODE_BITS { let mut node: Node = 0; for n in vec { node |= 1 << n - low; } self.node_id(node) } else { let step = (high - low) / DEGREE; let mut vecs: [Vec<u32>; DEGREE as usize] = Default::default(); for n in vec { vecs[((n - low) / step) as usize].push(n); } let mut ids = [0; DEGREE as usize]; for k in 0..DEGREE { ids[k as usize] = self.vec_id( low + k * step, low + (k + 1) * step, mem::take(&mut vecs[k as usize]), ); } self.node_id(pack(ids)) } } fn parse_set<'a>(&mut self, input: &'a str) -> IResult<&'a str, Id> { let (input, vec) = parse_vec(map_res(digit1, u32::from_str), input)?; Ok((input, self.vec_id(0, self.size, vec))) } fn intersects(&self, size: u32, a: Id, b: Id) -> bool { if a == 0 || b == 0 { false } else { let a_node = self.nodes[a as usize]; let b_node = self.nodes[b as usize]; if size <= NODE_BITS { a_node & b_node != 0 } else { let step = size / DEGREE; let a_ids = unpack(a_node); let b_ids = unpack(b_node); (0..DEGREE).any(|k| self.intersects(step, a_ids[k as usize], b_ids[k as usize])) } } } fn union(&mut self, size: u32, ids: &mut Vec<Id>) -> Id { ids.retain(|&id| id != 0); if ids.len() < 2 { ids.drain(..).next().unwrap_or(0) } else { let mut node; if size <= NODE_BITS { node = 0; for id in ids.drain(..) { node |= self.nodes[id as usize]; } } else { let step = size / DEGREE; let mut vecs: [Vec<Id>; DEGREE as usize] = Default::default(); for vec in &mut vecs { vec.reserve(ids.len()); } for id in ids.drain(..) { let ids1 = unpack(self.nodes[id as usize]); for k in 0..DEGREE { vecs[k as usize].push(ids1[k as usize]); } } let mut ids = [0; DEGREE as usize]; for k in 0..DEGREE { ids[k as usize] = self.union(step, &mut vecs[k as usize]); } node = pack(ids) }; self.node_id(node) } } fn for_each(&self, low: u32, high: u32, id: Id, f: &mut impl FnMut(u32)) { if id != 0 { let mut node = self.nodes[id as usize]; if high - low <= NODE_BITS { while node != 0 { let k = node.trailing_zeros(); f(low + k); node &= !(1 << k); } } else { let step = (high - low) / DEGREE; let ids = unpack(node); for k in 0..DEGREE { self.for_each(low + k * step, low + (k + 1) * step, ids[k as usize], f); } } } } } fn parse_nfa(input: &str) -> IResult<&str, (Trie, Automaton<Id>)> { let (input, _) = tag("Automaton")(input)?; let (input, _) = preceded(multispace0, char('('))(input)?; let (input, _) = preceded(multispace0, tag("\"nondet\""))(input)?; let (input, _) = preceded(multispace0, char(','))(input)?; let (input, size) = preceded(multispace0, map_res(digit1, u32::from_str))(input)?; let mut trie = Trie::new(size); let (input, _) = preceded(multispace0, char(','))(input)?; let (input, alphabet) = preceded(multispace0, map_res(digit1, usize::from_str))(input)?; let (input, _) = preceded(multispace0, char(','))(input)?; let (input, transitions) = preceded(multispace0, |input| { parse_vec( |input| parse_vec(|input| trie.parse_set(input), input), input, ) })(input)?; let (input, _) = preceded(multispace0, char(','))(input)?; let (input, initial) = preceded(multispace0, |input| trie.parse_set(input))(input)?; let (input, _) = preceded(multispace0, char(','))(input)?; let (input, accepting) = preceded(multispace0, |input| { parse_vec(|input| map_res(digit1, u32::from_str)(input), input) })(input)?; let (input, _) = preceded(multispace0, char(')'))(input)?; Ok(( input, ( trie, Automaton { size, alphabet, transitions, initial, accepting, }, ), )) } struct DFABuilder { nfa_accepting: Id, trie: Trie, set_dstate: HashMap<Id, u32>, queue: VecDeque<Id>, dfa: Automaton<u32>, } impl DFABuilder { fn visit(&mut self, set: Id) -> u32 { let DFABuilder { nfa_accepting, trie, set_dstate, queue, dfa, } = self; *set_dstate.entry(set).or_insert_with(|| { dfa.size += 1; if trie.intersects(trie.size, *nfa_accepting, set) { dfa.accepting.push(dfa.size); } queue.push_back(set); dfa.size }) } } fn nfa_to_dfa(mut trie: Trie, nfa: Automaton<Id>) -> Automaton<u32> { let mut builder = DFABuilder { nfa_accepting: trie.vec_id(0, trie.size, nfa.accepting.clone()), trie, set_dstate: HashMap::new(), queue: VecDeque::new(), dfa: Automaton { size: 0, alphabet: nfa.alphabet, transitions: vec![vec![]; nfa.alphabet], initial: !0, accepting: vec![], }, }; builder.dfa.initial = builder.visit(nfa.initial); let mut sets = Vec::new(); while let Some(set) = builder.queue.pop_front() { for (letter, transition) in nfa.transitions.iter().enumerate() { builder .trie .for_each(0, builder.trie.size, set, &mut |nstate| { sets.push(transition[nstate as usize - 1]) }); let set1 = builder.trie.union(builder.trie.size, &mut sets); debug_assert!(sets.is_empty()); let dstate = builder.visit(set1); builder.dfa.transitions[letter].push(dstate); } } builder.dfa } fn main() -> Result<(), Box<dyn Error>> { let mut line = String::new(); io::stdin().read_line(&mut line)?; let (rest, (trie, nfa)) = delimited(multispace0, parse_nfa, multispace0)(&line).map_err(|e| e.to_owned())?; if rest != "" { return Err("expected end of input".into()); } let dfa = nfa_to_dfa(trie, nfa); println!( "Automaton(\"det\", {}, {}, {:?}, [{}], {:?})", dfa.size, dfa.alphabet, dfa.transitions, dfa.initial, dfa.accepting ); Ok(()) } 
Check for Id overflow
Source Link
Anders Kaseorg
  • 40.7k
  • 3
  • 76
  • 149
use ahash::AHasher; use hashbrown::raw::RawTable; use hashbrown::HashMap; use mimalloc::MiMalloc; use nom::bytes::complete::tag; use nom::character::complete::{char, digit1, multispace0}; use nom::combinator::{map, map_res}; use nom::multi::separated_list0; use nom::sequence::{delimited, preceded}; use nom::IResult; use std::collections::VecDeque; use std::convert::TryInto; use std::error::Error; use std::hash::{Hash, Hasher}; use std::io; use std::mem; use std::str::FromStr; #[global_allocator] static GLOBAL: MiMalloc = MiMalloc; #[derive(Debug)] struct Automaton<Set> { size: u32, alphabet: usize, transitions: Vec<Vec<Set>>, initial: Set, accepting: Vec<u32>, } fn parse_vec<'a, T>( item: impl FnMut(&'a str) -> IResult<&'a str, T>, input: &'a str, ) -> IResult<&'a str, Vec<T>> { delimited( char('['), map( separated_list0( preceded(multispace0, char(',')), preceded(multispace0, item), ), |v| v.into_iter().collect(), ), preceded(multispace0, char(']')), )(input) } type Id = u32; type Node = u128; const ID_BITS: u32 = mem::size_of::<Id>() as u32 * 8; const NODE_BITS: u32 = mem::size_of::<Node>() as u32 * 8; const DEGREE: u32 = NODE_BITS / ID_BITS; struct Trie { size: u32, nodes: Vec<Node>, ids: RawTable<Id>, } fn pack(ids: [Id; DEGREE as usize]) -> Node { let mut node = 0; for k in 0..DEGREE { node |= (ids[k as usize] as Node) << ID_BITS * k; } node } fn unpack(node: Node) -> [Id; DEGREE as usize] { let mut ids = [0; DEGREE as usize]; for k in 0..DEGREE { ids[k as usize] = (node >> ID_BITS * k) as Id; } ids } fn node_hash(node: Node) -> u64 { let mut hasher = AHasher::default(); node.hash(&mut hasher); hasher.finish() } fn get_hasher<'a>(nodes: &'a [Node]) -> impl 'a + Fn(&'_ u32) -> u64 { move |&id| node_hash(nodes[id as usize]) } impl Trie { fn new(real_size: u32) -> Trie { let mut size = NODE_BITS; while size < real_size { size *= DEGREE; } let mut trie = Trie { size, nodes: vec![], ids: RawTable::new(), }; let zero_id = trie.node_id(0); debug_assert_eq!(zero_id, 0); trie } fn node_id(&mut self, node: Node) -> Id { let hash = node_hash(node); if let Some(bucket) = self.ids.find(hash, |&id| self.nodes[id as usize] == node) { unsafe { *bucket.as_ref() } } else { let id: Id = self.nodes.len() as Id;.try_into().unwrap(); self.nodes.push(node); self.ids.insert(hash, id, get_hasher(&self.nodes)); id } } fn vec_id(&mut self, low: u32, high: u32, vec: Vec<u32>) -> Id { if vec.is_empty() { 0 } else if high - low <= NODE_BITS { let mut node: Node = 0; for n in vec { node |= 1 << n - low; } self.node_id(node) } else { let step = (high - low) / DEGREE; let mut vecs: [Vec<u32>; DEGREE as usize] = Default::default(); for n in vec { vecs[((n - low) / step) as usize].push(n); } let mut ids = [0; DEGREE as usize]; for k in 0..DEGREE { ids[k as usize] = self.vec_id( low + k * step, low + (k + 1) * step, mem::take(&mut vecs[k as usize]), ); } self.node_id(pack(ids)) } } fn parse_set<'a>(&mut self, input: &'a str) -> IResult<&'a str, Id> { let (input, vec) = parse_vec(map_res(digit1, u32::from_str), input)?; Ok((input, self.vec_id(0, self.size, vec))) } fn intersects(&self, size: u32, a: Id, b: Id) -> bool { if a == 0 || b == 0 { false } else { let a_node = self.nodes[a as usize]; let b_node = self.nodes[b as usize]; if size <= NODE_BITS { a_node & b_node != 0 } else { let step = size / DEGREE; let a_ids = unpack(a_node); let b_ids = unpack(b_node); (0..DEGREE).any(|k| self.intersects(step, a_ids[k as usize], b_ids[k as usize])) } } } fn union(&mut self, size: u32, ids: &mut Vec<Id>) -> Id { ids.retain(|&id| id != 0); if ids.len() < 2 { ids.drain(..).next().unwrap_or(0) } else { let mut node; if size <= NODE_BITS { node = 0; for id in ids.drain(..) { node |= self.nodes[id as usize]; } } else { let step = size / DEGREE; let mut vecs: [Vec<Id>; DEGREE as usize] = Default::default(); for vec in &mut vecs { vec.reserve(ids.len()); } for id in ids.drain(..) { let ids1 = unpack(self.nodes[id as usize]); for k in 0..DEGREE { vecs[k as usize].push(ids1[k as usize]); } } let mut ids = [0; DEGREE as usize]; for k in 0..DEGREE { ids[k as usize] = self.union(step, &mut vecs[k as usize]); } node = pack(ids) }; self.node_id(node) } } fn for_each(&self, low: u32, high: u32, id: Id, f: &mut impl FnMut(u32)) { if id != 0 { let mut node = self.nodes[id as usize]; if high - low <= NODE_BITS { while node != 0 { let k = node.trailing_zeros(); f(low + k); node &= !(1 << k); } } else { let step = (high - low) / DEGREE; let ids = unpack(node); for k in 0..DEGREE { self.for_each(low + k * step, low + (k + 1) * step, ids[k as usize], f); } } } } } fn parse_nfa(input: &str) -> IResult<&str, (Trie, Automaton<Id>)> { let (input, _) = tag("Automaton")(input)?; let (input, _) = preceded(multispace0, char('('))(input)?; let (input, _) = preceded(multispace0, tag("\"nondet\""))(input)?; let (input, _) = preceded(multispace0, char(','))(input)?; let (input, size) = preceded(multispace0, map_res(digit1, u32::from_str))(input)?; let mut trie = Trie::new(size); let (input, _) = preceded(multispace0, char(','))(input)?; let (input, alphabet) = preceded(multispace0, map_res(digit1, usize::from_str))(input)?; let (input, _) = preceded(multispace0, char(','))(input)?; let (input, transitions) = preceded(multispace0, |input| { parse_vec( |input| parse_vec(|input| trie.parse_set(input), input), input, ) })(input)?; let (input, _) = preceded(multispace0, char(','))(input)?; let (input, initial) = preceded(multispace0, |input| trie.parse_set(input))(input)?; let (input, _) = preceded(multispace0, char(','))(input)?; let (input, accepting) = preceded(multispace0, |input| { parse_vec(|input| map_res(digit1, u32::from_str)(input), input) })(input)?; let (input, _) = preceded(multispace0, char(')'))(input)?; Ok(( input, ( trie, Automaton { size, alphabet, transitions, initial, accepting, }, ), )) } struct DFABuilder { nfa_accepting: Id, trie: Trie, set_dstate: HashMap<Id, u32>, queue: VecDeque<Id>, dfa: Automaton<u32>, } impl DFABuilder { fn visit(&mut self, set: Id) -> u32 { let DFABuilder { nfa_accepting, trie, set_dstate, queue, dfa, } = self; *set_dstate.entry(set).or_insert_with(|| { dfa.size += 1; if trie.intersects(trie.size, *nfa_accepting, set) { dfa.accepting.push(dfa.size); } queue.push_back(set); dfa.size }) } } fn nfa_to_dfa(mut trie: Trie, nfa: Automaton<Id>) -> Automaton<u32> { let mut builder = DFABuilder { nfa_accepting: trie.vec_id(0, trie.size, nfa.accepting.clone()), trie, set_dstate: HashMap::new(), queue: VecDeque::new(), dfa: Automaton { size: 0, alphabet: nfa.alphabet, transitions: vec![vec![]; nfa.alphabet], initial: !0, accepting: vec![], }, }; builder.dfa.initial = builder.visit(nfa.initial); let mut sets = Vec::new(); while let Some(set) = builder.queue.pop_front() { for (letter, transition) in nfa.transitions.iter().enumerate() { builder .trie .for_each(0, builder.trie.size, set, &mut |nstate| { sets.push(transition[nstate as usize - 1]) }); let set1 = builder.trie.union(builder.trie.size, &mut sets); debug_assert!(sets.is_empty()); let dstate = builder.visit(set1); builder.dfa.transitions[letter].push(dstate); } } builder.dfa } fn main() -> Result<(), Box<dyn Error>> { let mut line = String::new(); io::stdin().read_line(&mut line)?; let (rest, (trie, nfa)) = delimited(multispace0, parse_nfa, multispace0)(&line).map_err(|e| e.to_owned())?; if rest != "" { return Err("expected end of input".into()); } let dfa = nfa_to_dfa(trie, nfa); println!( "Automaton(\"det\", {}, {}, {:?}, [{}], {:?})", dfa.size, dfa.alphabet, dfa.transitions, dfa.initial, dfa.accepting ); Ok(()) } 
use ahash::AHasher; use hashbrown::raw::RawTable; use hashbrown::HashMap; use mimalloc::MiMalloc; use nom::bytes::complete::tag; use nom::character::complete::{char, digit1, multispace0}; use nom::combinator::{map, map_res}; use nom::multi::separated_list0; use nom::sequence::{delimited, preceded}; use nom::IResult; use std::collections::VecDeque; use std::error::Error; use std::hash::{Hash, Hasher}; use std::io; use std::mem; use std::str::FromStr; #[global_allocator] static GLOBAL: MiMalloc = MiMalloc; #[derive(Debug)] struct Automaton<Set> { size: u32, alphabet: usize, transitions: Vec<Vec<Set>>, initial: Set, accepting: Vec<u32>, } fn parse_vec<'a, T>( item: impl FnMut(&'a str) -> IResult<&'a str, T>, input: &'a str, ) -> IResult<&'a str, Vec<T>> { delimited( char('['), map( separated_list0( preceded(multispace0, char(',')), preceded(multispace0, item), ), |v| v.into_iter().collect(), ), preceded(multispace0, char(']')), )(input) } type Id = u32; type Node = u128; const ID_BITS: u32 = mem::size_of::<Id>() as u32 * 8; const NODE_BITS: u32 = mem::size_of::<Node>() as u32 * 8; const DEGREE: u32 = NODE_BITS / ID_BITS; struct Trie { size: u32, nodes: Vec<Node>, ids: RawTable<Id>, } fn pack(ids: [Id; DEGREE as usize]) -> Node { let mut node = 0; for k in 0..DEGREE { node |= (ids[k as usize] as Node) << ID_BITS * k; } node } fn unpack(node: Node) -> [Id; DEGREE as usize] { let mut ids = [0; DEGREE as usize]; for k in 0..DEGREE { ids[k as usize] = (node >> ID_BITS * k) as Id; } ids } fn node_hash(node: Node) -> u64 { let mut hasher = AHasher::default(); node.hash(&mut hasher); hasher.finish() } fn get_hasher<'a>(nodes: &'a [Node]) -> impl 'a + Fn(&'_ u32) -> u64 { move |&id| node_hash(nodes[id as usize]) } impl Trie { fn new(real_size: u32) -> Trie { let mut size = NODE_BITS; while size < real_size { size *= DEGREE; } let mut trie = Trie { size, nodes: vec![], ids: RawTable::new(), }; let zero_id = trie.node_id(0); debug_assert_eq!(zero_id, 0); trie } fn node_id(&mut self, node: Node) -> Id { let hash = node_hash(node); if let Some(bucket) = self.ids.find(hash, |&id| self.nodes[id as usize] == node) { unsafe { *bucket.as_ref() } } else { let id = self.nodes.len() as Id; self.nodes.push(node); self.ids.insert(hash, id, get_hasher(&self.nodes)); id } } fn vec_id(&mut self, low: u32, high: u32, vec: Vec<u32>) -> Id { if vec.is_empty() { 0 } else if high - low <= NODE_BITS { let mut node: Node = 0; for n in vec { node |= 1 << n - low; } self.node_id(node) } else { let step = (high - low) / DEGREE; let mut vecs: [Vec<u32>; DEGREE as usize] = Default::default(); for n in vec { vecs[((n - low) / step) as usize].push(n); } let mut ids = [0; DEGREE as usize]; for k in 0..DEGREE { ids[k as usize] = self.vec_id( low + k * step, low + (k + 1) * step, mem::take(&mut vecs[k as usize]), ); } self.node_id(pack(ids)) } } fn parse_set<'a>(&mut self, input: &'a str) -> IResult<&'a str, Id> { let (input, vec) = parse_vec(map_res(digit1, u32::from_str), input)?; Ok((input, self.vec_id(0, self.size, vec))) } fn intersects(&self, size: u32, a: Id, b: Id) -> bool { if a == 0 || b == 0 { false } else { let a_node = self.nodes[a as usize]; let b_node = self.nodes[b as usize]; if size <= NODE_BITS { a_node & b_node != 0 } else { let step = size / DEGREE; let a_ids = unpack(a_node); let b_ids = unpack(b_node); (0..DEGREE).any(|k| self.intersects(step, a_ids[k as usize], b_ids[k as usize])) } } } fn union(&mut self, size: u32, ids: &mut Vec<Id>) -> Id { ids.retain(|&id| id != 0); if ids.len() < 2 { ids.drain(..).next().unwrap_or(0) } else { let mut node; if size <= NODE_BITS { node = 0; for id in ids.drain(..) { node |= self.nodes[id as usize]; } } else { let step = size / DEGREE; let mut vecs: [Vec<Id>; DEGREE as usize] = Default::default(); for vec in &mut vecs { vec.reserve(ids.len()); } for id in ids.drain(..) { let ids1 = unpack(self.nodes[id as usize]); for k in 0..DEGREE { vecs[k as usize].push(ids1[k as usize]); } } let mut ids = [0; DEGREE as usize]; for k in 0..DEGREE { ids[k as usize] = self.union(step, &mut vecs[k as usize]); } node = pack(ids) }; self.node_id(node) } } fn for_each(&self, low: u32, high: u32, id: Id, f: &mut impl FnMut(u32)) { if id != 0 { let mut node = self.nodes[id as usize]; if high - low <= NODE_BITS { while node != 0 { let k = node.trailing_zeros(); f(low + k); node &= !(1 << k); } } else { let step = (high - low) / DEGREE; let ids = unpack(node); for k in 0..DEGREE { self.for_each(low + k * step, low + (k + 1) * step, ids[k as usize], f); } } } } } fn parse_nfa(input: &str) -> IResult<&str, (Trie, Automaton<Id>)> { let (input, _) = tag("Automaton")(input)?; let (input, _) = preceded(multispace0, char('('))(input)?; let (input, _) = preceded(multispace0, tag("\"nondet\""))(input)?; let (input, _) = preceded(multispace0, char(','))(input)?; let (input, size) = preceded(multispace0, map_res(digit1, u32::from_str))(input)?; let mut trie = Trie::new(size); let (input, _) = preceded(multispace0, char(','))(input)?; let (input, alphabet) = preceded(multispace0, map_res(digit1, usize::from_str))(input)?; let (input, _) = preceded(multispace0, char(','))(input)?; let (input, transitions) = preceded(multispace0, |input| { parse_vec( |input| parse_vec(|input| trie.parse_set(input), input), input, ) })(input)?; let (input, _) = preceded(multispace0, char(','))(input)?; let (input, initial) = preceded(multispace0, |input| trie.parse_set(input))(input)?; let (input, _) = preceded(multispace0, char(','))(input)?; let (input, accepting) = preceded(multispace0, |input| { parse_vec(|input| map_res(digit1, u32::from_str)(input), input) })(input)?; let (input, _) = preceded(multispace0, char(')'))(input)?; Ok(( input, ( trie, Automaton { size, alphabet, transitions, initial, accepting, }, ), )) } struct DFABuilder { nfa_accepting: Id, trie: Trie, set_dstate: HashMap<Id, u32>, queue: VecDeque<Id>, dfa: Automaton<u32>, } impl DFABuilder { fn visit(&mut self, set: Id) -> u32 { let DFABuilder { nfa_accepting, trie, set_dstate, queue, dfa, } = self; *set_dstate.entry(set).or_insert_with(|| { dfa.size += 1; if trie.intersects(trie.size, *nfa_accepting, set) { dfa.accepting.push(dfa.size); } queue.push_back(set); dfa.size }) } } fn nfa_to_dfa(mut trie: Trie, nfa: Automaton<Id>) -> Automaton<u32> { let mut builder = DFABuilder { nfa_accepting: trie.vec_id(0, trie.size, nfa.accepting.clone()), trie, set_dstate: HashMap::new(), queue: VecDeque::new(), dfa: Automaton { size: 0, alphabet: nfa.alphabet, transitions: vec![vec![]; nfa.alphabet], initial: !0, accepting: vec![], }, }; builder.dfa.initial = builder.visit(nfa.initial); let mut sets = Vec::new(); while let Some(set) = builder.queue.pop_front() { for (letter, transition) in nfa.transitions.iter().enumerate() { builder .trie .for_each(0, builder.trie.size, set, &mut |nstate| { sets.push(transition[nstate as usize - 1]) }); let set1 = builder.trie.union(builder.trie.size, &mut sets); debug_assert!(sets.is_empty()); let dstate = builder.visit(set1); builder.dfa.transitions[letter].push(dstate); } } builder.dfa } fn main() -> Result<(), Box<dyn Error>> { let mut line = String::new(); io::stdin().read_line(&mut line)?; let (rest, (trie, nfa)) = delimited(multispace0, parse_nfa, multispace0)(&line).map_err(|e| e.to_owned())?; if rest != "" { return Err("expected end of input".into()); } let dfa = nfa_to_dfa(trie, nfa); println!( "Automaton(\"det\", {}, {}, {:?}, [{}], {:?})", dfa.size, dfa.alphabet, dfa.transitions, dfa.initial, dfa.accepting ); Ok(()) } 
use ahash::AHasher; use hashbrown::raw::RawTable; use hashbrown::HashMap; use mimalloc::MiMalloc; use nom::bytes::complete::tag; use nom::character::complete::{char, digit1, multispace0}; use nom::combinator::{map, map_res}; use nom::multi::separated_list0; use nom::sequence::{delimited, preceded}; use nom::IResult; use std::collections::VecDeque; use std::convert::TryInto; use std::error::Error; use std::hash::{Hash, Hasher}; use std::io; use std::mem; use std::str::FromStr; #[global_allocator] static GLOBAL: MiMalloc = MiMalloc; #[derive(Debug)] struct Automaton<Set> { size: u32, alphabet: usize, transitions: Vec<Vec<Set>>, initial: Set, accepting: Vec<u32>, } fn parse_vec<'a, T>( item: impl FnMut(&'a str) -> IResult<&'a str, T>, input: &'a str, ) -> IResult<&'a str, Vec<T>> { delimited( char('['), map( separated_list0( preceded(multispace0, char(',')), preceded(multispace0, item), ), |v| v.into_iter().collect(), ), preceded(multispace0, char(']')), )(input) } type Id = u32; type Node = u128; const ID_BITS: u32 = mem::size_of::<Id>() as u32 * 8; const NODE_BITS: u32 = mem::size_of::<Node>() as u32 * 8; const DEGREE: u32 = NODE_BITS / ID_BITS; struct Trie { size: u32, nodes: Vec<Node>, ids: RawTable<Id>, } fn pack(ids: [Id; DEGREE as usize]) -> Node { let mut node = 0; for k in 0..DEGREE { node |= (ids[k as usize] as Node) << ID_BITS * k; } node } fn unpack(node: Node) -> [Id; DEGREE as usize] { let mut ids = [0; DEGREE as usize]; for k in 0..DEGREE { ids[k as usize] = (node >> ID_BITS * k) as Id; } ids } fn node_hash(node: Node) -> u64 { let mut hasher = AHasher::default(); node.hash(&mut hasher); hasher.finish() } fn get_hasher<'a>(nodes: &'a [Node]) -> impl 'a + Fn(&'_ u32) -> u64 { move |&id| node_hash(nodes[id as usize]) } impl Trie { fn new(real_size: u32) -> Trie { let mut size = NODE_BITS; while size < real_size { size *= DEGREE; } let mut trie = Trie { size, nodes: vec![], ids: RawTable::new(), }; let zero_id = trie.node_id(0); debug_assert_eq!(zero_id, 0); trie } fn node_id(&mut self, node: Node) -> Id { let hash = node_hash(node); if let Some(bucket) = self.ids.find(hash, |&id| self.nodes[id as usize] == node) { unsafe { *bucket.as_ref() } } else { let id: Id = self.nodes.len().try_into().unwrap(); self.nodes.push(node); self.ids.insert(hash, id, get_hasher(&self.nodes)); id } } fn vec_id(&mut self, low: u32, high: u32, vec: Vec<u32>) -> Id { if vec.is_empty() { 0 } else if high - low <= NODE_BITS { let mut node: Node = 0; for n in vec { node |= 1 << n - low; } self.node_id(node) } else { let step = (high - low) / DEGREE; let mut vecs: [Vec<u32>; DEGREE as usize] = Default::default(); for n in vec { vecs[((n - low) / step) as usize].push(n); } let mut ids = [0; DEGREE as usize]; for k in 0..DEGREE { ids[k as usize] = self.vec_id( low + k * step, low + (k + 1) * step, mem::take(&mut vecs[k as usize]), ); } self.node_id(pack(ids)) } } fn parse_set<'a>(&mut self, input: &'a str) -> IResult<&'a str, Id> { let (input, vec) = parse_vec(map_res(digit1, u32::from_str), input)?; Ok((input, self.vec_id(0, self.size, vec))) } fn intersects(&self, size: u32, a: Id, b: Id) -> bool { if a == 0 || b == 0 { false } else { let a_node = self.nodes[a as usize]; let b_node = self.nodes[b as usize]; if size <= NODE_BITS { a_node & b_node != 0 } else { let step = size / DEGREE; let a_ids = unpack(a_node); let b_ids = unpack(b_node); (0..DEGREE).any(|k| self.intersects(step, a_ids[k as usize], b_ids[k as usize])) } } } fn union(&mut self, size: u32, ids: &mut Vec<Id>) -> Id { ids.retain(|&id| id != 0); if ids.len() < 2 { ids.drain(..).next().unwrap_or(0) } else { let mut node; if size <= NODE_BITS { node = 0; for id in ids.drain(..) { node |= self.nodes[id as usize]; } } else { let step = size / DEGREE; let mut vecs: [Vec<Id>; DEGREE as usize] = Default::default(); for vec in &mut vecs { vec.reserve(ids.len()); } for id in ids.drain(..) { let ids1 = unpack(self.nodes[id as usize]); for k in 0..DEGREE { vecs[k as usize].push(ids1[k as usize]); } } let mut ids = [0; DEGREE as usize]; for k in 0..DEGREE { ids[k as usize] = self.union(step, &mut vecs[k as usize]); } node = pack(ids) }; self.node_id(node) } } fn for_each(&self, low: u32, high: u32, id: Id, f: &mut impl FnMut(u32)) { if id != 0 { let mut node = self.nodes[id as usize]; if high - low <= NODE_BITS { while node != 0 { let k = node.trailing_zeros(); f(low + k); node &= !(1 << k); } } else { let step = (high - low) / DEGREE; let ids = unpack(node); for k in 0..DEGREE { self.for_each(low + k * step, low + (k + 1) * step, ids[k as usize], f); } } } } } fn parse_nfa(input: &str) -> IResult<&str, (Trie, Automaton<Id>)> { let (input, _) = tag("Automaton")(input)?; let (input, _) = preceded(multispace0, char('('))(input)?; let (input, _) = preceded(multispace0, tag("\"nondet\""))(input)?; let (input, _) = preceded(multispace0, char(','))(input)?; let (input, size) = preceded(multispace0, map_res(digit1, u32::from_str))(input)?; let mut trie = Trie::new(size); let (input, _) = preceded(multispace0, char(','))(input)?; let (input, alphabet) = preceded(multispace0, map_res(digit1, usize::from_str))(input)?; let (input, _) = preceded(multispace0, char(','))(input)?; let (input, transitions) = preceded(multispace0, |input| { parse_vec( |input| parse_vec(|input| trie.parse_set(input), input), input, ) })(input)?; let (input, _) = preceded(multispace0, char(','))(input)?; let (input, initial) = preceded(multispace0, |input| trie.parse_set(input))(input)?; let (input, _) = preceded(multispace0, char(','))(input)?; let (input, accepting) = preceded(multispace0, |input| { parse_vec(|input| map_res(digit1, u32::from_str)(input), input) })(input)?; let (input, _) = preceded(multispace0, char(')'))(input)?; Ok(( input, ( trie, Automaton { size, alphabet, transitions, initial, accepting, }, ), )) } struct DFABuilder { nfa_accepting: Id, trie: Trie, set_dstate: HashMap<Id, u32>, queue: VecDeque<Id>, dfa: Automaton<u32>, } impl DFABuilder { fn visit(&mut self, set: Id) -> u32 { let DFABuilder { nfa_accepting, trie, set_dstate, queue, dfa, } = self; *set_dstate.entry(set).or_insert_with(|| { dfa.size += 1; if trie.intersects(trie.size, *nfa_accepting, set) { dfa.accepting.push(dfa.size); } queue.push_back(set); dfa.size }) } } fn nfa_to_dfa(mut trie: Trie, nfa: Automaton<Id>) -> Automaton<u32> { let mut builder = DFABuilder { nfa_accepting: trie.vec_id(0, trie.size, nfa.accepting.clone()), trie, set_dstate: HashMap::new(), queue: VecDeque::new(), dfa: Automaton { size: 0, alphabet: nfa.alphabet, transitions: vec![vec![]; nfa.alphabet], initial: !0, accepting: vec![], }, }; builder.dfa.initial = builder.visit(nfa.initial); let mut sets = Vec::new(); while let Some(set) = builder.queue.pop_front() { for (letter, transition) in nfa.transitions.iter().enumerate() { builder .trie .for_each(0, builder.trie.size, set, &mut |nstate| { sets.push(transition[nstate as usize - 1]) }); let set1 = builder.trie.union(builder.trie.size, &mut sets); debug_assert!(sets.is_empty()); let dstate = builder.visit(set1); builder.dfa.transitions[letter].push(dstate); } } builder.dfa } fn main() -> Result<(), Box<dyn Error>> { let mut line = String::new(); io::stdin().read_line(&mut line)?; let (rest, (trie, nfa)) = delimited(multispace0, parse_nfa, multispace0)(&line).map_err(|e| e.to_owned())?; if rest != "" { return Err("expected end of input".into()); } let dfa = nfa_to_dfa(trie, nfa); println!( "Automaton(\"det\", {}, {}, {:?}, [{}], {:?})", dfa.size, dfa.alphabet, dfa.transitions, dfa.initial, dfa.accepting ); Ok(()) } 
Ran case 15
Source Link
Anders Kaseorg
  • 40.7k
  • 3
  • 76
  • 149
Loading
Source Link
Anders Kaseorg
  • 40.7k
  • 3
  • 76
  • 149
Loading