diff --git a/src/accountant_skel.rs b/src/accountant_skel.rs index 527e57a9f0d2f6..de3a189b9d34e4 100644 --- a/src/accountant_skel.rs +++ b/src/accountant_skel.rs @@ -3,22 +3,22 @@ //! in flux. Clients should use AccountantStub to interact with it. use accountant::Accountant; -use bincode::{deserialize, serialize}; +use bincode::{deserialize, serialize, serialize_into}; use ecdsa; use entry::Entry; use event::Event; use hash::Hash; use historian::Historian; use packet; -use packet::SharedPackets; +use serde_json; +use packet::{SharedPackets, BLOB_SIZE}; use rayon::prelude::*; use recorder::Signal; use result::Result; -use serde_json; use signature::PublicKey; use std::cmp::max; use std::collections::VecDeque; -use std::io::Write; +use std::io::{Cursor, Write}; use std::net::{SocketAddr, UdpSocket}; use std::sync::atomic::{AtomicBool, Ordering}; use std::sync::mpsc::{channel, Receiver, Sender, SyncSender}; @@ -27,16 +27,16 @@ use std::thread::{spawn, JoinHandle}; use std::time::Duration; use streamer; use transaction::Transaction; - -use subscribers; - -pub struct AccountantSkel { - acc: Accountant, - last_id: Hash, - writer: W, - historian_input: SyncSender, +use crdt::{Crdt, ReplicatedData}; +use std::collections::LinkedList; +use std::mem::size_of; + +pub struct AccountantSkel { + acc: Mutex, + last_id: Mutex, + historian_input: Mutex>, historian: Historian, - entry_info_subscribers: Vec, + entry_info_subscribers: Mutex>, } #[cfg_attr(feature = "cargo-clippy", allow(large_enum_variant))] @@ -70,6 +70,8 @@ impl Request { } } +type SharedSkel = Arc; + #[derive(Serialize, Deserialize, Debug)] pub enum Response { Balance { key: PublicKey, val: Option }, @@ -77,68 +79,175 @@ pub enum Response { LastId { id: Hash }, } -impl AccountantSkel { +impl AccountantSkel { /// Create a new AccountantSkel that wraps the given Accountant. pub fn new( acc: Accountant, last_id: Hash, - writer: W, historian_input: SyncSender, historian: Historian, ) -> Self { AccountantSkel { - acc, - last_id, - writer, - historian_input, + acc: Mutex::new(acc), + last_id: Mutex::new(last_id), + entry_info_subscribers: Mutex::new(vec![]), + historian_input: Mutex::new(historian_input), historian, - entry_info_subscribers: vec![], } } - fn notify_entry_info_subscribers(&mut self, entry: &Entry) { + fn notify_entry_info_subscribers(obj: &SharedSkel, entry: &Entry) { // TODO: No need to bind(). let socket = UdpSocket::bind("127.0.0.1:0").expect("bind"); - for addr in &self.entry_info_subscribers { + trace!("taking read lock"); + let addrs = obj.entry_info_subscribers.lock().unwrap().clone(); + trace!("dropping read lock"); + for addr in addrs { let entry_info = EntryInfo { id: entry.id, num_hashes: entry.num_hashes, num_events: entry.events.len() as u64, }; let data = serialize(&Response::EntryInfo(entry_info)).expect("serialize EntryInfo"); + trace!("sending to {}", addr); let _res = socket.send_to(&data, addr); + trace!("done"); + } + } + + fn update_entry(obj: &SharedSkel, writer: &Arc>, entry: &Entry) { + trace!("update_entry"); + let mut last_id_l = obj.last_id.lock().unwrap(); + *last_id_l = entry.id; + trace!("update_entry 3"); + obj.acc.lock().unwrap().register_entry_id(&last_id_l); + drop(last_id_l); + trace!("update_entry 4"); + writeln!( + writer.lock().unwrap(), + "{}", + serde_json::to_string(&entry).unwrap() + ).unwrap(); + trace!("dropping write lock"); + Self::notify_entry_info_subscribers(obj, &entry); + trace!("done entry_info notify"); + } + + fn receive_to_list( + obj: &SharedSkel, + writer: &Arc>, + max: usize, + ) -> Result> { + //trace!("receive_to_list entry"); + //TODO implement a serialize for channel that does this without allocations + let mut num = 0; + let mut l = LinkedList::new(); + let entry = obj.historian + .output + .lock() + .unwrap() + .recv_timeout(Duration::new(1, 0))?; + trace!("obj.write 1 {:?}", entry); + Self::update_entry(obj, writer, &entry); + trace!("obj.write 1.end"); + l.push_back(entry); + while let Ok(entry) = obj.historian.receive() { + trace!("obj.write 2"); + Self::update_entry(obj, writer, &entry); + trace!("obj.write 2.end"); + l.push_back(entry); + num += 1; + if num == max { + break; + } + trace!("num: {}", num); } + //trace!("receive_to_list exit"); + Ok(l) } /// Process any Entry items that have been published by the Historian. - pub fn sync(&mut self) -> Hash { - while let Ok(entry) = self.historian.output.try_recv() { - self.last_id = entry.id; - self.acc.register_entry_id(&self.last_id); - writeln!(self.writer, "{}", serde_json::to_string(&entry).unwrap()).unwrap(); - self.notify_entry_info_subscribers(&entry); + /// continuosly broadcast blobs of entries out + fn run_sync( + obj: SharedSkel, + broadcast: &streamer::BlobSender, + blob_recycler: &packet::BlobRecycler, + writer: &Arc>, + exit: Arc, + ) -> Result<()> { + let max = BLOB_SIZE / size_of::(); + let mut q = VecDeque::new(); + let mut count = 0; + trace!("max: {}", max); + while let Ok(list) = Self::receive_to_list(&obj, writer, max) { + trace!("New blobs? {} {}", count, list.len()); + let b = blob_recycler.allocate(); + let pos = { + let mut bd = b.write().unwrap(); + let mut out = Cursor::new(bd.data_mut()); + serialize_into(&mut out, &list).expect("failed to serialize output"); + out.position() as usize + }; + assert!(pos < BLOB_SIZE); + b.write().unwrap().set_size(pos); + q.push_back(b); + count += 1; + if exit.load(Ordering::Relaxed) { + break; + } + } + if !q.is_empty() { + broadcast.send(q)?; } - self.last_id + Ok(()) + } + + pub fn sync_service( + obj: SharedSkel, + exit: Arc, + broadcast: streamer::BlobSender, + blob_recycler: packet::BlobRecycler, + writer: Arc>, + ) -> JoinHandle<()> { + spawn(move || loop { + let e = Self::run_sync( + obj.clone(), + &broadcast, + &blob_recycler, + &writer, + exit.clone(), + ); + if e.is_err() && exit.load(Ordering::Relaxed) { + break; + } + }) } /// Process Request items sent by clients. pub fn process_request( - &mut self, + &self, msg: Request, rsp_addr: SocketAddr, ) -> Option<(Response, SocketAddr)> { match msg { Request::GetBalance { key } => { - let val = self.acc.get_balance(&key); + let val = self.acc.lock().unwrap().get_balance(&key); Some((Response::Balance { key, val }, rsp_addr)) } - Request::GetLastId => Some((Response::LastId { id: self.sync() }, rsp_addr)), + Request::GetLastId => Some(( + Response::LastId { + id: *self.last_id.lock().unwrap(), + }, + rsp_addr, + )), Request::Transaction(_) => unreachable!(), Request::Subscribe { subscriptions } => { for subscription in subscriptions { match subscription { - Subscription::EntryInfo => self.entry_info_subscribers.push(rsp_addr), + Subscription::EntryInfo => { + self.entry_info_subscribers.lock().unwrap().push(rsp_addr) + } } } None @@ -214,22 +323,25 @@ impl AccountantSkel { } fn process_packets( - &mut self, + &self, req_vers: Vec<(Request, SocketAddr, u8)>, ) -> Result> { + trace!("partitioning"); let (trs, reqs) = Self::partition_requests(req_vers); // Process the transactions in parallel and then log the successful ones. - for result in self.acc.process_verified_transactions(trs) { + for result in self.acc.lock().unwrap().process_verified_transactions(trs) { if let Ok(tr) = result { self.historian_input + .lock() + .unwrap() .send(Signal::Event(Event::Transaction(tr)))?; } } // Let validators know they should not attempt to process additional // transactions in parallel. - self.historian_input.send(Signal::Tick)?; + self.historian_input.lock().unwrap().send(Signal::Tick)?; // Process the remaining requests serially. let rsps = reqs.into_iter() @@ -268,39 +380,44 @@ impl AccountantSkel { } fn process( - obj: &Arc>>, + obj: &SharedSkel, verified_receiver: &Receiver)>>, - blob_sender: &streamer::BlobSender, + responder_sender: &streamer::BlobSender, packet_recycler: &packet::PacketRecycler, blob_recycler: &packet::BlobRecycler, ) -> Result<()> { let timer = Duration::new(1, 0); let mms = verified_receiver.recv_timeout(timer)?; + trace!("got some messages: {}", mms.len()); for (msgs, vers) in mms { let reqs = Self::deserialize_packets(&msgs.read().unwrap()); let req_vers = reqs.into_iter() .zip(vers) .filter_map(|(req, ver)| req.map(|(msg, addr)| (msg, addr, ver))) - .filter(|x| x.0.verify()) + .filter(|x| { + let v = x.0.verify(); + trace!("v:{} x:{:?}", v, x); + v + }) .collect(); - let rsps = obj.lock().unwrap().process_packets(req_vers)?; + trace!("process_packets"); + let rsps = obj.process_packets(req_vers)?; + trace!("done process_packets"); let blobs = Self::serialize_responses(rsps, blob_recycler)?; + trace!("sending blobs: {}", blobs.len()); if !blobs.is_empty() { //don't wake up the other side if there is nothing - blob_sender.send(blobs)?; + responder_sender.send(blobs)?; } packet_recycler.recycle(msgs); - - // Write new entries to the ledger and notify subscribers. - obj.lock().unwrap().sync(); } - + trace!("done responding"); Ok(()) } /// Process verified blobs, already in order /// Respond with a signed hash of the state fn replicate_state( - obj: &Arc>>, + obj: &SharedSkel, verified_receiver: &streamer::BlobReceiver, blob_recycler: &packet::BlobRecycler, ) -> Result<()> { @@ -310,11 +427,11 @@ impl AccountantSkel { let blob = msgs.read().unwrap(); let entries: Vec = deserialize(&blob.data()[..blob.meta.size]).unwrap(); for entry in entries { - obj.lock().unwrap().acc.register_entry_id(&entry.id); + obj.acc.lock().unwrap().register_entry_id(&entry.id); - obj.lock() + obj.acc + .lock() .unwrap() - .acc .process_verified_events(entry.events)?; } //TODO respond back to leader with hash of the state @@ -328,25 +445,35 @@ impl AccountantSkel { /// Create a UDP microservice that forwards messages the given AccountantSkel. /// This service is the network leader /// Set `exit` to shutdown its threads. - pub fn serve( - obj: &Arc>>, - addr: &str, + pub fn serve( + obj: &SharedSkel, + me: ReplicatedData, + serve: UdpSocket, + gossip: UdpSocket, exit: Arc, + writer: W, ) -> Result>> { - let read = UdpSocket::bind(addr)?; + let crdt = Arc::new(RwLock::new(Crdt::new(me))); + let t_gossip = Crdt::gossip(crdt.clone(), exit.clone()); + let t_listen = Crdt::listen(crdt.clone(), gossip, exit.clone()); + // make sure we are on the same interface - let mut local = read.local_addr()?; + let mut local = serve.local_addr()?; local.set_port(0); - let write = UdpSocket::bind(local)?; + let respond_socket = UdpSocket::bind(local.clone())?; let packet_recycler = packet::PacketRecycler::default(); let blob_recycler = packet::BlobRecycler::default(); let (packet_sender, packet_receiver) = channel(); let t_receiver = - streamer::receiver(read, exit.clone(), packet_recycler.clone(), packet_sender)?; - let (blob_sender, blob_receiver) = channel(); - let t_responder = - streamer::responder(write, exit.clone(), blob_recycler.clone(), blob_receiver); + streamer::receiver(serve, exit.clone(), packet_recycler.clone(), packet_sender)?; + let (responder_sender, responder_receiver) = channel(); + let t_responder = streamer::responder( + respond_socket, + exit.clone(), + blob_recycler.clone(), + responder_receiver, + ); let (verified_sender, verified_receiver) = channel(); let exit_ = exit.clone(); @@ -357,32 +484,58 @@ impl AccountantSkel { } }); + let (broadcast_sender, broadcast_receiver) = channel(); + + let broadcast_socket = UdpSocket::bind(local)?; + let t_broadcast = streamer::broadcaster( + broadcast_socket, + exit.clone(), + crdt.clone(), + blob_recycler.clone(), + broadcast_receiver, + ); + + let t_sync = Self::sync_service( + obj.clone(), + exit.clone(), + broadcast_sender, + blob_recycler.clone(), + Arc::new(Mutex::new(writer)), + ); + let skel = obj.clone(); let t_server = spawn(move || loop { let e = Self::process( - &skel, + &mut skel.clone(), &verified_receiver, - &blob_sender, + &responder_sender, &packet_recycler, &blob_recycler, ); if e.is_err() { - // Assume this was a timeout, so sync any empty entries. - skel.lock().unwrap().sync(); - if exit.load(Ordering::Relaxed) { break; } } }); - Ok(vec![t_receiver, t_responder, t_server, t_verifier]) + Ok(vec![ + t_receiver, + t_responder, + t_server, + t_verifier, + t_sync, + t_gossip, + t_listen, + t_broadcast, + ]) } /// This service receives messages from a leader in the network and processes the transactions /// on the accountant state. /// # Arguments /// * `obj` - The accountant state. - /// * `rsubs` - The subscribers. + /// * `me` - my configuration + /// * `leader` - leader configuration /// * `exit` - The exit signal. /// # Remarks /// The pipeline is constructed as follows: @@ -396,13 +549,21 @@ impl AccountantSkel { /// 4. process the transaction state machine /// 5. respond with the hash of the state back to the leader pub fn replicate( - obj: &Arc>>, - rsubs: subscribers::Subscribers, + obj: &SharedSkel, + me: ReplicatedData, + gossip: UdpSocket, + replicate: UdpSocket, + leader: ReplicatedData, exit: Arc, ) -> Result>> { - let read = UdpSocket::bind(rsubs.me.addr)?; + let crdt = Arc::new(RwLock::new(Crdt::new(me))); + crdt.write().unwrap().set_leader(leader.id); + crdt.write().unwrap().insert(leader); + let t_gossip = Crdt::gossip(crdt.clone(), exit.clone()); + let t_listen = Crdt::listen(crdt.clone(), gossip, exit.clone()); + // make sure we are on the same interface - let mut local = read.local_addr()?; + let mut local = replicate.local_addr()?; local.set_port(0); let write = UdpSocket::bind(local)?; @@ -411,26 +572,26 @@ impl AccountantSkel { let t_blob_receiver = streamer::blob_receiver( exit.clone(), blob_recycler.clone(), - read, + replicate, blob_sender.clone(), )?; let (window_sender, window_receiver) = channel(); let (retransmit_sender, retransmit_receiver) = channel(); - let subs = Arc::new(RwLock::new(rsubs)); let t_retransmit = streamer::retransmitter( write, exit.clone(), - subs.clone(), + crdt.clone(), blob_recycler.clone(), retransmit_receiver, ); + //TODO //the packets coming out of blob_receiver need to be sent to the GPU and verified //then sent to the window, which does the erasure coding reconstruction let t_window = streamer::window( exit.clone(), - subs, + crdt, blob_recycler.clone(), blob_receiver, window_sender, @@ -444,7 +605,14 @@ impl AccountantSkel { break; } }); - Ok(vec![t_blob_receiver, t_retransmit, t_window, t_server]) + Ok(vec![ + t_blob_receiver, + t_retransmit, + t_window, + t_server, + t_gossip, + t_listen, + ]) } } @@ -490,19 +658,19 @@ mod tests { use std::net::{SocketAddr, UdpSocket}; use std::sync::atomic::{AtomicBool, Ordering}; use std::sync::mpsc::sync_channel; - use std::sync::{Arc, Mutex}; + use std::sync::{Arc, RwLock}; use std::thread::sleep; use std::time::Duration; use transaction::Transaction; - - use chrono::prelude::*; - use entry; - use event::Event; - use hash::{hash, Hash}; - use std::collections::VecDeque; - use std::sync::mpsc::channel; + use crdt::ReplicatedData; use streamer; - use subscribers::{Node, Subscribers}; + use std::sync::mpsc::channel; + use std::collections::VecDeque; + use hash::{hash, Hash}; + use event::Event; + use entry; + use chrono::prelude::*; + use crdt::Crdt; #[test] fn test_layout() { @@ -540,7 +708,7 @@ mod tests { let rsp_addr: SocketAddr = "0.0.0.0:0".parse().expect("socket address"); let (input, event_receiver) = sync_channel(10); let historian = Historian::new(event_receiver, &mint.last_id(), None); - let mut skel = AccountantSkel::new(acc, mint.last_id(), sink(), input, historian); + let skel = AccountantSkel::new(acc, mint.last_id(), input, historian); // Process a batch that includes a transaction that receives two tokens. let alice = KeyPair::new(); @@ -554,9 +722,13 @@ mod tests { assert!(skel.process_packets(req_vers).is_ok()); // Collect the ledger and feed it to a new accountant. - skel.historian_input.send(Signal::Tick).unwrap(); + skel.historian_input + .lock() + .unwrap() + .send(Signal::Tick) + .unwrap(); drop(skel.historian_input); - let entries: Vec = skel.historian.output.iter().collect(); + let entries: Vec = skel.historian.output.lock().unwrap().iter().collect(); // Assert the user holds one token, not two. If the server only output one // entry, then the second transaction will be rejected, because it drives @@ -570,45 +742,50 @@ mod tests { #[test] fn test_accountant_bad_sig() { - let serve_port = 9002; - let send_port = 9003; - let addr = format!("127.0.0.1:{}", serve_port); - let send_addr = format!("127.0.0.1:{}", send_port); + let (leader_data, leader_gossip, _, leader_serve) = test_node(); let alice = Mint::new(10_000); let acc = Accountant::new(&alice); let bob_pubkey = KeyPair::new().pubkey(); let exit = Arc::new(AtomicBool::new(false)); let (input, event_receiver) = sync_channel(10); let historian = Historian::new(event_receiver, &alice.last_id(), Some(30)); - let acc = Arc::new(Mutex::new(AccountantSkel::new( - acc, - alice.last_id(), + let acc_skel = Arc::new(AccountantSkel::new(acc, alice.last_id(), input, historian)); + let serve_addr = leader_serve.local_addr().unwrap(); + let threads = AccountantSkel::serve( + &acc_skel, + leader_data, + leader_serve, + leader_gossip, + exit.clone(), sink(), - input, - historian, - ))); - let _threads = AccountantSkel::serve(&acc, &addr, exit.clone()).unwrap(); + ).unwrap(); sleep(Duration::from_millis(300)); - let socket = UdpSocket::bind(send_addr).unwrap(); + let socket = UdpSocket::bind("127.0.0.1:0").unwrap(); socket.set_read_timeout(Some(Duration::new(5, 0))).unwrap(); + let mut acc_stub = AccountantStub::new(serve_addr, socket); + let last_id = acc_stub.get_last_id().wait().unwrap(); - let mut acc = AccountantStub::new(&addr, socket); - let last_id = acc.get_last_id().wait().unwrap(); + trace!("doing stuff"); let tr = Transaction::new(&alice.keypair(), bob_pubkey, 500, last_id); - let _sig = acc.transfer_signed(tr).unwrap(); + let _sig = acc_stub.transfer_signed(tr).unwrap(); - let last_id = acc.get_last_id().wait().unwrap(); + let last_id = acc_stub.get_last_id().wait().unwrap(); let mut tr2 = Transaction::new(&alice.keypair(), bob_pubkey, 501, last_id); tr2.data.tokens = 502; tr2.data.plan = Plan::new_payment(502, bob_pubkey); - let _sig = acc.transfer_signed(tr2).unwrap(); + let _sig = acc_stub.transfer_signed(tr2).unwrap(); - assert_eq!(acc.get_balance(&bob_pubkey).wait().unwrap(), 500); + assert_eq!(acc_stub.get_balance(&bob_pubkey).wait().unwrap(), 500); + trace!("exiting"); exit.store(true, Ordering::Relaxed); + trace!("joining threads"); + for t in threads { + t.join().unwrap(); + } } use std::sync::{Once, ONCE_INIT}; @@ -623,21 +800,45 @@ mod tests { }); } + fn test_node() -> (ReplicatedData, UdpSocket, UdpSocket, UdpSocket) { + let gossip = UdpSocket::bind("127.0.0.1:0").unwrap(); + let replicate = UdpSocket::bind("127.0.0.1:0").unwrap(); + let serve = UdpSocket::bind("127.0.0.1:0").unwrap(); + let pubkey = KeyPair::new().pubkey(); + let d = ReplicatedData::new( + pubkey, + gossip.local_addr().unwrap(), + replicate.local_addr().unwrap(), + serve.local_addr().unwrap(), + ); + (d, gossip, replicate, serve) + } + + /// Test that mesasge sent from leader to target1 and repliated to target2 #[test] fn test_replicate() { setup(); - let leader_sock = UdpSocket::bind("127.0.0.1:0").expect("bind"); - let leader_addr = leader_sock.local_addr().unwrap(); - let me_addr = "127.0.0.1:9010".parse().unwrap(); - let target_peer_sock = UdpSocket::bind("127.0.0.1:0").expect("bind"); - let target_peer_addr = target_peer_sock.local_addr().unwrap(); - let source_peer_sock = UdpSocket::bind("127.0.0.1:0").expect("bind"); + let (leader_data, leader_gossip, _, leader_serve) = test_node(); + let (target1_data, target1_gossip, target1_replicate, _) = test_node(); + let (target2_data, target2_gossip, target2_replicate, _) = test_node(); let exit = Arc::new(AtomicBool::new(false)); - let node_me = Node::new([0, 0, 0, 0, 0, 0, 0, 1], 10, me_addr); - let node_subs = vec![Node::new([0, 0, 0, 0, 0, 0, 0, 2], 8, target_peer_addr); 1]; - let node_leader = Node::new([0, 0, 0, 0, 0, 0, 0, 3], 20, leader_addr); - let subs = Subscribers::new(node_me, node_leader, &node_subs); + //start crdt_leader + let mut crdt_l = Crdt::new(leader_data.clone()); + crdt_l.set_leader(leader_data.id); + + let cref_l = Arc::new(RwLock::new(crdt_l)); + let t_l_gossip = Crdt::gossip(cref_l.clone(), exit.clone()); + let t_l_listen = Crdt::listen(cref_l, leader_gossip, exit.clone()); + + //start crdt2 + let mut crdt2 = Crdt::new(target2_data.clone()); + crdt2.insert(leader_data.clone()); + crdt2.set_leader(leader_data.id); + let leader_id = leader_data.id; + let cref2 = Arc::new(RwLock::new(crdt2)); + let t2_gossip = Crdt::gossip(cref2.clone(), exit.clone()); + let t2_listen = Crdt::listen(cref2, target2_gossip, exit.clone()); // setup some blob services to send blobs into the socket // to simulate the source peer and get blobs out of the socket to @@ -648,12 +849,14 @@ mod tests { let t_receiver = streamer::blob_receiver( exit.clone(), recv_recycler.clone(), - target_peer_sock, + target2_replicate, s_reader, ).unwrap(); + + // simulate leader sending messages let (s_responder, r_responder) = channel(); let t_responder = streamer::responder( - source_peer_sock, + leader_serve, exit.clone(), resp_recycler.clone(), r_responder, @@ -664,15 +867,16 @@ mod tests { let acc = Accountant::new(&alice); let (input, event_receiver) = sync_channel(10); let historian = Historian::new(event_receiver, &alice.last_id(), Some(30)); - let acc = Arc::new(Mutex::new(AccountantSkel::new( - acc, - alice.last_id(), - sink(), - input, - historian, - ))); - - let _threads = AccountantSkel::replicate(&acc, subs, exit.clone()).unwrap(); + let acc = Arc::new(AccountantSkel::new(acc, alice.last_id(), input, historian)); + let replicate_addr = target1_data.replicate_addr; + let threads = AccountantSkel::replicate( + &acc, + target1_data, + target1_gossip, + target1_replicate, + leader_data, + exit.clone(), + ).unwrap(); let mut alice_ref_balance = starting_balance; let mut msgs = VecDeque::new(); @@ -685,10 +889,11 @@ mod tests { let b_ = b.clone(); let mut w = b.write().unwrap(); w.set_index(i).unwrap(); + w.set_id(leader_id).unwrap(); let tr0 = Event::new_timestamp(&bob_keypair, Utc::now()); let entry0 = entry::create_entry(&cur_hash, i, vec![tr0]); - acc.lock().unwrap().acc.register_entry_id(&cur_hash); + acc.acc.lock().unwrap().register_entry_id(&cur_hash); cur_hash = hash(&cur_hash); let tr1 = Transaction::new( @@ -697,11 +902,11 @@ mod tests { transfer_amount, cur_hash, ); - acc.lock().unwrap().acc.register_entry_id(&cur_hash); + acc.acc.lock().unwrap().register_entry_id(&cur_hash); cur_hash = hash(&cur_hash); let entry1 = entry::create_entry(&cur_hash, i + num_blobs, vec![Event::Transaction(tr1)]); - acc.lock().unwrap().acc.register_entry_id(&cur_hash); + acc.acc.lock().unwrap().register_entry_id(&cur_hash); cur_hash = hash(&cur_hash); alice_ref_balance -= transfer_amount; @@ -710,7 +915,7 @@ mod tests { w.data_mut()[..serialized_entry.len()].copy_from_slice(&serialized_entry); w.set_size(serialized_entry.len()); - w.meta.set_addr(&me_addr); + w.meta.set_addr(&replicate_addr); drop(w); msgs.push_back(b_); } @@ -726,25 +931,31 @@ mod tests { msgs.push(msg); } - let alice_balance = acc.lock() + let alice_balance = acc.acc + .lock() .unwrap() - .acc .get_balance(&alice.keypair().pubkey()) .unwrap(); assert_eq!(alice_balance, alice_ref_balance); - let bob_balance = acc.lock() + let bob_balance = acc.acc + .lock() .unwrap() - .acc .get_balance(&bob_keypair.pubkey()) .unwrap(); assert_eq!(bob_balance, starting_balance - alice_ref_balance); exit.store(true, Ordering::Relaxed); + for t in threads { + t.join().expect("join"); + } + t2_gossip.join().expect("join"); + t2_listen.join().expect("join"); t_receiver.join().expect("join"); t_responder.join().expect("join"); + t_l_gossip.join().expect("join"); + t_l_listen.join().expect("join"); } - } #[cfg(all(feature = "unstable", test))] @@ -805,7 +1016,7 @@ mod bench { let (input, event_receiver) = sync_channel(10); let historian = Historian::new(event_receiver, &mint.last_id(), None); - let mut skel = AccountantSkel::new(acc, mint.last_id(), sink(), input, historian); + let mut skel = AccountantSkel::new(acc, mint.last_id(), input, historian); let now = Instant::now(); assert!(skel.process_packets(req_vers).is_ok()); diff --git a/src/accountant_stub.rs b/src/accountant_stub.rs index d03866099cbe77..10bdd12e8ee3f5 100644 --- a/src/accountant_stub.rs +++ b/src/accountant_stub.rs @@ -10,11 +10,11 @@ use hash::Hash; use signature::{KeyPair, PublicKey, Signature}; use std::collections::HashMap; use std::io; -use std::net::UdpSocket; +use std::net::{SocketAddr, UdpSocket}; use transaction::Transaction; pub struct AccountantStub { - pub addr: String, + pub addr: SocketAddr, pub socket: UdpSocket, last_id: Option, num_events: u64, @@ -25,9 +25,9 @@ impl AccountantStub { /// Create a new AccountantStub that will interface with AccountantSkel /// over `socket`. To receive responses, the caller must bind `socket` /// to a public address before invoking AccountantStub methods. - pub fn new(addr: &str, socket: UdpSocket) -> Self { + pub fn new(addr: SocketAddr, socket: UdpSocket) -> Self { let stub = AccountantStub { - addr: addr.to_string(), + addr: addr, socket, last_id: None, num_events: 0, @@ -166,32 +166,36 @@ mod tests { use std::io::sink; use std::sync::atomic::{AtomicBool, Ordering}; use std::sync::mpsc::sync_channel; - use std::sync::{Arc, Mutex}; + use std::sync::Arc; use std::thread::sleep; use std::time::Duration; + use crdt::ReplicatedData; // TODO: Figure out why this test sometimes hangs on TravisCI. #[test] fn test_accountant_stub() { - let addr = "127.0.0.1:9000"; - let send_addr = "127.0.0.1:9001"; + let gossip = UdpSocket::bind("0.0.0.0:0").unwrap(); + let serve = UdpSocket::bind("0.0.0.0:0").unwrap(); + let addr = serve.local_addr().unwrap(); + let pubkey = KeyPair::new().pubkey(); + let d = ReplicatedData::new( + pubkey, + gossip.local_addr().unwrap(), + "0.0.0.0:0".parse().unwrap(), + serve.local_addr().unwrap(), + ); + let alice = Mint::new(10_000); let acc = Accountant::new(&alice); let bob_pubkey = KeyPair::new().pubkey(); let exit = Arc::new(AtomicBool::new(false)); let (input, event_receiver) = sync_channel(10); let historian = Historian::new(event_receiver, &alice.last_id(), Some(30)); - let acc = Arc::new(Mutex::new(AccountantSkel::new( - acc, - alice.last_id(), - sink(), - input, - historian, - ))); - let _threads = AccountantSkel::serve(&acc, addr, exit.clone()).unwrap(); + let acc = Arc::new(AccountantSkel::new(acc, alice.last_id(), input, historian)); + let threads = AccountantSkel::serve(&acc, d, serve, gossip, exit.clone(), sink()).unwrap(); sleep(Duration::from_millis(300)); - let socket = UdpSocket::bind(send_addr).unwrap(); + let socket = UdpSocket::bind("0.0.0.0:0").unwrap(); socket.set_read_timeout(Some(Duration::new(5, 0))).unwrap(); let mut acc = AccountantStub::new(addr, socket); @@ -200,5 +204,8 @@ mod tests { .unwrap(); assert_eq!(acc.get_balance(&bob_pubkey).wait().unwrap(), 500); exit.store(true, Ordering::Relaxed); + for t in threads { + t.join().unwrap(); + } } } diff --git a/src/bin/client-demo.rs b/src/bin/client-demo.rs index 50f2e8a2ecfb87..d996c6e9c1a689 100644 --- a/src/bin/client-demo.rs +++ b/src/bin/client-demo.rs @@ -83,7 +83,7 @@ fn main() { }); let socket = UdpSocket::bind(&send_addr).unwrap(); - let mut acc = AccountantStub::new(&addr, socket); + let mut acc = AccountantStub::new(addr.parse().unwrap(), socket); println!("Get last ID..."); let last_id = acc.get_last_id().wait().unwrap(); @@ -122,7 +122,7 @@ fn main() { println!("Transferring 1 unit {} times...", trs.len()); let send_addr = "0.0.0.0:0"; let socket = UdpSocket::bind(send_addr).unwrap(); - let acc = AccountantStub::new(&addr, socket); + let acc = AccountantStub::new(addr.parse().unwrap(), socket); for tr in trs { acc.transfer_signed(tr.clone()).unwrap(); } diff --git a/src/bin/historian-demo.rs b/src/bin/historian-demo.rs index 010391cbab3d88..134e7950f5d04e 100644 --- a/src/bin/historian-demo.rs +++ b/src/bin/historian-demo.rs @@ -28,7 +28,7 @@ fn main() { let hist = Historian::new(event_receiver, &seed, Some(10)); create_ledger(&input, &seed).expect("send error"); drop(input); - let entries: Vec = hist.output.iter().collect(); + let entries: Vec = hist.output.lock().unwrap().iter().collect(); for entry in &entries { println!("{:?}", entry); } diff --git a/src/bin/testnode.rs b/src/bin/testnode.rs index 6a6d8ac9ea1c0e..e37d32093445d2 100644 --- a/src/bin/testnode.rs +++ b/src/bin/testnode.rs @@ -11,12 +11,15 @@ use solana::accountant_skel::AccountantSkel; use solana::entry::Entry; use solana::event::Event; use solana::historian::Historian; +use solana::signature::{KeyPair, KeyPairUtil}; +use solana::crdt::ReplicatedData; use std::env; use std::io::{stdin, stdout, Read}; use std::process::exit; use std::sync::atomic::AtomicBool; use std::sync::mpsc::sync_channel; -use std::sync::{Arc, Mutex}; +use std::sync::Arc; +use std::net::UdpSocket; fn print_usage(program: &str, opts: Options) { let mut brief = format!("Usage: cat | {} [options]\n\n", program); @@ -49,7 +52,9 @@ fn main() { if matches.opt_present("p") { port = matches.opt_str("p").unwrap().parse().expect("port"); } - let addr = format!("0.0.0.0:{}", port); + let serve_addr = format!("0.0.0.0:{}", port); + let gossip_addr = format!("0.0.0.0:{}", port + 1); + let replicate_addr = format!("0.0.0.0:{}", port + 2); if stdin_isatty() { eprintln!("nothing found on stdin, expected a log file"); @@ -99,15 +104,20 @@ fn main() { let (input, event_receiver) = sync_channel(10_000); let historian = Historian::new(event_receiver, &last_id, Some(1000)); let exit = Arc::new(AtomicBool::new(false)); - let skel = Arc::new(Mutex::new(AccountantSkel::new( - acc, - last_id, - stdout(), - input, - historian, - ))); - let threads = AccountantSkel::serve(&skel, &addr, exit.clone()).unwrap(); - eprintln!("Ready. Listening on {}", addr); + let skel = Arc::new(AccountantSkel::new(acc, last_id, input, historian)); + let serve_sock = UdpSocket::bind(&serve_addr).unwrap(); + let gossip_sock = UdpSocket::bind(&gossip_addr).unwrap(); + let replicate_sock = UdpSocket::bind(&replicate_addr).unwrap(); + let pubkey = KeyPair::new().pubkey(); + let d = ReplicatedData::new( + pubkey, + gossip_sock.local_addr().unwrap(), + replicate_sock.local_addr().unwrap(), + serve_sock.local_addr().unwrap(), + ); + let threads = + AccountantSkel::serve(&skel, d, serve_sock, gossip_sock, exit.clone(), stdout()).unwrap(); + eprintln!("Ready. Listening on {}", serve_addr); for t in threads { t.join().expect("join"); } diff --git a/src/crdt.rs b/src/crdt.rs index b7742d5cd309b2..60c2ee98dc5b91 100644 --- a/src/crdt.rs +++ b/src/crdt.rs @@ -1,15 +1,24 @@ //! The `crdt` module defines a data structure that is shared by all the nodes in the network over -//! a gossip control plane. The goal is to share small bits of of-chain information and detect and +//! a gossip control plane. The goal is to share small bits of off-chain information and detect and //! repair partitions. //! //! This CRDT only supports a very limited set of types. A map of PublicKey -> Versioned Struct. //! The last version is always picked durring an update. +//! +//! The network is arranged in layers: +//! +//! * layer 0 - Leader. +//! * layer 1 - As many nodes as we can fit +//! * layer 2 - Everyone else, if layer 1 is `2^10`, layer 2 should be able to fit `2^20` number of nodes. +//! +//! Accountant needs to provide an interface for us to query the stake weight use bincode::{deserialize, serialize}; use byteorder::{LittleEndian, ReadBytesExt}; use hash::Hash; -use result::Result; +use result::{Error, Result}; use ring::rand::{SecureRandom, SystemRandom}; +use rayon::prelude::*; use signature::{PublicKey, Signature}; use std::collections::HashMap; use std::io::Cursor; @@ -18,20 +27,21 @@ use std::sync::atomic::{AtomicBool, Ordering}; use std::sync::{Arc, RwLock}; use std::thread::{sleep, spawn, JoinHandle}; use std::time::Duration; +use packet::SharedBlob; /// Structure to be replicated by the network #[derive(Serialize, Deserialize, Clone)] pub struct ReplicatedData { - id: PublicKey, + pub id: PublicKey, sig: Signature, /// should always be increasing version: u64, /// address to connect to for gossip - gossip_addr: SocketAddr, + pub gossip_addr: SocketAddr, /// address to connect to for replication - replicate_addr: SocketAddr, + pub replicate_addr: SocketAddr, /// address to connect to when this node is leader - lead_addr: SocketAddr, + pub serve_addr: SocketAddr, /// current leader identity current_leader_id: PublicKey, /// last verified hash that was submitted to the leader @@ -41,15 +51,19 @@ pub struct ReplicatedData { } impl ReplicatedData { - pub fn new(id: PublicKey, gossip_addr: SocketAddr) -> ReplicatedData { - let daddr = "0.0.0.0:0".parse().unwrap(); + pub fn new( + id: PublicKey, + gossip_addr: SocketAddr, + replicate_addr: SocketAddr, + serve_addr: SocketAddr, + ) -> ReplicatedData { ReplicatedData { id, sig: Signature::default(), version: 0, gossip_addr, - replicate_addr: daddr, - lead_addr: daddr, + replicate_addr, + serve_addr, current_leader_id: PublicKey::default(), last_verified_hash: Hash::default(), last_verified_count: 0, @@ -78,7 +92,7 @@ pub struct Crdt { /// The value of the remote update index that i have last seen /// This Node will ask external nodes for updates since the value in this list remote: HashMap, - update_index: u64, + pub update_index: u64, me: PublicKey, timeout: Duration, } @@ -109,23 +123,117 @@ impl Crdt { g.table.insert(me.id, me); g } - pub fn import(&mut self, v: &ReplicatedData) { - // TODO check that last_verified types are always increasing - // TODO probably an error or attack - if self.me != v.id { - self.insert(v); - } + pub fn my_data(&self) -> &ReplicatedData { + &self.table[&self.me] + } + pub fn leader_data(&self) -> &ReplicatedData { + &self.table[&self.table[&self.me].current_leader_id] + } + + pub fn set_leader(&mut self, key: PublicKey) -> () { + let mut me = self.my_data().clone(); + me.current_leader_id = key; + me.version += 1; + self.insert(me); } - pub fn insert(&mut self, v: &ReplicatedData) { + + pub fn insert(&mut self, v: ReplicatedData) { + // TODO check that last_verified types are always increasing if self.table.get(&v.id).is_none() || (v.version > self.table[&v.id].version) { + //somehow we signed a message for our own identity with a higher version that + // we have stored ourselves + trace!("me: {:?}", self.me[0]); + trace!("v.id: {:?}", v.id[0]); trace!("insert! {}", v.version); self.update_index += 1; - let _ = self.table.insert(v.id, v.clone()); + let _ = self.table.insert(v.id.clone(), v.clone()); let _ = self.local.insert(v.id, self.update_index); } else { - trace!("INSERT FAILED {}", v.version); + trace!( + "INSERT FAILED new.version: {} me.version: {}", + v.version, + self.table[&v.id].version + ); } } + + /// broadcast messages from the leader to layer 1 nodes + /// # Remarks + /// We need to avoid having obj locked while doing any io, such as the `send_to` + pub fn broadcast( + obj: &Arc>, + blobs: &Vec, + s: &UdpSocket, + transmit_index: &mut u64, + ) -> Result<()> { + let (me, table): (ReplicatedData, Vec) = { + // copy to avoid locking durring IO + let robj = obj.read().unwrap(); + let cloned_table: Vec = robj.table.values().cloned().collect(); + (robj.table[&robj.me].clone(), cloned_table) + }; + let errs: Vec<_> = table + .iter() + .enumerate() + .cycle() + .zip(blobs.iter()) + .map(|((i, v), b)| { + if me.id == v.id { + return Ok(0); + } + // only leader should be broadcasting + assert!(me.current_leader_id != v.id); + let mut blob = b.write().unwrap(); + blob.set_index(*transmit_index + i as u64) + .expect("set_index"); + s.send_to(&blob.data[..blob.meta.size], &v.replicate_addr) + }) + .collect(); + for e in errs { + trace!("retransmit result {:?}", e); + match e { + Err(e) => return Err(Error::IO(e)), + _ => (), + } + *transmit_index += 1; + } + Ok(()) + } + + /// retransmit messages from the leader to layer 1 nodes + /// # Remarks + /// We need to avoid having obj locked while doing any io, such as the `send_to` + pub fn retransmit(obj: &Arc>, blob: &SharedBlob, s: &UdpSocket) -> Result<()> { + let (me, table): (ReplicatedData, Vec) = { + // copy to avoid locking durring IO + let s = obj.read().unwrap(); + (s.table[&s.me].clone(), s.table.values().cloned().collect()) + }; + let rblob = blob.read().unwrap(); + let errs: Vec<_> = table + .par_iter() + .map(|v| { + if me.id == v.id { + return Ok(0); + } + if me.current_leader_id == v.id { + trace!("skip retransmit to leader{:?}", v.id); + return Ok(0); + } + trace!("retransmit blob to {}", v.replicate_addr); + s.send_to(&rblob.data[..rblob.meta.size], &v.replicate_addr) + }) + .collect(); + for e in errs { + trace!("retransmit result {:?}", e); + match e { + Err(e) => return Err(Error::IO(e)), + _ => (), + } + } + Ok(()) + } + fn random() -> u64 { let rnd = SystemRandom::new(); let mut buf = [0u8; 8]; @@ -134,7 +242,7 @@ impl Crdt { rdr.read_u64::().unwrap() } fn get_updates_since(&self, v: u64) -> (PublicKey, u64, Vec) { - trace!("get updates since {}", v); + //trace!("get updates since {}", v); let data = self.table .values() .filter(|x| self.local[&x.id] > v) @@ -147,10 +255,9 @@ impl Crdt { /// Create a random gossip request /// # Returns - /// (A,B,C) - /// * A - Remote gossip address - /// * B - My gossip address - /// * C - Remote update index to request updates since + /// (A,B) + /// * A - Address to send to + /// * B - RequestUpdates protocol message fn gossip_request(&self) -> (SocketAddr, Protocol) { let n = (Self::random() as usize) % self.table.len(); trace!("random {:?} {}", &self.me[0..1], n); @@ -186,7 +293,7 @@ impl Crdt { // TODO we need to punish/spam resist here // sig verify the whole update and slash anyone who sends a bad update for v in data { - self.import(&v); + self.insert(v.clone()); } *self.remote.entry(from).or_insert(update_index) = update_index; } @@ -222,7 +329,7 @@ impl Crdt { let rsp = serialize(&Protocol::ReceiveUpdates(from, ups, data))?; trace!("send_to {}", addr); //TODO verify reqdata belongs to sender - obj.write().unwrap().import(&reqdata); + obj.write().unwrap().insert(reqdata); sock.send_to(&rsp, addr).unwrap(); trace!("send_to done!"); } @@ -258,6 +365,31 @@ mod test { use std::sync::{Arc, RwLock}; use std::thread::{sleep, JoinHandle}; use std::time::Duration; + use rayon::iter::*; + use packet::Blob; + use logger; + + fn test_node() -> (Crdt, UdpSocket, UdpSocket, UdpSocket) { + let gossip = UdpSocket::bind("0.0.0.0:0").unwrap(); + let replicate = UdpSocket::bind("0.0.0.0:0").unwrap(); + let serve = UdpSocket::bind("0.0.0.0:0").unwrap(); + let pubkey = KeyPair::new().pubkey(); + let d = ReplicatedData::new( + pubkey, + gossip.local_addr().unwrap(), + replicate.local_addr().unwrap(), + serve.local_addr().unwrap(), + ); + let crdt = Crdt::new(d); + trace!( + "id: {} gossip: {} replicate: {} serve: {}", + crdt.my_data().id[0], + gossip.local_addr().unwrap(), + replicate.local_addr().unwrap(), + serve.local_addr().unwrap(), + ); + (crdt, gossip, replicate, serve) + } /// Test that the network converges. /// Run until every node in the network has a full ReplicatedData set. @@ -271,12 +403,9 @@ mod test { let exit = Arc::new(AtomicBool::new(false)); let listen: Vec<_> = (0..num) .map(|_| { - let listener = UdpSocket::bind("0.0.0.0:0").unwrap(); - let pubkey = KeyPair::new().pubkey(); - let d = ReplicatedData::new(pubkey, listener.local_addr().unwrap()); - let crdt = Crdt::new(d); + let (crdt, gossip, _, _) = test_node(); let c = Arc::new(RwLock::new(crdt)); - let l = Crdt::listen(c.clone(), listener, exit.clone()); + let l = Crdt::listen(c.clone(), gossip, exit.clone()); (c, l) }) .collect(); @@ -332,7 +461,7 @@ mod test { let yv = listen[y].0.read().unwrap(); let mut d = yv.table[&yv.me].clone(); d.version = 0; - xv.insert(&d); + xv.insert(d); } }); } @@ -349,7 +478,7 @@ mod test { let yv = listen[y].0.read().unwrap(); let mut d = yv.table[&yv.me].clone(); d.version = 0; - xv.insert(&d); + xv.insert(d); } }); } @@ -357,16 +486,89 @@ mod test { /// Test that insert drops messages that are older #[test] fn insert_test() { - let mut d = ReplicatedData::new(KeyPair::new().pubkey(), "127.0.0.1:1234".parse().unwrap()); + let mut d = ReplicatedData::new( + KeyPair::new().pubkey(), + "127.0.0.1:1234".parse().unwrap(), + "127.0.0.1:1235".parse().unwrap(), + "127.0.0.1:1236".parse().unwrap(), + ); assert_eq!(d.version, 0); let mut crdt = Crdt::new(d.clone()); assert_eq!(crdt.table[&d.id].version, 0); d.version = 2; - crdt.insert(&d); + crdt.insert(d.clone()); assert_eq!(crdt.table[&d.id].version, 2); d.version = 1; - crdt.insert(&d); + crdt.insert(d.clone()); assert_eq!(crdt.table[&d.id].version, 2); } + #[test] + pub fn test_crdt_retransmit() { + logger::setup(); + trace!("c1:"); + let (mut c1, s1, r1, e1) = test_node(); + trace!("c2:"); + let (mut c2, s2, r2, _) = test_node(); + trace!("c3:"); + let (mut c3, s3, r3, _) = test_node(); + let c1_id = c1.my_data().id; + c1.set_leader(c1_id); + + c2.insert(c1.my_data().clone()); + c3.insert(c1.my_data().clone()); + + c2.set_leader(c1.my_data().id); + c3.set_leader(c1.my_data().id); + + let exit = Arc::new(AtomicBool::new(false)); + + // Create listen threads + let a1 = Arc::new(RwLock::new(c1)); + let t1 = Crdt::listen(a1.clone(), s1, exit.clone()); + + let a2 = Arc::new(RwLock::new(c2)); + let t2 = Crdt::listen(a2.clone(), s2, exit.clone()); + + let a3 = Arc::new(RwLock::new(c3)); + let t3 = Crdt::listen(a3.clone(), s3, exit.clone()); + + // Create gossip threads + let t1_gossip = Crdt::gossip(a1.clone(), exit.clone()); + let t2_gossip = Crdt::gossip(a2.clone(), exit.clone()); + let t3_gossip = Crdt::gossip(a3.clone(), exit.clone()); + + //wait to converge + trace!("waitng to converge:"); + let mut done = false; + for _ in 0..10 { + done = a1.read().unwrap().table.len() == 3 && a2.read().unwrap().table.len() == 3 + && a3.read().unwrap().table.len() == 3; + if done { + break; + } + sleep(Duration::new(1, 0)); + } + assert!(done); + let mut b = Blob::default(); + b.meta.size = 10; + Crdt::retransmit(&a1, &Arc::new(RwLock::new(b)), &e1).unwrap(); + let res: Vec<_> = [r1, r2, r3] + .into_par_iter() + .map(|s| { + let mut b = Blob::default(); + s.set_read_timeout(Some(Duration::new(1, 0))).unwrap(); + let res = s.recv_from(&mut b.data); + res.is_err() //true if failed to receive the retransmit packet + }) + .collect(); + //true if failed receive the retransmit packet, r2, and r3 should succeed + //r1 was the sender, so it should fail to receive the packet + assert_eq!(res, [true, false, false]); + exit.store(true, Ordering::Relaxed); + let threads = vec![t1, t2, t3, t1_gossip, t2_gossip, t3_gossip]; + for t in threads.into_iter() { + t.join().unwrap(); + } + } } diff --git a/src/erasure.rs b/src/erasure.rs index b8480a73d7529a..12b4223bb99a5d 100644 --- a/src/erasure.rs +++ b/src/erasure.rs @@ -153,7 +153,7 @@ pub fn decode_blocks(data: &mut [&mut [u8]], coding: &[&[u8]], erasures: &[i32]) // Generate coding blocks in window from consumed to consumed+NUM_DATA pub fn generate_coding( re: &BlobRecycler, - window: &mut Vec>, + window: &mut Vec, consumed: usize, ) -> Result<()> { let mut data_blobs = Vec::new(); @@ -179,7 +179,7 @@ pub fn generate_coding( let coding_end = consumed + NUM_CODED; for i in coding_start..coding_end { let n = i % window.len(); - window[n] = Some(re.allocate()); + window[n] = re.allocate(); coding_blobs.push(window[n].clone().unwrap()); } for b in &coding_blobs { @@ -272,7 +272,6 @@ pub fn recover( mod test { use erasure; use packet::{BlobRecycler, SharedBlob, PACKET_DATA_SIZE}; - extern crate env_logger; #[test] pub fn test_coding() { diff --git a/src/historian.rs b/src/historian.rs index 0d56b1deab9a07..beff6801e06609 100644 --- a/src/historian.rs +++ b/src/historian.rs @@ -4,12 +4,13 @@ use entry::Entry; use hash::Hash; use recorder::{ExitReason, Recorder, Signal}; -use std::sync::mpsc::{sync_channel, Receiver, SyncSender}; +use std::sync::mpsc::{sync_channel, Receiver, SyncSender, TryRecvError}; use std::thread::{spawn, JoinHandle}; use std::time::Instant; +use std::sync::{Arc, Mutex}; pub struct Historian { - pub output: Receiver, + pub output: Arc>>, pub thread_hdl: JoinHandle, } @@ -22,7 +23,11 @@ impl Historian { let (entry_sender, output) = sync_channel(10_000); let thread_hdl = Historian::create_recorder(*start_hash, ms_per_tick, event_receiver, entry_sender); - Historian { output, thread_hdl } + let loutput = Arc::new(Mutex::new(output)); + Historian { + output: loutput, + thread_hdl, + } } /// A background thread that will continue tagging received Event messages and @@ -46,6 +51,10 @@ impl Historian { } }) } + + pub fn receive(self: &Self) -> Result { + self.output.lock().unwrap().try_recv() + } } #[cfg(test)] @@ -67,9 +76,9 @@ mod tests { sleep(Duration::new(0, 1_000_000)); input.send(Signal::Tick).unwrap(); - let entry0 = hist.output.recv().unwrap(); - let entry1 = hist.output.recv().unwrap(); - let entry2 = hist.output.recv().unwrap(); + let entry0 = hist.output.lock().unwrap().recv().unwrap(); + let entry1 = hist.output.lock().unwrap().recv().unwrap(); + let entry2 = hist.output.lock().unwrap().recv().unwrap(); assert_eq!(entry0.num_hashes, 0); assert_eq!(entry1.num_hashes, 0); @@ -105,7 +114,7 @@ mod tests { sleep(Duration::from_millis(300)); input.send(Signal::Tick).unwrap(); drop(input); - let entries: Vec = hist.output.iter().collect(); + let entries: Vec = hist.output.lock().unwrap().iter().collect(); assert!(entries.len() > 1); // Ensure the ID is not the seed. diff --git a/src/lib.rs b/src/lib.rs index 7a316f9ade8c64..3d32b7ff79124b 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -8,6 +8,7 @@ pub mod entry; #[cfg(feature = "erasure")] pub mod erasure; pub mod event; +pub mod logger; pub mod hash; pub mod historian; pub mod ledger; @@ -18,7 +19,6 @@ pub mod recorder; pub mod result; pub mod signature; pub mod streamer; -pub mod subscribers; pub mod transaction; extern crate bincode; extern crate byteorder; diff --git a/src/logger.rs b/src/logger.rs new file mode 100644 index 00000000000000..88bcc911bf666f --- /dev/null +++ b/src/logger.rs @@ -0,0 +1,11 @@ +use std::sync::{Once, ONCE_INIT}; +extern crate env_logger; + +static INIT: Once = ONCE_INIT; + +/// Setup function that is only run once, even if called multiple times. +pub fn setup() { + INIT.call_once(|| { + let _ = env_logger::init(); + }); +} diff --git a/src/packet.rs b/src/packet.rs index c4b09eb56edd2f..c7ec88fc8f321f 100644 --- a/src/packet.rs +++ b/src/packet.rs @@ -7,6 +7,8 @@ use std::io; use std::mem::size_of; use std::net::{IpAddr, Ipv4Addr, Ipv6Addr, SocketAddr, UdpSocket}; use std::sync::{Arc, Mutex, RwLock}; +use bincode::{deserialize, serialize}; +use signature::PublicKey; pub type SharedPackets = Arc>; pub type SharedBlob = Arc>; @@ -14,7 +16,7 @@ pub type PacketRecycler = Recycler; pub type BlobRecycler = Recycler; pub const NUM_PACKETS: usize = 1024 * 8; -const BLOB_SIZE: usize = 64 * 1024; +pub const BLOB_SIZE: usize = 64 * 1024; pub const PACKET_DATA_SIZE: usize = 256; pub const NUM_BLOBS: usize = (NUM_PACKETS * PACKET_DATA_SIZE) / BLOB_SIZE; @@ -211,28 +213,40 @@ impl Packets { } } -const BLOB_INDEX_SIZE: usize = size_of::(); +const BLOB_INDEX_END: usize = size_of::(); +const BLOB_ID_END: usize = BLOB_INDEX_END + size_of::() + size_of::(); impl Blob { pub fn get_index(&self) -> Result { - let mut rdr = io::Cursor::new(&self.data[0..BLOB_INDEX_SIZE]); + let mut rdr = io::Cursor::new(&self.data[0..BLOB_INDEX_END]); let r = rdr.read_u64::()?; Ok(r) } pub fn set_index(&mut self, ix: u64) -> Result<()> { let mut wtr = vec![]; wtr.write_u64::(ix)?; - self.data[..BLOB_INDEX_SIZE].clone_from_slice(&wtr); + self.data[..BLOB_INDEX_END].clone_from_slice(&wtr); Ok(()) } + + pub fn get_id(&self) -> Result { + let e = deserialize(&self.data[BLOB_INDEX_END..BLOB_ID_END])?; + Ok(e) + } + pub fn set_id(&mut self, id: PublicKey) -> Result<()> { + let wtr = serialize(&id)?; + self.data[BLOB_INDEX_END..BLOB_ID_END].clone_from_slice(&wtr); + Ok(()) + } + pub fn data(&self) -> &[u8] { - &self.data[BLOB_INDEX_SIZE..] + &self.data[BLOB_ID_END..] } pub fn data_mut(&mut self) -> &mut [u8] { - &mut self.data[BLOB_INDEX_SIZE..] + &mut self.data[BLOB_ID_END..] } pub fn set_size(&mut self, size: usize) { - self.meta.size = size + BLOB_INDEX_SIZE; + self.meta.size = size + BLOB_ID_END; } pub fn recv_from(re: &BlobRecycler, socket: &UdpSocket) -> Result> { let mut v = VecDeque::new(); diff --git a/src/streamer.rs b/src/streamer.rs index 43e6f2ac353216..e9106c7261a4cb 100644 --- a/src/streamer.rs +++ b/src/streamer.rs @@ -8,7 +8,9 @@ use std::sync::mpsc; use std::sync::{Arc, RwLock}; use std::thread::{spawn, JoinHandle}; use std::time::Duration; -use subscribers::Subscribers; +use crdt::Crdt; +#[cfg(feature = "erasure")] +use erasure; pub type PacketReceiver = mpsc::Receiver; pub type PacketSender = mpsc::Sender; @@ -99,17 +101,14 @@ pub fn blob_receiver( if exit.load(Ordering::Relaxed) { break; } - let ret = recv_blobs(&recycler, &sock, &s); - if ret.is_err() { - break; - } + let _ = recv_blobs(&recycler, &sock, &s); }); Ok(t) } fn recv_window( window: &mut Vec>, - subs: &Arc>, + crdt: &Arc>, recycler: &BlobRecycler, consumed: &mut usize, r: &BlobReceiver, @@ -118,24 +117,25 @@ fn recv_window( ) -> Result<()> { let timer = Duration::new(1, 0); let mut dq = r.recv_timeout(timer)?; + let leader_id = crdt.read().unwrap().leader_data().id; while let Ok(mut nq) = r.try_recv() { dq.append(&mut nq) } { //retransmit all leader blocks let mut retransmitq = VecDeque::new(); - let rsubs = subs.read().unwrap(); for b in &dq { let p = b.read().unwrap(); //TODO this check isn't safe against adverserial packets //we need to maintain a sequence window trace!( - "idx: {} addr: {:?} leader: {:?}", + "idx: {} addr: {:?} id: {:?} leader: {:?}", p.get_index().unwrap(), + p.get_id().unwrap(), p.meta.addr(), - rsubs.leader.addr + leader_id ); - if p.meta.addr() == rsubs.leader.addr { + if p.get_id().unwrap() == leader_id { //TODO //need to copy the retransmited blob //otherwise we get into races with which thread @@ -195,7 +195,7 @@ fn recv_window( pub fn window( exit: Arc, - subs: Arc>, + crdt: Arc>, recycler: BlobRecycler, r: BlobReceiver, s: BlobSender, @@ -210,7 +210,7 @@ pub fn window( } let _ = recv_window( &mut window, - &subs, + &crdt, &recycler, &mut consumed, &r, @@ -221,8 +221,57 @@ pub fn window( }) } +fn broadcast( + crdt: &Arc>, + recycler: &BlobRecycler, + r: &BlobReceiver, + sock: &UdpSocket, + transmit_index: &mut u64, +) -> Result<()> { + let timer = Duration::new(1, 0); + let mut dq = r.recv_timeout(timer)?; + while let Ok(mut nq) = r.try_recv() { + dq.append(&mut nq); + } + let mut blobs = dq.into_iter().collect(); + /// appends codes to the list of blobs allowing us to reconstruct the stream + #[cfg(feature = "erasure")] + erasure::generate_codes(blobs); + Crdt::broadcast(crdt, &blobs, &sock, transmit_index)?; + while let Some(b) = blobs.pop() { + recycler.recycle(b); + } + Ok(()) +} + +/// Service to broadcast messages from the leader to layer 1 nodes. +/// See `crdt` for network layer definitions. +/// # Arguments +/// * `sock` - Socket to send from. +/// * `exit` - Boolean to signal system exit. +/// * `crdt` - CRDT structure +/// * `recycler` - Blob recycler. +/// * `r` - Receive channel for blobs to be retransmitted to all the layer 1 nodes. +pub fn broadcaster( + sock: UdpSocket, + exit: Arc, + crdt: Arc>, + recycler: BlobRecycler, + r: BlobReceiver, +) -> JoinHandle<()> { + spawn(move || { + let mut transmit_index = 0; + loop { + if exit.load(Ordering::Relaxed) { + break; + } + let _ = broadcast(&crdt, &recycler, &r, &sock, &mut transmit_index); + } + }) +} + fn retransmit( - subs: &Arc>, + crdt: &Arc>, recycler: &BlobRecycler, r: &BlobReceiver, sock: &UdpSocket, @@ -233,10 +282,8 @@ fn retransmit( dq.append(&mut nq); } { - let wsubs = subs.read().unwrap(); for b in &dq { - let mut mb = b.write().unwrap(); - wsubs.retransmit(&mut mb, sock)?; + Crdt::retransmit(&crdt, b, sock)?; } } while let Some(b) = dq.pop_front() { @@ -246,26 +293,29 @@ fn retransmit( } /// Service to retransmit messages from the leader to layer 1 nodes. -/// See `subscribers` for network layer definitions. +/// See `crdt` for network layer definitions. /// # Arguments /// * `sock` - Socket to read from. Read timeout is set to 1. /// * `exit` - Boolean to signal system exit. -/// * `subs` - Shared Subscriber structure. This structure needs to be updated and popualted by -/// the accountant. +/// * `crdt` - This structure needs to be updated and populated by the accountant and via gossip. /// * `recycler` - Blob recycler. /// * `r` - Receive channel for blobs to be retransmitted to all the layer 1 nodes. pub fn retransmitter( sock: UdpSocket, exit: Arc, - subs: Arc>, + crdt: Arc>, recycler: BlobRecycler, r: BlobReceiver, ) -> JoinHandle<()> { - spawn(move || loop { - if exit.load(Ordering::Relaxed) { - break; + spawn(move || { + trace!("retransmitter started"); + loop { + if exit.load(Ordering::Relaxed) { + break; + } + let _ = retransmit(&crdt, &recycler, &r, &sock); } - let _ = retransmit(&subs, &recycler, &r, &sock); + trace!("exiting retransmitter"); }) } @@ -356,7 +406,7 @@ mod bench { let time = elapsed.as_secs() * 10000000000 + elapsed.subsec_nanos() as u64; let ftime = (time as f64) / 10000000000f64; let fcount = (end_val - start_val) as f64; - println!("performance: {:?}", fcount / ftime); + trace!("performance: {:?}", fcount / ftime); exit.store(true, Ordering::Relaxed); t_reader.join()?; t_producer1.join()?; @@ -384,14 +434,18 @@ mod test { use std::time::Duration; use streamer::{blob_receiver, receiver, responder, retransmitter, window, BlobReceiver, PacketReceiver}; - use subscribers::{Node, Subscribers}; + use crdt::{Crdt, ReplicatedData}; + use signature::KeyPair; + use signature::KeyPairUtil; + use logger; + use std::thread::sleep; fn get_msgs(r: PacketReceiver, num: &mut usize) { for _t in 0..5 { let timer = Duration::new(1, 0); match r.recv_timeout(timer) { Ok(m) => *num += m.read().unwrap().packets.len(), - e => println!("error {:?}", e), + e => info!("error {:?}", e), } if *num == 10 { break; @@ -445,7 +499,7 @@ mod test { } *num += m.len(); } - e => println!("error {:?}", e), + e => info!("error {:?}", e), } if *num == 10 { break; @@ -455,15 +509,23 @@ mod test { #[test] pub fn window_send_test() { + let pubkey_me = KeyPair::new().pubkey(); let read = UdpSocket::bind("127.0.0.1:0").expect("bind"); let addr = read.local_addr().unwrap(); let send = UdpSocket::bind("127.0.0.1:0").expect("bind"); + let serve = UdpSocket::bind("127.0.0.1:0").expect("bind"); let exit = Arc::new(AtomicBool::new(false)); - let subs = Arc::new(RwLock::new(Subscribers::new( - Node::default(), - Node::new([0; 8], 0, send.local_addr().unwrap()), - &[], - ))); + let rep_data = ReplicatedData::new( + pubkey_me, + read.local_addr().unwrap(), + send.local_addr().unwrap(), + serve.local_addr().unwrap(), + ); + let mut crdt_me = Crdt::new(rep_data); + let me_id = crdt_me.my_data().id; + crdt_me.set_leader(me_id); + let subs = Arc::new(RwLock::new(crdt_me)); + let resp_recycler = BlobRecycler::default(); let (s_reader, r_reader) = channel(); let t_receiver = @@ -487,6 +549,7 @@ mod test { let b_ = b.clone(); let mut w = b.write().unwrap(); w.set_index(i).unwrap(); + w.set_id(me_id).unwrap(); assert_eq!(i, w.get_index().unwrap()); w.meta.size = PACKET_DATA_SIZE; w.meta.set_addr(&addr); @@ -507,43 +570,102 @@ mod test { t_window.join().expect("join"); } + fn test_node() -> (Arc>, UdpSocket, UdpSocket, UdpSocket) { + let gossip = UdpSocket::bind("127.0.0.1:0").unwrap(); + let replicate = UdpSocket::bind("127.0.0.1:0").unwrap(); + let serve = UdpSocket::bind("127.0.0.1:0").unwrap(); + let pubkey = KeyPair::new().pubkey(); + let d = ReplicatedData::new( + pubkey, + gossip.local_addr().unwrap(), + replicate.local_addr().unwrap(), + serve.local_addr().unwrap(), + ); + let crdt = Crdt::new(d); + trace!( + "id: {} gossip: {} replicate: {} serve: {}", + crdt.my_data().id[0], + gossip.local_addr().unwrap(), + replicate.local_addr().unwrap(), + serve.local_addr().unwrap(), + ); + (Arc::new(RwLock::new(crdt)), gossip, replicate, serve) + } + #[test] + //retransmit from leader to replicate target pub fn retransmit() { - let read = UdpSocket::bind("127.0.0.1:0").expect("bind"); - let send = UdpSocket::bind("127.0.0.1:0").expect("bind"); + logger::setup(); + trace!("here"); let exit = Arc::new(AtomicBool::new(false)); - let subs = Arc::new(RwLock::new(Subscribers::new( - Node::default(), - Node::default(), - &[Node::new([0; 8], 1, read.local_addr().unwrap())], - ))); + let (crdt_leader, sock_gossip_leader, _, sock_leader) = test_node(); + let (crdt_target, sock_gossip_target, sock_replicate_target, _) = test_node(); + let leader_data = crdt_leader.read().unwrap().my_data().clone(); + crdt_leader.write().unwrap().insert(leader_data.clone()); + crdt_leader.write().unwrap().set_leader(leader_data.id); + let crdt_leader_g_t = Crdt::gossip(crdt_leader.clone(), exit.clone()); + let crdt_leader_l_t = Crdt::listen(crdt_leader.clone(), sock_gossip_leader, exit.clone()); + + crdt_target.write().unwrap().insert(leader_data.clone()); + crdt_target.write().unwrap().set_leader(leader_data.id); + let crdt_target_g_t = Crdt::gossip(crdt_target.clone(), exit.clone()); + let crdt_target_l_t = Crdt::listen(crdt_target.clone(), sock_gossip_target, exit.clone()); + //leader retransmitter let (s_retransmit, r_retransmit) = channel(); let blob_recycler = BlobRecycler::default(); - let saddr = send.local_addr().unwrap(); + let saddr = sock_leader.local_addr().unwrap(); let t_retransmit = retransmitter( - send, + sock_leader, exit.clone(), - subs, + crdt_leader.clone(), blob_recycler.clone(), r_retransmit, ); + + //target receiver + let (s_blob_receiver, r_blob_receiver) = channel(); + let t_receiver = blob_receiver( + exit.clone(), + blob_recycler.clone(), + sock_replicate_target, + s_blob_receiver, + ).unwrap(); + for _ in 0..10 { + let done = crdt_target.read().unwrap().update_index == 2 + && crdt_leader.read().unwrap().update_index == 2; + if done { + break; + } + let timer = Duration::new(1, 0); + sleep(timer); + } + + //send the data through let mut bq = VecDeque::new(); let b = blob_recycler.allocate(); b.write().unwrap().meta.size = 10; bq.push_back(b); s_retransmit.send(bq).unwrap(); - let (s_blob_receiver, r_blob_receiver) = channel(); - let t_receiver = - blob_receiver(exit.clone(), blob_recycler.clone(), read, s_blob_receiver).unwrap(); - let mut oq = r_blob_receiver.recv().unwrap(); + let timer = Duration::new(5, 0); + trace!("Waiting for timeout"); + let mut oq = r_blob_receiver.recv_timeout(timer).unwrap(); assert_eq!(oq.len(), 1); let o = oq.pop_front().unwrap(); let ro = o.read().unwrap(); assert_eq!(ro.meta.size, 10); assert_eq!(ro.meta.addr(), saddr); exit.store(true, Ordering::Relaxed); - t_receiver.join().expect("join"); - t_retransmit.join().expect("join"); + let threads = vec![ + t_receiver, + t_retransmit, + crdt_target_g_t, + crdt_target_l_t, + crdt_leader_g_t, + crdt_leader_l_t, + ]; + for t in threads { + t.join().unwrap(); + } } } diff --git a/src/subscribers.rs b/src/subscribers.rs deleted file mode 100644 index f0b271c43960b5..00000000000000 --- a/src/subscribers.rs +++ /dev/null @@ -1,149 +0,0 @@ -//! The `subscribers` module defines data structures to keep track of nodes on the network. -//! The network is arranged in layers: -//! -//! * layer 0 - Leader. -//! * layer 1 - As many nodes as we can fit to quickly get reliable `2/3+1` finality -//! * layer 2 - Everyone else, if layer 1 is `2^10`, layer 2 should be able to fit `2^20` number of nodes. -//! -//! It's up to the external state machine to keep this updated. -use packet::Blob; -use rayon::prelude::*; -use result::{Error, Result}; -use std::net::{SocketAddr, UdpSocket}; - -use std::fmt; - -#[derive(Clone, PartialEq)] -pub struct Node { - pub id: [u64; 8], - pub weight: u64, - pub addr: SocketAddr, -} - -//sockaddr doesn't implement default -impl Default for Node { - fn default() -> Node { - Node { - id: [0; 8], - weight: 0, - addr: "0.0.0.0:0".parse().unwrap(), - } - } -} - -impl Node { - pub fn new(id: [u64; 8], weight: u64, addr: SocketAddr) -> Node { - Node { id, weight, addr } - } - fn key(&self) -> i64 { - (self.weight as i64).checked_neg().unwrap() - } -} - -impl fmt::Debug for Node { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "Node {{ weight: {} addr: {} }}", self.weight, self.addr) - } -} - -pub struct Subscribers { - data: Vec, - pub me: Node, - pub leader: Node, -} - -impl Subscribers { - pub fn new(me: Node, leader: Node, network: &[Node]) -> Subscribers { - let mut h = Subscribers { - data: vec![], - me: me.clone(), - leader: leader.clone(), - }; - h.insert(&[me, leader]); - h.insert(network); - h - } - - /// retransmit messages from the leader to layer 1 nodes - pub fn retransmit(&self, blob: &mut Blob, s: &UdpSocket) -> Result<()> { - let errs: Vec<_> = self.data - .par_iter() - .map(|i| { - if self.me == *i { - return Ok(0); - } - if self.leader == *i { - return Ok(0); - } - trace!("retransmit blob to {}", i.addr); - s.send_to(&blob.data[..blob.meta.size], &i.addr) - }) - .collect(); - for e in errs { - trace!("retransmit result {:?}", e); - match e { - Err(e) => return Err(Error::IO(e)), - _ => (), - } - } - Ok(()) - } - pub fn insert(&mut self, ns: &[Node]) { - self.data.extend_from_slice(ns); - self.data.sort_by_key(Node::key); - } -} - -#[cfg(test)] -mod test { - use packet::Blob; - use rayon::prelude::*; - use std::net::UdpSocket; - use std::time::Duration; - use subscribers::{Node, Subscribers}; - - #[test] - pub fn subscriber() { - let mut me = Node::default(); - me.weight = 10; - let mut leader = Node::default(); - leader.weight = 11; - let mut s = Subscribers::new(me, leader, &[]); - assert_eq!(s.data.len(), 2); - assert_eq!(s.data[0].weight, 11); - assert_eq!(s.data[1].weight, 10); - let mut n = Node::default(); - n.weight = 12; - s.insert(&[n]); - assert_eq!(s.data.len(), 3); - assert_eq!(s.data[0].weight, 12); - } - #[test] - pub fn retransmit() { - let s1 = UdpSocket::bind("127.0.0.1:0").expect("bind"); - let s2 = UdpSocket::bind("127.0.0.1:0").expect("bind"); - let s3 = UdpSocket::bind("127.0.0.1:0").expect("bind"); - let n1 = Node::new([0; 8], 0, s1.local_addr().unwrap()); - let n2 = Node::new([0; 8], 0, s2.local_addr().unwrap()); - let mut s = Subscribers::new(n1.clone(), n2.clone(), &[]); - let n3 = Node::new([0; 8], 0, s3.local_addr().unwrap()); - s.insert(&[n3]); - let mut b = Blob::default(); - b.meta.size = 10; - let s4 = UdpSocket::bind("127.0.0.1:0").expect("bind"); - s.retransmit(&mut b, &s4).unwrap(); - let res: Vec<_> = [s1, s2, s3] - .into_par_iter() - .map(|s| { - let mut b = Blob::default(); - s.set_read_timeout(Some(Duration::new(1, 0))).unwrap(); - s.recv_from(&mut b.data).is_err() - }) - .collect(); - assert_eq!(res, [true, true, false]); - let mut n4 = Node::default(); - n4.addr = "255.255.255.255:1".parse().unwrap(); - s.insert(&[n4]); - assert!(s.retransmit(&mut b, &s4).is_err()); - } -}