From b4228aa74f6ef4720167236cb072b84d94aa6d2a Mon Sep 17 00:00:00 2001 From: murilo ijanc Date: Fri, 27 Mar 2026 21:54:25 -0300 Subject: Add chunked paste support for content up to 1.44 MB Large pastes are split into 8 KiB chunks on the client side, each stored separately in a dedicated chunks/ directory. A version-2 manifest paste lists the chunk hashes and is announced to the DHT; chunks replicate via periodic republish with per-put throttling to avoid rate-limit bans. - New PUTC/PUTM protocol commands for chunks and manifests - Client-side chunking avoids O(n^2) base58 on large content - HTTP handler reassembles chunks directly from store - DHT sync routes incoming chunks to chunks/ directory - Republish interval reduced to 5 min with 200ms throttle - tp.1 updated with new 1.44 MB limit --- src/ops.rs | 167 ++++++++++++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 142 insertions(+), 25 deletions(-) (limited to 'src/ops.rs') diff --git a/src/ops.rs b/src/ops.rs index f45b9ab..2d6b016 100644 --- a/src/ops.rs +++ b/src/ops.rs @@ -9,7 +9,7 @@ use tesseras_dht::Node; use crate::base58; use crate::crypto; -use crate::paste::{MAX_PASTE_SIZE, Paste}; +use crate::paste::{CHUNK_SIZE, FORMAT_VERSION_CHUNKED, MAX_PASTE_SIZE, Paste}; use crate::store::PasteStore; /// Timeout for blocking DHT lookups. @@ -39,6 +39,52 @@ impl std::fmt::Display for PasteError { } } +/// Parse a manifest content into a list of chunk hashes. +fn parse_manifest(content: &[u8]) -> Option> { + if content.len() < 2 { + return None; + } + let count = u16::from_be_bytes([content[0], content[1]]) as usize; + if content.len() != 2 + count * 32 { + return None; + } + let mut hashes = Vec::with_capacity(count); + for i in 0..count { + let start = 2 + i * 32; + let mut hash = [0u8; 32]; + hash.copy_from_slice(&content[start..start + 32]); + hashes.push(hash); + } + Some(hashes) +} + +/// Fetch paste data from local store (pastes + chunks), +/// falling back to a DHT lookup. +fn fetch_paste_data( + node: &mut Node, + store: &PasteStore, + hash: &[u8], +) -> Result, PasteError> { + if let Some(local) = store.get_paste(hash) { + return Ok(local); + } + if let Some(local) = store.get_chunk(hash) { + return Ok(local); + } + let vals = node.get_blocking(hash, OP_TIMEOUT); + if vals.is_empty() { + return Err(PasteError::NotFound); + } + match vals.iter().find(|v| { + Paste::from_bytes(v) + .map(|p| Paste::content_key(&p.content) == *hash) + .unwrap_or(false) + }) { + Some(v) => Ok(v.clone()), + None => Err(PasteError::NotFound), + } +} + /// Decode the hash portion of a key string ("hash#enckey" or "hash"). /// Returns the 32-byte hash. fn parse_hash(key_str: &str) -> Result, PasteError> { @@ -50,8 +96,11 @@ fn parse_hash(key_str: &str) -> Result, PasteError> { Ok(hash) } -/// Store a paste. If `encrypt` is true, encrypts the content and -/// returns "hash#enckey" in base58. Otherwise returns just the hash. +/// Store a single paste. If `encrypt` is true, encrypts the content +/// and returns "hash#enckey" in base58. Otherwise returns just the hash. +/// +/// For content larger than [`CHUNK_SIZE`], use the chunked protocol +/// (PUTC + PUTM) from the client instead. pub fn put_paste( node: &mut Node, store: &PasteStore, @@ -94,8 +143,61 @@ pub fn put_paste( } } +/// Store a single chunk in the chunks directory. +/// Returns the chunk hash in base58. +pub fn put_chunk( + store: &PasteStore, + content: &[u8], + ttl_secs: u64, +) -> Result { + if content.len() > CHUNK_SIZE { + return Err(PasteError::TooLarge); + } + let paste = Paste::new(content.to_vec(), ttl_secs); + let serialized = paste.to_bytes(); + let hash = Paste::content_key(&paste.content); + store + .put_chunk(&hash, &serialized) + .map_err(|e| PasteError::Internal(e.to_string()))?; + Ok(base58::encode(&hash)) +} + +/// Store a chunked-paste manifest (version 2). +/// The manifest is announced to the DHT immediately; +/// chunks are replicated via the periodic republish cycle. +pub fn put_manifest( + node: &mut Node, + store: &PasteStore, + content: &[u8], + ttl_secs: u64, +) -> Result { + if parse_manifest(content).is_none() { + return Err(PasteError::Internal("invalid manifest".into())); + } + let manifest = Paste { + version: FORMAT_VERSION_CHUNKED, + content: content.to_vec(), + created_at: std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .unwrap_or_default() + .as_secs(), + ttl_secs, + }; + let serialized = manifest.to_bytes(); + let hash = Paste::content_key(&manifest.content); + store + .put_paste(&hash, &serialized) + .map_err(|e| PasteError::Internal(e.to_string()))?; + let dht_ttl = std::cmp::min(ttl_secs, u16::MAX as u64) as u16; + node.put(&hash, &serialized, dht_ttl, false); + let hash_b58 = base58::encode(&hash); + log::info!("put: stored manifest {hash_b58}"); + Ok(hash_b58) +} + /// Retrieve a paste by key ("hash#enckey" or bare "hash"). /// Tries local store first, then falls back to a blocking DHT lookup. +/// Transparently reassembles chunked (version-2) pastes. pub fn get_paste( node: &mut Node, store: &PasteStore, @@ -111,31 +213,29 @@ pub fn get_paste( return Err(PasteError::InvalidKey); } - let data = if let Some(local) = store.get_paste(&hash) { - local - } else { - let vals = node.get_blocking(&hash, OP_TIMEOUT); - if vals.is_empty() { - return Err(PasteError::NotFound); - } - // Verify DHT result: the content hash must match the - // requested key to prevent a malicious node from - // injecting arbitrary data. - match vals.iter().find(|v| { - Paste::from_bytes(v) - .map(|p| Paste::content_key(&p.content) == *hash) - .unwrap_or(false) - }) { - Some(v) => v.clone(), - None => return Err(PasteError::NotFound), - } - }; - + let data = fetch_paste_data(node, store, &hash)?; let paste = Paste::from_bytes(&data).ok_or(PasteError::InvalidKey)?; + if paste.is_expired() && !store.is_pinned(&hash) { return Err(PasteError::Expired); } + // Reassemble chunked paste (version 2 = manifest). + let content = if paste.version == FORMAT_VERSION_CHUNKED { + let chunk_hashes = parse_manifest(&paste.content) + .ok_or(PasteError::Internal("corrupt manifest".into()))?; + let mut assembled = Vec::new(); + for chunk_hash in &chunk_hashes { + let chunk_data = fetch_paste_data(node, store, chunk_hash)?; + let chunk_paste = Paste::from_bytes(&chunk_data) + .ok_or(PasteError::Internal("corrupt chunk".into()))?; + assembled.extend_from_slice(&chunk_paste.content); + } + assembled + } else { + paste.content + }; + if let Some(kb58) = enc_key_b58 { let key_bytes = base58::decode(kb58).ok_or(PasteError::InvalidKey)?; if key_bytes.len() != crypto::KEY_SIZE { @@ -143,22 +243,39 @@ pub fn get_paste( } let mut key = [0u8; crypto::KEY_SIZE]; key.copy_from_slice(&key_bytes); - crypto::decrypt(&key, &paste.content) + crypto::decrypt(&key, &content) .ok_or(PasteError::DecryptionFailed) } else { - Ok(paste.content) + Ok(content) } } /// Delete a paste from local store and the DHT. /// Creates a block marker so the paste is not /// re-imported from the DHT by sync. +/// For chunked pastes, also deletes all chunks. pub fn delete_paste( node: &mut Node, store: &PasteStore, key_str: &str, ) -> Result<(), PasteError> { let hash = parse_hash(key_str)?; + + // If this is a chunked manifest, delete its chunks too. + if let Some(data) = store.get_paste(&hash) { + if let Some(paste) = Paste::from_bytes(&data) { + if paste.version == FORMAT_VERSION_CHUNKED { + if let Some(chunk_hashes) = parse_manifest(&paste.content) { + for chunk_hash in &chunk_hashes { + store.block(chunk_hash); + store.remove_chunk(chunk_hash); + node.delete(chunk_hash); + } + } + } + } + } + store.block(&hash); store.remove_paste(&hash); store.unpin(&hash).ok(); -- cgit v1.2.3