use std::{collections::BTreeMap, io::Write};
use bytes::{BufMut, Bytes, BytesMut};
use once_cell::sync::Lazy;
use crate::{
metadata::Manifest,
v2::{
signature::SignatureState,
write::{volumes::VolumeParts, DirEntry, Directory, FileEntry},
Checksum, ChecksumAlgorithm, Index, IndexEntry, Signature, SignatureAlgorithm,
SignatureError, Span, Tag,
},
PathSegment, Version,
};
static HEADER: Lazy<Bytes> = Lazy::new(|| {
let mut header = BytesMut::with_capacity(8);
header.extend_from_slice(&crate::MAGIC);
header.extend_from_slice(&Version::V2.0);
header.freeze()
});
#[derive(Debug)]
#[must_use = "A Writer is a state machine and should be run to completion"]
pub struct Writer<S> {
checksum_algorithm: ChecksumAlgorithm,
state: S,
}
impl Writer<WritingManifest> {
pub fn new(checksum_algorithm: ChecksumAlgorithm) -> Self {
Writer {
state: WritingManifest {
header: HEADER.clone(),
},
checksum_algorithm,
}
}
}
impl Default for Writer<WritingManifest> {
fn default() -> Self {
Writer::new(ChecksumAlgorithm::Sha256)
}
}
impl<S> Writer<S> {
fn map_state<S2>(self, map: impl FnOnce(S) -> S2) -> Writer<S2> {
let Writer {
state,
checksum_algorithm,
} = self;
Writer {
state: map(state),
checksum_algorithm,
}
}
}
impl Writer<WritingManifest> {
pub fn write_manifest(self, manifest: &Manifest) -> Result<Writer<WritingAtoms>, WriteError> {
self.write_cbor_manifest(manifest)
}
pub fn write_cbor_manifest(
self,
manifest: &impl serde::Serialize,
) -> Result<Writer<WritingAtoms>, WriteError> {
let data = serde_cbor::to_vec(manifest)?;
Ok(self.write_raw_manifest(data))
}
pub fn write_raw_manifest(self, manifest: impl Into<Bytes>) -> Writer<WritingAtoms> {
let manifest = Section::new(Tag::Manifest, manifest, self.checksum_algorithm);
self.map_state(|state| state.with_manifest(manifest))
}
}
impl Writer<WritingAtoms> {
pub fn write_atoms(
self,
atoms: BTreeMap<PathSegment, FileEntry<'_>>,
) -> Result<Writer<WritingVolumes>, WriteError> {
let children = atoms
.into_iter()
.map(|(k, v)| (k, DirEntry::File(v)))
.collect();
let atoms = VolumeParts::serialize(Directory { children })?.atoms();
let section = Section::new(Tag::Atoms, atoms, self.checksum_algorithm);
Ok(self.map_state(|state| state.with_atoms(section)))
}
}
impl Writer<WritingVolumes> {
pub fn write_volume(&mut self, name: &str, volume: Directory<'_>) -> Result<(), WriteError> {
let volume = VolumeParts::serialize(volume)?.volume(name);
let section = Section::new(Tag::Volume, volume, self.checksum_algorithm);
if let Some(_previous) = self.state.volumes.insert(name.to_string(), section) {
return Err(WriteError::DuplicateVolume(name.to_string()));
}
Ok(())
}
pub fn with_volume(mut self, name: &str, volume: Directory<'_>) -> Result<Self, WriteError> {
self.write_volume(name, volume)?;
Ok(self)
}
pub fn finish(self, signature_algorithm: SignatureAlgorithm) -> Result<Bytes, WriteError> {
let Writer { state, .. } = self;
let sections = final_layout(&state, signature_algorithm, self.checksum_algorithm)?;
let mut buffer =
BytesMut::with_capacity(sections.iter().map(|s| s.serialized_length()).sum());
buffer.extend_from_slice(&state.header);
for section in sections {
section.write_to(&mut buffer);
}
Ok(buffer.freeze())
}
}
fn final_layout(
state: &WritingVolumes,
signature_algorithm: SignatureAlgorithm,
checksum_algorithm: ChecksumAlgorithm,
) -> Result<Vec<Section>, WriteError> {
let (initial_index, mut sections) =
calculate_layout_and_initial_index(state, signature_algorithm)?;
let estimated_index_section_length =
std::mem::size_of::<Tag>() + std::mem::size_of::<u64>() + cbor_len(&initial_index)?;
let fudge_factor = estimated_index_section_length / 5;
let allocated_index_section_length = estimated_index_section_length + fudge_factor;
let offset_from_start = state.header.len()
+ allocated_index_section_length
+ std::mem::size_of::<Tag>()
+ std::mem::size_of::<u64>();
let index: Index = initial_index.with_offset(offset_from_start);
let mut serialized_index = serde_cbor::to_vec(&index)?;
debug_assert!(
serialized_index.len() < allocated_index_section_length,
"Insufficient padding was allocated. Expected {} < {}. This is a bug.",
serialized_index.len(),
allocated_index_section_length
);
serialized_index.resize(allocated_index_section_length, 0);
let index_section = Section::new(Tag::Index, serialized_index, checksum_algorithm);
sections.insert(0, index_section);
Ok(sections)
}
fn cbor_len(value: &impl serde::Serialize) -> Result<usize, serde_cbor::Error> {
struct CountingWriter {
bytes_written: usize,
}
impl Write for CountingWriter {
fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
self.bytes_written += buf.len();
Ok(buf.len())
}
fn flush(&mut self) -> std::io::Result<()> {
Ok(())
}
}
let mut writer = CountingWriter { bytes_written: 0 };
serde_cbor::to_writer(&mut writer, value)?;
Ok(writer.bytes_written)
}
fn calculate_layout_and_initial_index(
state: &WritingVolumes,
signature_algorithm: SignatureAlgorithm,
) -> Result<(Index, Vec<Section>), SignatureError> {
let WritingVolumes {
manifest,
atoms,
volumes,
..
} = state;
let mut layout = LayoutState {
bytes_written: 0,
sections: Vec::new(),
signature_state: signature_algorithm.begin(),
};
let manifest = layout.push(manifest.clone())?;
let atoms = layout.push(atoms.clone())?;
let mut volume_entries = BTreeMap::new();
for (name, section) in volumes {
let span = layout.push(section.clone())?;
volume_entries.insert(name.clone(), span);
}
let (sections, signature) = layout.finish()?;
let index = Index {
manifest,
atoms,
volumes: volume_entries,
signature,
};
Ok((index, sections))
}
#[derive(Debug)]
struct LayoutState {
bytes_written: usize,
signature_state: SignatureState,
sections: Vec<Section>,
}
impl LayoutState {
fn push(&mut self, section: Section) -> Result<IndexEntry, SignatureError> {
let checksum = section.checksum.clone();
self.signature_state.update(§ion)?;
let start = self.bytes_written;
self.bytes_written += section.serialized_length();
let span = Span::new(start, self.bytes_written - start);
self.sections.push(section);
Ok(IndexEntry { span, checksum })
}
fn finish(self) -> Result<(Vec<Section>, Signature), SignatureError> {
let LayoutState {
signature_state,
sections,
..
} = self;
let signature = signature_state.finish()?;
Ok((sections, signature))
}
}
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
#[non_exhaustive]
pub struct WritingManifest {
header: Bytes,
}
impl WritingManifest {
fn with_manifest(self, manifest: Section) -> WritingAtoms {
let WritingManifest { header } = self;
WritingAtoms { manifest, header }
}
}
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
#[non_exhaustive]
pub struct WritingAtoms {
header: Bytes,
manifest: Section,
}
impl WritingAtoms {
fn with_atoms(self, atoms: Section) -> WritingVolumes {
let WritingAtoms { header, manifest } = self;
WritingVolumes {
header,
manifest,
atoms,
volumes: BTreeMap::new(),
}
}
}
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub(crate) struct Section {
pub(crate) tag: Tag,
pub(crate) data: Bytes,
pub(crate) checksum: Checksum,
}
impl Section {
pub(crate) fn new(tag: Tag, data: impl Into<Bytes>, checksum: ChecksumAlgorithm) -> Self {
let data: Bytes = data.into();
let checksum = checksum.calculate(&data);
Section {
tag,
data,
checksum,
}
}
fn serialized_length(&self) -> usize {
std::mem::size_of::<Tag>() + std::mem::size_of::<u64>() + self.data.len()
}
pub(crate) fn write_to(&self, buffer: &mut BytesMut) {
let Section { tag, data, .. } = self;
buffer.put_u8(tag.as_u8());
buffer.put_u64_le(data.len().try_into().unwrap());
buffer.extend_from_slice(data);
}
}
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
#[non_exhaustive]
pub struct WritingVolumes {
header: Bytes,
manifest: Section,
atoms: Section,
volumes: BTreeMap<String, Section>,
}
#[derive(Debug, thiserror::Error)]
pub enum WriteError {
#[error(transparent)]
Io(#[from] std::io::Error),
#[error("Unable to calculate the signature")]
Signature(#[from] SignatureError),
#[error("Serializing to CBOR failed")]
Cbor(#[from] serde_cbor::Error),
#[error("Attempted to write multiple volumes with the name, \"{_0}\"")]
DuplicateVolume(String),
}
#[cfg(test)]
mod tests {
use crate::{
utils::sha256,
v2::{Checksum, ChecksumAlgorithm, Index, IndexEntry, Signature, Span},
};
use super::*;
#[test]
fn the_header_section_is_correct() {
let Writer {
state: WritingManifest { header },
..
} = Writer::new(ChecksumAlgorithm::None);
assert_bytes_eq!(
header,
bytes! {
crate::MAGIC,
Version::V2,
}
);
}
#[test]
fn write_a_basic_manifest_section() {
let writer = Writer {
state: WritingManifest {
header: HEADER.clone(),
},
checksum_algorithm: ChecksumAlgorithm::Sha256,
};
let manifest = Manifest {
entrypoint: Some("python".to_string()),
..Default::default()
};
let Writer { state, .. } = writer.write_manifest(&manifest).unwrap();
let expected = serde_cbor::to_vec(&manifest).unwrap();
assert_eq!(
state,
WritingAtoms {
header: HEADER.clone(),
manifest: Section {
tag: Tag::Manifest,
checksum: ChecksumAlgorithm::Sha256.calculate(&expected),
data: expected.into(),
}
},
)
}
#[test]
fn the_atoms_section_is_a_kind_of_volume() {
let manifest = Section::new(Tag::Manifest, Bytes::new(), ChecksumAlgorithm::None);
let writer = Writer {
state: WritingAtoms {
header: HEADER.clone(),
manifest: manifest.clone(),
},
checksum_algorithm: ChecksumAlgorithm::None,
};
let Writer { state, .. } = writer.write_atoms(BTreeMap::new()).unwrap();
let expected = bytes! {
9_u64.to_le_bytes(),
Tag::Directory,
0_u64.to_le_bytes(),
0_u64.to_le_bytes(),
};
assert_bytes_eq!(state.atoms.data, expected);
assert_eq!(
state,
WritingVolumes {
header: HEADER.clone(),
manifest,
atoms: Section {
tag: Tag::Atoms,
checksum: Checksum::none(),
data: expected,
},
volumes: BTreeMap::new(),
}
)
}
#[test]
fn create_simple_webc_file() -> Result<(), Box<dyn std::error::Error>> {
let manifest = Manifest::default();
let mut writer = Writer::new(ChecksumAlgorithm::Sha256)
.write_manifest(&manifest)?
.write_atoms(BTreeMap::new())?;
writer.write_volume("first", dir_map!("a" => b"Hello, World!"))?;
let webc = writer.finish(SignatureAlgorithm::None)?;
let manifest_section = bytes! {
Tag::Manifest,
1_u64.to_le_bytes(),
[0xa0],
};
let atoms_section = bytes! {
Tag::Atoms,
25_u64.to_le_bytes(),
9_u64.to_le_bytes(),
Tag::Directory,
0_u64.to_le_bytes(),
0_u64.to_le_bytes(),
};
let first_volume_section = bytes! {
Tag::Volume,
117_u64.to_le_bytes(),
5_u64.to_le_bytes(),
"first",
75_u64.to_le_bytes(),
Tag::Directory,
17_u64.to_le_bytes(),
26_u64.to_le_bytes(),
1_u64.to_le_bytes(),
"a",
Tag::File,
0_u64.to_le_bytes(),
13_u64.to_le_bytes(),
sha256("Hello, World!"),
13_u64.to_le_bytes(),
"Hello, World!",
};
let index = Index {
manifest: IndexEntry {
span: Span::new(433, 10),
checksum: Checksum::sha256(sha256(&manifest_section[9..])),
},
atoms: IndexEntry {
span: Span::new(443, 34),
checksum: Checksum::sha256(sha256(&atoms_section[9..])),
},
volumes: [(
"first".to_string(),
IndexEntry {
span: Span::new(477, 126),
checksum: Checksum::sha256(sha256(&first_volume_section[9..])),
},
)]
.into_iter()
.collect(),
signature: Signature::none(),
};
let index_section = bytes! {
Tag::Index,
416_u64.to_le_bytes(),
serde_cbor::to_vec(&index).unwrap(),
[0_u8; 73],
};
assert_bytes_eq!(
webc,
bytes! {
crate::MAGIC,
Version::V2,
index_section,
manifest_section,
atoms_section,
first_volume_section,
}
);
assert_bytes_eq!(&webc[index.manifest.span], manifest_section);
assert_bytes_eq!(&webc[index.atoms.span], atoms_section);
assert_bytes_eq!(&webc[index.volumes["first"].span], first_volume_section);
Ok(())
}
}