use std::{
collections::BTreeMap,
fmt,
fs::File,
io::{BufReader, Read, Write},
path::{Path, PathBuf},
};
use bytes::{BufMut, Bytes, BytesMut};
use sha2::{Digest, Sha256};
use crate::{
readable_bytes,
v2::{Span, Tag},
PathSegment,
};
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub(crate) struct VolumeParts {
pub(crate) header: Bytes,
pub(crate) data: Bytes,
}
impl VolumeParts {
pub(crate) fn serialize(dir: Directory<'_>) -> Result<Self, std::io::Error> {
let serializer = Serializer::default();
serializer.serialize(dir)
}
pub(crate) fn volume(&self, name: &str) -> Bytes {
let VolumeParts { header, data } = self;
let mut buffer = BytesMut::with_capacity(
header.len() + data.len() + name.len() + 3 * std::mem::size_of::<u64>(),
);
buffer.put_u64_le(name.len().try_into().unwrap());
buffer.extend_from_slice(name.as_bytes());
buffer.put_u64_le(header.len().try_into().unwrap());
buffer.extend_from_slice(header);
buffer.put_u64_le(data.len().try_into().unwrap());
buffer.extend_from_slice(data);
buffer.freeze()
}
pub(crate) fn atoms(&self) -> Bytes {
let VolumeParts { header, data } = self;
let mut buffer =
BytesMut::with_capacity(header.len() + data.len() + 2 * std::mem::size_of::<u64>());
buffer.put_u64_le(header.len().try_into().unwrap());
buffer.extend_from_slice(header);
buffer.put_u64_le(data.len().try_into().unwrap());
buffer.extend_from_slice(data);
buffer.freeze()
}
}
#[derive(Debug, Default, Clone, PartialEq)]
struct Serializer {
header: BytesMut,
data: BytesMut,
}
impl Serializer {
fn serialize(mut self, dir: Directory<'_>) -> Result<VolumeParts, std::io::Error> {
self.serialize_directory(dir)?;
let Serializer { header, data } = self;
Ok(VolumeParts {
header: header.freeze(),
data: data.freeze(),
})
}
fn serialize_dir_entry(&mut self, dir_entry: DirEntry<'_>) -> Result<Span, std::io::Error> {
match dir_entry {
DirEntry::Dir(d) => self.serialize_directory(d),
DirEntry::File(f) => self.serialize_file(f),
}
}
fn serialize_directory(&mut self, dir: Directory<'_>) -> Result<Span, std::io::Error> {
const DUMMY_U64: [u8; std::mem::size_of::<u64>()] =
[0xde, 0xad, 0xbe, 0xef, 0xba, 0xad, 0xc0, 0xde];
let overall_start = self.header.len();
self.header.put_u8(Tag::Directory.as_u8());
let directory_length_ix = self.header.len();
self.header.extend(DUMMY_U64);
let mut offset_fields = BTreeMap::new();
let entries_start = self.header.len();
for name in dir.children.keys() {
let ix = self.header.len();
self.header.extend(DUMMY_U64);
self.header
.extend(u64::try_from(name.len()).unwrap().to_le_bytes());
offset_fields.insert(name.clone(), ix);
self.header.extend_from_slice(name.as_bytes());
}
let end = self.header.len();
let span = Span::new(overall_start, end - overall_start);
let length = u64::try_from(end - entries_start).unwrap().to_le_bytes();
self.header[directory_length_ix..directory_length_ix + length.len()]
.copy_from_slice(&length);
for (name, entry) in dir.children {
let Span { start, .. } = self.serialize_dir_entry(entry)?;
let offset_field = offset_fields[&name];
let offset = u64::try_from(start).unwrap().to_le_bytes();
self.header[offset_field..offset_field + offset.len()].copy_from_slice(&offset);
}
Ok(span)
}
fn serialize_file(&mut self, file: FileEntry<'_>) -> Result<Span, std::io::Error> {
let data_start = self.data.len();
let mut cs = Sha256ChecksumWriter::new(BufMut::writer(&mut self.data));
file.write_to(&mut cs)?;
let checksum = cs.finish();
let data_end = self.data.len();
let start = self.header.len();
self.header.put_u8(Tag::File.as_u8());
self.header
.extend(u64::try_from(data_start).unwrap().to_le_bytes());
self.header
.extend(u64::try_from(data_end).unwrap().to_le_bytes());
self.header.extend(checksum);
let end = self.header.len();
Ok(Span::new(start, end - start))
}
}
struct Sha256ChecksumWriter<W> {
writer: W,
state: Sha256,
}
impl<W> Sha256ChecksumWriter<W> {
fn new(writer: W) -> Self {
Sha256ChecksumWriter {
writer,
state: Sha256::default(),
}
}
fn finish(self) -> [u8; 32] {
self.state.finalize().into()
}
}
impl<W: Write> Write for Sha256ChecksumWriter<W> {
fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
let bytes_written = self.writer.write(buf)?;
self.state.update(&buf[..bytes_written]);
Ok(bytes_written)
}
fn flush(&mut self) -> std::io::Result<()> {
Ok(())
}
}
#[derive(Debug, Default)]
pub struct Directory<'a> {
pub children: BTreeMap<PathSegment, DirEntry<'a>>,
}
impl Directory<'static> {
pub fn from_path(directory: impl AsRef<Path>) -> Result<Self, std::io::Error> {
let directory = directory.as_ref();
let mut children: BTreeMap<PathSegment, DirEntry<'_>> = BTreeMap::new();
for entry in directory.read_dir()? {
let entry = entry?;
let path = entry.path();
let name = match path
.strip_prefix(directory)
.expect("The path was derived from our directory")
.to_str()
{
Some(s) => s.parse().unwrap(),
None => continue,
};
let file_type = entry.file_type()?;
if file_type.is_dir() {
let dir = Directory::from_path(&path)?;
children.insert(name, DirEntry::Dir(dir));
} else {
children.insert(name, DirEntry::File(FileEntry::from_path(path)?));
}
}
Ok(Directory { children })
}
}
#[derive(Debug)]
pub enum DirEntry<'a> {
Dir(Directory<'a>),
File(FileEntry<'a>),
}
impl<'a> From<Directory<'a>> for DirEntry<'a> {
fn from(value: Directory<'a>) -> Self {
DirEntry::Dir(value)
}
}
impl<'a, F> From<F> for DirEntry<'a>
where
FileEntry<'a>: From<F>,
{
fn from(value: F) -> Self {
DirEntry::File(value.into())
}
}
pub enum FileEntry<'a> {
Borrowed(&'a [u8]),
Owned(Bytes),
Reader(Box<dyn Read>),
}
impl<'a> fmt::Debug for FileEntry<'a> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
FileEntry::Borrowed(b) => f
.debug_tuple("Borrowed")
.field(&readable_bytes::readable_bytes(b))
.finish(),
FileEntry::Owned(b) => f
.debug_tuple("Owned")
.field(&readable_bytes::readable_bytes(b))
.finish(),
FileEntry::Reader(_) => f.debug_tuple("Reader").finish(),
}
}
}
impl FileEntry<'_> {
pub fn from_path(path: impl Into<PathBuf>) -> Result<Self, std::io::Error> {
struct LazyReader {
path: PathBuf,
reader: Option<BufReader<File>>,
}
impl Read for LazyReader {
fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
let r = match &mut self.reader {
Some(r) => r,
None => {
let f = File::open(&self.path)?;
self.reader.insert(BufReader::new(f))
}
};
r.read(buf)
}
}
let path = path.into();
let _ = path.metadata()?;
Ok(FileEntry::Reader(Box::new(LazyReader {
path,
reader: None,
})))
}
fn write_to(self, mut writer: impl Write) -> Result<(), std::io::Error> {
match self {
FileEntry::Borrowed(slice) => writer.write_all(slice),
FileEntry::Owned(bytes) => writer.write_all(&bytes),
FileEntry::Reader(mut reader) => {
std::io::copy(&mut reader, &mut writer)?;
Ok(())
}
}
}
}
impl<'a> From<&'a [u8]> for FileEntry<'a> {
fn from(value: &'a [u8]) -> Self {
FileEntry::Borrowed(value)
}
}
impl<'a, const N: usize> From<&'a [u8; N]> for FileEntry<'a> {
fn from(value: &'a [u8; N]) -> Self {
FileEntry::Borrowed(&value[..])
}
}
impl From<Vec<u8>> for FileEntry<'_> {
fn from(value: Vec<u8>) -> Self {
Bytes::from(value).into()
}
}
impl<const N: usize> From<[u8; N]> for FileEntry<'_> {
fn from(value: [u8; N]) -> Self {
value.to_vec().into()
}
}
impl From<Bytes> for FileEntry<'_> {
fn from(value: Bytes) -> Self {
FileEntry::Owned(value)
}
}
#[cfg(test)]
mod tests {
use crate::utils::{length_field, sha256};
use super::*;
#[test]
fn write_empty_volume() {
let dir = Directory::default();
let VolumeParts { header, data } = VolumeParts::serialize(dir).unwrap();
assert_bytes_eq!(
header,
bytes! {
Tag::Directory,
0_u64.to_le_bytes(),
}
);
assert_bytes_eq!(
data,
bytes! {
}
);
}
#[test]
fn volume_with_single_file() {
let file3_txt = b"Hello, World!";
let dir = dir_map! {
"file3.txt" => file3_txt,
};
let VolumeParts { header, data } = VolumeParts::serialize(dir).unwrap();
assert_bytes_eq!(
header,
bytes! {
Tag::Directory,
25_u64.to_le_bytes(),
34_u64.to_le_bytes(),
length_field("file3.txt"),
"file3.txt",
Tag::File,
0_u64.to_le_bytes(),
length_field(file3_txt),
sha256(file3_txt),
}
);
assert_bytes_eq!(data, file3_txt);
}
#[test]
fn volume_that_just_contains_files() {
let dir = dir_map! {
"file1.txt" => FileEntry::Borrowed(b"first"),
"xyz.txt" => FileEntry::Borrowed(b"second"),
"file2.txt" => FileEntry::Borrowed(b"third"),
};
let VolumeParts { header, data } = VolumeParts::serialize(dir).unwrap();
assert_bytes_eq!(
header,
bytes! {
Tag::Directory,
73_u64.to_le_bytes(),
82_u64.to_le_bytes(),
length_field("file1.txt"),
"file1.txt",
131_u64.to_le_bytes(),
length_field("file2.txt"),
"file2.txt",
180_u64.to_le_bytes(),
length_field("xyz.txt"),
"xyz.txt",
Tag::File,
0_u64.to_le_bytes(),
5_u64.to_le_bytes(),
sha256("first"),
Tag::File,
5_u64.to_le_bytes(),
10_u64.to_le_bytes(),
sha256("third"),
Tag::File,
10_u64.to_le_bytes(),
16_u64.to_le_bytes(),
sha256("second"),
}
);
assert_bytes_eq!(data, b"firstthirdsecond");
}
#[test]
fn header_with_single_directory() {
let dir = dir_map! {
"root" => dir_map!(),
};
let VolumeParts { header, .. } = VolumeParts::serialize(dir).unwrap();
let expected = bytes! {
Tag::Directory,
20_u64.to_le_bytes(),
29_u64.to_le_bytes(),
length_field("root"),
"root",
Tag::Directory,
0_u64.to_le_bytes(),
};
assert_bytes_eq!(header, expected);
}
#[test]
fn volume_with_nested_empty_directories() {
let dir = dir_map! {
"root" => dir_map! {
"nested" => dir_map! { },
},
};
let VolumeParts { header, data } = VolumeParts::serialize(dir).unwrap();
assert_bytes_eq!(
header,
bytes! {
Tag::Directory,
20_u64.to_le_bytes(),
29_u64.to_le_bytes(),
length_field("root"),
"root",
Tag::Directory,
22_u64.to_le_bytes(),
60_u64.to_le_bytes(),
length_field("nested"),
"nested",
Tag::Directory,
0_u64.to_le_bytes(),
}
);
assert!(data.is_empty());
}
#[test]
fn kitchen_sink() {
let xyz_txt = [0xaa; 10];
let file1_txt = [0xbb; 5];
let file2_txt = [0xcc; 8];
let file3_txt = [0xdd; 2];
let dir = dir_map! {
"a" => dir_map! {
"b" => dir_map! {
"xyz.txt" => &xyz_txt,
"file1.txt" => &file1_txt,
},
"c" => dir_map! {
"d" => dir_map!(),
"file2.txt" => &file2_txt,
},
},
"file3.txt" => &file3_txt,
};
let VolumeParts { header, data } = VolumeParts::serialize(dir).unwrap();
assert_bytes_eq!(
header,
bytes! {
Tag::Directory,
42_u64.to_le_bytes(),
51_u64.to_le_bytes(),
length_field("a"),
"a",
358_u64.to_le_bytes(),
length_field("file3.txt"),
"file3.txt",
Tag::Directory,
34_u64.to_le_bytes(),
94_u64.to_le_bytes(),
length_field("b"),
"b",
249_u64.to_le_bytes(),
length_field("c"),
"c",
Tag::Directory,
48_u64.to_le_bytes(),
151_u64.to_le_bytes(),
length_field("file1.txt"),
"file1.txt",
200_u64.to_le_bytes(),
length_field("xyz.txt"),
"xyz.txt",
Tag::File,
0_u64.to_le_bytes(),
5_u64.to_le_bytes(),
sha256(file1_txt),
Tag::File,
5_u64.to_le_bytes(),
15_u64.to_le_bytes(),
sha256(xyz_txt),
Tag::Directory,
42_u64.to_le_bytes(),
300_u64.to_le_bytes(),
length_field("d"),
"d",
309_u64.to_le_bytes(),
length_field("file2.txt"),
"file2.txt",
Tag::Directory,
0_u64.to_le_bytes(),
Tag::File,
15_u64.to_le_bytes(),
23_u64.to_le_bytes(),
sha256(file2_txt),
Tag::File,
23_u64.to_le_bytes(),
25_u64.to_le_bytes(),
sha256(file3_txt),
}
);
assert_bytes_eq!(
data,
[file1_txt.as_slice(), &xyz_txt, &file2_txt, &file3_txt].concat()
);
}
#[test]
fn load_files_from_directory() {
let temp = tempfile::tempdir().unwrap();
let to = temp.path().join("path").join("to");
let first = to.join("first.txt");
let second = to.join("second.md");
std::fs::create_dir_all(&to).unwrap();
std::fs::write(first, "first".as_bytes()).unwrap();
std::fs::write(second, "# Second".as_bytes()).unwrap();
let dir = Directory::from_path(temp.path()).unwrap();
let expected = dir_map! {
"path" => dir_map! {
"to" => dir_map! {
"first.txt" => b"first",
"second.md" => b"# Second",
}
}
};
assert_directories_match(dir, expected);
}
fn assert_directories_match(mut left: Directory<'_>, mut right: Directory<'_>) {
let left_keys: Vec<_> = left.children.keys().cloned().collect();
let right_keys: Vec<_> = right.children.keys().cloned().collect();
assert_eq!(left_keys, right_keys);
for key in &left_keys {
match (
left.children.remove(key).unwrap(),
right.children.remove(key).unwrap(),
) {
(DirEntry::Dir(left), DirEntry::Dir(right)) => {
assert_directories_match(left, right)
}
(DirEntry::File(left), DirEntry::File(right)) => {
assert_files_match(left, right, key)
}
(DirEntry::Dir(_), DirEntry::File(_)) | (DirEntry::File(_), DirEntry::Dir(_)) => {
panic!()
}
}
}
}
fn assert_files_match(left: FileEntry<'_>, right: FileEntry<'_>, key: &str) {
let mut left_buffer = Vec::new();
left.write_to(&mut left_buffer).unwrap();
let mut right_buffer = Vec::new();
right.write_to(&mut right_buffer).unwrap();
assert_bytes_eq!(
left_buffer,
right_buffer,
"Entries for \"{key}\" don't match"
);
}
}