1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
use anyhow::{Context, Result};
use pancake_engine_common::fs_utils::{self, AntiCollisionParentDir, NamePattern};
use pancake_engine_common::{SSTable, WritableMemLog};
use pancake_types::{serde::OptDatum, types::Serializable};
use std::path::Path;

const LOG_FILE_NAME: &str = "commit_log.kv";
const SSTABLES_DIR_NAME: &str = "sstables";

/// An LSMTree is an abstraction of a sorted dictionary.
///
/// ### API:
///
/// The exposed operations are: `put one`, `get one`, `get range`.
///
/// Values are immutable. They cannot be modified in-place, and must be replaced.
///
/// ### Internals:
///
/// One [`WritableMemLog`] holds the most recently inserted `{key: value}` in a sorted in-memory table.
///
/// The [`WritableMemLog`] is occasionally flushed into an [`SSTable`].
///
/// Multiple [`SSTable`]s are occasionally compacted into one [`SSTable`].
///
/// ### Querying:
///
/// A `put` operation accesses the Memtable of the [`WritableMemLog`] only.
///
/// A `get` operation generally accesses the [`WritableMemLog`] and all [`SSTable`]s.
///
/// When the same key exists in multiple internal tables, only the result from the newest table is retrieved.
pub struct LSMTree<K, V> {
    memlog: WritableMemLog<K, OptDatum<V>>,

    /// From older to newer.
    sstables: Vec<SSTable<K, OptDatum<V>>>,

    sstables_dir: AntiCollisionParentDir,
}

impl<K, V> LSMTree<K, V>
where
    K: Serializable + Ord + Clone,
    OptDatum<V>: Serializable,
{
    pub fn load_or_new<P: AsRef<Path>>(lsm_dir_path: P) -> Result<Self> {
        let log_file_path = lsm_dir_path.as_ref().join(LOG_FILE_NAME);
        let sstables_dir_path = lsm_dir_path.as_ref().join(SSTABLES_DIR_NAME);
        fs_utils::create_dir_all(&sstables_dir_path)?;

        let memlog = WritableMemLog::load_or_new(log_file_path)?;

        let mut sstable_file_paths = vec![];
        let sstables_dir = AntiCollisionParentDir::load_or_new(
            sstables_dir_path,
            NamePattern::new("", ".kv"),
            |child_path, res_child_num| -> Result<()> {
                let child_num = res_child_num.with_context(|| {
                    format!("An sstables dir contains an unexpected child path {child_path:?}")
                })?;

                sstable_file_paths.push((child_path, child_num));

                Ok(())
            },
        )?;

        sstable_file_paths.sort_by_key(|(_child_path, child_num)| *child_num);
        let sstables = sstable_file_paths
            .into_iter()
            .map(|(child_path, _child_num)| SSTable::load(child_path))
            .collect::<Result<Vec<_>>>()?;

        Ok(Self {
            memlog,
            sstables,
            sstables_dir,
        })
    }
}

mod gc;
mod opers;