use anyhow::Result;
use clap::Parser;
use lmdb::Cursor;
use lmdb::Transaction;
use serde::{Deserialize, Serialize};
use std::collections::HashSet;
use std::io::BufRead;
use std::io::Read;
use std::io::Write;
use std::path::PathBuf;

// commands for command line
#[derive(clap::Parser)]
enum Action {
	/// Index a given directory
	Index { directory: String },
	/// Perform a query and output the list of files containing the word
	Query { query: String },
	/// Output database stats (number of keywords)
	Stats,
	/// Decrypt a file from storage
	Decrypt { filename: String },
}

// command line options
#[derive(clap::Parser)]
struct Options {
	/// action to perform
	#[clap(subcommand)]
	action: Action,
}

// locally-stored keyword information
#[derive(Serialize, Deserialize, Debug)]
struct KeywordData {
	numfiles: u64,
	numsearch: u64,
}

// search result to be stored on CSP
#[derive(Serialize, Deserialize, Debug)]
struct SearchResult {
	filename: String,
	numfiles: u64,
}

// helper to perform a key-value query
fn get<V: for<'a> Deserialize<'a>>(transaction: &impl lmdb::Transaction, db: lmdb::Database, key: &impl AsRef<[u8]>) -> Result<Option<V>> {
	let e = transaction.get(db, key);
	let bytes = match e {
		Err(lmdb::Error::NotFound) => return Ok(None),
		smthelse => smthelse?,
	};
	let decoded = rmp_serde::from_slice(bytes)?;
	Ok(Some(decoded))
}

// helper to insert key-value
fn put(transaction: &mut lmdb::RwTransaction, db: lmdb::Database, key: &impl AsRef<[u8]>, value: &impl Serialize) -> Result<()> {
	let encoded = rmp_serde::to_vec(value)?;
	transaction.put(db, key, &encoded, Default::default())?;
	Ok(())
}

fn main() -> Result<()> {
	// parse command-line options
	let options = Options::parse();

	// generate or load a secret key
	let secret_key = if let Ok(stuff) = std::fs::read("secret.key") {
		orion::aead::SecretKey::from_slice(&stuff)?
	} else {
		let sk = orion::aead::SecretKey::default();
		std::fs::write("secret.key", sk.unprotected_as_bytes())?;
		sk
	};

	// create database directories
	std::fs::create_dir_all("db")?;
	std::fs::create_dir_all("encrypted")?;

	// open LMDB databases
	let path: PathBuf = "db".into();
	let env = lmdb::Environment::new().set_max_dbs(5).set_map_size(10e12 as usize).open(&path)?;
	let csp_keywords = env.create_db(Some("csp_keywords"), Default::default())?;
	let sse_keywords = env.create_db(Some("sse_keywords"), Default::default())?;

	match options.action {
		Action::Index { directory } => {
			// a regular expression to match words
			let word_regex = regex::Regex::new(r"\w[\w']+\w|\w\w|\w")?;
			// make a stop list into set for faster checking
			let stop_set: std::collections::HashSet<&str> = STOP_WORDS.into();

			// read the files from given directory
			for filename_r in std::fs::read_dir(directory)? {
				let filename = filename_r?;
				let path = filename.path();

				// initialize word set to later process every word once
				let mut word_set = HashSet::new();

				// read the file line by line
				let mut bufreader = std::io::BufReader::new(std::fs::File::open(&path)?);
				loop {
					let mut line = vec![];
					let n = bufreader.read_until(0xA, &mut line)?;
					if n == 0 {
						break;
					};
					let line_str = String::from_utf8_lossy(&line);
					// extract the words using regex
					for word in word_regex.find_iter(&line_str) {
						let word_str = word.as_str();
						if stop_set.contains(word_str) {
							continue;
						};
						// ensure the word is in the set
						word_set.insert(word_str.to_string());
					}
				}

				// store encrypted version of the file in encrypted/
				let mut file = std::fs::File::open(&path)?;
				let (mut sealer, nonce) = orion::aead::streaming::StreamSealer::new(&secret_key)?;
				let mut pathbuf = std::path::PathBuf::new();
				pathbuf.push("encrypted");
				pathbuf.push(filename.file_name());
				let mut encrypted_file = std::fs::File::create(&pathbuf)?;

				// write the nonce
				encrypted_file.write_all(nonce.as_ref())?;
				let mut buffer = vec![];

				// read file and encrypt it as a single sealed chunk
				file.read_to_end(&mut buffer)?;
				let sealed_chunk = sealer.seal_chunk(&mut buffer, &orion::aead::streaming::StreamTag::Message)?;

				// write the ciphertext to encrypted/
				encrypted_file.write_all(&sealed_chunk)?;

				// insert discovered words into database
				let mut tx = env.begin_rw_txn()?;
				for word in word_set {
					let keyword = blake3::hash(word.as_bytes()).as_bytes().clone();

					let mut keyword_data = get(&tx, sse_keywords, &keyword)?.unwrap_or_else(|| KeywordData { numfiles: 0, numsearch: 0 });
					keyword_data.numfiles += 1;

					let mut hasher = blake3::Hasher::new();
					hasher.update(&keyword);
					hasher.update(format!("{}", keyword_data.numsearch).as_bytes());
					let keyword_key = hasher.finalize().as_bytes().clone();

					let mut hasher = blake3::Hasher::new();
					hasher.update(&keyword_key);
					hasher.update(format!("{}", keyword_data.numfiles).as_bytes());
					let keyword_address = hasher.finalize().as_bytes().clone();

					let search_result = SearchResult {
						filename: filename.file_name().into_string().unwrap(),
						numfiles: keyword_data.numfiles,
					};
					let keyvalue = orion::aead::seal(&secret_key, &rmp_serde::to_vec(&search_result)?)?;

					// insert (updated) keyword record
					put(&mut tx, sse_keywords, &keyword, &keyword_data)?;

					// insert a CSP keyword record
					put(&mut tx, csp_keywords, &keyword_address, &keyvalue)?;
				}
				tx.commit()?;
			}
		}
		Action::Decrypt { filename } => {
			// open an encrypted file
			let mut pathbuf = PathBuf::new();
			pathbuf.push("encrypted");
			pathbuf.push(filename);
			let mut encrypted_file = std::fs::File::open(&pathbuf)?;

			// read nonce
			let mut nonce_slice = [0; 24];
			encrypted_file.read_exact(&mut nonce_slice)?;

			// decrypt sealed data
			let mut opener = orion::aead::streaming::StreamOpener::new(&secret_key, &orion::aead::streaming::Nonce::from_slice(&nonce_slice)?)?;
			let mut bytes = vec![];
			encrypted_file.read_to_end(&mut bytes)?;
			let (open_chunk, _tag) = opener.open_chunk(&bytes)?;

			// write the file to STDOUT
			std::io::stdout().write_all(&open_chunk)?;
		}
		Action::Stats => {
			let tx = env.begin_ro_txn()?;

			// display the records counts
			let count = tx.open_ro_cursor(sse_keywords)?.iter_start().count();
			println!("{} keywords", count);
			let count = tx.open_ro_cursor(csp_keywords)?.iter_start().count();
			println!("{} CSP keywords", count);
		}
		Action::Query { query } => {
			// start query timer
			let now = std::time::Instant::now();
			let tx = env.begin_ro_txn()?;

			// acquire local keyword data
			let keyword = blake3::hash(query.as_bytes()).as_bytes().clone();

			let keyword_data_o: Option<KeywordData> = get(&tx, sse_keywords, &keyword)?;
			if let Some(keyword_data) = keyword_data_o {
				// for every inserted file, lookup keyword in CSP database
				for numfile in 1..keyword_data.numfiles + 1 {
					let mut hasher = blake3::Hasher::new();
					hasher.update(&keyword);
					hasher.update(format!("{}", keyword_data.numsearch).as_bytes());
					let keyword_key = hasher.finalize().as_bytes().clone();

					let mut hasher = blake3::Hasher::new();
					hasher.update(&keyword_key);
					hasher.update(format!("{}", numfile).as_bytes());
					let keyword_address = hasher.finalize().as_bytes().clone();

					let keyvalue_env: Vec<u8> = get(&tx, csp_keywords, &keyword_address)?.unwrap();
					let keyvalue_b = orion::aead::open(&secret_key, &keyvalue_env)?;
					let search_result: SearchResult = rmp_serde::from_slice(&keyvalue_b)?;
					println!("{}", search_result.filename);
				}
			}
			// display query time
			let elapsed_time = now.elapsed();
			println!("query took {} microseconds.", elapsed_time.as_micros());
		}
	};
	// ensure DB is synchronized to disk before exiting
	env.sync(true)?;

	Ok(())
}

// stop words from InnoDB
const STOP_WORDS: [&str; 543] = [
	"a's",
	"able",
	"about",
	"above",
	"according",
	"accordingly",
	"across",
	"actually",
	"after",
	"afterwards",
	"again",
	"against",
	"ain't",
	"all",
	"allow",
	"allows",
	"almost",
	"alone",
	"along",
	"already",
	"also",
	"although",
	"always",
	"am",
	"among",
	"amongst",
	"an",
	"and",
	"another",
	"any",
	"anybody",
	"anyhow",
	"anyone",
	"anything",
	"anyway",
	"anyways",
	"anywhere",
	"apart",
	"appear",
	"appreciate",
	"appropriate",
	"are",
	"aren't",
	"around",
	"as",
	"aside",
	"ask",
	"asking",
	"associated",
	"at",
	"available",
	"away",
	"awfully",
	"be",
	"became",
	"because",
	"become",
	"becomes",
	"becoming",
	"been",
	"before",
	"beforehand",
	"behind",
	"being",
	"believe",
	"below",
	"beside",
	"besides",
	"best",
	"better",
	"between",
	"beyond",
	"both",
	"brief",
	"but",
	"by",
	"c'mon",
	"c's",
	"came",
	"can",
	"can't",
	"cannot",
	"cant",
	"cause",
	"causes",
	"certain",
	"certainly",
	"changes",
	"clearly",
	"co",
	"com",
	"come",
	"comes",
	"concerning",
	"consequently",
	"consider",
	"considering",
	"contain",
	"containing",
	"contains",
	"corresponding",
	"could",
	"couldn't",
	"course",
	"currently",
	"definitely",
	"described",
	"despite",
	"did",
	"didn't",
	"different",
	"do",
	"does",
	"doesn't",
	"doing",
	"don't",
	"done",
	"down",
	"downwards",
	"during",
	"each",
	"edu",
	"eg",
	"eight",
	"either",
	"else",
	"elsewhere",
	"enough",
	"entirely",
	"especially",
	"et",
	"etc",
	"even",
	"ever",
	"every",
	"everybody",
	"everyone",
	"everything",
	"everywhere",
	"ex",
	"exactly",
	"example",
	"except",
	"far",
	"few",
	"fifth",
	"first",
	"five",
	"followed",
	"following",
	"follows",
	"for",
	"former",
	"formerly",
	"forth",
	"four",
	"from",
	"further",
	"furthermore",
	"get",
	"gets",
	"getting",
	"given",
	"gives",
	"go",
	"goes",
	"going",
	"gone",
	"got",
	"gotten",
	"greetings",
	"had",
	"hadn't",
	"happens",
	"hardly",
	"has",
	"hasn't",
	"have",
	"haven't",
	"having",
	"he",
	"he's",
	"hello",
	"help",
	"hence",
	"her",
	"here",
	"here's",
	"hereafter",
	"hereby",
	"herein",
	"hereupon",
	"hers",
	"herself",
	"hi",
	"him",
	"himself",
	"his",
	"hither",
	"hopefully",
	"how",
	"howbeit",
	"however",
	"i'd",
	"i'll",
	"i'm",
	"i've",
	"ie",
	"if",
	"ignored",
	"immediate",
	"in",
	"inasmuch",
	"inc",
	"indeed",
	"indicate",
	"indicated",
	"indicates",
	"inner",
	"insofar",
	"instead",
	"into",
	"inward",
	"is",
	"isn't",
	"it",
	"it'd",
	"it'll",
	"it's",
	"its",
	"itself",
	"just",
	"keep",
	"keeps",
	"kept",
	"know",
	"known",
	"knows",
	"last",
	"lately",
	"later",
	"latter",
	"latterly",
	"least",
	"less",
	"lest",
	"let",
	"let's",
	"like",
	"liked",
	"likely",
	"little",
	"look",
	"looking",
	"looks",
	"ltd",
	"mainly",
	"many",
	"may",
	"maybe",
	"me",
	"mean",
	"meanwhile",
	"merely",
	"might",
	"more",
	"moreover",
	"most",
	"mostly",
	"much",
	"must",
	"my",
	"myself",
	"name",
	"namely",
	"nd",
	"near",
	"nearly",
	"necessary",
	"need",
	"needs",
	"neither",
	"never",
	"nevertheless",
	"new",
	"next",
	"nine",
	"no",
	"nobody",
	"non",
	"none",
	"noone",
	"nor",
	"normally",
	"not",
	"nothing",
	"novel",
	"now",
	"nowhere",
	"obviously",
	"of",
	"off",
	"often",
	"oh",
	"ok",
	"okay",
	"old",
	"on",
	"once",
	"one",
	"ones",
	"only",
	"onto",
	"or",
	"other",
	"others",
	"otherwise",
	"ought",
	"our",
	"ours",
	"ourselves",
	"out",
	"outside",
	"over",
	"overall",
	"own",
	"particular",
	"particularly",
	"per",
	"perhaps",
	"placed",
	"please",
	"plus",
	"possible",
	"presumably",
	"probably",
	"provides",
	"que",
	"quite",
	"qv",
	"rather",
	"rd",
	"re",
	"really",
	"reasonably",
	"regarding",
	"regardless",
	"regards",
	"relatively",
	"respectively",
	"right",
	"said",
	"same",
	"saw",
	"say",
	"saying",
	"says",
	"second",
	"secondly",
	"see",
	"seeing",
	"seem",
	"seemed",
	"seeming",
	"seems",
	"seen",
	"self",
	"selves",
	"sensible",
	"sent",
	"serious",
	"seriously",
	"seven",
	"several",
	"shall",
	"she",
	"should",
	"shouldn't",
	"since",
	"six",
	"so",
	"some",
	"somebody",
	"somehow",
	"someone",
	"something",
	"sometime",
	"sometimes",
	"somewhat",
	"somewhere",
	"soon",
	"sorry",
	"specified",
	"specify",
	"specifying",
	"still",
	"sub",
	"such",
	"sup",
	"sure",
	"t's",
	"take",
	"taken",
	"tell",
	"tends",
	"th",
	"than",
	"thank",
	"thanks",
	"thanx",
	"that",
	"that's",
	"thats",
	"the",
	"their",
	"theirs",
	"them",
	"themselves",
	"then",
	"thence",
	"there",
	"there's",
	"thereafter",
	"thereby",
	"therefore",
	"therein",
	"theres",
	"thereupon",
	"these",
	"they",
	"they'd",
	"they'll",
	"they're",
	"they've",
	"think",
	"third",
	"this",
	"thorough",
	"thoroughly",
	"those",
	"though",
	"three",
	"through",
	"throughout",
	"thru",
	"thus",
	"to",
	"together",
	"too",
	"took",
	"toward",
	"towards",
	"tried",
	"tries",
	"truly",
	"try",
	"trying",
	"twice",
	"two",
	"un",
	"under",
	"unfortunately",
	"unless",
	"unlikely",
	"until",
	"unto",
	"up",
	"upon",
	"us",
	"use",
	"used",
	"useful",
	"uses",
	"using",
	"usually",
	"value",
	"various",
	"very",
	"via",
	"viz",
	"vs",
	"want",
	"wants",
	"was",
	"wasn't",
	"way",
	"we",
	"we'd",
	"we'll",
	"we're",
	"we've",
	"welcome",
	"well",
	"went",
	"were",
	"weren't",
	"what",
	"what's",
	"whatever",
	"when",
	"whence",
	"whenever",
	"where",
	"where's",
	"whereafter",
	"whereas",
	"whereby",
	"wherein",
	"whereupon",
	"wherever",
	"whether",
	"which",
	"while",
	"whither",
	"who",
	"who's",
	"whoever",
	"whole",
	"whom",
	"whose",
	"why",
	"will",
	"willing",
	"wish",
	"with",
	"within",
	"without",
	"won't",
	"wonder",
	"would",
	"wouldn't",
	"yes",
	"yet",
	"you",
	"you'd",
	"you'll",
	"you're",
	"you've",
	"your",
	"yours",
	"yourself",
	"yourselves",
	"zero",
];
