diff --git a/Cargo.lock b/Cargo.lock index e5461b0..0107e21 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -95,6 +95,26 @@ dependencies = [ "windows-link", ] +[[package]] +name = "const_format" +version = "0.2.35" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7faa7469a93a566e9ccc1c73fe783b4a65c274c5ace346038dca9c39fe0030ad" +dependencies = [ + "const_format_proc_macros", +] + +[[package]] +name = "const_format_proc_macros" +version = "0.2.34" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d57c2eccfb16dbac1f4e61e206105db5820c9d26c3c472bc17c774259ef7744" +dependencies = [ + "proc-macro2", + "quote", + "unicode-xid", +] + [[package]] name = "core-foundation-sys" version = "0.8.6" @@ -119,6 +139,7 @@ version = "0.5.0" dependencies = [ "assert_cmd", "chrono", + "const_format", "diff", "itoa", "predicates", @@ -420,6 +441,12 @@ version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b4ac048d71ede7ee76d585517add45da530660ef4390e49b098733c6e897f254" +[[package]] +name = "unicode-xid" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ebc1c04c71510c7f702b52b7c350734c9ff1295c464a03335b00bb84fc54f853" + [[package]] name = "wait-timeout" version = "0.2.0" diff --git a/Cargo.toml b/Cargo.toml index 29331c3..2ca5938 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -16,6 +16,7 @@ path = "src/main.rs" [dependencies] chrono = "0.4.38" +const_format = "0.2.35" diff = "0.1.13" itoa = "1.0.11" regex = "1.10.4" @@ -26,7 +27,7 @@ unicode-width = "0.2.0" pretty_assertions = "1.4.0" assert_cmd = "2.0.14" predicates = "3.1.0" -tempfile = "3.10.1" +tempfile = "3.26.0" [profile.release] lto = "thin" @@ -40,3 +41,10 @@ panic = "abort" [profile.dist] inherits = "release" lto = "thin" + +[features] +# default = ["cmp_bytes_limit_128_bit"] +# allows to set the --bytes limit from u64 to u128, if limits larger than Exabyte are required. +cmp_bytes_limit_128_bit = [] +# instead of limiting to KiB, MiB, etc, one can write kib, mib, Mb or whatever case. +allow_case_insensitive_byte_units = [] diff --git a/fuzz/fuzz_targets/fuzz_side.rs b/fuzz/fuzz_targets/fuzz_side.rs index 8a69c07..4212dc7 100644 --- a/fuzz/fuzz_targets/fuzz_side.rs +++ b/fuzz/fuzz_targets/fuzz_side.rs @@ -2,11 +2,10 @@ #[macro_use] extern crate libfuzzer_sys; -use diffutilslib::side_diff; +use diffutilslib::side_diff::{self, Params}; use std::fs::File; use std::io::Write; -use diffutilslib::params::Params; fuzz_target!(|x: (Vec, Vec, /* usize, usize */ bool)| { let (original, new, /* width, tabsize, */ expand) = x; @@ -39,4 +38,4 @@ fuzz_target!(|x: (Vec, Vec, /* usize, usize */ bool)| { .unwrap() .write_all(&output_buf) .unwrap(); -}); \ No newline at end of file +}); diff --git a/src/arg_parser.rs b/src/arg_parser.rs new file mode 100644 index 0000000..76dddf4 --- /dev/null +++ b/src/arg_parser.rs @@ -0,0 +1,716 @@ +//! This is a generic parser for params/options. +//! +//! The concept is to have this generic parser, which will parse e.g. 'cmp --options' or 'diff --options'. \ +//! For the parser to know which options are possible, they must be given as a list of AppOptions. +//! The AppOptions are known at compile time so 'static is used for all these. +use std::{ffi::OsString, fmt::Display, iter::Peekable}; + +use crate::cmp::Bytes; + +pub type ResultParamsGenParse = Result; +type ResultBytesParse = Result; + +pub const TEXT_COPYRIGHT: &str = r#"Copyright (C) 2026 ? +Licenses: MIT License, Apache License 2.0 . +This is free software: you are free to change and redistribute it. +There is NO WARRANTY, to the extent permitted by law."#; + +// TODO finalize text +pub const TEXT_HELP_FOOTER: &str = r#" +This utility is part of the Rust uutils project: https://github.com/uutils/. +Report bugs here: https://github.com/uutils/diffutils/issues. +"#; + +pub const OPT_HELP: AppOption = AppOption { + long_name: "help", + short: None, + has_arg: false, +}; +pub const OPT_VERSION: AppOption = AppOption { + long_name: "version", + short: Some('v'), + has_arg: false, +}; + +/// This contains the args/options the app allows. They must be all of const value. +#[derive(Debug, Clone, Copy, PartialEq)] +pub struct AppOption { + /// long name of option + pub long_name: &'static str, + pub short: Option, + pub has_arg: bool, + // pub arg_default: Option<&'static str>, +} + +#[derive(Debug, Clone, PartialEq)] +pub struct ParsedOption { + pub app_option: &'static AppOption, + pub arg_for_option: Option, + pub name_type_used: OptionNameTypeUsed, +} + +impl ParsedOption { + #[allow(unused)] + pub fn new( + app_option: &'static AppOption, + arg_for_option: String, + name_type_used: OptionNameTypeUsed, + ) -> Self { + Self { + app_option, + arg_for_option: Some(arg_for_option), + name_type_used, + } + } + + pub fn new_none(app_option: &'static AppOption, name_type_used: OptionNameTypeUsed) -> Self { + Self { + app_option, + arg_for_option: None, + name_type_used, + } + } + + pub fn check_add_arg>( + &mut self, + opts: &mut Peekable, + ) -> Result<(), ArgParserError> { + // argument missing + if self.app_option.has_arg { + if self.arg_for_option.is_none() { + // take following argument if it is not an option + if let Some(arg) = opts.peek() { + let arg = arg.to_string_lossy(); + if !arg.starts_with('-') { + self.arg_for_option = Some(arg.to_string()); + _ = opts.next(); + } + } + if self.arg_for_option.is_none() { + return Err(ArgParserError::ArgForOptionMissing(self.clone())); + } + } + } else { + // argument allowed? + if self.arg_for_option.is_some() { + return Err(ArgParserError::ArgForOptionNotAllowed(self.clone())); + } + } + + Ok(()) + } + + pub fn arg_for_option_or_empty_string(&self) -> String { + match &self.arg_for_option { + Some(s) => s.clone(), + None => String::new(), + } + } + + pub fn short_char_or_empty_string(&self) -> String { + match self.app_option.short { + Some(c) => format!(" ('-{c}')"), + None => String::new(), + } + } +} + +impl Default for ParsedOption { + fn default() -> Self { + Self { + app_option: &AppOption { + long_name: "dummy", + short: None, + has_arg: false, + }, + arg_for_option: None, + name_type_used: OptionNameTypeUsed::LongName, + } + } +} + +/// To differentiate the user input, did he use -s or --silent. +/// While this is technically no difference, the error message may vary. +#[derive(Debug, Default, Clone, Copy, PartialEq)] +pub enum OptionNameTypeUsed { + #[default] + LongName, + ShortName, +} + +/// Contains all parser errors and their text messages. +/// +/// First argument is always the exe name ('cmp'). \ +#[derive(Debug, PartialEq)] +pub enum ArgParserError { + /// When the long option is abbreviated, but does not have a unique match. + /// (ambiguous option, possible options) + AmbiguousOption(String, Vec<&'static AppOption>), + + /// : option '--silent' doesn't allow an argument + /// (wrong option) + ArgForOptionNotAllowed(ParsedOption), + + /// (option, short or long name used) + ArgForOptionMissing(ParsedOption), + + /// as parameter missing. + NoExecutable, + + /// Non-existent single dash option. + /// (unidentified option) + InvalidOption(String), + + /// executable (e.g. cmp) but no args for it + NoOperand(DiffUtility), + + // /// TODO Two dashes '--' without option not allowed. GNU cmp has somewhat undefined behavior, this is cleaner. + // OptionUndefined(String), + /// Non-existent double dash option. This is unrecognized because the name can be abbreviated. + /// (unrecognized option) + UnrecognizedOption(String), +} + +impl ArgParserError { + // writes the error message and adds the help hint "Try ' --help' for more information." + pub fn write_err( + f: &mut std::fmt::Formatter<'_>, + exe_name: &str, + msg: &str, + ) -> Result<(), std::fmt::Error> { + write!( + f, + "{exe_name}: {msg}\n{exe_name}: Try '{exe_name} --help' for more information." + ) + } +} + +impl Display for ArgParserError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match &self { + ArgParserError::AmbiguousOption(param, possible_opts) => { + // create list of possible options + let mut list = Vec::new(); + for opt in possible_opts { + list.push("'--".to_string() + opt.long_name + "'"); + } + write!( + f, + "{}", + &format!( + "option '{param}' is ambiguous; possibilities: {}", + list.join(" ") + ), + ) + } + + ArgParserError::ArgForOptionNotAllowed(opt) => write!( + f, + "{}", + &format!( + "option '{}' doesn't allow an argument", + opt.app_option.long_name + ), + ), + ArgParserError::ArgForOptionMissing(opt) => { + write!( + f, + "{}", + &format!( + "option '--{}'{} requires an argument", + opt.app_option.long_name, + opt.short_char_or_empty_string() + ), + ) + } + ArgParserError::NoExecutable => { + write!(f, "Expected utility name as second argument, got nothing.") + } + ArgParserError::InvalidOption(opt) => { + write!(f, "{}", &format!("invalid option '{opt}'")) + } + ArgParserError::NoOperand(exe_name) => { + write!(f, "{}", &format!("missing operand after '{exe_name}'"),) + } + ArgParserError::UnrecognizedOption(opt) => { + write!(f, "{}", &format!("unrecognized option '{opt}'")) + } + } + } +} + +#[allow(unused)] // required for cmp +pub enum ParseBytesError { + NoValue, + PosOverflow, + InvalidNumber, + InvalidUnit, +} + +#[derive(Debug)] +pub struct ArgParser { + pub util: DiffUtility, + // #[deprecated(note = "use util")] + // pub executable: OsString, + pub options_parsed: Vec, + pub operands: Vec, // pub arg_options: &'a [ArgOption], +} + +impl ArgParser { + pub fn parse_params>( + app_options: &'static [AppOption], + mut args: Peekable, + ) -> ResultParamsGenParse { + // This cannot happen here, it is an error of the main before calling this module. + let Some(name_executable) = args.next() else { + return Err(ArgParserError::NoExecutable); + }; + let util = match DiffUtility::try_from(&name_executable) { + Ok(u) => u, + Err(_) => return Err(ArgParserError::NoExecutable), + }; + let mut arg_parser = Self { + util, + // executable: name_executable, + options_parsed: Vec::new(), + operands: Vec::new(), + }; + // sdiff options begin with ‘-’, so normally from-file and to-file may not begin with ‘-’. + // However, -- as an argument by itself treats the remaining arguments as file names even if they begin with ‘-’. + // You may not use - as an input file. + // read next param as file name, here we generally use read as operand + let mut is_double_dash = false; + + while let Some(param_os) = args.next() { + let mut param = param_os.to_string_lossy().to_string(); + // dbg!(¶m); + let mut ci = param.char_indices().peekable(); + // is param? + let (_, c0) = ci.next().expect("Param must have at least one char!"); + if c0 == '-' && !is_double_dash { + // check 2nd char + match ci.next() { + Some((_, c1)) => { + let mut p_opt = ParsedOption::default(); + // has 3rd char + match ci.peek() { + Some((pos_c2, _c2)) => { + if c1 == '-' { + // long option, e.g. --bytes + + // Find argument for some options, either '=' or following arg. + // This also shortens param to the name. + if let Some(p) = param[*pos_c2..].find('=') { + // only --bytes and --ignore-initial must have bytes, else error + // reduce param to option and + // return bytes without = sign. + p_opt.arg_for_option = + Some(param.split_off(p + *pos_c2)[1..].to_string()); + } + + let possible_opts = Self::identify_options_from_partial_text( + // allow partial option descriptors + app_options, + ¶m[2..], + ); + match possible_opts.len() { + 0 => { + return Err(ArgParserError::UnrecognizedOption(param)); + } + + 1 => p_opt.app_option = *possible_opts.first().unwrap(), + + _ => { + return Err(ArgParserError::AmbiguousOption( + param, + possible_opts, + )); + } + } + + // identified unique option + if *p_opt.app_option == OPT_HELP { + arg_parser.options_parsed = vec![ParsedOption { + app_option: &OPT_HELP, + arg_for_option: None, + name_type_used: OptionNameTypeUsed::LongName, + }]; + arg_parser.operands.clear(); + + return Ok(arg_parser); + } + if *p_opt.app_option == OPT_VERSION { + arg_parser.set_version(OptionNameTypeUsed::LongName); + return Ok(arg_parser); + } + p_opt.name_type_used = OptionNameTypeUsed::LongName; + p_opt.check_add_arg(&mut args)?; + arg_parser.options_parsed.push(p_opt); + } else { + // -MultiSingleChar, e.g. -bl or option with bytes -n200 + let mut c = c1; + let mut pos = 1; + loop { + match app_options.iter().find(|o| o.short == Some(c)) { + Some(opt) => { + if *opt == OPT_VERSION { + // return only version + arg_parser + .set_version(OptionNameTypeUsed::ShortName); + return Ok(arg_parser); + } + + if opt.has_arg { + // take rest of the string as arg + let arg_for_option = if param.len() > pos + 1 { + Some(param[pos + 1..].to_string()) + } else { + args.next().map(|arg| { + arg.to_string_lossy().to_string() + }) + }; + match arg_for_option { + Some(_) => { + arg_parser + .options_parsed + .push(ParsedOption { + app_option: opt, + arg_for_option, + name_type_used: + OptionNameTypeUsed::ShortName, + }); + break; + } + None => return Err( + ArgParserError::ArgForOptionMissing( + ParsedOption::new_none( + opt, + OptionNameTypeUsed::ShortName, + ), + ), + ), + } + } else { + arg_parser.options_parsed.push(ParsedOption { + app_option: opt, + arg_for_option: None, + name_type_used: + OptionNameTypeUsed::ShortName, + }); + } + } + None => { + return Err(ArgParserError::InvalidOption(param)) + } + } + match ci.next() { + Some((p, cx)) => { + c = cx; + pos = p + } + None => break, + } + } + } + } + None => { + // single short options, e.g. -b. + match app_options.iter().find(|opt| { + if let Some(c) = opt.short { + c == c1 + } else { + false + } + }) { + Some(opt) => { + // identified unique option + if *opt == OPT_VERSION { + // return only version + arg_parser.set_version(OptionNameTypeUsed::ShortName); + return Ok(arg_parser); + } + p_opt.app_option = opt; + p_opt.name_type_used = OptionNameTypeUsed::ShortName; + p_opt.check_add_arg(&mut args)?; + arg_parser.options_parsed.push(p_opt); + } + None => { + if c1 == '-' { + is_double_dash = true + } else { + return Err(ArgParserError::InvalidOption(param)); + } + } + } + } + } + } + None => { + // single dash '-', this is for file as StandardInput + arg_parser.operands.push(param_os); + } + } + } else { + // Operand, not an option with - or -- + arg_parser.operands.push(param_os); + } + } + + if arg_parser.operands.is_empty() { + return Err(ArgParserError::NoOperand( + // arg_parser.executable.to_string_lossy().to_string(), + arg_parser.util, + )); + } + + Ok(arg_parser) + } + + pub fn add_copyright(text: &str) -> String { + format!("{text}\n{TEXT_COPYRIGHT}") + } + + pub fn identify_options_from_partial_text( + app_options: &'static [AppOption], + opt: &str, + ) -> Vec<&'static AppOption> { + let l = opt.len(); + let v: Vec<&'static AppOption> = app_options + .iter() + .filter(|&it| it.long_name.len() >= l && &it.long_name[0..l] == opt) + // .copied() + .collect(); + + v + } + + #[allow(unused)] + pub fn is_help(&self) -> bool { + self.options_parsed + .iter() + .any(|opt| *opt.app_option == OPT_HELP) + } + + #[allow(unused)] + pub fn is_version(&self) -> bool { + self.options_parsed + .iter() + .any(|opt| *opt.app_option == OPT_VERSION) + } + + /// Parses a number as defined in . \ + /// e.g. 1024 or 1KiB + #[allow(unused)] // required for cmp + pub fn parse_bytes(bytes: &str) -> ResultBytesParse { + if bytes.is_empty() { + return Err(ParseBytesError::NoValue); + } + + let multiplier: Bytes; + let n = match bytes.find(|b: char| !b.is_ascii_digit()) { + Some(pos) => { + if pos == 0 { + return Err(ParseBytesError::InvalidNumber); + } + multiplier = Self::parse_number_unit(&bytes[pos..])?; + &bytes[0..pos] + } + None => { + multiplier = 1; + bytes + } + }; + + // return value + match n.parse::() { + Ok(num) => { + if multiplier == 1 { + Ok(num) + } else { + match num.checked_mul(multiplier) { + Some(r) => Ok(r), + None => Err(ParseBytesError::PosOverflow), + } + } + } + // This is an additional error message not present in GNU DiffUtils. + Err(e) if *e.kind() == std::num::IntErrorKind::PosOverflow => { + Err(ParseBytesError::PosOverflow) + } + Err(_) => Err(ParseBytesError::InvalidNumber), + } + } + + /// Returns a multiplier depending on the given unit, e.g. 'KiB' -> 1024, + /// which then can be used to calculate the final number of bytes. + /// Following GNU documentation: https://www.gnu.org/software/diffutils/manual/html_node/cmp-Options.html + #[cfg(not(feature = "allow_case_insensitive_byte_units"))] + #[allow(unused)] // required for cmp + pub fn parse_number_unit(unit: &str) -> ResultBytesParse { + // Note that GNU cmp advertises supporting up to Y, but fails if you try + // to actually use anything beyond E. + let multiplier = match unit { + "kB" | "KB" => 1_000, + "k" | "K" | "KiB" | "kiB" => 1_024, + "MB" => 1_000_000, + "M" | "MiB" => 1_048_576, + "GB" => 1_000_000_000, + "G" | "GiB" => 1_073_741_824, + + "TB" => 1_000_000_000_000, + "T" | "TiB" => 1_099_511_627_776, + "PB" => 1_000_000_000_000_000, + "P" | "PiB" => 1_125_899_906_842_624, + "EB" => 1_000_000_000_000_000_000, + "E" | "EiB" => 1_152_921_504_606_846_976, + + // Everything above EiB cannot fit into u64. + // GNU cmp just returns an invalid bytes value + #[cfg(feature = "cmp_bytes_limit_128_bit")] + "ZB" => 1_000_000_000_000_000_000_000, + #[cfg(feature = "cmp_bytes_limit_128_bit")] + "Z" | "ZiB" => 1_180_591_620_717_411_303_424, + #[cfg(feature = "cmp_bytes_limit_128_bit")] + "YB" => 1_000_000_000_000_000_000_000_000, + #[cfg(feature = "cmp_bytes_limit_128_bit")] + "Y" | "YiB" => 1_208_925_819_614_629_174_706_176, + _ => { + return Err(ParseBytesError::InvalidUnit); + } + }; + + Ok(multiplier) + } + + /// Returns a multiplier depending on the given unit, e.g. 'KiB' -> 1024, + /// which then can be used to calculate the final number of bytes. + /// Following GNU documentation: https://www.gnu.org/software/diffutils/manual/html_node/cmp-Options.html + #[cfg(feature = "allow_case_insensitive_byte_units")] + pub fn parse_number_unit(unit: &str) -> ResultBytesParse { + // Note that GNU cmp advertises supporting up to Y, but fails if you try + // to actually use anything beyond E. + let unit = unit.to_owned().to_ascii_lowercase(); + // .to_ascii_lowercase().as_str(); + let multiplier = match unit.as_str() { + "kb" => 1_000, + "k" | "kib" => 1_024, + "mb" => 1_000_000, + "m" | "mib" => 1_048_576, + "gb" => 1_000_000_000, + "g" | "gib" => 1_073_741_824, + + "tb" => 1_000_000_000_000, + "t" | "tib" => 1_099_511_627_776, + "pb" => 1_000_000_000_000_000, + "p" | "pib" => 1_125_899_906_842_624, + "eb" => 1_000_000_000_000_000_000, + "e" | "eib" => 1_152_921_504_606_846_976, + + // Everything above EiB cannot fit into u64. + // GNU cmp just returns an invalid bytes value + #[cfg(feature = "cmp_bytes_limit_128_bit")] + "zb" => 1_000_000_000_000_000_000_000, + #[cfg(feature = "cmp_bytes_limit_128_bit")] + "z" | "zib" => 1_180_591_620_717_411_303_424, + #[cfg(feature = "cmp_bytes_limit_128_bit")] + "yb" => 1_000_000_000_000_000_000_000_000, + #[cfg(feature = "cmp_bytes_limit_128_bit")] + "y" | "yib" => 1_208_925_819_614_629_174_706_176, + _ => { + return Err(ParseBytesError::InvalidUnit); + } + }; + + Ok(multiplier) + } + + fn set_version(&mut self, name_type_used: OptionNameTypeUsed) { + self.options_parsed = vec![ParsedOption { + app_option: &OPT_VERSION, + arg_for_option: None, + name_type_used, + }]; + self.operands.clear(); + } +} + +/// Differentiates the utilities included in DiffUtil +/// and allows easy output of the name with format!("{diff_util}"). +#[derive(Debug, Default, Clone, Copy, Eq, PartialEq)] +#[allow(dead_code)] +pub enum DiffUtility { + Cmp, + #[default] + Diff, + Diff3, + Patch, + SDiff, +} + +impl DiffUtility { + /// Backward compatibility to old param.executable + #[allow(unused)] + pub fn executable(&self) -> OsString { + self.to_os_string() + } + + #[allow(unused)] + pub fn to_os_string(self) -> OsString { + OsString::from(self.to_string()) + } +} + +impl TryFrom<&OsString> for DiffUtility { + type Error = DiffUtilityError; + + fn try_from(util_name: &OsString) -> Result { + match util_name.to_str() { + Some("cmp") => Ok(DiffUtility::Cmp), + Some("diff") => Ok(DiffUtility::Diff), + // Some("diff3") => Ok(DiffUtility::Diff3), + // Some("patch") => Ok(DiffUtility::Patch), + Some("sdiff") => Ok(DiffUtility::SDiff), + Some("diff3") => Err(DiffUtilityError::NotYetSupported("diff3".to_string())), + Some("patch") => Err(DiffUtilityError::NotYetSupported("patch".to_string())), + // Some("sdiff") => Err(DiffUtilityError::NotYetSupported("sdiff".to_string())), + Some(name) => Err(DiffUtilityError::NameNotRecognized(name.to_string())), + None => Err(DiffUtilityError::Nothing), + } + } +} + +impl Display for DiffUtility { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let name = match self { + DiffUtility::Cmp => "cmp", + DiffUtility::Diff => "diff", + DiffUtility::Diff3 => "diff3", + DiffUtility::SDiff => "sdiff", + DiffUtility::Patch => "patch", + }; + write!(f, "{name}") + } +} + +#[derive(Debug)] +pub enum DiffUtilityError { + NameNotRecognized(String), + NotYetSupported(String), + Nothing, +} + +impl Display for DiffUtilityError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + DiffUtilityError::NameNotRecognized(name) => { + write!(f, "{name}: utility name not recognized") + } + DiffUtilityError::NotYetSupported(name) => { + write!(f, "{name}: utility not yet supported") + } + DiffUtilityError::Nothing => { + write!(f, "Expected utility name as second argument, got nothing.") + } + } + } +} diff --git a/src/cmp.rs b/src/cmp.rs index 1db866b..442eb02 100644 --- a/src/cmp.rs +++ b/src/cmp.rs @@ -20,15 +20,23 @@ use std::os::unix::fs::MetadataExt; #[cfg(target_os = "windows")] use std::os::windows::fs::MetadataExt; +/// for --bytes, so really large number limits can be expressed, like 1Y. +#[cfg(not(feature = "cmp_bytes_limit_128_bit"))] +pub type Bytes = u64; +#[cfg(feature = "cmp_bytes_limit_128_bit")] +pub type Bytes = u128; +// ignore initial is currently limited to u64, as take(skip) is used. +pub type IgnInit = u64; + #[derive(Clone, Debug, Default, Eq, PartialEq)] pub struct Params { executable: OsString, from: OsString, to: OsString, print_bytes: bool, - skip_a: Option, - skip_b: Option, - max_bytes: Option, + skip_a: Option, + skip_b: Option, + max_bytes: Option, verbose: bool, quiet: bool, } @@ -66,13 +74,13 @@ pub fn parse_params>(mut opts: Peekable) -> Resu }; let executable_str = executable.to_string_lossy().to_string(); - let parse_skip = |param: &str, skip_desc: &str| -> Result { + let parse_skip = |param: &str, skip_desc: &str| -> Result { let suffix_start = param .find(|b: char| !b.is_ascii_digit()) .unwrap_or(param.len()); - let mut num = match param[..suffix_start].parse::() { + let mut num = match param[..suffix_start].parse::() { Ok(num) => num, - Err(e) if *e.kind() == std::num::IntErrorKind::PosOverflow => usize::MAX, + Err(e) if *e.kind() == std::num::IntErrorKind::PosOverflow => IgnInit::MAX, Err(_) => { return Err(format!( "{executable_str}: invalid --ignore-initial value '{skip_desc}'" @@ -83,33 +91,24 @@ pub fn parse_params>(mut opts: Peekable) -> Resu if suffix_start != param.len() { // Note that GNU cmp advertises supporting up to Y, but fails if you try // to actually use anything beyond E. - let multiplier: usize = match ¶m[suffix_start..] { + let multiplier: IgnInit = match ¶m[suffix_start..] { "kB" => 1_000, "K" => 1_024, "MB" => 1_000_000, "M" => 1_048_576, "GB" => 1_000_000_000, "G" => 1_073_741_824, - // This only generates a warning when compiling for target_pointer_width < 64 - #[allow(unused_variables)] - suffix @ ("TB" | "T" | "PB" | "P" | "EB" | "E") => { - #[cfg(target_pointer_width = "64")] - match suffix { - "TB" => 1_000_000_000_000, - "T" => 1_099_511_627_776, - "PB" => 1_000_000_000_000_000, - "P" => 1_125_899_906_842_624, - "EB" => 1_000_000_000_000_000_000, - "E" => 1_152_921_504_606_846_976, - _ => unreachable!(), - } - #[cfg(not(target_pointer_width = "64"))] - usize::MAX - } - "ZB" => usize::MAX, // 1_000_000_000_000_000_000_000, - "Z" => usize::MAX, // 1_180_591_620_717_411_303_424, - "YB" => usize::MAX, // 1_000_000_000_000_000_000_000_000, - "Y" => usize::MAX, // 1_208_925_819_614_629_174_706_176, + "TB" => 1_000_000_000_000, + "T" => 1_099_511_627_776, + "PB" => 1_000_000_000_000_000, + "P" => 1_125_899_906_842_624, + "EB" => 1_000_000_000_000_000_000, + "E" => 1_152_921_504_606_846_976, + // TODO setting usize:MAX does not mimic GNU cmp behavior, it should be an error. + "ZB" => IgnInit::MAX, // 1_000_000_000_000_000_000_000, + "Z" => IgnInit::MAX, // 1_180_591_620_717_411_303_424, + "YB" => IgnInit::MAX, // 1_000_000_000_000_000_000_000_000, + "Y" => IgnInit::MAX, // 1_208_925_819_614_629_174_706_176, _ => { return Err(format!( "{executable_str}: invalid --ignore-initial value '{skip_desc}'" @@ -119,7 +118,7 @@ pub fn parse_params>(mut opts: Peekable) -> Resu num = match num.overflowing_mul(multiplier) { (n, false) => n, - _ => usize::MAX, + _ => IgnInit::MAX, } } @@ -173,9 +172,10 @@ pub fn parse_params>(mut opts: Peekable) -> Resu let (_, arg) = param_str.split_once('=').unwrap(); arg.to_string() }; - let max_bytes = match max_bytes.parse::() { + let max_bytes = match max_bytes.parse::() { Ok(num) => num, - Err(e) if *e.kind() == std::num::IntErrorKind::PosOverflow => usize::MAX, + // TODO limit to MAX is dangerous, this should become an error like in GNU cmp. + Err(e) if *e.kind() == std::num::IntErrorKind::PosOverflow => Bytes::MAX, Err(_) => { return Err(format!( "{executable_str}: invalid --bytes value '{max_bytes}'" @@ -233,7 +233,7 @@ pub fn parse_params>(mut opts: Peekable) -> Resu } // Do as GNU cmp, and completely disable printing if we are - // outputing to /dev/null. + // outputting to /dev/null. #[cfg(not(target_os = "windows"))] if is_stdout_dev_null() { params.quiet = true; @@ -285,7 +285,7 @@ pub fn parse_params>(mut opts: Peekable) -> Resu fn prepare_reader( path: &OsString, - skip: &Option, + skip: &Option, params: &Params, ) -> Result, String> { let mut reader: Box = if path == "-" { @@ -304,6 +304,8 @@ fn prepare_reader( }; if let Some(skip) = skip { + // cast as u64 must remain, because value of IgnInit data type could be changed. + #[allow(clippy::unnecessary_cast)] if let Err(e) = io::copy(&mut reader.by_ref().take(*skip as u64), &mut io::sink()) { return Err(format_failure_to_read_input_file( ¶ms.executable, @@ -326,7 +328,7 @@ pub fn cmp(params: &Params) -> Result { let mut from = prepare_reader(¶ms.from, ¶ms.skip_a, params)?; let mut to = prepare_reader(¶ms.to, ¶ms.skip_b, params)?; - let mut offset_width = params.max_bytes.unwrap_or(usize::MAX); + let mut offset_width = params.max_bytes.unwrap_or(Bytes::MAX); if let (Ok(a_meta), Ok(b_meta)) = (fs::metadata(¶ms.from), fs::metadata(¶ms.to)) { #[cfg(not(target_os = "windows"))] @@ -341,7 +343,7 @@ pub fn cmp(params: &Params) -> Result { return Ok(Cmp::Different); } - let smaller = cmp::min(a_size, b_size) as usize; + let smaller = cmp::min(a_size, b_size) as Bytes; offset_width = cmp::min(smaller, offset_width); } @@ -350,8 +352,8 @@ pub fn cmp(params: &Params) -> Result { // Capacity calc: at_byte width + 2 x 3-byte octal numbers + 2 x 4-byte value + 4 spaces let mut output = Vec::::with_capacity(offset_width + 3 * 2 + 4 * 2 + 4); - let mut at_byte = 1; - let mut at_line = 1; + let mut at_byte: Bytes = 1; + let mut at_line: u64 = 1; let mut start_of_line = true; let mut stdout = BufWriter::new(io::stdout().lock()); let mut compare = Cmp::Equal; @@ -401,8 +403,8 @@ pub fn cmp(params: &Params) -> Result { if from_buf[..consumed] == to_buf[..consumed] { let last = from_buf[..consumed].last().unwrap(); - at_byte += consumed; - at_line += from_buf[..consumed].iter().filter(|&c| *c == b'\n').count(); + at_byte += consumed as Bytes; + at_line += (from_buf[..consumed].iter().filter(|&c| *c == b'\n').count()) as u64; start_of_line = *last == b'\n'; @@ -590,7 +592,7 @@ fn format_visible_byte(byte: u8) -> String { fn format_verbose_difference( from_byte: u8, to_byte: u8, - at_byte: usize, + at_byte: Bytes, offset_width: usize, output: &mut Vec, params: &Params, @@ -655,7 +657,7 @@ fn format_verbose_difference( } #[inline] -fn report_eof(at_byte: usize, at_line: usize, start_of_line: bool, eof_on: &str, params: &Params) { +fn report_eof(at_byte: Bytes, at_line: u64, start_of_line: bool, eof_on: &str, params: &Params) { if params.quiet { return; } @@ -707,7 +709,7 @@ fn is_posix_locale() -> bool { } #[inline] -fn report_difference(from_byte: u8, to_byte: u8, at_byte: usize, at_line: usize, params: &Params) { +fn report_difference(from_byte: u8, to_byte: u8, at_byte: Bytes, at_line: u64, params: &Params) { if params.quiet { return; } @@ -804,7 +806,7 @@ mod tests { from: os("foo"), to: os("bar"), skip_a: Some(1), - skip_b: Some(usize::MAX), + skip_b: Some(IgnInit::MAX), ..Default::default() }), parse_params( @@ -982,7 +984,7 @@ mod tests { executable: os("cmp"), from: os("foo"), to: os("bar"), - max_bytes: Some(usize::MAX), + max_bytes: Some(Bytes::MAX), ..Default::default() }), parse_params( @@ -999,6 +1001,7 @@ mod tests { ); // Failure case + // TODO This is actually fine in GNU cmp. --bytes does not have a unit parser yet. assert_eq!( Err("cmp: invalid --bytes value '1K'".to_string()), parse_params( @@ -1044,8 +1047,8 @@ mod tests { executable: os("cmp"), from: os("foo"), to: os("bar"), - skip_a: Some(usize::MAX), - skip_b: Some(usize::MAX), + skip_a: Some(IgnInit::MAX), + skip_b: Some(IgnInit::MAX), ..Default::default() }), parse_params( @@ -1116,8 +1119,12 @@ mod tests { .enumerate() { let values = [ - 1_000usize.checked_pow((i + 1) as u32).unwrap_or(usize::MAX), - 1024usize.checked_pow((i + 1) as u32).unwrap_or(usize::MAX), + (1_000 as IgnInit) + .checked_pow((i + 1) as u32) + .unwrap_or(IgnInit::MAX), + (1024 as IgnInit) + .checked_pow((i + 1) as u32) + .unwrap_or(IgnInit::MAX), ]; for (j, v) in values.iter().enumerate() { assert_eq!( diff --git a/src/diff.rs b/src/diff.rs index f4c0614..0b01335 100644 --- a/src/diff.rs +++ b/src/diff.rs @@ -4,12 +4,9 @@ // files that was distributed with this source code. use crate::params::{parse_params, Format}; -use crate::utils::report_failure_to_read_input_file; -use crate::{context_diff, ed_diff, normal_diff, side_diff, unified_diff}; +use crate::{context_diff, ed_diff, normal_diff, side_diff, unified_diff, utils}; use std::env::ArgsOs; -use std::ffi::OsString; -use std::fs; -use std::io::{self, stdout, Read, Write}; +use std::io::{self, stdout, Write}; use std::iter::Peekable; use std::process::{exit, ExitCode}; @@ -40,35 +37,16 @@ pub fn main(opts: Peekable) -> ExitCode { return ExitCode::SUCCESS; } - // read files - fn read_file_contents(filepath: &OsString) -> io::Result> { - if filepath == "-" { - let mut content = Vec::new(); - io::stdin().read_to_end(&mut content).and(Ok(content)) - } else { - fs::read(filepath) - } - } - let mut io_error = false; - let from_content = match read_file_contents(¶ms.from) { - Ok(from_content) => from_content, + let (from_content, to_content) = match utils::read_both_files(¶ms.from, ¶ms.to) { + Ok(contents) => contents, Err(e) => { - report_failure_to_read_input_file(¶ms.executable, ¶ms.from, &e); - io_error = true; - vec![] - } - }; - let to_content = match read_file_contents(¶ms.to) { - Ok(to_content) => to_content, - Err(e) => { - report_failure_to_read_input_file(¶ms.executable, ¶ms.to, &e); - io_error = true; - vec![] + eprintln!( + "{}", + utils::format_failure_to_read_input_files(¶ms.executable, &e) + ); + return ExitCode::from(2); } }; - if io_error { - return ExitCode::from(2); - } // run diff let result: Vec = match params.format { @@ -81,7 +59,7 @@ pub fn main(opts: Peekable) -> ExitCode { }), Format::SideBySide => { let mut output = stdout().lock(); - side_diff::diff(&from_content, &to_content, &mut output, ¶ms) + side_diff::diff(&from_content, &to_content, &mut output, &(¶ms).into()) } }; if params.brief && !result.is_empty() { diff --git a/src/lib.rs b/src/lib.rs index 342b01c..8dfa3cf 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,9 +1,11 @@ +pub mod arg_parser; pub mod cmp; pub mod context_diff; pub mod ed_diff; pub mod macros; pub mod normal_diff; pub mod params; +pub mod sdiff; pub mod side_diff; pub mod unified_diff; pub mod utils; diff --git a/src/main.rs b/src/main.rs index b7c2712..266a2be 100644 --- a/src/main.rs +++ b/src/main.rs @@ -11,6 +11,7 @@ use std::{ process::ExitCode, }; +mod arg_parser; mod cmp; mod context_diff; mod diff; @@ -18,6 +19,7 @@ mod ed_diff; mod macros; mod normal_diff; mod params; +mod sdiff; mod side_diff; mod unified_diff; mod utils; @@ -70,8 +72,9 @@ fn main() -> ExitCode { }; match util_name.to_str() { - Some("diff") => diff::main(args), Some("cmp") => cmp::main(args), + Some("diff") => diff::main(args), + Some("sdiff") => sdiff::main(args), Some(name) => { eprintln!("{name}: utility not supported"); ExitCode::from(2) diff --git a/src/sdiff.rs b/src/sdiff.rs new file mode 100644 index 0000000..7aca692 --- /dev/null +++ b/src/sdiff.rs @@ -0,0 +1,119 @@ +//! This module holds the core compare logic of sdiff. +pub mod params_sdiff; +pub mod params_sdiff_def; + +use std::{ + env::ArgsOs, + fmt::Display, + io::{stdout, Write}, + iter::Peekable, + process::ExitCode, +}; + +use crate::{ + sdiff::{params_sdiff::ParamsSdiff, params_sdiff_def::ParamsSdiffOk}, + side_diff, utils, +}; + +pub const EXE_NAME: &str = "sdiff"; + +/// Entry into sdiff. +/// +/// Param options, e.g. 'sdiff file1.txt file2.txt -bd n2000kB'. \ +/// sdiff options as documented at +/// +/// Exit codes are documented at +/// https://www.gnu.org/software/diffutils/manual/html_node/Invoking-sdiff.html \ +/// Exit status is 0 if inputs are identical, 1 if different, 2 in error case. +pub fn main(opts: Peekable) -> ExitCode { + let params = match ParamsSdiff::parse_params(opts) { + Ok(res) => match res { + ParamsSdiffOk::Info(info) => { + println!("{info}"); + return ExitCode::from(0); + } + ParamsSdiffOk::ParamsSdiff(params) => params, + }, + Err(e) => { + eprintln!("{e}"); + return ExitCode::from(2); + } + }; + + if params.from == "-" && params.to == "-" + || same_file::is_same_file(¶ms.from, ¶ms.to).unwrap_or(false) + { + return ExitCode::SUCCESS; + } + + match sdiff(¶ms) { + Ok(SdiffOk::Equal) => ExitCode::SUCCESS, + Ok(SdiffOk::Different) => ExitCode::from(1), + Err(e) => { + // if !params.silent { + eprintln!("{e}"); + // } + ExitCode::from(2) + } + } +} + +#[derive(Debug)] +pub enum SdiffOk { + Different, + Equal, +} + +/// Errors of core sdiff functionality. +/// To centralize error messages and make it easier to use in a lib. +#[derive(Debug, PartialEq)] +pub enum SdiffError { + OutputError(String), + // (msg) + ReadFileError(String), +} + +impl Display for SdiffError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + SdiffError::OutputError(msg) => write!(f, "{msg}"), + SdiffError::ReadFileError(msg) => write!(f, "{msg}"), + } + } +} + +/// This is the main function to compare the files. \ +/// Files are limited to u64 bytes and u64 lines. +/// TODO sdiff is missing a number of options, currently implemented: +/// * expand_tabs +/// * tabsize +/// * width +pub fn sdiff(params: &ParamsSdiff) -> Result { + let (from_content, to_content) = match utils::read_both_files(¶ms.from, ¶ms.to) { + Ok(contents) => contents, + // Err((filepath, error)) => { + Err(errors) => { + let msg = utils::format_failure_to_read_input_files(¶ms.util.executable(), &errors); + return Err(SdiffError::ReadFileError(msg)); + } + }; + + // run diff + let mut output = stdout().lock(); + let result = side_diff::diff(&from_content, &to_content, &mut output, ¶ms.into()); + + match std::io::stdout().write_all(&result) { + Ok(_) => { + if result.is_empty() { + Ok(SdiffOk::Equal) + } else { + Ok(SdiffOk::Different) + } + } + Err(e) => Err(SdiffError::OutputError(e.to_string())), + } + + // println!("\nsdiff does not compare files yet."); + // println!("{:?} or {:?}?", SdiffOk::Different, SdiffOk::Equal); + // Ok(SdiffOk::Equal) +} diff --git a/src/sdiff/params_sdiff.rs b/src/sdiff/params_sdiff.rs new file mode 100644 index 0000000..27f3074 --- /dev/null +++ b/src/sdiff/params_sdiff.rs @@ -0,0 +1,324 @@ +//! This module contains the Parser for sdiff arguments. +//! +//! All option definitions, output texts and the Error handling is in [super::params_sdiff_def]. +use std::{ffi::OsString, iter::Peekable}; + +use crate::{ + arg_parser::{ArgParser, ArgParserError, DiffUtility, ParsedOption, OPT_HELP, OPT_VERSION}, + sdiff::params_sdiff_def::*, +}; + +/// Holds the given command line arguments except "--version" and "--help". +#[derive(Debug, Clone, Eq, PartialEq)] +pub struct ParamsSdiff { + /// Identifier + pub util: DiffUtility, + pub from: OsString, + pub to: OsString, + /// --diff-program=PROGRAM use PROGRAM to compare files + pub diff_program: Option, + /// -t, --expand-tabs expand tabs to spaces in output + pub expand_tabs: bool, + /// --help display this help and exit + pub help: bool, + /// -W, --ignore-all-space ignore all white space + pub ignore_all_space: bool, + /// -B, --ignore-blank-lines ignore changes whose lines are all blank + pub ignore_blank_lines: bool, + /// -i, --ignore-case consider upper- and lower-case to be the same + pub ignore_case: bool, + /// -I, --ignore-matching-lines=REGEXP ignore changes all whose lines match REGEXP + pub ignore_matching_lines: Option, + /// -b, --ignore-space-change ignore changes in the amount of white space + pub ignore_space_change: bool, + /// -E, --ignore-tab-expansion ignore changes due to tab expansion + pub ignore_tab_expansion: bool, + /// -Z, --ignore-trailing-space ignore white space at line end + pub ignore_trailing_space: bool, + /// -l, --left-column output only the left column of common lines + pub left_column: bool, + /// -d, --minimal try hard to find a smaller set of changes + pub minimal: bool, + /// -o, --output=FILE operate interactively, sending output to FILE + pub output: Option, + /// -H, --speed-large-files assume large files, many scattered small changes + pub speed_large_files: bool, + /// --strip-trailing-cr strip trailing carriage return on input + pub strip_trailing_cr: bool, + /// -s, --suppress-common-lines do not output common lines + pub suppress_common_lines: bool, + /// --tabsize=NUM tab stops at every NUM (default 8) print columns + pub tabsize: usize, + /// -a, --text treat all files as text + pub text: bool, + /// -v, --version output version information and exit + pub version: bool, + /// -w, --width=NUM output at most NUM (default 130) print columns + pub width: usize, +} + +impl ParamsSdiff { + pub fn parse_params>(opts: Peekable) -> ResultParamsSdiffParse { + let p_gen = ArgParser::parse_params(&ARG_OPTIONS, opts)?; + Self::try_from(&p_gen) + } + + fn try_from(parser: &ArgParser) -> ResultParamsSdiffParse { + let mut params = Self { + util: DiffUtility::SDiff, + ..Default::default() + }; + + // set options + for parsed_option in &parser.options_parsed { + dbg!(parsed_option); + match *parsed_option.app_option { + OPT_DIFF_PROGRAM => params.diff_program = parsed_option.arg_for_option.clone(), + OPT_EXPAND_TABS => params.expand_tabs = true, + OPT_HELP => return Ok(ParamsSdiffOk::Info(ArgParser::add_copyright(TEXT_HELP))), + OPT_IGNORE_ALL_SPACE => params.ignore_all_space = true, + OPT_IGNORE_BLANK_LINES => params.ignore_blank_lines = true, + OPT_IGNORE_CASE => params.ignore_case = true, + OPT_IGNORE_MATCHING_LINES => { + params.ignore_matching_lines = parsed_option.arg_for_option.clone() + } + OPT_IGNORE_SPACE_CHANGE => params.ignore_space_change = true, + OPT_IGNORE_TAB_EXPANSION => params.ignore_tab_expansion = true, + OPT_IGNORE_TRAILING_SPACE => params.ignore_trailing_space = true, + OPT_LEFT_COLUMN => params.left_column = true, + OPT_MINIMAL => params.minimal = true, + OPT_OUTPUT => params.output = parsed_option.arg_for_option.clone(), + OPT_SPEED_LARGE_FILES => params.speed_large_files = true, + OPT_STRIP_TRAILING_CR => params.strip_trailing_cr = true, + OPT_SUPPRESS_COMMON_LINES => params.suppress_common_lines = true, + OPT_TABSIZE => { + params.set_tabsize(parsed_option)?; + } + OPT_TEXT => params.text = true, + OPT_VERSION => return Ok(ParamsSdiffOk::Info(TEXT_VERSION.to_string())), + OPT_WIDTH => { + params.set_width(parsed_option)?; + } + + // This is not an error, but a todo. Unfortunately an Enum is not possible. + _ => todo!("Err Option: {}", parsed_option.app_option.long_name), + } + } + + // set operands + match parser.operands.len() { + 0 => { + return Err(ParamsSdiffError::ArgParserError(ArgParserError::NoOperand( + params.util, + ))) + } + // If only file_1 is set, then file_2 defaults to '-', so it reads from StandardInput. + 1 => { + params.from = parser.operands[0].clone(); + params.to = OsString::from("-"); + } + 2 => { + params.from = parser.operands[0].clone(); + params.to = parser.operands[1].clone(); + } + _ => { + return Err(ParamsSdiffError::ExtraOperand( + parser.operands[2].to_string_lossy().to_string(), + )); + } + } + + // // Do as GNU sdiff, and completely disable printing if we are + // // outputting to /dev/null. + // #[cfg(not(target_os = "windows"))] + // if crate::sdiff::is_stdout_dev_null() { + // params.silent = true; + // params.verbose = false; + // params.print_bytes = false; + // } + + // dbg!(¶ms); + Ok(ParamsSdiffOk::ParamsSdiff(params)) + } + + pub fn set_tabsize(&mut self, parsed_option: &ParsedOption) -> Result { + let tab_size = parsed_option.arg_for_option.clone().unwrap_or_default(); + let t = match tab_size.parse::() { + Ok(w) => w, + Err(_) => return Err(ParamsSdiffError::InvalidNumber(parsed_option.clone())), + }; + self.tabsize = t; + + Ok(t) + } + + pub fn set_width(&mut self, parsed_option: &ParsedOption) -> Result { + let width = parsed_option.arg_for_option.clone().unwrap_or_default(); + let w = match width.parse::() { + Ok(w) => w, + Err(_) => return Err(ParamsSdiffError::InvalidNumber(parsed_option.clone())), + }; + self.width = w; + + Ok(w) + } +} + +impl Default for ParamsSdiff { + fn default() -> Self { + Self { + util: DiffUtility::SDiff, + from: Default::default(), + to: Default::default(), + diff_program: Default::default(), + expand_tabs: Default::default(), + help: Default::default(), + ignore_all_space: Default::default(), + ignore_blank_lines: Default::default(), + ignore_case: Default::default(), + ignore_matching_lines: Default::default(), + ignore_space_change: Default::default(), + ignore_tab_expansion: Default::default(), + ignore_trailing_space: Default::default(), + left_column: Default::default(), + minimal: Default::default(), + output: Default::default(), + speed_large_files: Default::default(), + strip_trailing_cr: Default::default(), + suppress_common_lines: Default::default(), + tabsize: 8, + text: Default::default(), + version: Default::default(), + width: 130, + } + } +} + +// Usually assert is used like assert_eq(test result, expected result). +#[cfg(test)] +mod tests { + use super::*; + // use crate::arg_parser::OPT_VERSION; + + pub const TEXT_HELP_HINT: &str = "Try 'sdiff --help' for more information."; + + fn os(s: &str) -> OsString { + OsString::from(s) + } + + /// Simplify call of parser, just pass a normal string like in the Terminal. + fn parse(args: &str) -> ResultParamsSdiffParse { + let mut o = Vec::new(); + for arg in args.split(' ') { + o.push(os(arg)); + } + let p = o.into_iter().peekable(); + + ParamsSdiff::parse_params(p) + } + + fn res_ok(params: ParamsSdiff) -> ResultParamsSdiffParse { + Ok(ParamsSdiffOk::ParamsSdiff(params)) + } + + #[test] + fn positional() { + // file_1 and file_2 given + assert_eq!( + parse("sdiff foo bar"), + res_ok(ParamsSdiff { + util: DiffUtility::SDiff, + from: os("foo"), + to: os("bar"), + ..Default::default() + }), + ); + + // file_1 only + assert_eq!( + parse("sdiff foo"), + res_ok(ParamsSdiff { + util: DiffUtility::SDiff, + from: os("foo"), + to: os("-"), + ..Default::default() + }), + ); + + // double dash without operand + // Test fails as this behavior is not replicated. + // assert_eq!( + // parse_params("sdiff foo -- --help"), + // res_ok(ParamsSdiff { + // util: DiffUtility::SDiff, + // file_1: os("foo"), + // file_2: os("--help"), + // ..Default::default() + // }), + // ); + + // Err: too many operands + assert_eq!( + parse("sdiff foo bar extra"), + Err(ParamsSdiffError::ExtraOperand("extra".to_string())), + ); + + // Err: no arguments + assert_eq!( + parse("sdiff"), + Err(ParamsSdiffError::ArgParserError(ArgParserError::NoOperand( + DiffUtility::SDiff + ))) + ); + } + + #[test] + fn execution_modes() { + // Test all options + // I^A is at the end of the single options, forcing '^A' as argument for 'I'. + // --wi is abbreviated and uses equal sign + // diff-program uses next arg + // -O uses next arg + let params = ParamsSdiff { + util: DiffUtility::SDiff, + from: os("foo"), + to: os("bar"), + diff_program: Some("prg".to_string()), + expand_tabs: true, + help: false, + ignore_all_space: true, + ignore_blank_lines: true, + ignore_case: true, + ignore_matching_lines: Some("^A".to_string()), + ignore_space_change: true, + ignore_tab_expansion: true, + ignore_trailing_space: true, + left_column: true, + minimal: true, + output: Some("out".to_string()), + speed_large_files: true, + strip_trailing_cr: true, + suppress_common_lines: true, + tabsize: 2, + text: true, + version: false, + width: 150, + }; + assert_eq!( + parse( + "sdiff foo bar -iEZbWBalstdHI^A --wi=150 --diff-program prg -o out --strip --tab=2" + ), + res_ok(params.clone()) + ); + + // negative value + let r = parse("sdiff foo bar --tab=-2"); + match r { + Ok(_) => assert!(false, "Should not be Ok."), + Err(e) => assert_eq!( + e.to_string(), + format!("sdiff: invalid argument '-2' for '--tabsize'\nsdiff: {TEXT_HELP_HINT}") + ), + } + } +} diff --git a/src/sdiff/params_sdiff_def.rs b/src/sdiff/params_sdiff_def.rs new file mode 100644 index 0000000..3c7e9f0 --- /dev/null +++ b/src/sdiff/params_sdiff_def.rs @@ -0,0 +1,234 @@ +//! This module holds all definitions, text and error messages for sdiff. +use std::fmt::Display; + +use const_format::concatcp; + +use crate::{ + arg_parser::{ + AppOption, ArgParserError, ParsedOption, OPT_HELP, OPT_VERSION, TEXT_HELP_FOOTER, + }, + sdiff::{params_sdiff::ParamsSdiff, EXE_NAME}, + // sdiff::{Bytes, IgnInit, EXE_NAME}, +}; + +pub type ResultParamsSdiffParse = Result; + +// AppOptions for sdiff +pub(super) const OPT_DIFF_PROGRAM: AppOption = AppOption { + long_name: "diff-program", + short: None, + has_arg: true, +}; +pub(super) const OPT_EXPAND_TABS: AppOption = AppOption { + long_name: "expand-tabs", + short: Some('t'), + has_arg: false, +}; +pub(super) const OPT_IGNORE_ALL_SPACE: AppOption = AppOption { + long_name: "ignore-all-space", + short: Some('W'), + has_arg: false, +}; +pub(super) const OPT_IGNORE_BLANK_LINES: AppOption = AppOption { + long_name: "ignore-blank-lines", + short: Some('B'), + has_arg: false, +}; +pub(super) const OPT_IGNORE_CASE: AppOption = AppOption { + long_name: "ignore-case", + short: Some('i'), + has_arg: false, +}; +pub(super) const OPT_IGNORE_MATCHING_LINES: AppOption = AppOption { + long_name: "ignore-matching-lines", + short: Some('I'), + has_arg: true, +}; +pub(super) const OPT_IGNORE_SPACE_CHANGE: AppOption = AppOption { + long_name: "ignore-space-change", + short: Some('b'), + has_arg: false, +}; +pub(super) const OPT_IGNORE_TAB_EXPANSION: AppOption = AppOption { + long_name: "ignore-tab-expansion", + short: Some('E'), + has_arg: false, +}; +pub(super) const OPT_IGNORE_TRAILING_SPACE: AppOption = AppOption { + long_name: "ignore-trailing-space", + short: Some('Z'), + has_arg: false, +}; +pub(super) const OPT_LEFT_COLUMN: AppOption = AppOption { + long_name: "left-column", + short: Some('l'), + has_arg: false, +}; +pub(super) const OPT_MINIMAL: AppOption = AppOption { + long_name: "minimal", + short: Some('d'), + has_arg: false, +}; +pub(super) const OPT_OUTPUT: AppOption = AppOption { + long_name: "output", + short: Some('o'), + has_arg: true, +}; +pub(super) const OPT_SPEED_LARGE_FILES: AppOption = AppOption { + long_name: "speed-large-files", + short: Some('H'), + has_arg: false, +}; +pub(super) const OPT_STRIP_TRAILING_CR: AppOption = AppOption { + long_name: "strip-trailing-cr", + short: None, + has_arg: false, +}; +pub(super) const OPT_SUPPRESS_COMMON_LINES: AppOption = AppOption { + long_name: "suppress-common-lines", + short: Some('s'), + has_arg: false, +}; +pub(super) const OPT_TABSIZE: AppOption = AppOption { + long_name: "tabsize", + short: None, + has_arg: true, +}; +pub(super) const OPT_TEXT: AppOption = AppOption { + long_name: "text", + short: Some('a'), + has_arg: false, +}; +pub(super) const OPT_WIDTH: AppOption = AppOption { + long_name: "width", + short: Some('w'), + has_arg: true, +}; + +// Array for ParamsGen +pub(super) const ARG_OPTIONS: [AppOption; 20] = [ + OPT_DIFF_PROGRAM, + OPT_EXPAND_TABS, + OPT_HELP, + OPT_IGNORE_ALL_SPACE, + OPT_IGNORE_BLANK_LINES, + OPT_IGNORE_CASE, + OPT_IGNORE_MATCHING_LINES, + OPT_IGNORE_SPACE_CHANGE, + OPT_IGNORE_TAB_EXPANSION, + OPT_IGNORE_TRAILING_SPACE, + OPT_LEFT_COLUMN, + OPT_MINIMAL, + OPT_OUTPUT, + OPT_SPEED_LARGE_FILES, + OPT_STRIP_TRAILING_CR, + OPT_SUPPRESS_COMMON_LINES, + OPT_TABSIZE, + OPT_TEXT, + OPT_VERSION, + OPT_WIDTH, +]; + +// TODO Help text rewrite, this is copyrighted by GNU +pub const TEXT_HELP: &str = concatcp!( + r#" +Usage: sdiff [OPTION]... FILE1 FILE2 +sdiff is a tool which allows to compare two text files for differences. +It outputs the differences in a side-by-side view. +Use 'diff' for a row-by-row view. +Use 'cmp' to compare binary files + +Options: + -o, --output=FILE operate interactively, sending output to FILE + + -i, --ignore-case consider upper- and lower-case to be the same + -E, --ignore-tab-expansion ignore changes due to tab expansion + -Z, --ignore-trailing-space ignore white space at line end + -b, --ignore-space-change ignore changes in the amount of white space + -W, --ignore-all-space ignore all white space + -B, --ignore-blank-lines ignore changes whose lines are all blank + -I, --ignore-matching-lines=RE ignore changes all whose lines match RE + --strip-trailing-cr strip trailing carriage return on input + -a, --text treat all files as text + + -w, --width=NUM output at most NUM (default 130) print columns + -l, --left-column output only the left column of common lines + -s, --suppress-common-lines do not output common lines + + -t, --expand-tabs expand tabs to spaces in output + --tabsize=NUM tab stops at every NUM (default 8) print columns + + -d, --minimal try hard to find a smaller set of changes + -H, --speed-large-files assume large files, many scattered small changes + --diff-program=PROGRAM use PROGRAM to compare files + + --help display this help and exit + -v, --version output version information and exit + +If a FILE is '-', read operating system's standard input. +Exit status is 0 if inputs are identical, 1 if different, 2 in error case. +"#, + TEXT_HELP_FOOTER +); + +// TODO Version text, possibly centralized. +pub const TEXT_VERSION: &str = concat!("sdiff (Rust DiffUtils) ", env!("CARGO_PKG_VERSION"),); + +/// Success return type for parsing of params. +/// +/// Successful parsing will return ParamsSdiff, \ +/// '-- help' und '--version' will return an [ParamsSdiffInfo] enum, \ +/// Error will be returned as [ParamsSdiffError] in the function Result. +#[derive(Debug, PartialEq)] +pub enum ParamsSdiffOk { + Info(String), + ParamsSdiff(ParamsSdiff), +} + +/// Contains all parser errors and their text messages. +/// This allows centralized maintenance. +#[derive(Debug, PartialEq)] +pub enum ParamsSdiffError { + /// Bubbled up error + ArgParserError(ArgParserError), + + /// number argument incorrect + InvalidNumber(ParsedOption), + + // (param argument) + // WidthInvalid(String), + /// Having 3 operands or more + /// (wrong operand) + ExtraOperand(String), +} + +impl From for ParamsSdiffError { + fn from(err: ArgParserError) -> Self { + Self::ArgParserError(err) + } +} + +impl Display for ParamsSdiffError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + // Writes the error message, adds sdiff: and the --help information. + fn write_err(f: &mut std::fmt::Formatter<'_>, msg: &str) -> Result<(), std::fmt::Error> { + ArgParserError::write_err(f, EXE_NAME, msg) + } + + // TODO Different error messages for Short and Long name calls? Generally error messages do not attempt to be GNU compatible. + match self { + ParamsSdiffError::ArgParserError(e) => write_err(f, &e.to_string()), + ParamsSdiffError::ExtraOperand(opt) => write_err(f, &format!("extra operand '{opt}'")), + ParamsSdiffError::InvalidNumber(opt) => write_err( + f, + &format!( + "invalid argument '{}' for '--{}'{}", + opt.arg_for_option_or_empty_string(), + opt.app_option.long_name, + opt.short_char_or_empty_string(), + ), + ), + // ParamsSdiffError::WidthInvalid(param) => write_err(f, &format!("invalid '{param}'")), + } + } +} diff --git a/src/side_diff.rs b/src/side_diff.rs index 56953d2..0224fb5 100644 --- a/src/side_diff.rs +++ b/src/side_diff.rs @@ -8,7 +8,7 @@ use diff::Result; use std::{io::Write, vec}; use unicode_width::UnicodeWidthStr; -use crate::params::Params; +use crate::sdiff::params_sdiff::ParamsSdiff; const GUTTER_WIDTH_MIN: usize = 3; @@ -98,6 +98,34 @@ impl Config { } } +/// Params for side_diff, so the functions can be used by multiple modules (diff and sdiff) +#[derive(Default)] +pub struct Params { + pub expand_tabs: bool, + pub tabsize: usize, + pub width: usize, +} + +impl From<&crate::params::Params> for Params { + fn from(param: &crate::params::Params) -> Self { + Self { + expand_tabs: param.expand_tabs, + tabsize: param.tabsize, + width: param.width, + } + } +} + +impl From<&ParamsSdiff> for Params { + fn from(param: &ParamsSdiff) -> Self { + Self { + expand_tabs: param.expand_tabs, + tabsize: param.tabsize, + width: param.width, + } + } +} + fn format_tabs_and_spaces( from: usize, to: usize, diff --git a/src/utils.rs b/src/utils.rs index daca18d..be1bc20 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -4,7 +4,8 @@ // files that was distributed with this source code. use regex::Regex; -use std::{ffi::OsString, io::Write}; +use std::io::{self, Error, Read, Write}; +use std::{ffi::OsString, fs}; use unicode_width::UnicodeWidthStr; /// Replace tabs by spaces in the input line. @@ -87,15 +88,61 @@ pub fn format_failure_to_read_input_file( ) } -pub fn report_failure_to_read_input_file( +/// Formats the error messages of both files. +pub fn format_failure_to_read_input_files( executable: &OsString, - filepath: &OsString, - error: &std::io::Error, -) { - eprintln!( - "{}", - format_failure_to_read_input_file(executable, filepath, error) + errors: &[(OsString, Error)], +) -> String { + let mut msg = format_failure_to_read_input_file( + executable, + &errors[0].0, // filepath, + &errors[0].1, // &error, ); + if errors.len() > 1 { + msg.push('\n'); + msg.push_str(&format_failure_to_read_input_file( + executable, + &errors[1].0, // filepath, + &errors[1].1, // &error, + )); + } + + msg +} + +pub fn read_file_contents(filepath: &OsString) -> io::Result> { + if filepath == "-" { + let mut content = Vec::new(); + io::stdin().read_to_end(&mut content).and(Ok(content)) + } else { + fs::read(filepath) + } +} + +pub type ResultReadBothFiles = Result<(Vec, Vec), Vec<(OsString, Error)>>; +/// Reads both files and returns the files or a list of errors, as both files can produce a separate error. +pub fn read_both_files(from: &OsString, to: &OsString) -> ResultReadBothFiles { + let mut read_errors = Vec::new(); + let from_content = match read_file_contents(from).map_err(|e| (from.clone(), e)) { + Ok(r) => r, + Err(e) => { + read_errors.push(e); + Vec::new() + } + }; + let to_content = match read_file_contents(to).map_err(|e| (to.clone(), e)) { + Ok(r) => r, + Err(e) => { + read_errors.push(e); + Vec::new() + } + }; + + if read_errors.is_empty() { + Ok((from_content, to_content)) + } else { + Err(read_errors) + } } #[cfg(test)] diff --git a/tests/integration.rs b/tests/integration.rs index c9db571..26b621a 100644 --- a/tests/integration.rs +++ b/tests/integration.rs @@ -32,6 +32,7 @@ mod common { "Expected utility name as second argument, got nothing.\n", )); + // TODO sdiff, which correctly returns "unrecognized option: '--foobar'" (single quote instead of double quote, so diff and cmp need fix.) for subcmd in ["diff", "cmp"] { let mut cmd = cargo_bin_cmd!("diffutils"); cmd.arg(subcmd); @@ -59,7 +60,7 @@ mod common { #[cfg(windows)] let error_message = "The system cannot find the file specified."; - for subcmd in ["diff", "cmp"] { + for subcmd in ["diff", "cmp", "sdiff"] { let mut cmd = cargo_bin_cmd!("diffutils"); cmd.arg(subcmd); cmd.arg(&nopath).arg(file.path()); @@ -81,18 +82,22 @@ mod common { ": {}: {error_message}\n", &nopath.as_os_str().to_string_lossy() ))); - } - let mut cmd = cargo_bin_cmd!("diffutils"); - cmd.arg("diff"); - cmd.arg(&nopath).arg(&nopath); - cmd.assert().code(predicate::eq(2)).failure().stderr( - predicate::str::contains(format!( - ": {}: {error_message}\n", - &nopath.as_os_str().to_string_lossy() - )) - .count(2), - ); + // TODO test fails for cmp + if subcmd == "cmp" { + continue; + } + let mut cmd = cargo_bin_cmd!("diffutils"); + cmd.arg(subcmd); + cmd.arg(&nopath).arg(&nopath); + cmd.assert().code(predicate::eq(2)).failure().stderr( + predicate::str::contains(format!( + ": {}: {error_message}\n", + &nopath.as_os_str().to_string_lossy() + )) + .count(2), + ); + } Ok(()) } @@ -890,3 +895,27 @@ mod cmp { Ok(()) } } + +mod sdiff { + use super::*; + + #[test] + fn differences() -> Result<(), Box> { + let mut file1 = NamedTempFile::new()?; + file1.write_all("foo\n".as_bytes())?; + + let mut file2 = NamedTempFile::new()?; + file2.write_all("bar\n".as_bytes())?; + + let mut cmd = cargo_bin_cmd!("diffutils"); + cmd.arg("diff"); + cmd.arg(file1.path()).arg(file2.path()); + + cmd.assert() + .code(predicate::eq(1)) + .failure() + .stdout(predicate::str::is_empty().not()); + + Ok(()) + } +}