Merge changes I90e72a20,I65e551dd into main am: d84a5b9d74
Original change: https://android-review.googlesource.com/c/platform/development/+/3495938
Change-Id: I6a6dbc48c9538c1c2fdd2cbb2c0f986deb81cfab
Signed-off-by: Automerger Merge Worker <android-build-automerger-merge-worker@system.gserviceaccount.com>
diff --git a/tools/external_crates/crate_tool/Cargo.toml b/tools/external_crates/crate_tool/Cargo.toml
index 5f27d90..2266995 100644
--- a/tools/external_crates/crate_tool/Cargo.toml
+++ b/tools/external_crates/crate_tool/Cargo.toml
@@ -27,7 +27,7 @@
checksum = { path = "../checksum" }
crate_config = { path = "../crate_config" }
google_metadata = { path = "../google_metadata"}
-license_checker = { path = "../license_checker", features = ["fuzzy_content_match"] }
+license_checker = { path = "../license_checker" }
name_and_version = { path = "../name_and_version" }
repo_config = { path = "../repo_config" }
rooted_path = { path = "../rooted_path" }
diff --git a/tools/external_crates/license_checker/Android.bp b/tools/external_crates/license_checker/Android.bp
index 96a6d49..abe5df8 100644
--- a/tools/external_crates/license_checker/Android.bp
+++ b/tools/external_crates/license_checker/Android.bp
@@ -17,7 +17,9 @@
edition: "2021",
rustlibs: [
"libglob",
+ "libitertools",
"libspdx",
+ "libtextdistance",
"libthiserror",
],
}
@@ -36,7 +38,9 @@
edition: "2021",
rustlibs: [
"libglob",
+ "libitertools",
"libspdx",
+ "libtextdistance",
"libthiserror",
],
}
diff --git a/tools/external_crates/license_checker/Cargo.toml b/tools/external_crates/license_checker/Cargo.toml
index dcc542e..aa9b7fd 100644
--- a/tools/external_crates/license_checker/Cargo.toml
+++ b/tools/external_crates/license_checker/Cargo.toml
@@ -5,10 +5,7 @@
[dependencies]
glob = "0.3"
-itertools = { version = "0.11", optional = true }
+itertools = "0.14"
spdx = "0.10"
-textdistance = { version = "1.1.1", optional = true }
-thiserror = "1.0"
-
-[features]
-fuzzy_content_match = ["dep:textdistance", "dep:itertools"]
\ No newline at end of file
+textdistance = "1.1.1"
+thiserror = "1.0"
\ No newline at end of file
diff --git a/tools/external_crates/license_checker/src/file_classifier/content_classifier.rs b/tools/external_crates/license_checker/src/file_classifier/content_classifier.rs
index 2f6b700..90854ed 100644
--- a/tools/external_crates/license_checker/src/file_classifier/content_classifier.rs
+++ b/tools/external_crates/license_checker/src/file_classifier/content_classifier.rs
@@ -12,11 +12,9 @@
// See the License for the specific language governing permissions and
// limitations under the License.
-#[cfg(feature = "fuzzy_content_match")]
use itertools::Itertools;
use spdx::{LicenseReq, Licensee};
use std::sync::LazyLock;
-#[cfg(feature = "fuzzy_content_match")]
use textdistance::str::ratcliff_obershelp;
fn strip_punctuation(text: &str) -> String {
@@ -32,7 +30,7 @@
processed.trim().to_string()
}
-pub(crate) fn classify_license_file_contents(contents: &str) -> Vec<LicenseReq> {
+pub(super) fn classify_license_file_contents(contents: &str) -> Vec<LicenseReq> {
let contents = strip_punctuation(contents);
// Exact match
@@ -42,13 +40,14 @@
matches.push(req.clone());
}
}
- if !matches.is_empty() {
- return matches;
- }
+ matches
+}
+
+pub(super) fn classify_license_file_contents_fuzzy(contents: &str) -> Option<LicenseReq> {
+ let contents = strip_punctuation(contents);
// Fuzzy match. This is expensive, so start with licenses that are closest in length to the file,
// and only return a single match at most.
- #[cfg(feature = "fuzzy_content_match")]
for (req, required_text) in LICENSE_CONTENT_CLASSIFICATION.iter().sorted_by(|a, b| {
let mut ra = a.1.len() as f32 / contents.len() as f32;
let mut rb = b.1.len() as f32 / contents.len() as f32;
@@ -62,12 +61,11 @@
}) {
let similarity = ratcliff_obershelp(contents.as_str(), required_text);
if similarity > 0.95 {
- matches.push(req.clone());
- break;
+ return Some(req.clone());
}
}
- matches
+ None
}
static LICENSE_CONTENT_CLASSIFICATION: LazyLock<Vec<(LicenseReq, String)>> = LazyLock::new(|| {
@@ -80,6 +78,7 @@
("BSD-2-Clause", include_str!("licenses/BSD-2-Clause.txt")),
("BSD-3-Clause", include_str!("licenses/BSD-3-Clause.txt")),
("Unicode-3.0", include_str!("licenses/Unicode-3.0.txt")),
+ ("Unicode-DFS-2016", include_str!("licenses/Unicode-DFS-2016.txt")),
("Unlicense", include_str!("licenses/Unlicense.txt")),
("Zlib", include_str!("licenses/Zlib.txt")),
("OpenSSL", include_str!("licenses/OpenSSL.txt")),
@@ -124,12 +123,13 @@
);
}
- #[cfg(feature = "fuzzy_content_match")]
#[test]
fn test_classify_fuzzy() {
+ assert!(classify_license_file_contents(include_str!("testdata/BSD-3-Clause-bindgen.txt"))
+ .is_empty());
assert_eq!(
- classify_license_file_contents(include_str!("testdata/BSD-3-Clause-bindgen.txt")),
- vec![Licensee::parse("BSD-3-Clause").unwrap().into_req()]
+ classify_license_file_contents_fuzzy(include_str!("testdata/BSD-3-Clause-bindgen.txt")),
+ Some(Licensee::parse("BSD-3-Clause").unwrap().into_req())
);
}
diff --git a/tools/external_crates/license_checker/src/file_classifier/licenses/Unicode-DFS-2016.txt b/tools/external_crates/license_checker/src/file_classifier/licenses/Unicode-DFS-2016.txt
new file mode 100644
index 0000000..e92e9b5
--- /dev/null
+++ b/tools/external_crates/license_checker/src/file_classifier/licenses/Unicode-DFS-2016.txt
@@ -0,0 +1,14 @@
+NOTICE TO USER: Carefully read the following legal agreement. BY DOWNLOADING, INSTALLING, COPYING OR OTHERWISE USING UNICODE INC.'S DATA FILES ("DATA FILES"), AND/OR SOFTWARE ("SOFTWARE"), YOU UNEQUIVOCALLY ACCEPT, AND AGREE TO BE BOUND BY, ALL OF THE TERMS AND CONDITIONS OF THIS AGREEMENT. IF YOU DO NOT AGREE, DO NOT DOWNLOAD, INSTALL, COPY, DISTRIBUTE OR USE THE DATA FILES OR SOFTWARE.
+
+COPYRIGHT AND PERMISSION NOTICE
+
+Copyright © 1991-2016 Unicode, Inc. All rights reserved. Distributed under the Terms of Use in http://www.unicode.org/copyright.html.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy of the Unicode data files and any associated documentation (the "Data Files") or Unicode software and any associated documentation (the "Software") to deal in the Data Files or Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, and/or sell copies of the Data Files or Software, and to permit persons to whom the Data Files or Software are furnished to do so, provided that either
+
+ (a) this copyright and permission notice appear with all copies of the Data Files or Software, or
+ (b) this copyright and permission notice appear in associated Documentation.
+
+THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THE DATA FILES OR SOFTWARE.
+
+Except as contained in this notice, the name of a copyright holder shall not be used in advertising or otherwise to promote the sale, use or other dealings in these Data Files or Software without prior written authorization of the copyright holder.
\ No newline at end of file
diff --git a/tools/external_crates/license_checker/src/file_classifier/mod.rs b/tools/external_crates/license_checker/src/file_classifier/mod.rs
index ebdf82d..6e40f65 100644
--- a/tools/external_crates/license_checker/src/file_classifier/mod.rs
+++ b/tools/external_crates/license_checker/src/file_classifier/mod.rs
@@ -50,6 +50,7 @@
#[allow(dead_code)]
by_inexact_name: Option<InexactLicenseType>,
by_content: OnceCell<Vec<LicenseReq>>,
+ by_content_fuzzy: OnceCell<Option<LicenseReq>>,
}
impl Classifier {
@@ -63,6 +64,7 @@
by_name,
by_inexact_name,
by_content: OnceCell::new(),
+ by_content_fuzzy: OnceCell::new(),
}
}
pub fn by_name(&self) -> Option<&LicenseReq> {
@@ -78,4 +80,12 @@
classify_license_file_contents(&contents)
})
}
+ pub fn by_content_fuzzy(&self) -> Option<&LicenseReq> {
+ self.by_content_fuzzy
+ .get_or_init(|| {
+ let contents = read_to_string(self.crate_path.join(&self.file_path)).unwrap();
+ content_classifier::classify_license_file_contents_fuzzy(&contents)
+ })
+ .as_ref()
+ }
}
diff --git a/tools/external_crates/license_checker/src/lib.rs b/tools/external_crates/license_checker/src/lib.rs
index 6d00e5b..dcbafa1 100644
--- a/tools/external_crates/license_checker/src/lib.rs
+++ b/tools/external_crates/license_checker/src/lib.rs
@@ -87,10 +87,12 @@
state.unsatisfied =
expression_parser::evaluate_license_expr(crate_name, cargo_toml_license)?.required;
- let possible_license_files = license_file_finder::find_license_files(crate_path)?;
+ let possible_license_files = license_file_finder::find_license_files(crate_path)?
+ .into_iter()
+ .map(|f| (f.clone(), Classifier::new(crate_path, f)))
+ .collect::<BTreeMap<_, _>>();
- for file in &possible_license_files {
- let classifier = Classifier::new(crate_path, file.clone());
+ for (file, classifier) in &possible_license_files {
if let Some(req) = classifier.by_name() {
if state.unsatisfied.remove(req) {
state.satisfied.insert(req.clone(), file.clone());
@@ -104,5 +106,21 @@
}
}
+ if !state.unsatisfied.is_empty() {
+ for (file, classifier) in &possible_license_files {
+ if classifier.by_name().is_some() || !classifier.by_content().is_empty() {
+ continue;
+ }
+ if let Some(req) = classifier.by_content_fuzzy() {
+ if state.unsatisfied.remove(req) {
+ state.satisfied.insert(req.clone(), file.clone());
+ if state.unsatisfied.is_empty() {
+ break;
+ }
+ }
+ }
+ }
+ }
+
Ok(state)
}