cargo/util/canonical_url.rs
1use crate::util::errors::CargoResult;
2use std::hash::{self, Hash};
3use url::Url;
4
5/// A newtype wrapper around `Url` which represents a "canonical" version of an
6/// original URL.
7///
8/// A "canonical" url is only intended for internal comparison purposes in
9/// Cargo. It's to help paper over mistakes such as depending on
10/// `github.com/foo/bar` vs `github.com/foo/bar.git`. This is **only** for
11/// internal purposes within Cargo and provides no means to actually read the
12/// underlying string value of the `Url` it contains. This is intentional,
13/// because all fetching should still happen within the context of the original
14/// URL.
15#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone)]
16pub struct CanonicalUrl(Url);
17
18impl CanonicalUrl {
19 pub fn new(url: &Url) -> CargoResult<CanonicalUrl> {
20 let mut url = url.clone();
21
22 // cannot-be-a-base-urls (e.g., `github.com:rust-lang/rustfmt.git`)
23 // are not supported.
24 if url.cannot_be_a_base() {
25 anyhow::bail!(
26 "invalid url `{}`: cannot-be-a-base-URLs are not supported",
27 url
28 )
29 }
30
31 // Strip a trailing slash.
32 if url.path().ends_with('/') {
33 url.path_segments_mut().unwrap().pop_if_empty();
34 }
35
36 // Perform further canonicalization specific to git registries, which
37 // do not contain a `+` specifier.
38 if !url.scheme().contains('+') {
39 // For GitHub URLs specifically, just lower-case everything. GitHub
40 // treats both the same, but they hash differently, and we're gonna be
41 // hashing them. This wants a more general solution, and also we're
42 // almost certainly not using the same case conversion rules that GitHub
43 // does. (See issue #84)
44 if url.host_str() == Some("github.com") {
45 url = format!("https{}", &url[url::Position::AfterScheme..])
46 .parse()
47 .unwrap();
48 let path = url.path().to_lowercase();
49 url.set_path(&path);
50 }
51
52 // Repos can generally be accessed with or without `.git` extension.
53 let needs_chopping = url.path().ends_with(".git");
54 if needs_chopping {
55 let last = {
56 let last = url.path_segments().unwrap().next_back().unwrap();
57 last[..last.len() - 4].to_owned()
58 };
59 url.path_segments_mut().unwrap().pop().push(&last);
60 }
61 }
62
63 Ok(CanonicalUrl(url))
64 }
65
66 /// Returns the raw canonicalized URL, although beware that this should
67 /// never be used/displayed/etc, it should only be used for internal data
68 /// structures and hashes and such.
69 pub fn raw_canonicalized_url(&self) -> &Url {
70 &self.0
71 }
72}
73
74// See comment in `source_id.rs` for why we explicitly use `as_str()` here.
75impl Hash for CanonicalUrl {
76 fn hash<S: hash::Hasher>(&self, into: &mut S) {
77 self.0.as_str().hash(into);
78 }
79}