1use anyhow::{Context, Result};
2use std::fs::File;
3use std::io::{Read, Seek, SeekFrom};
4use std::path::Path;
5
6use crate::core::{
7 ArchiveKind,
8 network::{installer_filename, is_zip_path},
9};
10
11mod signatures {
12 pub const MSI: [u8; 8] = [0xD0, 0xCF, 0x11, 0xE0, 0xA1, 0xB1, 0x1A, 0xE1];
13 pub const ZIP_LOCAL: [u8; 4] = [0x50, 0x4B, 0x03, 0x04];
14 pub const ZIP_EMPTY: [u8; 4] = [0x50, 0x4B, 0x05, 0x06];
15 pub const ZIP_SPANNED: [u8; 4] = [0x50, 0x4B, 0x07, 0x08];
16 pub const SEVEN_ZIP: [u8; 6] = [0x37, 0x7A, 0xBC, 0xAF, 0x27, 0x1C];
17 pub const GZIP: [u8; 3] = [0x1F, 0x8B, 0x08];
18 pub const TAR_OFFSET: usize = 257;
19 pub const TAR_MAGIC: [u8; 5] = [0x75, 0x73, 0x74, 0x61, 0x72];
20 pub const CAB: [u8; 4] = [0x4D, 0x53, 0x43, 0x46];
21 pub const RAR4: [u8; 7] = [0x52, 0x61, 0x72, 0x21, 0x1A, 0x07, 0x00];
22 pub const RAR5: [u8; 8] = [0x52, 0x61, 0x72, 0x21, 0x1A, 0x07, 0x01, 0x00];
23 pub const MSIX_MARKERS: [&str; 2] = ["appxmanifest.xml", "appxmetadata/appxbundlemanifest.xml"];
24}
25
26const PROBE_HEADER_BYTES: usize = 512;
31
32#[derive(Debug, Clone, Copy, PartialEq, Eq)]
33pub(crate) enum PayloadKind {
34 Raw,
35 Archive(ArchiveKind),
36 Cab,
37}
38
39#[derive(Debug, Clone, Copy, PartialEq, Eq)]
40pub(crate) enum DetectedArtifactKind {
41 Msi,
42 Msix,
43 Archive(ArchiveKind),
44 Cab,
45}
46
47pub(crate) fn classify_payload(url: &str) -> PayloadKind {
48 if is_zip_path(url) {
49 return PayloadKind::Archive(ArchiveKind::Zip);
50 }
51
52 let file_name = installer_filename(url).to_ascii_lowercase();
53
54 if file_name.ends_with(".cab") {
55 return PayloadKind::Cab;
56 }
57
58 archive_kind_from_file_name(&file_name).map_or(PayloadKind::Raw, PayloadKind::Archive)
59}
60
61pub(crate) fn probe_downloaded_artifact_kind(path: &Path) -> Result<Option<DetectedArtifactKind>> {
62 let mut file = File::open(path)
63 .with_context(|| format!("failed to open downloaded payload {}", path.display()))?;
64 let mut limited_reader = file.by_ref().take(PROBE_HEADER_BYTES as u64);
65 let buffer = read_probe_bytes(&mut limited_reader)?;
66 file.seek(SeekFrom::Start(0))
67 .with_context(|| format!("failed to rewind downloaded payload {}", path.display()))?;
68
69 match classify_probe_bytes(&buffer) {
70 Some(DetectedArtifactKind::Archive(ArchiveKind::Zip)) => try_probe_as_msix(file),
71 detected => Ok(detected),
72 }
73}
74
75pub(crate) fn archive_kind_for_url(url: &str) -> Option<ArchiveKind> {
76 if is_zip_path(url) {
77 return Some(ArchiveKind::Zip);
78 }
79
80 let file_name = installer_filename(url).to_ascii_lowercase();
81 archive_kind_from_file_name(&file_name)
82}
83
84fn archive_kind_from_file_name(file_name: &str) -> Option<ArchiveKind> {
85 if file_name.ends_with(".tar.gz")
86 || file_name.ends_with(".tgz")
87 || file_name.ends_with(".tbz2")
88 || file_name.ends_with(".tar.bz2")
89 || file_name.ends_with(".tar")
90 {
91 Some(ArchiveKind::Tar)
92 } else if file_name.ends_with(".gz") {
93 Some(ArchiveKind::Gzip)
94 } else if file_name.ends_with(".7z") {
95 Some(ArchiveKind::SevenZip)
96 } else if file_name.ends_with(".rar") {
97 Some(ArchiveKind::Rar)
98 } else {
99 None
100 }
101}
102
103fn classify_probe_bytes(bytes: &[u8]) -> Option<DetectedArtifactKind> {
104 if is_zip_signature(bytes) {
105 return Some(DetectedArtifactKind::Archive(ArchiveKind::Zip));
106 }
107
108 if is_msi_signature(bytes) {
109 return Some(DetectedArtifactKind::Msi);
110 }
111
112 if is_cab_signature(bytes) {
113 return Some(DetectedArtifactKind::Cab);
114 }
115
116 if is_gzip_signature(bytes) {
117 return Some(DetectedArtifactKind::Archive(ArchiveKind::Gzip));
118 }
119
120 if is_tar_signature(bytes) {
121 return Some(DetectedArtifactKind::Archive(ArchiveKind::Tar));
122 }
123
124 if is_seven_zip_signature(bytes) {
125 return Some(DetectedArtifactKind::Archive(ArchiveKind::SevenZip));
126 }
127
128 if is_rar_signature(bytes) {
129 return Some(DetectedArtifactKind::Archive(ArchiveKind::Rar));
130 }
131
132 None
133}
134
135fn try_probe_as_msix(file: File) -> Result<Option<DetectedArtifactKind>> {
136 let mut archive = match zip::ZipArchive::new(file) {
137 Ok(archive) => archive,
138 Err(_) => return Ok(Some(DetectedArtifactKind::Archive(ArchiveKind::Zip))),
139 };
140
141 if zip_archive_looks_like_msix(&mut archive).unwrap_or(false) {
142 return Ok(Some(DetectedArtifactKind::Msix));
143 }
144
145 Ok(Some(DetectedArtifactKind::Archive(ArchiveKind::Zip)))
146}
147
148fn zip_archive_looks_like_msix<R: Read + Seek>(archive: &mut zip::ZipArchive<R>) -> Result<bool> {
149 for index in 0..archive.len() {
150 let entry = archive.by_index(index).with_context(|| {
151 format!(
152 "failed to read ZIP entry {} while probing MSIX markers",
153 index
154 )
155 })?;
156
157 let normalized_name = entry.name().replace('\\', "/").to_ascii_lowercase();
158 if signatures::MSIX_MARKERS
159 .iter()
160 .any(|marker| normalized_name == *marker)
161 {
162 return Ok(true);
163 }
164 }
165
166 Ok(false)
167}
168
169fn read_probe_bytes<R: Read>(reader: &mut R) -> Result<Vec<u8>> {
170 let mut buffer = Vec::with_capacity(PROBE_HEADER_BYTES);
171 reader
172 .read_to_end(&mut buffer)
173 .context("failed to read probe bytes")?;
174
175 Ok(buffer)
176}
177
178fn is_msi_signature(bytes: &[u8]) -> bool {
179 bytes.starts_with(&signatures::MSI)
180}
181
182fn is_cab_signature(bytes: &[u8]) -> bool {
183 bytes.starts_with(&signatures::CAB)
184}
185
186fn is_zip_signature(bytes: &[u8]) -> bool {
187 bytes.starts_with(&signatures::ZIP_LOCAL)
188 || bytes.starts_with(&signatures::ZIP_EMPTY)
189 || bytes.starts_with(&signatures::ZIP_SPANNED)
190}
191
192fn is_seven_zip_signature(bytes: &[u8]) -> bool {
193 bytes.starts_with(&signatures::SEVEN_ZIP)
194}
195
196fn is_gzip_signature(bytes: &[u8]) -> bool {
197 bytes.starts_with(&signatures::GZIP)
198}
199
200fn is_tar_signature(bytes: &[u8]) -> bool {
201 bytes
202 .get(signatures::TAR_OFFSET..signatures::TAR_OFFSET + signatures::TAR_MAGIC.len())
203 .is_some_and(|magic| magic == signatures::TAR_MAGIC)
204}
205
206fn is_rar_signature(bytes: &[u8]) -> bool {
207 bytes.starts_with(&signatures::RAR4) || bytes.starts_with(&signatures::RAR5)
208}
209
210#[cfg(test)]
211mod tests {
212 use super::{
213 DetectedArtifactKind, PayloadKind, archive_kind_for_url, classify_payload,
214 classify_probe_bytes, probe_downloaded_artifact_kind, read_probe_bytes,
215 };
216 use crate::core::ArchiveKind;
217 use std::fs;
218 use std::io::{self, Read, Write};
219 use tempfile::NamedTempFile;
220 use zip::ZipWriter;
221 use zip::write::SimpleFileOptions;
222
223 struct ChunkedReader<'a> {
224 bytes: &'a [u8],
225 chunk_size: usize,
226 offset: usize,
227 }
228
229 impl Read for ChunkedReader<'_> {
230 fn read(&mut self, buffer: &mut [u8]) -> io::Result<usize> {
231 if self.offset >= self.bytes.len() {
232 return Ok(0);
233 }
234
235 let chunk_end = (self.offset + self.chunk_size).min(self.bytes.len());
236 let chunk = &self.bytes[self.offset..chunk_end];
237 let count = chunk.len().min(buffer.len());
238 buffer[..count].copy_from_slice(&chunk[..count]);
239 self.offset += count;
240
241 Ok(count)
242 }
243 }
244
245 #[test]
246 fn classifies_zip_payloads_before_portable_fallback() {
247 assert_eq!(
248 classify_payload("https://example.invalid/tool.zip?token=123#fragment"),
249 PayloadKind::Archive(ArchiveKind::Zip)
250 );
251 }
252
253 #[test]
254 fn classifies_non_archive_payloads_as_raw() {
255 assert_eq!(
256 classify_payload("https://example.invalid/tool.exe"),
257 PayloadKind::Raw
258 );
259 }
260
261 #[test]
262 fn classifies_cab_payloads_as_cab() {
263 assert_eq!(
264 classify_payload("https://example.invalid/tool.cab"),
265 PayloadKind::Cab
266 );
267 }
268
269 #[test]
270 fn classifies_tar_family_payloads_as_archive() {
271 assert_eq!(
272 classify_payload("https://example.invalid/tool.tar.gz"),
273 PayloadKind::Archive(ArchiveKind::Tar)
274 );
275 assert_eq!(
276 classify_payload("https://example.invalid/tool.tgz"),
277 PayloadKind::Archive(ArchiveKind::Tar)
278 );
279 assert_eq!(
280 classify_payload("https://example.invalid/tool.tbz2"),
281 PayloadKind::Archive(ArchiveKind::Tar)
282 );
283 assert_eq!(
284 classify_payload("https://example.invalid/tool.tar.bz2"),
285 PayloadKind::Archive(ArchiveKind::Tar)
286 );
287 }
288
289 #[test]
290 fn classifies_gzip_payloads_as_archive() {
291 assert_eq!(
292 archive_kind_for_url("https://example.invalid/tool.gz"),
293 Some(ArchiveKind::Gzip)
294 );
295 }
296
297 #[test]
298 fn classifies_other_archive_formats_as_archive() {
299 assert_eq!(
300 archive_kind_for_url("https://example.invalid/tool.7z"),
301 Some(ArchiveKind::SevenZip)
302 );
303 assert_eq!(
304 archive_kind_for_url("https://example.invalid/tool.rar"),
305 Some(ArchiveKind::Rar)
306 );
307 }
308
309 #[test]
310 fn probes_msi_signatures() {
311 assert_eq!(
312 classify_probe_bytes(&[0xD0, 0xCF, 0x11, 0xE0, 0xA1, 0xB1, 0x1A, 0xE1]),
313 Some(DetectedArtifactKind::Msi)
314 );
315 }
316
317 #[test]
318 fn probes_cab_signatures() {
319 assert_eq!(
320 classify_probe_bytes(b"MSCFcab payload"),
321 Some(DetectedArtifactKind::Cab)
322 );
323 }
324
325 #[test]
326 fn probes_zip_signatures() {
327 assert_eq!(
328 classify_probe_bytes(b"PK\x03\x04rest"),
329 Some(DetectedArtifactKind::Archive(ArchiveKind::Zip))
330 );
331 }
332
333 #[test]
334 fn classifies_empty_probe_bytes_as_none() {
335 assert_eq!(classify_probe_bytes(&[]), None);
336 }
337
338 #[test]
339 fn classifies_partial_probe_bytes_as_none() {
340 assert_eq!(classify_probe_bytes(&[0xD0]), None);
341 assert_eq!(classify_probe_bytes(b"PK\x03"), None);
342 }
343
344 #[test]
345 fn probes_msix_like_zip_packages() {
346 let temp_file = NamedTempFile::new().expect("temp file");
347 let file = fs::File::create(temp_file.path()).expect("create zip file");
348 let mut writer = ZipWriter::new(file);
349
350 writer
351 .start_file("AppxManifest.xml", SimpleFileOptions::default())
352 .expect("start msix manifest entry");
353 writer
354 .write_all(b"<Package />")
355 .expect("write msix manifest");
356 writer.finish().expect("finish msix zip");
357
358 assert_eq!(
359 probe_downloaded_artifact_kind(temp_file.path()).expect("probe msix zip"),
360 Some(DetectedArtifactKind::Msix)
361 );
362 }
363
364 #[test]
365 fn read_probe_bytes_collects_short_reads() {
366 let mut reader = ChunkedReader {
367 bytes: b"header-bytes",
368 chunk_size: 1,
369 offset: 0,
370 };
371
372 assert_eq!(
373 read_probe_bytes(&mut reader).expect("read bytes"),
374 b"header-bytes"
375 );
376 }
377
378 #[test]
379 fn probes_tar_signatures() {
380 let mut bytes = vec![0u8; 512];
381 bytes[257..262].copy_from_slice(b"ustar");
382
383 assert_eq!(
384 classify_probe_bytes(&bytes),
385 Some(DetectedArtifactKind::Archive(ArchiveKind::Tar))
386 );
387 }
388}