mirror of
https://github.com/kharonsec/garage.git
synced 2026-04-25 20:44:55 +02:00
Support streaming of gzip content involving multiple Content-Encoding headers (#1369)
## Problem
`hugo deploy` is broken with Garage on recent hugo versions when using gzip matchers
## Why?
We don't support multi-value headers correctly, in this case this specific headers combination:
```
Content-Encoding: gzip
Content-Encoding: aws-chunked
```
is interpreted as:
```
Content-Encoding: gzip
```
instead of:
```
Content-Encoding: gzip,aws-chunked
```
It fails both 1. the signature check and 2. the streaming check.
## Proposed fix
- Taking into account multi-value headers when building Canonical Request (validated with hugo deploy + AWS SDK v2)
- Taking into account multi-value headers (both comma separated and HeaderEntry separated) when removing `aws-chunked` (validated with hugo deploy + AWS SDK v2)
## Full explanation
Currently, `hugo deploy` on version `hugo v0.152.2` or more recent uses AWS SDK v2 only and supports for sending gzipped content.
That's configured with a matcher like that:
```yaml
deployment:
matchers:
- pattern: "^.+\\.(woff2|woff|svg|ttf|otf|eot|js|css)$"
cacheControl: "max-age=31536000, no-transform, public"
gzip: true # <-------- here
```
Also, with SDK v2, hugo is streaming all of its files.
Thus, it sends that kind of requests:
```python
Request {
method: PUT,
uri: /sebou/pagefind/pagefind.js?x-id=PutObject,
version: HTTP/1.1,
headers: {
"host": "localhost",
"user-agent": "aws-sdk-go-v2/1.39.2 ua/2.1 os/linux lang/go#1.25.6 md/GOOS#linux md/GOARCH#amd64 api/s3#1.84.0 ft/s3-transfer m/E,G,Z,g",
"content-length": "10026",
"accept-encoding": "identity",
"amz-sdk-invocation-id": "aed6df34-a67c-4bab-b63b-2b3777b751a0",
"amz-sdk-request": "attempt=1; max=3",
"authorization": "AWS4-HMAC-SHA256 Credential=GKxxxxx/20260227/garage/s3/aws4_request, SignedHeaders=accept-encoding;amz-sdk-invocation-id;amz-sdk-request;cache-control;content-encoding;content-length;content-type;host;x-amz-content-sha256;x-amz-date;x-amz-decoded-content-length;x-amz-meta-md5chksum;x-amz-trailer, Signature=76cd9b77f693ca89c2e6dd2a4dc55f83d4a82eca0f563d9d095ff96076f7b057",
"cache-control": "max-age=31536000, no-transform, public",
"content-encoding": "gzip", # <---- see here 1st instance of Content-Encoding
"content-encoding": "aws-chunked", # <---- 2nd instance of Content-Encoding
"content-type": "text/javascript",
"via": "2.0 Caddy",
"x-amz-content-sha256": "STREAMING-UNSIGNED-PAYLOAD-TRAILER",
"x-amz-date": "20260227T132212Z",
"x-amz-decoded-content-length": "9982",
"x-amz-meta-md5chksum": "aad88ac0bf704e91584b8d9ad9796670",
"x-amz-trailer": "x-amz-checksum-crc32",
"x-forwarded-for": "::1",
"x-forwarded-host": "localhost",
"x-forwarded-proto": "https"
},
body: Body(Streaming)
}
```
But our canonical request function only calls `HeaderMap.get()` that returns only the 1st value and not `HeaderMap.get_all()` that returns all the values for a header.
Leading to the following invalid `CanonicalRequest` value:
```python
PUT
/sebou/pagefind/pagefind.js
x-id=PutObject
accept-encoding:identity
amz-sdk-invocation-id:aed6df34-a67c-4bab-b63b-2b3777b751a0
amz-sdk-request:attempt=1; max=3
cache-control:max-age=31536000, no-transform, public
content-encoding:gzip # <----- see here, we kept only gzip and dropped aws-chunked
content-length:10026
content-type:text/javascript
host:localhost
x-amz-content-sha256:STREAMING-UNSIGNED-PAYLOAD-TRAILER
x-amz-date:20260227T132212Z
x-amz-decoded-content-length:9982
x-amz-meta-md5chksum:aad88ac0bf704e91584b8d9ad9796670
x-amz-trailer:x-amz-checksum-crc32
accept-encoding;amz-sdk-invocation-id;amz-sdk-request;cache-control;content-encoding;content-length;content-type;host;x-amz-content-sha256;x-amz-date;x-amz-decoded-content-length;x-amz-meta-md5chksum;x-amz-trailer
```
Amazon is crystal clear that, instead of dropping the other values, we should concatenate them with a comma:

https://docs.aws.amazon.com/IAM/latest/UserGuide/reference_sigv-create-signed-request.html#create-canonical-request
Reviewed-on: https://git.deuxfleurs.fr/Deuxfleurs/garage/pulls/1369
Reviewed-by: Alex <lx@deuxfleurs.fr>
Co-authored-by: Quentin Dufour <quentin@deuxfleurs.fr>
Co-committed-by: Quentin Dufour <quentin@deuxfleurs.fr>
This commit is contained in:
@@ -81,7 +81,7 @@ fn parse_x_amz_content_sha256(header: Option<&str>) -> Result<ContentSha256Heade
|
||||
_ => {
|
||||
return Err(Error::bad_request(
|
||||
"invalid or unsupported x-amz-content-sha256",
|
||||
))
|
||||
));
|
||||
}
|
||||
};
|
||||
Ok(ContentSha256Header::StreamingPayload { trailer, signed })
|
||||
@@ -357,11 +357,18 @@ pub fn canonical_request(
|
||||
let canonical_header_string = signed_headers
|
||||
.iter()
|
||||
.map(|name| {
|
||||
let value = headers
|
||||
.get(name)
|
||||
let all_values = headers.get_all(name);
|
||||
let mut iter_values = all_values.iter();
|
||||
let base_value = iter_values
|
||||
.next()
|
||||
.ok_or_bad_request(format!("signed header `{}` is not present", name))?;
|
||||
let value = std::str::from_utf8(value.as_bytes())?;
|
||||
Ok(format!("{}:{}", name.as_str(), value.trim()))
|
||||
let mut built_string = std::str::from_utf8(base_value.as_bytes())?.to_string();
|
||||
for extend_value in iter_values {
|
||||
let extend_string = std::str::from_utf8(extend_value.as_bytes())?;
|
||||
built_string.push(',');
|
||||
built_string.push_str(extend_string);
|
||||
}
|
||||
Ok(format!("{}:{}", name.as_str(), built_string.trim()))
|
||||
})
|
||||
.collect::<Result<Vec<String>, Error>>()?
|
||||
.join("\n");
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
use std::iter::FromIterator;
|
||||
use std::pin::Pin;
|
||||
use std::sync::Mutex;
|
||||
|
||||
@@ -5,7 +6,7 @@ use chrono::{DateTime, NaiveDateTime, TimeZone, Utc};
|
||||
use futures::prelude::*;
|
||||
use futures::task;
|
||||
use hmac::Mac;
|
||||
use http::header::{HeaderMap, HeaderValue, CONTENT_ENCODING};
|
||||
use http::header::{Entry, HeaderMap, HeaderValue, CONTENT_ENCODING};
|
||||
use hyper::body::{Bytes, Frame, Incoming as IncomingBody};
|
||||
use hyper::Request;
|
||||
|
||||
@@ -42,15 +43,52 @@ pub fn parse_streaming_body(
|
||||
// Remove the aws-chunked component in the content-encoding: header
|
||||
// Note: this header is not properly sent by minio client, so don't fail
|
||||
// if it is absent from the request.
|
||||
if let Some(content_encoding) = req.headers_mut().remove(CONTENT_ENCODING) {
|
||||
if let Some(rest) = content_encoding.as_bytes().strip_prefix(b"aws-chunked,") {
|
||||
req.headers_mut()
|
||||
.insert(CONTENT_ENCODING, HeaderValue::from_bytes(rest).unwrap());
|
||||
} else if content_encoding != "aws-chunked" {
|
||||
return Err(Error::bad_request(
|
||||
"content-encoding does not contain aws-chunked for STREAMING-*-PAYLOAD",
|
||||
));
|
||||
let mut original_content_encoding = vec![];
|
||||
if let Entry::Occupied(content_encoding) = req.headers_mut().entry(CONTENT_ENCODING) {
|
||||
// 1. collect headers
|
||||
let (_, vals) = content_encoding.remove_entry_mult();
|
||||
original_content_encoding = Vec::from_iter(vals);
|
||||
}
|
||||
let mut header_initialized = false;
|
||||
let mut chunked_found = false;
|
||||
for enc_val in original_content_encoding.iter() {
|
||||
// 2. clean each header value and reinject it.
|
||||
let mut rebuilt_val = vec![];
|
||||
for part in enc_val.as_bytes().split(|c| *c == b',') {
|
||||
let trimmed_part = part.trim_ascii();
|
||||
if trimmed_part == b"aws-chunked" {
|
||||
chunked_found = true;
|
||||
continue;
|
||||
}
|
||||
if !rebuilt_val.is_empty() {
|
||||
rebuilt_val.push(b',');
|
||||
}
|
||||
rebuilt_val.extend_from_slice(trimmed_part);
|
||||
}
|
||||
|
||||
if rebuilt_val.is_empty() {
|
||||
// skip empty headers
|
||||
continue;
|
||||
}
|
||||
|
||||
if !header_initialized {
|
||||
req.headers_mut().insert(
|
||||
CONTENT_ENCODING,
|
||||
HeaderValue::from_bytes(&rebuilt_val).unwrap(),
|
||||
);
|
||||
header_initialized = true;
|
||||
} else {
|
||||
req.headers_mut().append(
|
||||
CONTENT_ENCODING,
|
||||
HeaderValue::from_bytes(&rebuilt_val).unwrap(),
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
if !original_content_encoding.is_empty() && !chunked_found {
|
||||
return Err(Error::bad_request(
|
||||
"content-encoding does not contain aws-chunked for STREAMING-*-PAYLOAD",
|
||||
));
|
||||
}
|
||||
|
||||
// If trailer header is announced, add the calculation of the requested checksum
|
||||
@@ -480,7 +518,7 @@ where
|
||||
continue;
|
||||
}
|
||||
Some(Err(e)) => {
|
||||
return Poll::Ready(Some(Err(StreamingPayloadError::Stream(e))))
|
||||
return Poll::Ready(Some(Err(StreamingPayloadError::Stream(e))));
|
||||
}
|
||||
None => {
|
||||
return Poll::Ready(Some(Err(StreamingPayloadError::message(
|
||||
@@ -490,7 +528,7 @@ where
|
||||
}
|
||||
}
|
||||
Err(nom::Err::Error(e)) | Err(nom::Err::Failure(e)) => {
|
||||
return Poll::Ready(Some(Err(e)))
|
||||
return Poll::Ready(Some(Err(e)));
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
Reference in New Issue
Block a user