Support streaming of gzip content involving multiple Content-Encoding headers (#1369)

## Problem

`hugo deploy` is broken with Garage on recent hugo versions when using gzip matchers

## Why?

We don't support multi-value headers correctly, in this case this specific headers combination:

```
Content-Encoding: gzip
Content-Encoding: aws-chunked
```

is interpreted as:

```
Content-Encoding: gzip
```

instead of:

```
Content-Encoding: gzip,aws-chunked
```

It fails both 1. the signature check and 2. the streaming check.

## Proposed fix

 - Taking into account multi-value headers when building Canonical Request (validated with hugo deploy + AWS SDK v2)
 - Taking into account multi-value headers (both comma separated and HeaderEntry separated) when removing `aws-chunked` (validated with hugo deploy + AWS SDK v2)

## Full explanation

Currently, `hugo deploy` on version `hugo v0.152.2` or more recent uses AWS SDK v2 only and supports for sending gzipped content.
That's configured with a matcher like that:

```yaml
deployment:
  matchers:
    - pattern: "^.+\\.(woff2|woff|svg|ttf|otf|eot|js|css)$"
      cacheControl: "max-age=31536000, no-transform, public"
      gzip: true  # <-------- here
```

Also, with SDK v2, hugo is streaming all of its files.
Thus, it sends that kind of requests:

```python
Request {
  method: PUT,
  uri: /sebou/pagefind/pagefind.js?x-id=PutObject,
  version: HTTP/1.1,
  headers: {
    "host": "localhost",
    "user-agent": "aws-sdk-go-v2/1.39.2 ua/2.1 os/linux lang/go#1.25.6 md/GOOS#linux md/GOARCH#amd64 api/s3#1.84.0 ft/s3-transfer m/E,G,Z,g",
    "content-length": "10026",
    "accept-encoding": "identity",
    "amz-sdk-invocation-id": "aed6df34-a67c-4bab-b63b-2b3777b751a0",
    "amz-sdk-request": "attempt=1; max=3",
    "authorization": "AWS4-HMAC-SHA256 Credential=GKxxxxx/20260227/garage/s3/aws4_request, SignedHeaders=accept-encoding;amz-sdk-invocation-id;amz-sdk-request;cache-control;content-encoding;content-length;content-type;host;x-amz-content-sha256;x-amz-date;x-amz-decoded-content-length;x-amz-meta-md5chksum;x-amz-trailer, Signature=76cd9b77f693ca89c2e6dd2a4dc55f83d4a82eca0f563d9d095ff96076f7b057",
    "cache-control": "max-age=31536000, no-transform, public",
    "content-encoding": "gzip",                                           # <---- see here 1st instance of Content-Encoding
    "content-encoding": "aws-chunked",                                    # <---- 2nd instance of Content-Encoding
    "content-type": "text/javascript",
    "via": "2.0 Caddy",
    "x-amz-content-sha256": "STREAMING-UNSIGNED-PAYLOAD-TRAILER",
    "x-amz-date": "20260227T132212Z",
    "x-amz-decoded-content-length": "9982",
    "x-amz-meta-md5chksum": "aad88ac0bf704e91584b8d9ad9796670",
    "x-amz-trailer": "x-amz-checksum-crc32",
    "x-forwarded-for": "::1",
    "x-forwarded-host": "localhost",
    "x-forwarded-proto": "https"
  },
  body: Body(Streaming)
}
```

But our canonical request function only calls `HeaderMap.get()` that returns only the 1st value and not `HeaderMap.get_all()` that returns all the values for a header.
Leading to the following invalid `CanonicalRequest` value:

```python
PUT
/sebou/pagefind/pagefind.js
x-id=PutObject
accept-encoding:identity
amz-sdk-invocation-id:aed6df34-a67c-4bab-b63b-2b3777b751a0
amz-sdk-request:attempt=1; max=3
cache-control:max-age=31536000, no-transform, public
content-encoding:gzip                                                             # <----- see here, we kept only gzip and dropped aws-chunked
content-length:10026
content-type:text/javascript
host:localhost
x-amz-content-sha256:STREAMING-UNSIGNED-PAYLOAD-TRAILER
x-amz-date:20260227T132212Z
x-amz-decoded-content-length:9982
x-amz-meta-md5chksum:aad88ac0bf704e91584b8d9ad9796670
x-amz-trailer:x-amz-checksum-crc32

accept-encoding;amz-sdk-invocation-id;amz-sdk-request;cache-control;content-encoding;content-length;content-type;host;x-amz-content-sha256;x-amz-date;x-amz-decoded-content-length;x-amz-meta-md5chksum;x-amz-trailer
```

Amazon is crystal clear that, instead of dropping the other values, we should concatenate them with a comma:

![20260227_17h26m20s_grim](/attachments/e3edf7bf-7dff-43d7-80d9-cf276ae94ed5)

https://docs.aws.amazon.com/IAM/latest/UserGuide/reference_sigv-create-signed-request.html#create-canonical-request
Reviewed-on: https://git.deuxfleurs.fr/Deuxfleurs/garage/pulls/1369
Reviewed-by: Alex <lx@deuxfleurs.fr>
Co-authored-by: Quentin Dufour <quentin@deuxfleurs.fr>
Co-committed-by: Quentin Dufour <quentin@deuxfleurs.fr>
This commit is contained in:
Quentin Dufour
2026-02-27 18:02:31 +00:00
committed by Alex
parent 668dfea4e2
commit f796df8c34
2 changed files with 61 additions and 16 deletions

View File

@@ -81,7 +81,7 @@ fn parse_x_amz_content_sha256(header: Option<&str>) -> Result<ContentSha256Heade
_ => {
return Err(Error::bad_request(
"invalid or unsupported x-amz-content-sha256",
))
));
}
};
Ok(ContentSha256Header::StreamingPayload { trailer, signed })
@@ -357,11 +357,18 @@ pub fn canonical_request(
let canonical_header_string = signed_headers
.iter()
.map(|name| {
let value = headers
.get(name)
let all_values = headers.get_all(name);
let mut iter_values = all_values.iter();
let base_value = iter_values
.next()
.ok_or_bad_request(format!("signed header `{}` is not present", name))?;
let value = std::str::from_utf8(value.as_bytes())?;
Ok(format!("{}:{}", name.as_str(), value.trim()))
let mut built_string = std::str::from_utf8(base_value.as_bytes())?.to_string();
for extend_value in iter_values {
let extend_string = std::str::from_utf8(extend_value.as_bytes())?;
built_string.push(',');
built_string.push_str(extend_string);
}
Ok(format!("{}:{}", name.as_str(), built_string.trim()))
})
.collect::<Result<Vec<String>, Error>>()?
.join("\n");

View File

@@ -1,3 +1,4 @@
use std::iter::FromIterator;
use std::pin::Pin;
use std::sync::Mutex;
@@ -5,7 +6,7 @@ use chrono::{DateTime, NaiveDateTime, TimeZone, Utc};
use futures::prelude::*;
use futures::task;
use hmac::Mac;
use http::header::{HeaderMap, HeaderValue, CONTENT_ENCODING};
use http::header::{Entry, HeaderMap, HeaderValue, CONTENT_ENCODING};
use hyper::body::{Bytes, Frame, Incoming as IncomingBody};
use hyper::Request;
@@ -42,15 +43,52 @@ pub fn parse_streaming_body(
// Remove the aws-chunked component in the content-encoding: header
// Note: this header is not properly sent by minio client, so don't fail
// if it is absent from the request.
if let Some(content_encoding) = req.headers_mut().remove(CONTENT_ENCODING) {
if let Some(rest) = content_encoding.as_bytes().strip_prefix(b"aws-chunked,") {
req.headers_mut()
.insert(CONTENT_ENCODING, HeaderValue::from_bytes(rest).unwrap());
} else if content_encoding != "aws-chunked" {
return Err(Error::bad_request(
"content-encoding does not contain aws-chunked for STREAMING-*-PAYLOAD",
));
let mut original_content_encoding = vec![];
if let Entry::Occupied(content_encoding) = req.headers_mut().entry(CONTENT_ENCODING) {
// 1. collect headers
let (_, vals) = content_encoding.remove_entry_mult();
original_content_encoding = Vec::from_iter(vals);
}
let mut header_initialized = false;
let mut chunked_found = false;
for enc_val in original_content_encoding.iter() {
// 2. clean each header value and reinject it.
let mut rebuilt_val = vec![];
for part in enc_val.as_bytes().split(|c| *c == b',') {
let trimmed_part = part.trim_ascii();
if trimmed_part == b"aws-chunked" {
chunked_found = true;
continue;
}
if !rebuilt_val.is_empty() {
rebuilt_val.push(b',');
}
rebuilt_val.extend_from_slice(trimmed_part);
}
if rebuilt_val.is_empty() {
// skip empty headers
continue;
}
if !header_initialized {
req.headers_mut().insert(
CONTENT_ENCODING,
HeaderValue::from_bytes(&rebuilt_val).unwrap(),
);
header_initialized = true;
} else {
req.headers_mut().append(
CONTENT_ENCODING,
HeaderValue::from_bytes(&rebuilt_val).unwrap(),
);
}
}
if !original_content_encoding.is_empty() && !chunked_found {
return Err(Error::bad_request(
"content-encoding does not contain aws-chunked for STREAMING-*-PAYLOAD",
));
}
// If trailer header is announced, add the calculation of the requested checksum
@@ -480,7 +518,7 @@ where
continue;
}
Some(Err(e)) => {
return Poll::Ready(Some(Err(StreamingPayloadError::Stream(e))))
return Poll::Ready(Some(Err(StreamingPayloadError::Stream(e))));
}
None => {
return Poll::Ready(Some(Err(StreamingPayloadError::message(
@@ -490,7 +528,7 @@ where
}
}
Err(nom::Err::Error(e)) | Err(nom::Err::Failure(e)) => {
return Poll::Ready(Some(Err(e)))
return Poll::Ready(Some(Err(e)));
}
};