diff --git a/autotest/gcore/vsicurl.py b/autotest/gcore/vsicurl.py index 45746a20f57b..6e0a3c08e7e2 100755 --- a/autotest/gcore/vsicurl.py +++ b/autotest/gcore/vsicurl.py @@ -233,11 +233,139 @@ def server(): webserver.server_stop(process, port) +############################################################################### +# Test regular redirection + + +@pytest.mark.parametrize( + "authorization_header_allowed", [None, "YES", "NO", "IF_SAME_HOST"] +) +def test_vsicurl_test_redirect(server, authorization_header_allowed): + + gdal.VSICurlClearCache() + + expected_headers = None + unexpected_headers = [] + if authorization_header_allowed != "NO": + expected_headers = {"Authorization": "Bearer xxx"} + else: + unexpected_headers = ["Authorization"] + + handler = webserver.SequentialHandler() + handler.add("GET", "/test_redirect/", 404) + handler.add( + "HEAD", + "/test_redirect/test.bin", + 301, + {"Location": "http://localhost:%d/redirected/test.bin" % server.port}, + expected_headers={"Authorization": "Bearer xxx"}, + ) + + # Curl always forward Authorization if same server when handling itself + # the redirect, so this means that CPL_VSIL_CURL_AUTHORIZATION_HEADER_ALLOWED_IF_REDIRECT=NO + # is not honored for that particular request. To honour it, we would have + # to disable CURLOPT_FOLLOWLOCATION and implement it at hand + handler.add( + "HEAD", + "/redirected/test.bin", + 200, + {"Content-Length": "3"}, + expected_headers={"Authorization": "Bearer xxx"}, + ) + + handler.add( + "GET", + "/redirected/test.bin", + 200, + {"Content-Length": "3"}, + b"xyz", + expected_headers=expected_headers, + unexpected_headers=unexpected_headers, + ) + + options = {"GDAL_HTTP_HEADERS": "Authorization: Bearer xxx"} + if authorization_header_allowed: + options[ + "CPL_VSIL_CURL_AUTHORIZATION_HEADER_ALLOWED_IF_REDIRECT" + ] = authorization_header_allowed + with webserver.install_http_handler(handler), gdal.config_options(options): + f = gdal.VSIFOpenL( + "/vsicurl/http://localhost:%d/test_redirect/test.bin" % server.port, + "rb", + ) + assert f is not None + try: + assert gdal.VSIFReadL(1, 3, f) == b"xyz" + finally: + gdal.VSIFCloseL(f) + + +############################################################################### +# Test regular redirection + + +@pytest.mark.parametrize( + "authorization_header_allowed", [None, "YES", "NO", "IF_SAME_HOST"] +) +def test_vsicurl_test_redirect_different_server(server, authorization_header_allowed): + + gdal.VSICurlClearCache() + + expected_headers = None + unexpected_headers = [] + if authorization_header_allowed == "YES": + expected_headers = {"Authorization": "Bearer xxx"} + else: + unexpected_headers = ["Authorization"] + + handler = webserver.SequentialHandler() + handler.add("GET", "/test_redirect/", 404) + handler.add( + "HEAD", + "/test_redirect/test.bin", + 301, + {"Location": "http://127.0.0.1:%d/redirected/test.bin" % server.port}, + expected_headers={"Authorization": "Bearer xxx"}, + ) + handler.add( + "HEAD", + "/redirected/test.bin", + 200, + {"Content-Length": "3"}, + expected_headers=expected_headers, + unexpected_headers=unexpected_headers, + ) + handler.add( + "GET", + "/redirected/test.bin", + 200, + {"Content-Length": "3"}, + b"xyz", + expected_headers=expected_headers, + unexpected_headers=unexpected_headers, + ) + + options = {"GDAL_HTTP_HEADERS": "Authorization: Bearer xxx"} + if authorization_header_allowed: + options[ + "CPL_VSIL_CURL_AUTHORIZATION_HEADER_ALLOWED_IF_REDIRECT" + ] = authorization_header_allowed + with webserver.install_http_handler(handler), gdal.config_options(options): + f = gdal.VSIFOpenL( + "/vsicurl/http://localhost:%d/test_redirect/test.bin" % server.port, + "rb", + ) + try: + assert gdal.VSIFReadL(1, 3, f) == b"xyz" + finally: + gdal.VSIFCloseL(f) + + ############################################################################### # Test redirection with Expires= type of signed URLs -def test_vsicurl_test_redirect(server): +def test_vsicurl_test_redirect_with_expires(server): gdal.VSICurlClearCache() diff --git a/doc/source/user/virtual_file_systems.rst b/doc/source/user/virtual_file_systems.rst index 860025e30452..171525af0cbd 100644 --- a/doc/source/user/virtual_file_systems.rst +++ b/doc/source/user/virtual_file_systems.rst @@ -413,6 +413,14 @@ As an alternative, starting with GDAL 3.6, the :config:`GDAL_HTTP_HEADERS` configuration option can also be used to specify headers. :config:`CPL_CURL_VERBOSE=YES` allows one to see them and more, when combined with ``--debug``. +Starting with GDAL 3.10, the ``Authorization`` header is no longer automatically forwarded when redirections are followed. +That behavior can be configured by setting the :config:`CPL_VSIL_CURL_AUTHORIZATION_HEADER_ALLOWED_IF_REDIRECT`` +configuration option to: + +- ``NO`` to always disable forwarding of Authorization header +- ``YES`` to always enable forwarding of Authorization header +- ``IF_SAME_HOST`` to enable forwarding of Authorization header only if the redirection is to the same host. + Starting with GDAL 2.3, the :config:`GDAL_HTTP_MAX_RETRY` (number of attempts) and :config:`GDAL_HTTP_RETRY_DELAY` (in seconds) configuration option can be set, so that request retries are done in case of HTTP errors 429, 502, 503 or 504. Starting with GDAL 3.6, the following configuration options control the TCP keep-alive functionality (cf https://daniel.haxx.se/blog/2020/02/10/curl-ootw-keepalive-time/ for a detailed explanation): diff --git a/port/cpl_http.cpp b/port/cpl_http.cpp index 471adadbc20b..7d8751036744 100644 --- a/port/cpl_http.cpp +++ b/port/cpl_http.cpp @@ -2237,14 +2237,22 @@ void *CPLHTTPSetOptions(void *pcurl, const char *pszURL, CURLAUTH_ANYSAFE); else if (EQUAL(pszHttpAuth, "BEARER")) { - const char *pszBearer = CSLFetchNameValue(papszOptions, "HTTP_BEARER"); - if (pszBearer == nullptr) - pszBearer = CPLGetConfigOption("GDAL_HTTP_BEARER", nullptr); - if (pszBearer != nullptr) - unchecked_curl_easy_setopt(http_handle, CURLOPT_XOAUTH2_BEARER, - pszBearer); - unchecked_curl_easy_setopt(http_handle, CURLOPT_HTTPAUTH, - CURLAUTH_BEARER); + const char *pszAuthorizationHeaderAllowed = CSLFetchNameValueDef( + papszOptions, "AUTHORIZATION_HEADER_ALLOWED", "YES"); + const bool bAuthorizationHeaderAllowed = + CPLTestBool(pszAuthorizationHeaderAllowed); + if (bAuthorizationHeaderAllowed) + { + const char *pszBearer = + CSLFetchNameValue(papszOptions, "HTTP_BEARER"); + if (pszBearer == nullptr) + pszBearer = CPLGetConfigOption("GDAL_HTTP_BEARER", nullptr); + if (pszBearer != nullptr) + unchecked_curl_easy_setopt(http_handle, CURLOPT_XOAUTH2_BEARER, + pszBearer); + unchecked_curl_easy_setopt(http_handle, CURLOPT_HTTPAUTH, + CURLAUTH_BEARER); + } } else if (EQUAL(pszHttpAuth, "NEGOTIATE")) unchecked_curl_easy_setopt(http_handle, CURLOPT_HTTPAUTH, @@ -2365,6 +2373,15 @@ void *CPLHTTPSetOptions(void *pcurl, const char *pszURL, 1L); unchecked_curl_easy_setopt(http_handle, CURLOPT_FOLLOWLOCATION, 1); + const char *pszUnrestrictedAuth = CPLGetConfigOption( + "CPL_VSIL_CURL_AUTHORIZATION_HEADER_ALLOWED_IF_REDIRECT", + "IF_SAME_HOST"); + if (!EQUAL(pszUnrestrictedAuth, "IF_SAME_HOST") && + CPLTestBool(pszUnrestrictedAuth)) + { + unchecked_curl_easy_setopt(http_handle, CURLOPT_UNRESTRICTED_AUTH, 1); + } + unchecked_curl_easy_setopt(http_handle, CURLOPT_MAXREDIRS, 10); unchecked_curl_easy_setopt(http_handle, CURLOPT_POSTREDIR, CURL_REDIR_POST_ALL); @@ -2664,6 +2681,11 @@ void *CPLHTTPSetOptions(void *pcurl, const char *pszURL, } if (!bHeadersDone) { + const char *pszAuthorizationHeaderAllowed = CSLFetchNameValueDef( + papszOptions, "AUTHORIZATION_HEADER_ALLOWED", "YES"); + const bool bAuthorizationHeaderAllowed = + CPLTestBool(pszAuthorizationHeaderAllowed); + // We accept both raw headers with \r\n as a separator, or as // a comma separated list of foo: bar values. const CPLStringList aosTokens( @@ -2672,7 +2694,11 @@ void *CPLHTTPSetOptions(void *pcurl, const char *pszURL, : CSLTokenizeString2(pszHeaders, ",", CSLT_HONOURSTRINGS)); for (int i = 0; i < aosTokens.size(); ++i) { - headers = curl_slist_append(headers, aosTokens[i]); + if (bAuthorizationHeaderAllowed || + !STARTS_WITH_CI(aosTokens[i], "Authorization:")) + { + headers = curl_slist_append(headers, aosTokens[i]); + } } } } diff --git a/port/cpl_vsil_curl.cpp b/port/cpl_vsil_curl.cpp index 548728180154..ccd324333232 100644 --- a/port/cpl_vsil_curl.cpp +++ b/port/cpl_vsil_curl.cpp @@ -625,6 +625,7 @@ void VSICURLInitWriteFuncStruct(cpl::WriteFuncStruct *psStruct, VSILFILE *fp, psStruct->nStartOffset = 0; psStruct->nEndOffset = 0; psStruct->nHTTPCode = 0; + psStruct->nFirstHTTPCode = 0; psStruct->nContentLength = 0; psStruct->bFoundContentRange = false; psStruct->bError = false; @@ -667,7 +668,10 @@ size_t VSICurlHandleWriteFunc(void *buffer, size_t count, size_t nmemb, char *pszSpace = strchr(pszLine, ' '); if (pszSpace) { - psStruct->nHTTPCode = atoi(pszSpace + 1); + const int nHTTPCode = atoi(pszSpace + 1); + if (psStruct->nFirstHTTPCode == 0) + psStruct->nFirstHTTPCode = nHTTPCode; + psStruct->nHTTPCode = nHTTPCode; } } else if (STARTS_WITH_CI(pszLine, "Content-Length: ")) @@ -1227,8 +1231,21 @@ vsi_l_offset VSICurlHandle::GetFileSizeOrHeaders(bool bSetError, if (!osEffectiveURL.empty() && strstr(osEffectiveURL.c_str(), osURL.c_str()) == nullptr) { - CPLDebug(poFS->GetDebugKey(), "Effective URL: %s", - osEffectiveURL.c_str()); + // Moved permanently ? + if (sWriteFuncHeaderData.nFirstHTTPCode == 301) + { + CPLDebug(poFS->GetDebugKey(), + "Using effective URL %s permanently", + osEffectiveURL.c_str()); + oFileProp.osRedirectURL = osEffectiveURL; + poFS->SetCachedFileProp(m_pszURL, oFileProp); + } + else + { + CPLDebug(poFS->GetDebugKey(), + "Using effective URL %s temporarily", + osEffectiveURL.c_str()); + } // Is this is a redirect to a S3 URL? if (VSICurlIsS3LikeSignedURL(osEffectiveURL.c_str()) && @@ -1587,7 +1604,9 @@ vsi_l_offset VSICurlHandle::Tell() /* GetRedirectURLIfValid() */ /************************************************************************/ -std::string VSICurlHandle::GetRedirectURLIfValid(bool &bHasExpired) const +std::string +VSICurlHandle::GetRedirectURLIfValid(bool &bHasExpired, + CPLStringList &aosHTTPOptions) const { bHasExpired = false; poFS->GetCachedFileProp(m_pszURL, oFileProp); @@ -1619,6 +1638,39 @@ std::string VSICurlHandle::GetRedirectURLIfValid(bool &bHasExpired) const bHasExpired = false; } + if (m_pszURL != osURL) + { + const char *pszAuthorizationHeaderAllowed = CPLGetConfigOption( + "CPL_VSIL_CURL_AUTHORIZATION_HEADER_ALLOWED_IF_REDIRECT", + "IF_SAME_HOST"); + if (EQUAL(pszAuthorizationHeaderAllowed, "IF_SAME_HOST")) + { + const auto ExtractServer = [](const std::string &s) + { + size_t afterHTTPPos = 0; + if (STARTS_WITH(s.c_str(), "http://")) + afterHTTPPos = strlen("http://"); + else if (STARTS_WITH(s.c_str(), "https://")) + afterHTTPPos = strlen("https://"); + const auto posSlash = s.find('/', afterHTTPPos); + if (posSlash != std::string::npos) + return s.substr(afterHTTPPos, posSlash - afterHTTPPos); + else + return s.substr(afterHTTPPos); + }; + + if (ExtractServer(osURL) != ExtractServer(m_pszURL)) + { + aosHTTPOptions.SetNameValue("AUTHORIZATION_HEADER_ALLOWED", + "NO"); + } + } + else if (!CPLTestBool(pszAuthorizationHeaderAllowed)) + { + aosHTTPOptions.SetNameValue("AUTHORIZATION_HEADER_ALLOWED", "NO"); + } + } + return osURL; } @@ -1793,7 +1845,9 @@ std::string VSICurlHandle::DownloadRegion(const vsi_l_offset startOffset, ManagePlanetaryComputerSigning(); bool bHasExpired = false; - std::string osURL(GetRedirectURLIfValid(bHasExpired)); + + CPLStringList aosHTTPOptions(m_aosHTTPOptions); + std::string osURL(GetRedirectURLIfValid(bHasExpired, aosHTTPOptions)); bool bUsedRedirect = osURL != m_pszURL; WriteFuncStruct sWriteFuncData; @@ -1803,7 +1857,7 @@ std::string VSICurlHandle::DownloadRegion(const vsi_l_offset startOffset, retry: CURL *hCurlHandle = curl_easy_init(); struct curl_slist *headers = - VSICurlSetOptions(hCurlHandle, osURL.c_str(), m_aosHTTPOptions.List()); + VSICurlSetOptions(hCurlHandle, osURL.c_str(), aosHTTPOptions.List()); if (!AllowAutomaticRedirection()) unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_FOLLOWLOCATION, 0); @@ -2366,7 +2420,9 @@ int VSICurlHandle::ReadMultiRange(int const nRanges, void **const ppData, ManagePlanetaryComputerSigning(); bool bHasExpired = false; - std::string osURL(GetRedirectURLIfValid(bHasExpired)); + + CPLStringList aosHTTPOptions(m_aosHTTPOptions); + std::string osURL(GetRedirectURLIfValid(bHasExpired, aosHTTPOptions)); if (bHasExpired) { return VSIVirtualHandle::ReadMultiRange(nRanges, ppData, panOffsets, @@ -2431,7 +2487,7 @@ int VSICurlHandle::ReadMultiRange(int const nRanges, void **const ppData, // unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_PIPEWAIT, 1); struct curl_slist *headers = VSICurlSetOptions( - hCurlHandle, osURL.c_str(), m_aosHTTPOptions.List()); + hCurlHandle, osURL.c_str(), aosHTTPOptions.List()); VSICURLInitWriteFuncStruct(&asWriteFuncData[iRequest], this, pfnReadCbk, pReadCbkUserData); @@ -3023,18 +3079,19 @@ size_t VSICurlHandle::PRead(void *pBuffer, size_t nSize, NetworkStatisticsFile oContextFile(m_osFilename.c_str()); NetworkStatisticsAction oContextAction("PRead"); + CPLStringList aosHTTPOptions(m_aosHTTPOptions); std::string osURL; { std::lock_guard oLock(m_oMutex); ManagePlanetaryComputerSigning(); bool bHasExpired; - osURL = GetRedirectURLIfValid(bHasExpired); + osURL = GetRedirectURLIfValid(bHasExpired, aosHTTPOptions); } CURL *hCurlHandle = curl_easy_init(); struct curl_slist *headers = - VSICurlSetOptions(hCurlHandle, osURL.c_str(), m_aosHTTPOptions.List()); + VSICurlSetOptions(hCurlHandle, osURL.c_str(), aosHTTPOptions.List()); WriteFuncStruct sWriteFuncData; VSICURLInitWriteFuncStruct(&sWriteFuncData, nullptr, nullptr, nullptr); @@ -3194,7 +3251,9 @@ void VSICurlHandle::AdviseRead(int nRanges, const vsi_l_offset *panOffsets, ManagePlanetaryComputerSigning(); bool bHasExpired = false; - const std::string l_osURL(GetRedirectURLIfValid(bHasExpired)); + CPLStringList aosHTTPOptions(m_aosHTTPOptions); + const std::string l_osURL( + GetRedirectURLIfValid(bHasExpired, aosHTTPOptions)); if (bHasExpired) { return; @@ -3261,7 +3320,7 @@ void VSICurlHandle::AdviseRead(int nRanges, const vsi_l_offset *panOffsets, static_cast(m_aoAdviseReadRanges.size())); #endif - const auto task = [this](const std::string &osURL) + const auto task = [this, aosHTTPOptions](const std::string &osURL) { CURLM *hMultiHandle = curl_multi_init(); @@ -3309,7 +3368,7 @@ void VSICurlHandle::AdviseRead(int nRanges, const vsi_l_offset *panOffsets, // unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_PIPEWAIT, 1); struct curl_slist *headers = VSICurlSetOptions( - hCurlHandle, osURL.c_str(), m_aosHTTPOptions.List()); + hCurlHandle, osURL.c_str(), aosHTTPOptions.List()); VSICURLInitWriteFuncStruct(&asWriteFuncData[i], this, pfnReadCbk, pReadCbkUserData); diff --git a/port/cpl_vsil_curl_class.h b/port/cpl_vsil_curl_class.h index c4da8b280bc4..d91e15cc20af 100644 --- a/port/cpl_vsil_curl_class.h +++ b/port/cpl_vsil_curl_class.h @@ -116,7 +116,8 @@ struct WriteFuncStruct bool bMultiRange = false; vsi_l_offset nStartOffset = 0; vsi_l_offset nEndOffset = 0; - int nHTTPCode = 0; + int nHTTPCode = 0; // potentially after redirect + int nFirstHTTPCode = 0; // the one of the redirect vsi_l_offset nContentLength = 0; bool bFoundContentRange = false; bool bError = false; @@ -422,7 +423,8 @@ class VSICurlHandle : public VSIVirtualHandle int ReadMultiRangeSingleGet(int nRanges, void **ppData, const vsi_l_offset *panOffsets, const size_t *panSizes); - std::string GetRedirectURLIfValid(bool &bHasExpired) const; + std::string GetRedirectURLIfValid(bool &bHasExpired, + CPLStringList &aosHTTPOptions) const; void UpdateRedirectInfo(CURL *hCurlHandle, const WriteFuncStruct &sWriteFuncHeaderData);