-
-
Notifications
You must be signed in to change notification settings - Fork 6.8k
Description
I did this
With the following program downloading 128MB of gzipped zeros from my website, libcurl will fail http2 transfers with CURLE_TOO_LARGE
. Part of the problem here is that it is extremely easy to fill up the pause buffer if there is compression at play: a tiny amount of data transferred with gzip compression (of which the sample here is a pathological case, but it happens in less unreasonable cases too). I think that this most commonly happens inside of Lix where someone is running on a slow machine with an extremely fast internet connection (say, github actions) which is downloading especially huge files and is not eating up the data it's downloading very quickly so is giving a lot of pauses for flow control.
It's especially bad with compression due to the buffer in question being post decompression, whereas the window size control is applied to the compressed bytes. This is the part of the problem that feels like a curl bug and is relatively clearly not API misuse.
For curious reasons, you have to do two transfers to the same machine simultaneously to hit this problem, but the problem code below reproduces it 100% of the time on my machine at least.
This is a reduction of the Lix file transfer code with all fancy dependencies removed, so it is just C++ and libcurl; I could have written a C sample but it would likely be harder to read and even longer. This is the bug we are having with curl on our end: https://git.lix.systems/lix-project/lix/issues/662.
Compile the program below with c++ -std=c++20 $(pkg-config --libs --cflags libcurl) bad.cc -o bad
and run with ./bad
.
C++ sample program extracted from Lix
#include <cassert>
#include <condition_variable>
#include <curl/curl.h>
#include <thread>
#include <atomic>
#include <iostream>
#include <map>
#include <mutex>
#include <set>
#include <vector>
#include <memory>
class TransferItem2
{
std::mutex lk;
std::vector<char> buf1;
std::condition_variable cv;
bool done = false;
public:
std::unique_ptr<CURL, decltype([](auto * c) { curl_easy_cleanup(c); })> req;
static size_t writeCallbackWrapper(void * contents, size_t size, size_t nmemb, void * userp)
{
return static_cast<TransferItem2 *>(userp)->writeCallback(contents, size, nmemb);
}
size_t writeCallback(void *contents, size_t size, size_t nmemb)
{
const size_t realSize = size * nmemb;
std::lock_guard<std::mutex> _l(lk);
if (buf1.size() > 1024 * 1024) {
std::cout << "pause " << req.get() << "\n";
return CURL_WRITEFUNC_PAUSE;
}
// std::cout << "recv " << req.get() << " bytes: " << realSize << "\n";
buf1.insert(buf1.end(), realSize, realSize + size);
cv.notify_all();
return realSize;
}
// In a real application this would actually do something with the data.
bool consume()
{
std::unique_lock<std::mutex> g(lk);
if (done) return true;
if (buf1.empty()) {
cv.wait(g);
}
auto _ = std::move(buf1);
buf1 = {};
return done;
}
void finish(CURLcode code)
{
std::cout << "Finish item " << req.get() << " with code " << code << " error: " << curl_easy_strerror(code) << "\n";
done = true;
}
static int debugCallback(CURL * handle, curl_infotype type, char * data, size_t size, void * userptr)
{
if (type == CURLINFO_TEXT)
std::cout << "curl: " << std::string(data, size) << "\n";
return 0;
}
TransferItem2(std::string uri) : req(curl_easy_init()) {
curl_easy_setopt(req.get(), CURLOPT_VERBOSE, 1);
curl_easy_setopt(req.get(), CURLOPT_DEBUGFUNCTION, TransferItem2::debugCallback);
curl_easy_setopt(req.get(), CURLOPT_URL, uri.c_str());
curl_easy_setopt(req.get(), CURLOPT_FOLLOWLOCATION, 1L);
curl_easy_setopt(req.get(), CURLOPT_ACCEPT_ENCODING, ""); // all of them!
curl_easy_setopt(req.get(), CURLOPT_MAXREDIRS, 10);
curl_easy_setopt(req.get(), CURLOPT_NOSIGNAL, 1);
curl_easy_setopt(req.get(), CURLOPT_PIPEWAIT, 1);
curl_easy_setopt(req.get(), CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_2TLS);
curl_easy_setopt(req.get(), CURLOPT_WRITEFUNCTION, TransferItem2::writeCallbackWrapper);
curl_easy_setopt(req.get(), CURLOPT_WRITEDATA, this);
curl_easy_setopt(req.get(), CURLOPT_NOPROGRESS, 1);
curl_easy_setopt(req.get(), CURLOPT_PROTOCOLS_STR, "http,https,ftp,ftps");
curl_easy_setopt(req.get(), CURLOPT_CONNECTTIMEOUT, 5);
curl_easy_setopt(req.get(), CURLOPT_LOW_SPEED_LIMIT, 1L);
curl_easy_setopt(req.get(), CURLOPT_LOW_SPEED_TIME, 300);
}
};
class Transferrer
{
std::unique_ptr<CURLM, decltype([](auto * m) { curl_multi_cleanup(m); })> curlm;
std::thread thread;
std::atomic<bool> stop = false;
std::map<CURL *, std::shared_ptr<TransferItem2>> items;
std::vector<std::shared_ptr<TransferItem2>> incoming;
std::set<std::shared_ptr<TransferItem2>> unpause;
std::mutex lk;
public:
void add(std::shared_ptr<TransferItem2> item)
{
auto _l = std::lock_guard(lk);
incoming.push_back(item);
}
void requestUnpause(std::shared_ptr<TransferItem2> item)
{
std::cout << "requestUnpause " << item->req.get() << "\n";
std::lock_guard<std::mutex> _l(lk);
unpause.insert(item);
}
bool consume(std::shared_ptr<TransferItem2> item)
{
requestUnpause(item);
return item->consume();
}
void transferThread()
{
while (!stop) {
int running;
CURLMcode mc = curl_multi_perform(curlm.get(), &running);
if (mc != CURLM_OK)
throw std::runtime_error("curl_multi_perform");
CURLMsg * msg;
int left;
while ((msg = curl_multi_info_read(curlm.get(), &left))) {
if (msg->msg == CURLMSG_DONE) {
auto i = items.find(msg->easy_handle);
assert(i != items.end());
i->second->finish(msg->data.result);
curl_multi_remove_handle(curlm.get(), i->second->req.get());
items.erase(i);
}
}
/* Wait for activity, including wakeup events. */
mc = curl_multi_poll(curlm.get(), nullptr, 0, 64, nullptr);
if (mc != CURLM_OK)
throw std::runtime_error("unexpected error from curl_multi_poll()");
{
auto unpause_ = ([&]() {
std::lock_guard _l(lk);
auto ret = std::move(this->unpause);
this->unpause.clear();
return ret;
})();
for (auto & item : unpause_) {
std::cout << "unpause " << item->req.get() << "\n";
curl_easy_pause(item->req.get(), CURLPAUSE_CONT);
}
}
{
auto _l = std::lock_guard(lk);
auto incoming_ = std::move(incoming);
incoming.clear();
for (auto && tf : incoming_) {
items.insert(std::make_pair(tf->req.get(), tf));
curl_multi_add_handle(curlm.get(), tf->req.get());
}
}
}
}
Transferrer() : curlm(curl_multi_init()) {
static std::once_flag globalInit;
std::call_once(globalInit, curl_global_init, CURL_GLOBAL_ALL);
curl_multi_setopt(curlm.get(), CURLMOPT_PIPELINING, CURLPIPE_MULTIPLEX);
curl_multi_setopt(curlm.get(), CURLMOPT_MAX_TOTAL_CONNECTIONS, 10);
thread = std::thread([&]() {
transferThread();
});
}
~Transferrer()
{
stop = true;
thread.join();
}
};
int main()
{
auto t1 = std::make_shared<TransferItem2>("https://jade.fyi/zeros.txt");
auto t2 = std::make_shared<TransferItem2>("https://jade.fyi/zeros.txt");
auto mgr = Transferrer{};
std::cout << "t1 is " << t1->req.get() << ", t2 is " << t2->req.get() << "\n";
// take a little byte out of t1 and t2 so that they are definitely in a data transfer phase
mgr.add(t1);
mgr.consume(t1);
mgr.add(t2);
mgr.consume(t2);
// hungrily take more bytes out of t1
while (!mgr.consume(t1)) {
std::cout << "omnomnom\n";
}
}
I expected the following
Curl should manage its window sizes such that it can deal with pauses on transfers with http2 with compression. It may require some redesign so that the buffer getting overly filled up is compressed data rather than decompressed data.
curl/libcurl version
curl 8.11.0 (x86_64-pc-linux-gnu) libcurl/8.11.0 OpenSSL/3.3.2 zlib/1.3.1 brotli/1.1.0 zstd/1.5.6 libidn2/2.3.7 libpsl/0.21.5 libssh2/1.11.
1 nghttp2/1.64.0
Release-Date: 2024-11-06
Protocols: dict file ftp ftps gopher gophers http https imap imaps ipfs ipns mqtt pop3 pop3s rtsp scp sftp smb smbs smtp smtps telnet tftp
Features: alt-svc AsynchDNS brotli GSS-API HSTS HTTP2 HTTPS-proxy IDN IPv6 Kerberos Largefile libz NTLM PSL SPNEGO SSL threadsafe TLS-SRP U
nixSockets zstd
operating system
This is happening on various kernels; the affected userspace code is from nixpkgs 9ecb50d2fae8680be74c08bb0a995c5383747f89 but it also happens on newer nixpkgs with Curl 8.11.1.
Linux tail-bot 6.12.12-1-lts #1 SMP PREEMPT_DYNAMIC Sat, 01 Feb 2025 18:47:29 +0000 x86_64 GNU/Linux