blob: b200ea3cf6ac3e51ace56d857b10241586fa66ad [file] [log] [blame]
[email protected]d0d49dd82012-01-26 00:03:591// Copyright (c) 2012 The Chromium Authors. All rights reserved.
[email protected]7556ea22011-12-08 19:29:152// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "net/dns/dns_session.h"
6
avi65168052015-12-01 19:27:077#include <stdint.h>
8
9#include <limits>
10
[email protected]7556ea22011-12-08 19:29:1511#include "base/bind.h"
[email protected]a6c84f42013-06-07 20:39:3812#include "base/lazy_instance.h"
Avi Drissman13fc8932015-12-20 04:40:4613#include "base/macros.h"
asvitkinec3c93722015-06-17 14:48:3714#include "base/metrics/histogram_macros.h"
[email protected]ae1b30b2013-05-23 23:06:0315#include "base/metrics/sample_vector.h"
[email protected]120d38d2012-12-14 01:42:3216#include "base/rand_util.h"
17#include "base/stl_util.h"
[email protected]66e96c42013-06-28 15:20:3118#include "base/time/time.h"
[email protected]7556ea22011-12-08 19:29:1519#include "net/base/ip_endpoint.h"
[email protected]120d38d2012-12-14 01:42:3220#include "net/base/net_errors.h"
[email protected]7556ea22011-12-08 19:29:1521#include "net/dns/dns_config_service.h"
[email protected]120d38d2012-12-14 01:42:3222#include "net/dns/dns_socket_pool.h"
[email protected]bdb65982012-12-20 20:44:5923#include "net/socket/stream_socket.h"
24#include "net/udp/datagram_client_socket.h"
[email protected]7556ea22011-12-08 19:29:1525
26namespace net {
27
[email protected]ae1b30b2013-05-23 23:06:0328namespace {
29// Never exceed max timeout.
30const unsigned kMaxTimeoutMs = 5000;
31// Set min timeout, in case we are talking to a local DNS proxy.
32const unsigned kMinTimeoutMs = 10;
33
34// Number of buckets in the histogram of observed RTTs.
35const size_t kRTTBucketCount = 100;
36// Target percentile in the RTT histogram used for retransmission timeout.
37const unsigned kRTOPercentile = 99;
38} // namespace
39
[email protected]a6c84f42013-06-07 20:39:3840// Runtime statistics of DNS server.
41struct DnsSession::ServerStats {
42 ServerStats(base::TimeDelta rtt_estimate_param, RttBuckets* buckets)
43 : last_failure_count(0), rtt_estimate(rtt_estimate_param) {
44 rtt_histogram.reset(new base::SampleVector(buckets));
[email protected]a144bd22013-07-29 21:53:1045 // Seed histogram with 2 samples at |rtt_estimate| timeout.
pkasting6b68a162014-12-01 22:10:2946 rtt_histogram->Accumulate(
47 static_cast<base::HistogramBase::Sample>(rtt_estimate.InMilliseconds()),
48 2);
[email protected]a6c84f42013-06-07 20:39:3849 }
50
51 // Count of consecutive failures after last success.
52 int last_failure_count;
53
54 // Last time when server returned failure or timeout.
55 base::Time last_failure;
56 // Last time when server returned success.
57 base::Time last_success;
58
59 // Estimated RTT using moving average.
60 base::TimeDelta rtt_estimate;
61 // Estimated error in the above.
62 base::TimeDelta rtt_deviation;
63
64 // A histogram of observed RTT .
65 scoped_ptr<base::SampleVector> rtt_histogram;
66
67 DISALLOW_COPY_AND_ASSIGN(ServerStats);
68};
69
70// static
71base::LazyInstance<DnsSession::RttBuckets>::Leaky DnsSession::rtt_buckets_ =
72 LAZY_INSTANCE_INITIALIZER;
73
74DnsSession::RttBuckets::RttBuckets() : base::BucketRanges(kRTTBucketCount + 1) {
[email protected]15ce3842013-06-27 14:38:4575 base::Histogram::InitializeBucketRanges(1, 5000, this);
[email protected]a6c84f42013-06-07 20:39:3876}
77
[email protected]120d38d2012-12-14 01:42:3278DnsSession::SocketLease::SocketLease(scoped_refptr<DnsSession> session,
79 unsigned server_index,
80 scoped_ptr<DatagramClientSocket> socket)
81 : session_(session), server_index_(server_index), socket_(socket.Pass()) {}
82
83DnsSession::SocketLease::~SocketLease() {
84 session_->FreeSocket(server_index_, socket_.Pass());
85}
86
[email protected]7556ea22011-12-08 19:29:1587DnsSession::DnsSession(const DnsConfig& config,
[email protected]120d38d2012-12-14 01:42:3288 scoped_ptr<DnsSocketPool> socket_pool,
[email protected]7556ea22011-12-08 19:29:1589 const RandIntCallback& rand_int_callback,
90 NetLog* net_log)
91 : config_(config),
[email protected]120d38d2012-12-14 01:42:3292 socket_pool_(socket_pool.Pass()),
avi65168052015-12-01 19:27:0793 rand_callback_(base::Bind(rand_int_callback,
94 0,
95 std::numeric_limits<uint16_t>::max())),
[email protected]7556ea22011-12-08 19:29:1596 net_log_(net_log),
[email protected]a6c84f42013-06-07 20:39:3897 server_index_(0) {
[email protected]120d38d2012-12-14 01:42:3298 socket_pool_->Initialize(&config_.nameservers, net_log);
[email protected]a6c84f42013-06-07 20:39:3899 UMA_HISTOGRAM_CUSTOM_COUNTS(
timvolodine23be97452014-09-26 15:44:01100 "AsyncDNS.ServerCount", config_.nameservers.size(), 0, 10, 11);
[email protected]ae1b30b2013-05-23 23:06:03101 for (size_t i = 0; i < config_.nameservers.size(); ++i) {
olli.raulaf2cfbf312015-11-16 07:40:42102 server_stats_.push_back(make_scoped_ptr(
103 new ServerStats(config_.timeout, rtt_buckets_.Pointer())));
[email protected]ae1b30b2013-05-23 23:06:03104 }
[email protected]120d38d2012-12-14 01:42:32105}
106
[email protected]a6c84f42013-06-07 20:39:38107DnsSession::~DnsSession() {
108 RecordServerStats();
109}
[email protected]7556ea22011-12-08 19:29:15110
avi65168052015-12-01 19:27:07111uint16_t DnsSession::NextQueryId() const {
112 return static_cast<uint16_t>(rand_callback_.Run());
pkasting6b68a162014-12-01 22:10:29113}
[email protected]7556ea22011-12-08 19:29:15114
[email protected]a6c84f42013-06-07 20:39:38115unsigned DnsSession::NextFirstServerIndex() {
116 unsigned index = NextGoodServerIndex(server_index_);
[email protected]7556ea22011-12-08 19:29:15117 if (config_.rotate)
118 server_index_ = (server_index_ + 1) % config_.nameservers.size();
[email protected]d0d49dd82012-01-26 00:03:59119 return index;
[email protected]7556ea22011-12-08 19:29:15120}
121
[email protected]a6c84f42013-06-07 20:39:38122unsigned DnsSession::NextGoodServerIndex(unsigned server_index) {
123 unsigned index = server_index;
124 base::Time oldest_server_failure(base::Time::Now());
125 unsigned oldest_server_failure_index = 0;
126
127 UMA_HISTOGRAM_BOOLEAN("AsyncDNS.ServerIsGood",
128 server_stats_[server_index]->last_failure.is_null());
129
130 do {
131 base::Time cur_server_failure = server_stats_[index]->last_failure;
132 // If number of failures on this server doesn't exceed number of allowed
133 // attempts, return its index.
134 if (server_stats_[server_index]->last_failure_count < config_.attempts) {
135 return index;
136 }
137 // Track oldest failed server.
138 if (cur_server_failure < oldest_server_failure) {
139 oldest_server_failure = cur_server_failure;
140 oldest_server_failure_index = index;
141 }
142 index = (index + 1) % config_.nameservers.size();
143 } while (index != server_index);
144
145 // If we are here it means that there are no successful servers, so we have
146 // to use one that has failed oldest.
147 return oldest_server_failure_index;
148}
149
150void DnsSession::RecordServerFailure(unsigned server_index) {
151 UMA_HISTOGRAM_CUSTOM_COUNTS(
timvolodine23be97452014-09-26 15:44:01152 "AsyncDNS.ServerFailureIndex", server_index, 0, 10, 11);
[email protected]a6c84f42013-06-07 20:39:38153 ++(server_stats_[server_index]->last_failure_count);
154 server_stats_[server_index]->last_failure = base::Time::Now();
155}
156
157void DnsSession::RecordServerSuccess(unsigned server_index) {
158 if (server_stats_[server_index]->last_success.is_null()) {
159 UMA_HISTOGRAM_COUNTS_100("AsyncDNS.ServerFailuresAfterNetworkChange",
160 server_stats_[server_index]->last_failure_count);
161 } else {
162 UMA_HISTOGRAM_COUNTS_100("AsyncDNS.ServerFailuresBeforeSuccess",
163 server_stats_[server_index]->last_failure_count);
164 }
165 server_stats_[server_index]->last_failure_count = 0;
166 server_stats_[server_index]->last_failure = base::Time();
167 server_stats_[server_index]->last_success = base::Time::Now();
168}
169
[email protected]ae1b30b2013-05-23 23:06:03170void DnsSession::RecordRTT(unsigned server_index, base::TimeDelta rtt) {
[email protected]a6c84f42013-06-07 20:39:38171 DCHECK_LT(server_index, server_stats_.size());
[email protected]ae1b30b2013-05-23 23:06:03172
173 // For measurement, assume it is the first attempt (no backoff).
174 base::TimeDelta timeout_jacobson = NextTimeoutFromJacobson(server_index, 0);
175 base::TimeDelta timeout_histogram = NextTimeoutFromHistogram(server_index, 0);
176 UMA_HISTOGRAM_TIMES("AsyncDNS.TimeoutErrorJacobson", rtt - timeout_jacobson);
177 UMA_HISTOGRAM_TIMES("AsyncDNS.TimeoutErrorHistogram",
178 rtt - timeout_histogram);
179 UMA_HISTOGRAM_TIMES("AsyncDNS.TimeoutErrorJacobsonUnder",
180 timeout_jacobson - rtt);
181 UMA_HISTOGRAM_TIMES("AsyncDNS.TimeoutErrorHistogramUnder",
182 timeout_histogram - rtt);
183
184 // Jacobson/Karels algorithm for TCP.
185 // Using parameters: alpha = 1/8, delta = 1/4, beta = 4
[email protected]a6c84f42013-06-07 20:39:38186 base::TimeDelta& estimate = server_stats_[server_index]->rtt_estimate;
187 base::TimeDelta& deviation = server_stats_[server_index]->rtt_deviation;
[email protected]ae1b30b2013-05-23 23:06:03188 base::TimeDelta current_error = rtt - estimate;
189 estimate += current_error / 8; // * alpha
190 base::TimeDelta abs_error = base::TimeDelta::FromInternalValue(
191 std::abs(current_error.ToInternalValue()));
192 deviation += (abs_error - deviation) / 4; // * delta
193
194 // Histogram-based method.
pkasting6b68a162014-12-01 22:10:29195 server_stats_[server_index]->rtt_histogram->Accumulate(
196 static_cast<base::HistogramBase::Sample>(rtt.InMilliseconds()), 1);
[email protected]ae1b30b2013-05-23 23:06:03197}
198
199void DnsSession::RecordLostPacket(unsigned server_index, int attempt) {
200 base::TimeDelta timeout_jacobson =
201 NextTimeoutFromJacobson(server_index, attempt);
202 base::TimeDelta timeout_histogram =
203 NextTimeoutFromHistogram(server_index, attempt);
204 UMA_HISTOGRAM_TIMES("AsyncDNS.TimeoutSpentJacobson", timeout_jacobson);
205 UMA_HISTOGRAM_TIMES("AsyncDNS.TimeoutSpentHistogram", timeout_histogram);
206}
207
[email protected]a6c84f42013-06-07 20:39:38208void DnsSession::RecordServerStats() {
209 for (size_t index = 0; index < server_stats_.size(); ++index) {
210 if (server_stats_[index]->last_failure_count) {
211 if (server_stats_[index]->last_success.is_null()) {
212 UMA_HISTOGRAM_COUNTS("AsyncDNS.ServerFailuresWithoutSuccess",
213 server_stats_[index]->last_failure_count);
214 } else {
215 UMA_HISTOGRAM_COUNTS("AsyncDNS.ServerFailuresAfterSuccess",
216 server_stats_[index]->last_failure_count);
217 }
218 }
219 }
220}
221
222
[email protected]ae1b30b2013-05-23 23:06:03223base::TimeDelta DnsSession::NextTimeout(unsigned server_index, int attempt) {
[email protected]a144bd22013-07-29 21:53:10224 // Respect config timeout if it exceeds |kMaxTimeoutMs|.
225 if (config_.timeout.InMilliseconds() >= kMaxTimeoutMs)
226 return config_.timeout;
227 return NextTimeoutFromHistogram(server_index, attempt);
[email protected]7556ea22011-12-08 19:29:15228}
229
[email protected]120d38d2012-12-14 01:42:32230// Allocate a socket, already connected to the server address.
231scoped_ptr<DnsSession::SocketLease> DnsSession::AllocateSocket(
[email protected]ae1b30b2013-05-23 23:06:03232 unsigned server_index, const NetLog::Source& source) {
[email protected]120d38d2012-12-14 01:42:32233 scoped_ptr<DatagramClientSocket> socket;
234
235 socket = socket_pool_->AllocateSocket(server_index);
[email protected]dd946bb2013-06-12 22:53:01236 if (!socket.get())
237 return scoped_ptr<SocketLease>();
[email protected]120d38d2012-12-14 01:42:32238
[email protected]ae1b30b2013-05-23 23:06:03239 socket->NetLog().BeginEvent(NetLog::TYPE_SOCKET_IN_USE,
240 source.ToEventParametersCallback());
[email protected]120d38d2012-12-14 01:42:32241
242 SocketLease* lease = new SocketLease(this, server_index, socket.Pass());
243 return scoped_ptr<SocketLease>(lease);
244}
245
[email protected]bdb65982012-12-20 20:44:59246scoped_ptr<StreamSocket> DnsSession::CreateTCPSocket(
[email protected]ae1b30b2013-05-23 23:06:03247 unsigned server_index, const NetLog::Source& source) {
[email protected]bdb65982012-12-20 20:44:59248 return socket_pool_->CreateTCPSocket(server_index, source);
249}
250
[email protected]120d38d2012-12-14 01:42:32251// Release a socket.
[email protected]ae1b30b2013-05-23 23:06:03252void DnsSession::FreeSocket(unsigned server_index,
253 scoped_ptr<DatagramClientSocket> socket) {
[email protected]120d38d2012-12-14 01:42:32254 DCHECK(socket.get());
255
256 socket->NetLog().EndEvent(NetLog::TYPE_SOCKET_IN_USE);
257
258 socket_pool_->FreeSocket(server_index, socket.Pass());
259}
[email protected]7556ea22011-12-08 19:29:15260
[email protected]ae1b30b2013-05-23 23:06:03261base::TimeDelta DnsSession::NextTimeoutFromJacobson(unsigned server_index,
262 int attempt) {
[email protected]a6c84f42013-06-07 20:39:38263 DCHECK_LT(server_index, server_stats_.size());
[email protected]ae1b30b2013-05-23 23:06:03264
[email protected]a6c84f42013-06-07 20:39:38265 base::TimeDelta timeout = server_stats_[server_index]->rtt_estimate +
266 4 * server_stats_[server_index]->rtt_deviation;
[email protected]ae1b30b2013-05-23 23:06:03267
268 timeout = std::max(timeout, base::TimeDelta::FromMilliseconds(kMinTimeoutMs));
269
270 // The timeout doubles every full round.
271 unsigned num_backoffs = attempt / config_.nameservers.size();
272
273 return std::min(timeout * (1 << num_backoffs),
274 base::TimeDelta::FromMilliseconds(kMaxTimeoutMs));
275}
276
277base::TimeDelta DnsSession::NextTimeoutFromHistogram(unsigned server_index,
278 int attempt) {
[email protected]a6c84f42013-06-07 20:39:38279 DCHECK_LT(server_index, server_stats_.size());
[email protected]ae1b30b2013-05-23 23:06:03280
mostynb91e0da982015-01-20 19:17:27281 static_assert(std::numeric_limits<base::HistogramBase::Count>::is_signed,
282 "histogram base count assumed to be signed");
[email protected]ae1b30b2013-05-23 23:06:03283
284 // Use fixed percentile of observed samples.
[email protected]a6c84f42013-06-07 20:39:38285 const base::SampleVector& samples =
286 *server_stats_[server_index]->rtt_histogram;
287
[email protected]ae1b30b2013-05-23 23:06:03288 base::HistogramBase::Count total = samples.TotalCount();
289 base::HistogramBase::Count remaining_count = kRTOPercentile * total / 100;
290 size_t index = 0;
[email protected]a6c84f42013-06-07 20:39:38291 while (remaining_count > 0 && index < rtt_buckets_.Get().size()) {
[email protected]ae1b30b2013-05-23 23:06:03292 remaining_count -= samples.GetCountAtIndex(index);
293 ++index;
294 }
295
296 base::TimeDelta timeout =
[email protected]a6c84f42013-06-07 20:39:38297 base::TimeDelta::FromMilliseconds(rtt_buckets_.Get().range(index));
[email protected]ae1b30b2013-05-23 23:06:03298
299 timeout = std::max(timeout, base::TimeDelta::FromMilliseconds(kMinTimeoutMs));
300
301 // The timeout still doubles every full round.
302 unsigned num_backoffs = attempt / config_.nameservers.size();
303
304 return std::min(timeout * (1 << num_backoffs),
305 base::TimeDelta::FromMilliseconds(kMaxTimeoutMs));
306}
307
[email protected]7556ea22011-12-08 19:29:15308} // namespace net