[email protected] | d0d49dd8 | 2012-01-26 00:03:59 | [diff] [blame] | 1 | // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
[email protected] | 7556ea2 | 2011-12-08 19:29:15 | [diff] [blame] | 2 | // Use of this source code is governed by a BSD-style license that can be |
| 3 | // found in the LICENSE file. |
| 4 | |
| 5 | #include "net/dns/dns_session.h" |
| 6 | |
| 7 | #include "base/basictypes.h" |
| 8 | #include "base/bind.h" |
[email protected] | a6c84f4 | 2013-06-07 20:39:38 | [diff] [blame] | 9 | #include "base/lazy_instance.h" |
[email protected] | ae1b30b | 2013-05-23 23:06:03 | [diff] [blame] | 10 | #include "base/metrics/histogram.h" |
| 11 | #include "base/metrics/sample_vector.h" |
[email protected] | 120d38d | 2012-12-14 01:42:32 | [diff] [blame] | 12 | #include "base/rand_util.h" |
| 13 | #include "base/stl_util.h" |
[email protected] | 66e96c4 | 2013-06-28 15:20:31 | [diff] [blame] | 14 | #include "base/time/time.h" |
[email protected] | 7556ea2 | 2011-12-08 19:29:15 | [diff] [blame] | 15 | #include "net/base/ip_endpoint.h" |
[email protected] | 120d38d | 2012-12-14 01:42:32 | [diff] [blame] | 16 | #include "net/base/net_errors.h" |
[email protected] | 7556ea2 | 2011-12-08 19:29:15 | [diff] [blame] | 17 | #include "net/dns/dns_config_service.h" |
[email protected] | 120d38d | 2012-12-14 01:42:32 | [diff] [blame] | 18 | #include "net/dns/dns_socket_pool.h" |
[email protected] | bdb6598 | 2012-12-20 20:44:59 | [diff] [blame] | 19 | #include "net/socket/stream_socket.h" |
| 20 | #include "net/udp/datagram_client_socket.h" |
[email protected] | 7556ea2 | 2011-12-08 19:29:15 | [diff] [blame] | 21 | |
| 22 | namespace net { |
| 23 | |
[email protected] | ae1b30b | 2013-05-23 23:06:03 | [diff] [blame] | 24 | namespace { |
| 25 | // Never exceed max timeout. |
| 26 | const unsigned kMaxTimeoutMs = 5000; |
| 27 | // Set min timeout, in case we are talking to a local DNS proxy. |
| 28 | const unsigned kMinTimeoutMs = 10; |
| 29 | |
| 30 | // Number of buckets in the histogram of observed RTTs. |
| 31 | const size_t kRTTBucketCount = 100; |
| 32 | // Target percentile in the RTT histogram used for retransmission timeout. |
| 33 | const unsigned kRTOPercentile = 99; |
| 34 | } // namespace |
| 35 | |
[email protected] | a6c84f4 | 2013-06-07 20:39:38 | [diff] [blame] | 36 | // Runtime statistics of DNS server. |
| 37 | struct DnsSession::ServerStats { |
| 38 | ServerStats(base::TimeDelta rtt_estimate_param, RttBuckets* buckets) |
| 39 | : last_failure_count(0), rtt_estimate(rtt_estimate_param) { |
| 40 | rtt_histogram.reset(new base::SampleVector(buckets)); |
[email protected] | a144bd2 | 2013-07-29 21:53:10 | [diff] [blame^] | 41 | // Seed histogram with 2 samples at |rtt_estimate| timeout. |
| 42 | rtt_histogram->Accumulate(rtt_estimate.InMilliseconds(), 2); |
[email protected] | a6c84f4 | 2013-06-07 20:39:38 | [diff] [blame] | 43 | } |
| 44 | |
| 45 | // Count of consecutive failures after last success. |
| 46 | int last_failure_count; |
| 47 | |
| 48 | // Last time when server returned failure or timeout. |
| 49 | base::Time last_failure; |
| 50 | // Last time when server returned success. |
| 51 | base::Time last_success; |
| 52 | |
| 53 | // Estimated RTT using moving average. |
| 54 | base::TimeDelta rtt_estimate; |
| 55 | // Estimated error in the above. |
| 56 | base::TimeDelta rtt_deviation; |
| 57 | |
| 58 | // A histogram of observed RTT . |
| 59 | scoped_ptr<base::SampleVector> rtt_histogram; |
| 60 | |
| 61 | DISALLOW_COPY_AND_ASSIGN(ServerStats); |
| 62 | }; |
| 63 | |
| 64 | // static |
| 65 | base::LazyInstance<DnsSession::RttBuckets>::Leaky DnsSession::rtt_buckets_ = |
| 66 | LAZY_INSTANCE_INITIALIZER; |
| 67 | |
| 68 | DnsSession::RttBuckets::RttBuckets() : base::BucketRanges(kRTTBucketCount + 1) { |
[email protected] | 15ce384 | 2013-06-27 14:38:45 | [diff] [blame] | 69 | base::Histogram::InitializeBucketRanges(1, 5000, this); |
[email protected] | a6c84f4 | 2013-06-07 20:39:38 | [diff] [blame] | 70 | } |
| 71 | |
[email protected] | 120d38d | 2012-12-14 01:42:32 | [diff] [blame] | 72 | DnsSession::SocketLease::SocketLease(scoped_refptr<DnsSession> session, |
| 73 | unsigned server_index, |
| 74 | scoped_ptr<DatagramClientSocket> socket) |
| 75 | : session_(session), server_index_(server_index), socket_(socket.Pass()) {} |
| 76 | |
| 77 | DnsSession::SocketLease::~SocketLease() { |
| 78 | session_->FreeSocket(server_index_, socket_.Pass()); |
| 79 | } |
| 80 | |
[email protected] | 7556ea2 | 2011-12-08 19:29:15 | [diff] [blame] | 81 | DnsSession::DnsSession(const DnsConfig& config, |
[email protected] | 120d38d | 2012-12-14 01:42:32 | [diff] [blame] | 82 | scoped_ptr<DnsSocketPool> socket_pool, |
[email protected] | 7556ea2 | 2011-12-08 19:29:15 | [diff] [blame] | 83 | const RandIntCallback& rand_int_callback, |
| 84 | NetLog* net_log) |
| 85 | : config_(config), |
[email protected] | 120d38d | 2012-12-14 01:42:32 | [diff] [blame] | 86 | socket_pool_(socket_pool.Pass()), |
[email protected] | 7556ea2 | 2011-12-08 19:29:15 | [diff] [blame] | 87 | rand_callback_(base::Bind(rand_int_callback, 0, kuint16max)), |
| 88 | net_log_(net_log), |
[email protected] | a6c84f4 | 2013-06-07 20:39:38 | [diff] [blame] | 89 | server_index_(0) { |
[email protected] | 120d38d | 2012-12-14 01:42:32 | [diff] [blame] | 90 | socket_pool_->Initialize(&config_.nameservers, net_log); |
[email protected] | a6c84f4 | 2013-06-07 20:39:38 | [diff] [blame] | 91 | UMA_HISTOGRAM_CUSTOM_COUNTS( |
| 92 | "AsyncDNS.ServerCount", config_.nameservers.size(), 0, 10, 10); |
[email protected] | ae1b30b | 2013-05-23 23:06:03 | [diff] [blame] | 93 | for (size_t i = 0; i < config_.nameservers.size(); ++i) { |
[email protected] | a6c84f4 | 2013-06-07 20:39:38 | [diff] [blame] | 94 | server_stats_.push_back(new ServerStats(config_.timeout, |
| 95 | rtt_buckets_.Pointer())); |
[email protected] | ae1b30b | 2013-05-23 23:06:03 | [diff] [blame] | 96 | } |
[email protected] | 120d38d | 2012-12-14 01:42:32 | [diff] [blame] | 97 | } |
| 98 | |
[email protected] | a6c84f4 | 2013-06-07 20:39:38 | [diff] [blame] | 99 | DnsSession::~DnsSession() { |
| 100 | RecordServerStats(); |
| 101 | } |
[email protected] | 7556ea2 | 2011-12-08 19:29:15 | [diff] [blame] | 102 | |
[email protected] | ae1b30b | 2013-05-23 23:06:03 | [diff] [blame] | 103 | int DnsSession::NextQueryId() const { return rand_callback_.Run(); } |
[email protected] | 7556ea2 | 2011-12-08 19:29:15 | [diff] [blame] | 104 | |
[email protected] | a6c84f4 | 2013-06-07 20:39:38 | [diff] [blame] | 105 | unsigned DnsSession::NextFirstServerIndex() { |
| 106 | unsigned index = NextGoodServerIndex(server_index_); |
[email protected] | 7556ea2 | 2011-12-08 19:29:15 | [diff] [blame] | 107 | if (config_.rotate) |
| 108 | server_index_ = (server_index_ + 1) % config_.nameservers.size(); |
[email protected] | d0d49dd8 | 2012-01-26 00:03:59 | [diff] [blame] | 109 | return index; |
[email protected] | 7556ea2 | 2011-12-08 19:29:15 | [diff] [blame] | 110 | } |
| 111 | |
[email protected] | a6c84f4 | 2013-06-07 20:39:38 | [diff] [blame] | 112 | unsigned DnsSession::NextGoodServerIndex(unsigned server_index) { |
| 113 | unsigned index = server_index; |
| 114 | base::Time oldest_server_failure(base::Time::Now()); |
| 115 | unsigned oldest_server_failure_index = 0; |
| 116 | |
| 117 | UMA_HISTOGRAM_BOOLEAN("AsyncDNS.ServerIsGood", |
| 118 | server_stats_[server_index]->last_failure.is_null()); |
| 119 | |
| 120 | do { |
| 121 | base::Time cur_server_failure = server_stats_[index]->last_failure; |
| 122 | // If number of failures on this server doesn't exceed number of allowed |
| 123 | // attempts, return its index. |
| 124 | if (server_stats_[server_index]->last_failure_count < config_.attempts) { |
| 125 | return index; |
| 126 | } |
| 127 | // Track oldest failed server. |
| 128 | if (cur_server_failure < oldest_server_failure) { |
| 129 | oldest_server_failure = cur_server_failure; |
| 130 | oldest_server_failure_index = index; |
| 131 | } |
| 132 | index = (index + 1) % config_.nameservers.size(); |
| 133 | } while (index != server_index); |
| 134 | |
| 135 | // If we are here it means that there are no successful servers, so we have |
| 136 | // to use one that has failed oldest. |
| 137 | return oldest_server_failure_index; |
| 138 | } |
| 139 | |
| 140 | void DnsSession::RecordServerFailure(unsigned server_index) { |
| 141 | UMA_HISTOGRAM_CUSTOM_COUNTS( |
| 142 | "AsyncDNS.ServerFailureIndex", server_index, 0, 10, 10); |
| 143 | ++(server_stats_[server_index]->last_failure_count); |
| 144 | server_stats_[server_index]->last_failure = base::Time::Now(); |
| 145 | } |
| 146 | |
| 147 | void DnsSession::RecordServerSuccess(unsigned server_index) { |
| 148 | if (server_stats_[server_index]->last_success.is_null()) { |
| 149 | UMA_HISTOGRAM_COUNTS_100("AsyncDNS.ServerFailuresAfterNetworkChange", |
| 150 | server_stats_[server_index]->last_failure_count); |
| 151 | } else { |
| 152 | UMA_HISTOGRAM_COUNTS_100("AsyncDNS.ServerFailuresBeforeSuccess", |
| 153 | server_stats_[server_index]->last_failure_count); |
| 154 | } |
| 155 | server_stats_[server_index]->last_failure_count = 0; |
| 156 | server_stats_[server_index]->last_failure = base::Time(); |
| 157 | server_stats_[server_index]->last_success = base::Time::Now(); |
| 158 | } |
| 159 | |
[email protected] | ae1b30b | 2013-05-23 23:06:03 | [diff] [blame] | 160 | void DnsSession::RecordRTT(unsigned server_index, base::TimeDelta rtt) { |
[email protected] | a6c84f4 | 2013-06-07 20:39:38 | [diff] [blame] | 161 | DCHECK_LT(server_index, server_stats_.size()); |
[email protected] | ae1b30b | 2013-05-23 23:06:03 | [diff] [blame] | 162 | |
| 163 | // For measurement, assume it is the first attempt (no backoff). |
| 164 | base::TimeDelta timeout_jacobson = NextTimeoutFromJacobson(server_index, 0); |
| 165 | base::TimeDelta timeout_histogram = NextTimeoutFromHistogram(server_index, 0); |
| 166 | UMA_HISTOGRAM_TIMES("AsyncDNS.TimeoutErrorJacobson", rtt - timeout_jacobson); |
| 167 | UMA_HISTOGRAM_TIMES("AsyncDNS.TimeoutErrorHistogram", |
| 168 | rtt - timeout_histogram); |
| 169 | UMA_HISTOGRAM_TIMES("AsyncDNS.TimeoutErrorJacobsonUnder", |
| 170 | timeout_jacobson - rtt); |
| 171 | UMA_HISTOGRAM_TIMES("AsyncDNS.TimeoutErrorHistogramUnder", |
| 172 | timeout_histogram - rtt); |
| 173 | |
| 174 | // Jacobson/Karels algorithm for TCP. |
| 175 | // Using parameters: alpha = 1/8, delta = 1/4, beta = 4 |
[email protected] | a6c84f4 | 2013-06-07 20:39:38 | [diff] [blame] | 176 | base::TimeDelta& estimate = server_stats_[server_index]->rtt_estimate; |
| 177 | base::TimeDelta& deviation = server_stats_[server_index]->rtt_deviation; |
[email protected] | ae1b30b | 2013-05-23 23:06:03 | [diff] [blame] | 178 | base::TimeDelta current_error = rtt - estimate; |
| 179 | estimate += current_error / 8; // * alpha |
| 180 | base::TimeDelta abs_error = base::TimeDelta::FromInternalValue( |
| 181 | std::abs(current_error.ToInternalValue())); |
| 182 | deviation += (abs_error - deviation) / 4; // * delta |
| 183 | |
| 184 | // Histogram-based method. |
[email protected] | a6c84f4 | 2013-06-07 20:39:38 | [diff] [blame] | 185 | server_stats_[server_index]->rtt_histogram |
| 186 | ->Accumulate(rtt.InMilliseconds(), 1); |
[email protected] | ae1b30b | 2013-05-23 23:06:03 | [diff] [blame] | 187 | } |
| 188 | |
| 189 | void DnsSession::RecordLostPacket(unsigned server_index, int attempt) { |
| 190 | base::TimeDelta timeout_jacobson = |
| 191 | NextTimeoutFromJacobson(server_index, attempt); |
| 192 | base::TimeDelta timeout_histogram = |
| 193 | NextTimeoutFromHistogram(server_index, attempt); |
| 194 | UMA_HISTOGRAM_TIMES("AsyncDNS.TimeoutSpentJacobson", timeout_jacobson); |
| 195 | UMA_HISTOGRAM_TIMES("AsyncDNS.TimeoutSpentHistogram", timeout_histogram); |
| 196 | } |
| 197 | |
[email protected] | a6c84f4 | 2013-06-07 20:39:38 | [diff] [blame] | 198 | void DnsSession::RecordServerStats() { |
| 199 | for (size_t index = 0; index < server_stats_.size(); ++index) { |
| 200 | if (server_stats_[index]->last_failure_count) { |
| 201 | if (server_stats_[index]->last_success.is_null()) { |
| 202 | UMA_HISTOGRAM_COUNTS("AsyncDNS.ServerFailuresWithoutSuccess", |
| 203 | server_stats_[index]->last_failure_count); |
| 204 | } else { |
| 205 | UMA_HISTOGRAM_COUNTS("AsyncDNS.ServerFailuresAfterSuccess", |
| 206 | server_stats_[index]->last_failure_count); |
| 207 | } |
| 208 | } |
| 209 | } |
| 210 | } |
| 211 | |
| 212 | |
[email protected] | ae1b30b | 2013-05-23 23:06:03 | [diff] [blame] | 213 | base::TimeDelta DnsSession::NextTimeout(unsigned server_index, int attempt) { |
[email protected] | a144bd2 | 2013-07-29 21:53:10 | [diff] [blame^] | 214 | // Respect config timeout if it exceeds |kMaxTimeoutMs|. |
| 215 | if (config_.timeout.InMilliseconds() >= kMaxTimeoutMs) |
| 216 | return config_.timeout; |
| 217 | return NextTimeoutFromHistogram(server_index, attempt); |
[email protected] | 7556ea2 | 2011-12-08 19:29:15 | [diff] [blame] | 218 | } |
| 219 | |
[email protected] | 120d38d | 2012-12-14 01:42:32 | [diff] [blame] | 220 | // Allocate a socket, already connected to the server address. |
| 221 | scoped_ptr<DnsSession::SocketLease> DnsSession::AllocateSocket( |
[email protected] | ae1b30b | 2013-05-23 23:06:03 | [diff] [blame] | 222 | unsigned server_index, const NetLog::Source& source) { |
[email protected] | 120d38d | 2012-12-14 01:42:32 | [diff] [blame] | 223 | scoped_ptr<DatagramClientSocket> socket; |
| 224 | |
| 225 | socket = socket_pool_->AllocateSocket(server_index); |
[email protected] | dd946bb | 2013-06-12 22:53:01 | [diff] [blame] | 226 | if (!socket.get()) |
| 227 | return scoped_ptr<SocketLease>(); |
[email protected] | 120d38d | 2012-12-14 01:42:32 | [diff] [blame] | 228 | |
[email protected] | ae1b30b | 2013-05-23 23:06:03 | [diff] [blame] | 229 | socket->NetLog().BeginEvent(NetLog::TYPE_SOCKET_IN_USE, |
| 230 | source.ToEventParametersCallback()); |
[email protected] | 120d38d | 2012-12-14 01:42:32 | [diff] [blame] | 231 | |
| 232 | SocketLease* lease = new SocketLease(this, server_index, socket.Pass()); |
| 233 | return scoped_ptr<SocketLease>(lease); |
| 234 | } |
| 235 | |
[email protected] | bdb6598 | 2012-12-20 20:44:59 | [diff] [blame] | 236 | scoped_ptr<StreamSocket> DnsSession::CreateTCPSocket( |
[email protected] | ae1b30b | 2013-05-23 23:06:03 | [diff] [blame] | 237 | unsigned server_index, const NetLog::Source& source) { |
[email protected] | bdb6598 | 2012-12-20 20:44:59 | [diff] [blame] | 238 | return socket_pool_->CreateTCPSocket(server_index, source); |
| 239 | } |
| 240 | |
[email protected] | 120d38d | 2012-12-14 01:42:32 | [diff] [blame] | 241 | // Release a socket. |
[email protected] | ae1b30b | 2013-05-23 23:06:03 | [diff] [blame] | 242 | void DnsSession::FreeSocket(unsigned server_index, |
| 243 | scoped_ptr<DatagramClientSocket> socket) { |
[email protected] | 120d38d | 2012-12-14 01:42:32 | [diff] [blame] | 244 | DCHECK(socket.get()); |
| 245 | |
| 246 | socket->NetLog().EndEvent(NetLog::TYPE_SOCKET_IN_USE); |
| 247 | |
| 248 | socket_pool_->FreeSocket(server_index, socket.Pass()); |
| 249 | } |
[email protected] | 7556ea2 | 2011-12-08 19:29:15 | [diff] [blame] | 250 | |
[email protected] | ae1b30b | 2013-05-23 23:06:03 | [diff] [blame] | 251 | base::TimeDelta DnsSession::NextTimeoutFromJacobson(unsigned server_index, |
| 252 | int attempt) { |
[email protected] | a6c84f4 | 2013-06-07 20:39:38 | [diff] [blame] | 253 | DCHECK_LT(server_index, server_stats_.size()); |
[email protected] | ae1b30b | 2013-05-23 23:06:03 | [diff] [blame] | 254 | |
[email protected] | a6c84f4 | 2013-06-07 20:39:38 | [diff] [blame] | 255 | base::TimeDelta timeout = server_stats_[server_index]->rtt_estimate + |
| 256 | 4 * server_stats_[server_index]->rtt_deviation; |
[email protected] | ae1b30b | 2013-05-23 23:06:03 | [diff] [blame] | 257 | |
| 258 | timeout = std::max(timeout, base::TimeDelta::FromMilliseconds(kMinTimeoutMs)); |
| 259 | |
| 260 | // The timeout doubles every full round. |
| 261 | unsigned num_backoffs = attempt / config_.nameservers.size(); |
| 262 | |
| 263 | return std::min(timeout * (1 << num_backoffs), |
| 264 | base::TimeDelta::FromMilliseconds(kMaxTimeoutMs)); |
| 265 | } |
| 266 | |
| 267 | base::TimeDelta DnsSession::NextTimeoutFromHistogram(unsigned server_index, |
| 268 | int attempt) { |
[email protected] | a6c84f4 | 2013-06-07 20:39:38 | [diff] [blame] | 269 | DCHECK_LT(server_index, server_stats_.size()); |
[email protected] | ae1b30b | 2013-05-23 23:06:03 | [diff] [blame] | 270 | |
| 271 | COMPILE_ASSERT(std::numeric_limits<base::HistogramBase::Count>::is_signed, |
| 272 | histogram_base_count_assumed_to_be_signed); |
| 273 | |
| 274 | // Use fixed percentile of observed samples. |
[email protected] | a6c84f4 | 2013-06-07 20:39:38 | [diff] [blame] | 275 | const base::SampleVector& samples = |
| 276 | *server_stats_[server_index]->rtt_histogram; |
| 277 | |
[email protected] | ae1b30b | 2013-05-23 23:06:03 | [diff] [blame] | 278 | base::HistogramBase::Count total = samples.TotalCount(); |
| 279 | base::HistogramBase::Count remaining_count = kRTOPercentile * total / 100; |
| 280 | size_t index = 0; |
[email protected] | a6c84f4 | 2013-06-07 20:39:38 | [diff] [blame] | 281 | while (remaining_count > 0 && index < rtt_buckets_.Get().size()) { |
[email protected] | ae1b30b | 2013-05-23 23:06:03 | [diff] [blame] | 282 | remaining_count -= samples.GetCountAtIndex(index); |
| 283 | ++index; |
| 284 | } |
| 285 | |
| 286 | base::TimeDelta timeout = |
[email protected] | a6c84f4 | 2013-06-07 20:39:38 | [diff] [blame] | 287 | base::TimeDelta::FromMilliseconds(rtt_buckets_.Get().range(index)); |
[email protected] | ae1b30b | 2013-05-23 23:06:03 | [diff] [blame] | 288 | |
| 289 | timeout = std::max(timeout, base::TimeDelta::FromMilliseconds(kMinTimeoutMs)); |
| 290 | |
| 291 | // The timeout still doubles every full round. |
| 292 | unsigned num_backoffs = attempt / config_.nameservers.size(); |
| 293 | |
| 294 | return std::min(timeout * (1 << num_backoffs), |
| 295 | base::TimeDelta::FromMilliseconds(kMaxTimeoutMs)); |
| 296 | } |
| 297 | |
[email protected] | 7556ea2 | 2011-12-08 19:29:15 | [diff] [blame] | 298 | } // namespace net |