Skip to content

Commit f3b7c05

Browse files
francoisfreitagtimgraham
authored andcommitted
Refs #16614 -- Made QuerySet.iterator() use server-side cursors on PostgreSQL.
Thanks to Josh Smeaton for the idea of implementing server-side cursors in PostgreSQL from the iterator method, and Anssi Kääriäinen and Kevin Turner for their previous work. Also Simon Charette and Tim Graham for review.
1 parent 53bffe8 commit f3b7c05

File tree

13 files changed

+165
-31
lines changed

13 files changed

+165
-31
lines changed

django/db/backends/base/base.py

Lines changed: 22 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -165,7 +165,7 @@ def init_connection_state(self):
165165
"""Initializes the database connection settings."""
166166
raise NotImplementedError('subclasses of BaseDatabaseWrapper may require an init_connection_state() method')
167167

168-
def create_cursor(self):
168+
def create_cursor(self, name=None):
169169
"""Creates a cursor. Assumes that a connection is established."""
170170
raise NotImplementedError('subclasses of BaseDatabaseWrapper may require a create_cursor() method')
171171

@@ -214,10 +214,21 @@ def ensure_connection(self):
214214

215215
# ##### Backend-specific wrappers for PEP-249 connection methods #####
216216

217-
def _cursor(self):
217+
def _prepare_cursor(self, cursor):
218+
"""
219+
Validate the connection is usable and perform database cursor wrapping.
220+
"""
221+
self.validate_thread_sharing()
222+
if self.queries_logged:
223+
wrapped_cursor = self.make_debug_cursor(cursor)
224+
else:
225+
wrapped_cursor = self.make_cursor(cursor)
226+
return wrapped_cursor
227+
228+
def _cursor(self, name=None):
218229
self.ensure_connection()
219230
with self.wrap_database_errors:
220-
return self.create_cursor()
231+
return self.create_cursor(name)
221232

222233
def _commit(self):
223234
if self.connection is not None:
@@ -240,12 +251,7 @@ def cursor(self):
240251
"""
241252
Creates a cursor, opening a connection if necessary.
242253
"""
243-
self.validate_thread_sharing()
244-
if self.queries_logged:
245-
cursor = self.make_debug_cursor(self._cursor())
246-
else:
247-
cursor = self.make_cursor(self._cursor())
248-
return cursor
254+
return self._prepare_cursor(self._cursor())
249255

250256
def commit(self):
251257
"""
@@ -553,6 +559,13 @@ def wrap_database_errors(self):
553559
"""
554560
return DatabaseErrorWrapper(self)
555561

562+
def chunked_cursor(self):
563+
"""
564+
Return a cursor that tries to avoid caching in the database (if
565+
supported by the database), otherwise return a regular cursor.
566+
"""
567+
return self.cursor()
568+
556569
def make_debug_cursor(self, cursor):
557570
"""
558571
Creates a cursor that logs all queries in self.queries_log.

django/db/backends/mysql/base.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -271,7 +271,7 @@ def init_connection_state(self):
271271
# with SQL standards.
272272
cursor.execute('SET SQL_AUTO_IS_NULL = 0')
273273

274-
def create_cursor(self):
274+
def create_cursor(self, name=None):
275275
cursor = self.connection.cursor()
276276
return CursorWrapper(cursor)
277277

django/db/backends/oracle/base.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -249,7 +249,7 @@ def init_connection_state(self):
249249
if not self.get_autocommit():
250250
self.commit()
251251

252-
def create_cursor(self):
252+
def create_cursor(self, name=None):
253253
return FormatStylePlaceholderCursor(self.connection)
254254

255255
def _commit(self):

django/db/backends/postgresql/base.py

Lines changed: 23 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
Requires psycopg 2: http://initd.org/projects/psycopg2
55
"""
66

7+
import threading
78
import warnings
89

910
from django.conf import settings
@@ -145,6 +146,10 @@ class DatabaseWrapper(BaseDatabaseWrapper):
145146
introspection_class = DatabaseIntrospection
146147
ops_class = DatabaseOperations
147148

149+
def __init__(self, *args, **kwargs):
150+
super(DatabaseWrapper, self).__init__(*args, **kwargs)
151+
self._named_cursor_idx = 0
152+
148153
def get_connection_params(self):
149154
settings_dict = self.settings_dict
150155
# None may be used to connect to the default 'postgres' db
@@ -206,11 +211,27 @@ def init_connection_state(self):
206211
if not self.get_autocommit():
207212
self.connection.commit()
208213

209-
def create_cursor(self):
210-
cursor = self.connection.cursor()
214+
def create_cursor(self, name=None):
215+
if name:
216+
# In autocommit mode, the cursor will be used outside of a
217+
# transaction, hence use a holdable cursor.
218+
cursor = self.connection.cursor(name, scrollable=False, withhold=self.connection.autocommit)
219+
else:
220+
cursor = self.connection.cursor()
211221
cursor.tzinfo_factory = utc_tzinfo_factory if settings.USE_TZ else None
212222
return cursor
213223

224+
def chunked_cursor(self):
225+
self._named_cursor_idx += 1
226+
db_cursor = self._cursor(
227+
name='_django_curs_%d_%d' % (
228+
# Avoid reusing name in other threads
229+
threading.current_thread().ident,
230+
self._named_cursor_idx,
231+
)
232+
)
233+
return self._prepare_cursor(db_cursor)
234+
214235
def _set_autocommit(self, autocommit):
215236
with self.wrap_database_errors:
216237
self.connection.autocommit = autocommit

django/db/backends/sqlite3/base.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -215,7 +215,7 @@ def get_new_connection(self, conn_params):
215215
def init_connection_state(self):
216216
pass
217217

218-
def create_cursor(self):
218+
def create_cursor(self, name=None):
219219
return self.connection.cursor(factory=SQLiteCursorWrapper)
220220

221221
def close(self):

django/db/models/query.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -34,8 +34,9 @@
3434

3535

3636
class BaseIterable(object):
37-
def __init__(self, queryset):
37+
def __init__(self, queryset, chunked_fetch=False):
3838
self.queryset = queryset
39+
self.chunked_fetch = chunked_fetch
3940

4041

4142
class ModelIterable(BaseIterable):
@@ -49,7 +50,7 @@ def __iter__(self):
4950
compiler = queryset.query.get_compiler(using=db)
5051
# Execute the query. This will also fill compiler.select, klass_info,
5152
# and annotations.
52-
results = compiler.execute_sql()
53+
results = compiler.execute_sql(chunked_fetch=self.chunked_fetch)
5354
select, klass_info, annotation_col_map = (compiler.select, compiler.klass_info,
5455
compiler.annotation_col_map)
5556
model_cls = klass_info['model']
@@ -318,7 +319,7 @@ def iterator(self):
318319
An iterator over the results from applying this QuerySet to the
319320
database.
320321
"""
321-
return iter(self._iterable_class(self))
322+
return iter(self._iterable_class(self, chunked_fetch=True))
322323

323324
def aggregate(self, *args, **kwargs):
324325
"""
@@ -1071,7 +1072,7 @@ def _clone(self, **kwargs):
10711072

10721073
def _fetch_all(self):
10731074
if self._result_cache is None:
1074-
self._result_cache = list(self.iterator())
1075+
self._result_cache = list(self._iterable_class(self))
10751076
if self._prefetch_related_lookups and not self._prefetch_done:
10761077
self._prefetch_related_objects()
10771078

django/db/models/sql/compiler.py

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -799,7 +799,7 @@ def has_results(self):
799799
self.query.set_extra_mask(['a'])
800800
return bool(self.execute_sql(SINGLE))
801801

802-
def execute_sql(self, result_type=MULTI):
802+
def execute_sql(self, result_type=MULTI, chunked_fetch=False):
803803
"""
804804
Run the query against the database and returns the result(s). The
805805
return value is a single data item if result_type is SINGLE, or an
@@ -823,12 +823,16 @@ def execute_sql(self, result_type=MULTI):
823823
return iter([])
824824
else:
825825
return
826-
827-
cursor = self.connection.cursor()
826+
if chunked_fetch:
827+
cursor = self.connection.chunked_cursor()
828+
else:
829+
cursor = self.connection.cursor()
828830
try:
829831
cursor.execute(sql, params)
830832
except Exception:
831-
cursor.close()
833+
with self.connection.wrap_database_errors:
834+
# Closing a server-side cursor could yield an error
835+
cursor.close()
832836
raise
833837

834838
if result_type == CURSOR:
@@ -852,11 +856,11 @@ def execute_sql(self, result_type=MULTI):
852856
cursor, self.connection.features.empty_fetchmany_value,
853857
self.col_count
854858
)
855-
if not self.connection.features.can_use_chunked_reads:
859+
if not chunked_fetch and not self.connection.features.can_use_chunked_reads:
856860
try:
857861
# If we are using non-chunked reads, we return the same data
858862
# structure as normally, but ensure it is all read into memory
859-
# before going any further.
863+
# before going any further. Use chunked_fetch if requested.
860864
return list(result)
861865
finally:
862866
# done with the cursor

django/test/testcases.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -175,13 +175,15 @@ def setUpClass(cls):
175175
for alias in connections:
176176
connection = connections[alias]
177177
connection.cursor = _CursorFailure(cls.__name__, connection.cursor)
178+
connection.chunked_cursor = _CursorFailure(cls.__name__, connection.chunked_cursor)
178179

179180
@classmethod
180181
def tearDownClass(cls):
181182
if not cls.allow_database_queries:
182183
for alias in connections:
183184
connection = connections[alias]
184185
connection.cursor = connection.cursor.wrapped
186+
connection.chunked_cursor = connection.chunked_cursor.wrapped
185187
if hasattr(cls, '_cls_modified_context'):
186188
cls._cls_modified_context.disable()
187189
delattr(cls, '_cls_modified_context')

docs/ref/databases.txt

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -171,6 +171,24 @@ If you need to add a PostgreSQL extension (like ``hstore``, ``postgis``, etc.)
171171
using a migration, use the
172172
:class:`~django.contrib.postgres.operations.CreateExtension` operation.
173173

174+
.. _postgresql-server-side-cursors:
175+
176+
Server-side cursors
177+
-------------------
178+
179+
.. versionadded:: 1.11
180+
181+
When using :meth:`QuerySet.iterator()
182+
<django.db.models.query.QuerySet.iterator>`, Django opens a :ref:`server-side
183+
cursor <psycopg2:server-side-cursors>`. By default, PostgreSQL assumes that
184+
only the first 10% of the results of cursor queries will be fetched. The query
185+
planner spends less time planning the query and starts returning results
186+
faster, but this could diminish performance if more than 10% of the results are
187+
retrieved. PostgreSQL's assumptions on the number of rows retrieved for a
188+
cursor query is controlled with the `cursor_tuple_fraction`_ option.
189+
190+
.. _cursor_tuple_fraction: https://www.postgresql.org/docs/current/static/runtime-config-query.html#GUC-CURSOR-TUPLE-FRACTION
191+
174192
Test database templates
175193
-----------------------
176194

docs/ref/models/querysets.txt

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1981,15 +1981,15 @@ evaluated will force it to evaluate again, repeating the query.
19811981
Also, use of ``iterator()`` causes previous ``prefetch_related()`` calls to be
19821982
ignored since these two optimizations do not make sense together.
19831983

1984-
.. warning::
1984+
Some Python database drivers still load the entire result set into memory, but
1985+
won't cache results after iterating over them. Oracle and :ref:`PostgreSQL
1986+
<postgresql-server-side-cursors>` use server-side cursors to stream results
1987+
from the database without loading the entire result set into memory.
1988+
1989+
.. versionchanged:: 1.11
19851990

1986-
Some Python database drivers like ``psycopg2`` perform caching if using
1987-
client side cursors (instantiated with ``connection.cursor()`` and what
1988-
Django's ORM uses). Using ``iterator()`` does not affect caching at the
1989-
database driver level. To disable this caching, look at `server side
1990-
cursors`_.
1991+
PostgreSQL support for server-side cursors was added.
19911992

1992-
.. _server side cursors: http://initd.org/psycopg/docs/usage.html#server-side-cursors
19931993

19941994
``latest()``
19951995
~~~~~~~~~~~~

0 commit comments

Comments
 (0)