From 74cb20467e3d025a80cb2e5294bd1bba69303d0b Mon Sep 17 00:00:00 2001
From: Dmitrii Dolgov <9erthalion6@gmail.com>
Date: Thu, 20 May 2021 21:13:38 +0200
Subject: [PATCH v40 3/6] amskip implementation for Btree

Btree implementation of index am method amskip for Index Skip Scan. To
make it more robust and suitable for both situations:

* small number of distinct values (e.g. because of the planner
underestimation)

* significant amounts of distinct values

a mixed approach is implemented. Instead of restarting the search for
every value, first we check if there is a next distinct value on the
current page. Only then if no such value was found restart and search
from the tree root.

No support for backward scan is implemented in case of a scroll cursor,
instead a Material node will be put on top.

Author: Jesper Pedersen, Dmitry Dolgov
Reviewed-by: Thomas Munro, David Rowley, Floris Van Nee, Kyotaro Horiguchi, Tomas Vondra, Peter Geoghegan
---
 src/backend/access/nbtree/nbtree.c            |  12 +
 src/backend/access/nbtree/nbtsearch.c         | 215 +++++-
 src/include/access/nbtree.h                   |   5 +
 src/test/regress/expected/join.out            |   3 +
 src/test/regress/expected/select_distinct.out | 642 ++++++++++++++++++
 src/test/regress/sql/join.sql                 |   5 +
 src/test/regress/sql/select_distinct.sql      | 282 ++++++++
 7 files changed, 1163 insertions(+), 1 deletion(-)

diff --git a/src/backend/access/nbtree/nbtree.c b/src/backend/access/nbtree/nbtree.c
index 13024af2fa..393ceb3054 100644
--- a/src/backend/access/nbtree/nbtree.c
+++ b/src/backend/access/nbtree/nbtree.c
@@ -124,6 +124,7 @@ bthandler(PG_FUNCTION_ARGS)
 	amroutine->ambulkdelete = btbulkdelete;
 	amroutine->amvacuumcleanup = btvacuumcleanup;
 	amroutine->amcanreturn = btcanreturn;
+	amroutine->amskip = btskip;
 	amroutine->amcostestimate = btcostestimate;
 	amroutine->amoptions = btoptions;
 	amroutine->amproperty = btproperty;
@@ -375,6 +376,8 @@ btbeginscan(Relation rel, int nkeys, int norderbys)
 	 */
 	so->currTuples = so->markTuples = NULL;
 
+	so->skipScanKey = NULL;
+
 	scan->xs_itupdesc = RelationGetDescr(rel);
 
 	scan->opaque = so;
@@ -441,6 +444,15 @@ btrescan(IndexScanDesc scan, ScanKey scankey, int nscankeys,
 	_bt_preprocess_array_keys(scan);
 }
 
+/*
+ * btskip() -- skip to the beginning of the next key prefix
+ */
+bool
+btskip(IndexScanDesc scan, ScanDirection direction, int prefix)
+{
+	return _bt_skip(scan, direction, prefix);
+}
+
 /*
  *	btendscan() -- close down a scan
  */
diff --git a/src/backend/access/nbtree/nbtsearch.c b/src/backend/access/nbtree/nbtsearch.c
index 9d82d4904d..0ad761916a 100644
--- a/src/backend/access/nbtree/nbtsearch.c
+++ b/src/backend/access/nbtree/nbtsearch.c
@@ -45,7 +45,11 @@ static bool _bt_parallel_readpage(IndexScanDesc scan, BlockNumber blkno,
 static Buffer _bt_walk_left(Relation rel, Buffer buf, Snapshot snapshot);
 static bool _bt_endpoint(IndexScanDesc scan, ScanDirection dir);
 static inline void _bt_initialize_more_data(BTScanOpaque so, ScanDirection dir);
-
+static inline void _bt_update_skip_scankeys(IndexScanDesc scan,
+											Relation indexRel);
+static inline bool _bt_scankey_within_page(IndexScanDesc scan,
+										   BTScanInsert key,
+										   Buffer buf);
 
 /*
  *	_bt_drop_lock_and_maybe_pin()
@@ -1498,6 +1502,161 @@ _bt_next(IndexScanDesc scan, ScanDirection dir)
 	return true;
 }
 
+/*
+ *  _bt_skip() -- Skip items that have the same prefix as the most recently
+ * 				  fetched index tuple.
+ *
+ * 		The current position is set so that a subsequent call to _bt_next will
+ * 		fetch the first tuple that differs in the leading 'prefix' keys.
+ *
+ * 		The current page is searched for the next unique value. If none is found
+ * 		we will do a scan from the root in order to find the next page with
+ * 		a unique value.
+ */
+bool
+_bt_skip(IndexScanDesc scan, ScanDirection dir, int prefix)
+{
+	BTScanOpaque so = (BTScanOpaque) scan->opaque;
+	BTStack stack;
+	Buffer buf;
+	OffsetNumber offnum;
+	BTScanPosItem *currItem;
+	Relation 	 indexRel = scan->indexRelation;
+	bool scanstart = !BTScanPosIsValid(so->currPos);
+
+	/* We want to return tuples, and we need a starting point */
+	Assert(scan->xs_want_itup);
+	Assert(scan->xs_itup);
+
+	if (so->numKilled > 0)
+		_bt_killitems(scan);
+
+	/* If skipScanKey is NULL then we initialize it with _bt_mkscankey */
+	if (so->skipScanKey == NULL)
+	{
+		so->skipScanKey = _bt_mkscankey(indexRel, scan->xs_itup);
+		so->skipScanKey->keysz = prefix;
+		so->skipScanKey->scantid = NULL;
+	}
+	so->skipScanKey->nextkey = ScanDirectionIsForward(dir);
+	_bt_update_skip_scankeys(scan, indexRel);
+
+	/* Check if the next unique key can be found within the current page.
+	 * Since we do not lock the current page between jumps, it's possible
+	 * that it was splitted since the last time we saw it. This is fine in
+	 * case of scanning forward, since page split to the right and we are
+	 * still on the left most page. In case of scanning backwards it's
+	 * possible to loose some pages and we need to remember the previous
+	 * page, and then follow the right link from the current page until we
+	 * find the original one.
+	 *
+	 * Since the whole idea of checking the current page is to protect
+	 * ourselves and make more performant statistic mismatch case when
+	 * there are too many distinct values for jumping, it's not clear if
+	 * the complexity of this solution in case of backward scan is
+	 * justified, so for now just avoid it.
+	 */
+	if (BufferIsValid(so->currPos.buf) && ScanDirectionIsForward(dir))
+	{
+		_bt_lockbuf(indexRel, so->currPos.buf, BT_READ);
+
+		if (_bt_scankey_within_page(scan, so->skipScanKey, so->currPos.buf))
+		{
+			bool keyFound = false;
+
+			offnum = _bt_binsrch(scan->indexRelation, so->skipScanKey, so->currPos.buf);
+
+			/* Lock the page for SERIALIZABLE transactions */
+			PredicateLockPage(scan->indexRelation, BufferGetBlockNumber(so->currPos.buf),
+							  scan->xs_snapshot);
+
+			/* We know in which direction to look */
+			_bt_initialize_more_data(so, dir);
+
+			/* Now read the data */
+			keyFound = _bt_readpage(scan, dir, offnum);
+
+			_bt_relbuf(indexRel, so->currPos.buf);
+			so->currPos.buf = InvalidBuffer;
+
+			if (keyFound)
+			{
+				/* set IndexTuple */
+				currItem = &so->currPos.items[so->currPos.itemIndex];
+				scan->xs_heaptid = currItem->heapTid;
+				scan->xs_itup = (IndexTuple) (so->currTuples + currItem->tupleOffset);
+				return true;
+			}
+		}
+		else
+			_bt_unlockbuf(indexRel, so->currPos.buf);
+	}
+
+	if (BufferIsValid(so->currPos.buf))
+	{
+		ReleaseBuffer(so->currPos.buf);
+		so->currPos.buf = InvalidBuffer;
+	}
+
+	/*
+	 * We haven't found scan key within the current page, so let's scan from
+	 * the root. Use _bt_search and _bt_binsrch to get the buffer and offset
+	 * number
+	 */
+	stack = _bt_search(scan->indexRelation, so->skipScanKey,
+					   &buf, BT_READ, scan->xs_snapshot);
+	_bt_freestack(stack);
+	so->currPos.buf = buf;
+	offnum = _bt_binsrch(scan->indexRelation, so->skipScanKey, buf);
+
+	/* Lock the page for SERIALIZABLE transactions */
+	PredicateLockPage(scan->indexRelation, BufferGetBlockNumber(buf),
+					  scan->xs_snapshot);
+
+	/* We know in which direction to look */
+	_bt_initialize_more_data(so, dir);
+
+	/*
+	 * Simplest case is when both directions are forward, when we are already
+	 * at the next distinct key at the beginning of the series (so everything
+	 * else would be done in _bt_readpage)
+	 *
+	 * The case when both directions are backwards is also simple, but we need
+	 * to go one step back, since we need a last element from the previous
+	 * series.
+	 */
+	if (ScanDirectionIsBackward(dir) || (ScanDirectionIsForward(dir) & scanstart))
+		 offnum = OffsetNumberPrev(offnum);
+
+	/* Now read the data */
+	if (!_bt_readpage(scan, dir, offnum))
+	{
+		/*
+		 * There's no actually-matching data on this page.  Try to advance to
+		 * the next page.  Return false if there's no matching data at all.
+		 */
+		_bt_unlockbuf(indexRel, so->currPos.buf);
+		if (!_bt_steppage(scan, dir))
+		{
+			pfree(so->skipScanKey);
+			so->skipScanKey = NULL;
+			return false;
+		}
+	}
+	else
+		_bt_drop_lock_and_maybe_pin(scan, &so->currPos);
+
+	/* And set IndexTuple */
+	currItem = &so->currPos.items[so->currPos.itemIndex];
+	scan->xs_heaptid = currItem->heapTid;
+	scan->xs_itup = (IndexTuple) (so->currTuples + currItem->tupleOffset);
+
+	so->currPos.moreLeft = true;
+	so->currPos.moreRight = true;
+
+	return true;
+}
+
 /*
  *	_bt_readpage() -- Load data from current index page into so->currPos
  *
@@ -2494,3 +2653,57 @@ _bt_initialize_more_data(BTScanOpaque so, ScanDirection dir)
 	so->numKilled = 0;			/* just paranoia */
 	so->markItemIndex = -1;		/* ditto */
 }
+
+/*
+ * _bt_update_skip_scankeys() -- set up new values for the existing scankeys
+ * based on the current index tuple
+ */
+static inline void
+_bt_update_skip_scankeys(IndexScanDesc scan, Relation indexRel)
+{
+	TupleDesc		itupdesc;
+	int			indnkeyatts,
+				i;
+	BTScanOpaque 	so = (BTScanOpaque) scan->opaque;
+	ScanKey			scankeys = so->skipScanKey->scankeys;
+
+	itupdesc = RelationGetDescr(indexRel);
+	indnkeyatts = IndexRelationGetNumberOfKeyAttributes(indexRel);
+	for (i = 0; i < indnkeyatts; i++)
+	{
+		Datum datum;
+		bool null;
+		int flags;
+
+		datum = index_getattr(scan->xs_itup, i + 1, itupdesc, &null);
+		flags = (null ? SK_ISNULL : 0) |
+				(indexRel->rd_indoption[i] << SK_BT_INDOPTION_SHIFT);
+		scankeys[i].sk_flags = flags;
+		scankeys[i].sk_argument = datum;
+	}
+}
+
+/*
+ * _bt_scankey_within_page() -- check if the provided scankey could be found
+ * within a page, specified by the buffer.
+ *
+ * Scankey nextkey will tell us if we need to find a current key or the next
+ * one, which affects whether or not it's ok to be equal to the page highkey.
+ */
+static inline bool
+_bt_scankey_within_page(IndexScanDesc scan, BTScanInsert key, Buffer buf)
+{
+	OffsetNumber low, high;
+	Page page = BufferGetPage(buf);
+	BTPageOpaque opaque = (BTPageOpaque) PageGetSpecialPointer(page);
+	int high_compare = key->nextkey ? 0 : 1;
+
+	low = P_FIRSTDATAKEY(opaque);
+	high = PageGetMaxOffsetNumber(page);
+
+	if (unlikely(high < low))
+		return false;
+
+	return (_bt_compare(scan->indexRelation, key, page, low) > 0 &&
+			_bt_compare(scan->indexRelation, key, page, high) < high_compare);
+}
diff --git a/src/include/access/nbtree.h b/src/include/access/nbtree.h
index 9fec6fb1a8..2c516654c2 100644
--- a/src/include/access/nbtree.h
+++ b/src/include/access/nbtree.h
@@ -1064,6 +1064,9 @@ typedef struct BTScanOpaqueData
 	 */
 	int			markItemIndex;	/* itemIndex, or -1 if not valid */
 
+	/* Work space for _bt_skip */
+	BTScanInsert	skipScanKey;	/* used to control skipping */
+
 	/* keep these last in struct for efficiency */
 	BTScanPosData currPos;		/* current position data */
 	BTScanPosData markPos;		/* marked position, if any */
@@ -1229,6 +1232,7 @@ extern OffsetNumber _bt_binsrch_insert(Relation rel, BTInsertState insertstate);
 extern int32 _bt_compare(Relation rel, BTScanInsert key, Page page, OffsetNumber offnum);
 extern bool _bt_first(IndexScanDesc scan, ScanDirection dir);
 extern bool _bt_next(IndexScanDesc scan, ScanDirection dir);
+extern bool _bt_skip(IndexScanDesc scan, ScanDirection dir, int prefix);
 extern Buffer _bt_get_endpoint(Relation rel, uint32 level, bool rightmost,
 							   Snapshot snapshot);
 
@@ -1253,6 +1257,7 @@ extern void _bt_end_vacuum_callback(int code, Datum arg);
 extern Size BTreeShmemSize(void);
 extern void BTreeShmemInit(void);
 extern bytea *btoptions(Datum reloptions, bool validate);
+extern bool btskip(IndexScanDesc scan, ScanDirection dir, int prefix);
 extern bool btproperty(Oid index_oid, int attno,
 					   IndexAMProperty prop, const char *propname,
 					   bool *res, bool *isnull);
diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out
index d5b5b775fd..5d3fcc0d21 100644
--- a/src/test/regress/expected/join.out
+++ b/src/test/regress/expected/join.out
@@ -4614,6 +4614,8 @@ select d.* from d left join (select * from b group by b.id, b.c_id) s
          ->  Seq Scan on d
 (8 rows)
 
+-- disable index skip scan to prevent it interfering with the plan
+set enable_indexskipscan to off;
 -- similarly, but keying off a DISTINCT clause
 explain (costs off)
 select d.* from d left join (select distinct * from b) s
@@ -4631,6 +4633,7 @@ select d.* from d left join (select distinct * from b) s
          ->  Seq Scan on d
 (9 rows)
 
+set enable_indexskipscan to on;
 -- check join removal works when uniqueness of the join condition is enforced
 -- by a UNION
 explain (costs off)
diff --git a/src/test/regress/expected/select_distinct.out b/src/test/regress/expected/select_distinct.out
index 58122c6f88..dfc90c797a 100644
--- a/src/test/regress/expected/select_distinct.out
+++ b/src/test/regress/expected/select_distinct.out
@@ -375,3 +375,645 @@ SELECT null IS NOT DISTINCT FROM null as "yes";
  t
 (1 row)
 
+-- index only skip scan
+CREATE TABLE distinct_a (a int, b int, c int);
+INSERT INTO distinct_a (
+    SELECT five, tenthous, 10 FROM
+    generate_series(1, 5) five,
+    generate_series(1, 10000) tenthous
+);
+CREATE INDEX ON distinct_a (a, b);
+CREATE INDEX ON distinct_a ((a + 1));
+ANALYZE distinct_a;
+SELECT DISTINCT a FROM distinct_a;
+ a 
+---
+ 1
+ 2
+ 3
+ 4
+ 5
+(5 rows)
+
+SELECT DISTINCT a FROM distinct_a WHERE a = 1;
+ a 
+---
+ 1
+(1 row)
+
+SELECT DISTINCT a FROM distinct_a ORDER BY a DESC;
+ a 
+---
+ 5
+ 4
+ 3
+ 2
+ 1
+(5 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT DISTINCT a FROM distinct_a;
+                       QUERY PLAN                       
+--------------------------------------------------------
+ Index Only Scan using distinct_a_a_b_idx on distinct_a
+   Skip scan: true
+   Distinct Prefix: 1
+(3 rows)
+
+-- test index skip scan with a condition on a non unique field
+SELECT DISTINCT ON (a) a, b FROM distinct_a WHERE b = 2;
+ a | b 
+---+---
+ 1 | 2
+ 2 | 2
+ 3 | 2
+ 4 | 2
+ 5 | 2
+(5 rows)
+
+-- test index skip scan backwards
+SELECT DISTINCT ON (a) a, b FROM distinct_a ORDER BY a DESC, b DESC;
+ a |   b   
+---+-------
+ 5 | 10000
+ 4 | 10000
+ 3 | 10000
+ 2 | 10000
+ 1 | 10000
+(5 rows)
+
+-- test index skip scan for expressions
+EXPLAIN (COSTS OFF)
+SELECT DISTINCT (a + 1) FROM distinct_a ORDER BY (a + 1);
+             QUERY PLAN             
+------------------------------------
+ Sort
+   Sort Key: ((a + 1))
+   ->  HashAggregate
+         Group Key: (a + 1)
+         ->  Seq Scan on distinct_a
+(5 rows)
+
+SELECT DISTINCT (a + 1) FROM distinct_a ORDER BY (a + 1);
+ ?column? 
+----------
+        2
+        3
+        4
+        5
+        6
+(5 rows)
+
+-- check colums order
+CREATE INDEX distinct_a_b_a on distinct_a (b, a);
+SELECT DISTINCT a FROM distinct_a WHERE b = 2;
+ a 
+---
+ 1
+ 2
+ 3
+ 4
+ 5
+(5 rows)
+
+SELECT DISTINCT on (a, b) a, b FROM distinct_a WHERE b = 2;
+ a | b 
+---+---
+ 1 | 2
+ 2 | 2
+ 3 | 2
+ 4 | 2
+ 5 | 2
+(5 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT DISTINCT a FROM distinct_a WHERE b = 2;
+                     QUERY PLAN                     
+----------------------------------------------------
+ Index Only Scan using distinct_a_b_a on distinct_a
+   Skip scan: true
+   Distinct Prefix: 2
+   Index Cond: (b = 2)
+(4 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT DISTINCT on (a, b) a, b FROM distinct_a WHERE b = 2;
+                     QUERY PLAN                     
+----------------------------------------------------
+ Index Only Scan using distinct_a_b_a on distinct_a
+   Skip scan: true
+   Distinct Prefix: 2
+   Index Cond: (b = 2)
+(4 rows)
+
+DROP INDEX distinct_a_b_a;
+-- test opposite scan/index directions inside a cursor
+-- forward/backward
+BEGIN;
+DECLARE c SCROLL CURSOR FOR
+SELECT DISTINCT ON (a) a,b FROM distinct_a ORDER BY a, b;
+FETCH FROM c;
+ a | b 
+---+---
+ 1 | 1
+(1 row)
+
+FETCH BACKWARD FROM c;
+ a | b 
+---+---
+(0 rows)
+
+FETCH 6 FROM c;
+ a | b 
+---+---
+ 1 | 1
+ 2 | 1
+ 3 | 1
+ 4 | 1
+ 5 | 1
+(5 rows)
+
+FETCH BACKWARD 6 FROM c;
+ a | b 
+---+---
+ 5 | 1
+ 4 | 1
+ 3 | 1
+ 2 | 1
+ 1 | 1
+(5 rows)
+
+FETCH 6 FROM c;
+ a | b 
+---+---
+ 1 | 1
+ 2 | 1
+ 3 | 1
+ 4 | 1
+ 5 | 1
+(5 rows)
+
+FETCH BACKWARD 6 FROM c;
+ a | b 
+---+---
+ 5 | 1
+ 4 | 1
+ 3 | 1
+ 2 | 1
+ 1 | 1
+(5 rows)
+
+END;
+-- backward/forward
+BEGIN;
+DECLARE c SCROLL CURSOR FOR
+SELECT DISTINCT ON (a) a,b FROM distinct_a ORDER BY a DESC, b DESC;
+FETCH FROM c;
+ a |   b   
+---+-------
+ 5 | 10000
+(1 row)
+
+FETCH BACKWARD FROM c;
+ a | b 
+---+---
+(0 rows)
+
+FETCH 6 FROM c;
+ a |   b   
+---+-------
+ 5 | 10000
+ 4 | 10000
+ 3 | 10000
+ 2 | 10000
+ 1 | 10000
+(5 rows)
+
+FETCH BACKWARD 6 FROM c;
+ a |   b   
+---+-------
+ 1 | 10000
+ 2 | 10000
+ 3 | 10000
+ 4 | 10000
+ 5 | 10000
+(5 rows)
+
+FETCH 6 FROM c;
+ a |   b   
+---+-------
+ 5 | 10000
+ 4 | 10000
+ 3 | 10000
+ 2 | 10000
+ 1 | 10000
+(5 rows)
+
+FETCH BACKWARD 6 FROM c;
+ a |   b   
+---+-------
+ 1 | 10000
+ 2 | 10000
+ 3 | 10000
+ 4 | 10000
+ 5 | 10000
+(5 rows)
+
+END;
+-- test missing values and skipping from the end
+CREATE TABLE distinct_abc(a int, b int, c int);
+CREATE INDEX ON distinct_abc(a, b, c);
+INSERT INTO distinct_abc
+	VALUES (1, 1, 1),
+		   (1, 1, 2),
+		   (1, 2, 2),
+		   (1, 2, 3),
+		   (2, 2, 1),
+		   (2, 2, 3),
+		   (3, 1, 1),
+		   (3, 1, 2),
+		   (3, 2, 2),
+		   (3, 2, 3);
+EXPLAIN (COSTS OFF)
+SELECT DISTINCT ON (a) a,b,c FROM distinct_abc WHERE c = 2;
+                          QUERY PLAN                          
+--------------------------------------------------------------
+ Index Only Scan using distinct_abc_a_b_c_idx on distinct_abc
+   Skip scan: true
+   Distinct Prefix: 1
+   Index Cond: (c = 2)
+(4 rows)
+
+BEGIN;
+DECLARE c SCROLL CURSOR FOR
+SELECT DISTINCT ON (a) a,b,c FROM distinct_abc WHERE c = 2;
+FETCH ALL FROM c;
+ a | b | c 
+---+---+---
+ 1 | 1 | 2
+ 3 | 1 | 2
+(2 rows)
+
+FETCH BACKWARD ALL FROM c;
+ a | b | c 
+---+---+---
+ 3 | 1 | 2
+ 1 | 1 | 2
+(2 rows)
+
+END;
+EXPLAIN (COSTS OFF)
+SELECT DISTINCT ON (a) a,b,c FROM distinct_abc WHERE c = 2
+ORDER BY a DESC, b DESC;
+                              QUERY PLAN                               
+-----------------------------------------------------------------------
+ Index Only Scan Backward using distinct_abc_a_b_c_idx on distinct_abc
+   Skip scan: true
+   Distinct Prefix: 1
+   Index Cond: (c = 2)
+(4 rows)
+
+BEGIN;
+DECLARE c SCROLL CURSOR FOR
+SELECT DISTINCT ON (a) a,b,c FROM distinct_abc WHERE c = 2
+ORDER BY a DESC, b DESC;
+FETCH ALL FROM c;
+ a | b | c 
+---+---+---
+ 3 | 2 | 2
+ 1 | 2 | 2
+(2 rows)
+
+FETCH BACKWARD ALL FROM c;
+ a | b | c 
+---+---+---
+ 1 | 2 | 2
+ 3 | 2 | 2
+(2 rows)
+
+END;
+DROP TABLE distinct_abc;
+-- check colums order
+SELECT DISTINCT a FROM distinct_a WHERE b = 2 AND c = 10;
+ a 
+---
+ 1
+ 2
+ 3
+ 4
+ 5
+(5 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT DISTINCT a FROM distinct_a WHERE b = 2 AND c = 10;
+                       QUERY PLAN                        
+---------------------------------------------------------
+ Unique
+   ->  Index Scan using distinct_a_a_b_idx on distinct_a
+         Index Cond: (b = 2)
+         Filter: (c = 10)
+(4 rows)
+
+-- check projection case
+SELECT DISTINCT a, a FROM distinct_a WHERE b = 2;
+ a | a 
+---+---
+ 1 | 1
+ 2 | 2
+ 3 | 3
+ 4 | 4
+ 5 | 5
+(5 rows)
+
+SELECT DISTINCT a, 1 FROM distinct_a WHERE b = 2;
+ a | ?column? 
+---+----------
+ 1 |        1
+ 2 |        1
+ 3 |        1
+ 4 |        1
+ 5 |        1
+(5 rows)
+
+-- test cursor forward/backward movements
+BEGIN;
+DECLARE c SCROLL CURSOR FOR SELECT DISTINCT a FROM distinct_a;
+FETCH FROM c;
+ a 
+---
+ 1
+(1 row)
+
+FETCH BACKWARD FROM c;
+ a 
+---
+(0 rows)
+
+FETCH 6 FROM c;
+ a 
+---
+ 1
+ 2
+ 3
+ 4
+ 5
+(5 rows)
+
+FETCH BACKWARD 6 FROM c;
+ a 
+---
+ 5
+ 4
+ 3
+ 2
+ 1
+(5 rows)
+
+FETCH 6 FROM c;
+ a 
+---
+ 1
+ 2
+ 3
+ 4
+ 5
+(5 rows)
+
+FETCH BACKWARD 6 FROM c;
+ a 
+---
+ 5
+ 4
+ 3
+ 2
+ 1
+(5 rows)
+
+END;
+DROP TABLE distinct_a;
+-- test tuples visibility
+CREATE TABLE distinct_visibility (a int, b int);
+INSERT INTO distinct_visibility (select a, b from generate_series(1,5) a, generate_series(1, 10000) b);
+CREATE INDEX ON distinct_visibility (a, b);
+ANALYZE distinct_visibility;
+SELECT DISTINCT ON (a) a, b FROM distinct_visibility ORDER BY a, b;
+ a | b 
+---+---
+ 1 | 1
+ 2 | 1
+ 3 | 1
+ 4 | 1
+ 5 | 1
+(5 rows)
+
+DELETE FROM distinct_visibility WHERE a = 2 and b = 1;
+SELECT DISTINCT ON (a) a, b FROM distinct_visibility ORDER BY a, b;
+ a | b 
+---+---
+ 1 | 1
+ 2 | 2
+ 3 | 1
+ 4 | 1
+ 5 | 1
+(5 rows)
+
+SELECT DISTINCT ON (a) a, b FROM distinct_visibility ORDER BY a DESC, b DESC;
+ a |   b   
+---+-------
+ 5 | 10000
+ 4 | 10000
+ 3 | 10000
+ 2 | 10000
+ 1 | 10000
+(5 rows)
+
+DELETE FROM distinct_visibility WHERE a = 2 and b = 10000;
+SELECT DISTINCT ON (a) a, b FROM distinct_visibility ORDER BY a DESC, b DESC;
+ a |   b   
+---+-------
+ 5 | 10000
+ 4 | 10000
+ 3 | 10000
+ 2 |  9999
+ 1 | 10000
+(5 rows)
+
+DROP TABLE distinct_visibility;
+-- test page boundaries
+CREATE TABLE distinct_boundaries AS
+    SELECT a, b::int2 b, (b % 2)::int2 c FROM
+        generate_series(1, 5) a,
+        generate_series(1,366) b;
+CREATE INDEX ON distinct_boundaries (a, b, c);
+ANALYZE distinct_boundaries;
+EXPLAIN (COSTS OFF)
+SELECT DISTINCT ON (a) a, b, c from distinct_boundaries
+WHERE b >= 1 and c = 0 ORDER BY a, b;
+                                 QUERY PLAN                                 
+----------------------------------------------------------------------------
+ Index Only Scan using distinct_boundaries_a_b_c_idx on distinct_boundaries
+   Skip scan: true
+   Distinct Prefix: 1
+   Index Cond: ((b >= 1) AND (c = 0))
+(4 rows)
+
+SELECT DISTINCT ON (a) a, b, c from distinct_boundaries
+WHERE b >= 1 and c = 0 ORDER BY a, b;
+ a | b | c 
+---+---+---
+ 1 | 2 | 0
+ 2 | 2 | 0
+ 3 | 2 | 0
+ 4 | 2 | 0
+ 5 | 2 | 0
+(5 rows)
+
+DROP TABLE distinct_boundaries;
+-- test tuple killing
+-- DESC ordering
+CREATE TABLE distinct_killed AS
+    SELECT a, b, b % 2 AS c, 10 AS d
+        FROM generate_series(1, 5) a,
+             generate_series(1,1000) b;
+CREATE INDEX ON distinct_killed (a, b, c, d);
+DELETE FROM distinct_killed where a = 3;
+BEGIN;
+    DECLARE c SCROLL CURSOR FOR
+    SELECT DISTINCT ON (a) a,b,c,d
+    FROM distinct_killed ORDER BY a DESC, b DESC;
+    FETCH FORWARD ALL FROM c;
+ a |  b   | c | d  
+---+------+---+----
+ 5 | 1000 | 0 | 10
+ 4 | 1000 | 0 | 10
+ 2 | 1000 | 0 | 10
+ 1 | 1000 | 0 | 10
+(4 rows)
+
+    FETCH BACKWARD ALL FROM c;
+ a |  b   | c | d  
+---+------+---+----
+ 1 | 1000 | 0 | 10
+ 2 | 1000 | 0 | 10
+ 4 | 1000 | 0 | 10
+ 5 | 1000 | 0 | 10
+(4 rows)
+
+COMMIT;
+DROP TABLE distinct_killed;
+-- regular ordering
+CREATE TABLE distinct_killed AS
+    SELECT a, b, b % 2 AS c, 10 AS d
+        FROM generate_series(1, 5) a,
+             generate_series(1,1000) b;
+CREATE INDEX ON distinct_killed (a, b, c, d);
+DELETE FROM distinct_killed where a = 3;
+BEGIN;
+    DECLARE c SCROLL CURSOR FOR
+    SELECT DISTINCT ON (a) a,b,c,d
+    FROM distinct_killed ORDER BY a, b;
+    FETCH FORWARD ALL FROM c;
+ a | b | c | d  
+---+---+---+----
+ 1 | 1 | 1 | 10
+ 2 | 1 | 1 | 10
+ 4 | 1 | 1 | 10
+ 5 | 1 | 1 | 10
+(4 rows)
+
+    FETCH BACKWARD ALL FROM c;
+ a | b | c | d  
+---+---+---+----
+ 5 | 1 | 1 | 10
+ 4 | 1 | 1 | 10
+ 2 | 1 | 1 | 10
+ 1 | 1 | 1 | 10
+(4 rows)
+
+COMMIT;
+DROP TABLE distinct_killed;
+-- partial delete
+CREATE TABLE distinct_killed AS
+    SELECT a, b, b % 2 AS c, 10 AS d
+        FROM generate_series(1, 5) a,
+             generate_series(1,1000) b;
+CREATE INDEX ON distinct_killed (a, b, c, d);
+DELETE FROM distinct_killed WHERE a = 3 AND b <= 999;
+BEGIN;
+    DECLARE c SCROLL CURSOR FOR
+    SELECT DISTINCT ON (a) a,b,c,d
+    FROM distinct_killed ORDER BY a DESC, b DESC;
+    FETCH FORWARD ALL FROM c;
+ a |  b   | c | d  
+---+------+---+----
+ 5 | 1000 | 0 | 10
+ 4 | 1000 | 0 | 10
+ 3 | 1000 | 0 | 10
+ 2 | 1000 | 0 | 10
+ 1 | 1000 | 0 | 10
+(5 rows)
+
+    FETCH BACKWARD ALL FROM c;
+ a |  b   | c | d  
+---+------+---+----
+ 1 | 1000 | 0 | 10
+ 2 | 1000 | 0 | 10
+ 3 | 1000 | 0 | 10
+ 4 | 1000 | 0 | 10
+ 5 | 1000 | 0 | 10
+(5 rows)
+
+COMMIT;
+DROP TABLE distinct_killed;
+-- test posting lists
+CREATE TABLE distinct_posting (a int, b int, c int);
+CREATE INDEX ON distinct_posting (a, b, c);
+INSERT INTO distinct_posting
+	VALUES (1, 1, 1),
+		   (1, 1, 2),
+		   (1, 2, 2),
+		   (1, 2, 3),
+		   (2, 2, 1),
+		   (2, 2, 3),
+		   (3, 1, 1),
+		   (3, 1, 2),
+		   (3, 2, 2),
+		   (3, 2, 3);
+INSERT INTO distinct_posting (
+    SELECT 1 as a, 1 as b, 1 AS c
+        FROM generate_series(1,1000) i
+);
+BEGIN;
+    DECLARE c SCROLL CURSOR FOR
+    SELECT DISTINCT ON (a) a,b,c FROM distinct_posting WHERE c = 2
+    ORDER BY a DESC, b DESC;
+    FETCH ALL FROM c;
+ a | b | c 
+---+---+---
+ 3 | 2 | 2
+ 1 | 2 | 2
+(2 rows)
+
+    FETCH BACKWARD ALL FROM c;
+ a | b | c 
+---+---+---
+ 1 | 2 | 2
+ 3 | 2 | 2
+(2 rows)
+
+COMMIT;
+-- test that quals are check for indexability before applied
+CREATE TABLE Indexable_quals (a text, b text, c text);
+CREATE INDEX ON indexable_quals (a, b, c);
+INSERT INTO indexable_quals VALUES ('a1', 'b', 'xxx');
+INSERT INTO indexable_quals VALUES ('a1', 'b', 'yyy');
+INSERT INTO indexable_quals VALUES ('a2', 'b', 'xxx');
+INSERT INTO indexable_quals VALUES ('a2', 'b', 'yyy');
+SELECT DISTINCT ON (a, b)  a, b
+FROM indexable_quals WHERE c LIKE '%y%' AND a LIKE 'a%' AND b = 'b';
+ a  | b 
+----+---
+ a1 | b
+ a2 | b
+(2 rows)
+
diff --git a/src/test/regress/sql/join.sql b/src/test/regress/sql/join.sql
index 52240fea7e..de85bcc308 100644
--- a/src/test/regress/sql/join.sql
+++ b/src/test/regress/sql/join.sql
@@ -1585,11 +1585,16 @@ explain (costs off)
 select d.* from d left join (select * from b group by b.id, b.c_id) s
   on d.a = s.id;
 
+-- disable index skip scan to prevent it interfering with the plan
+set enable_indexskipscan to off;
+
 -- similarly, but keying off a DISTINCT clause
 explain (costs off)
 select d.* from d left join (select distinct * from b) s
   on d.a = s.id;
 
+set enable_indexskipscan to on;
+
 -- check join removal works when uniqueness of the join condition is enforced
 -- by a UNION
 explain (costs off)
diff --git a/src/test/regress/sql/select_distinct.sql b/src/test/regress/sql/select_distinct.sql
index 1bfe59c26f..3d1ebf515c 100644
--- a/src/test/regress/sql/select_distinct.sql
+++ b/src/test/regress/sql/select_distinct.sql
@@ -174,3 +174,285 @@ SELECT 1 IS NOT DISTINCT FROM 2 as "no";
 SELECT 2 IS NOT DISTINCT FROM 2 as "yes";
 SELECT 2 IS NOT DISTINCT FROM null as "no";
 SELECT null IS NOT DISTINCT FROM null as "yes";
+
+-- index only skip scan
+CREATE TABLE distinct_a (a int, b int, c int);
+INSERT INTO distinct_a (
+    SELECT five, tenthous, 10 FROM
+    generate_series(1, 5) five,
+    generate_series(1, 10000) tenthous
+);
+CREATE INDEX ON distinct_a (a, b);
+CREATE INDEX ON distinct_a ((a + 1));
+ANALYZE distinct_a;
+
+SELECT DISTINCT a FROM distinct_a;
+SELECT DISTINCT a FROM distinct_a WHERE a = 1;
+SELECT DISTINCT a FROM distinct_a ORDER BY a DESC;
+
+EXPLAIN (COSTS OFF)
+SELECT DISTINCT a FROM distinct_a;
+
+-- test index skip scan with a condition on a non unique field
+SELECT DISTINCT ON (a) a, b FROM distinct_a WHERE b = 2;
+
+-- test index skip scan backwards
+SELECT DISTINCT ON (a) a, b FROM distinct_a ORDER BY a DESC, b DESC;
+
+-- test index skip scan for expressions
+EXPLAIN (COSTS OFF)
+SELECT DISTINCT (a + 1) FROM distinct_a ORDER BY (a + 1);
+SELECT DISTINCT (a + 1) FROM distinct_a ORDER BY (a + 1);
+
+-- check colums order
+CREATE INDEX distinct_a_b_a on distinct_a (b, a);
+
+SELECT DISTINCT a FROM distinct_a WHERE b = 2;
+SELECT DISTINCT on (a, b) a, b FROM distinct_a WHERE b = 2;
+
+EXPLAIN (COSTS OFF)
+SELECT DISTINCT a FROM distinct_a WHERE b = 2;
+
+EXPLAIN (COSTS OFF)
+SELECT DISTINCT on (a, b) a, b FROM distinct_a WHERE b = 2;
+
+DROP INDEX distinct_a_b_a;
+
+-- test opposite scan/index directions inside a cursor
+-- forward/backward
+BEGIN;
+DECLARE c SCROLL CURSOR FOR
+SELECT DISTINCT ON (a) a,b FROM distinct_a ORDER BY a, b;
+
+FETCH FROM c;
+FETCH BACKWARD FROM c;
+
+FETCH 6 FROM c;
+FETCH BACKWARD 6 FROM c;
+
+FETCH 6 FROM c;
+FETCH BACKWARD 6 FROM c;
+
+END;
+
+-- backward/forward
+BEGIN;
+DECLARE c SCROLL CURSOR FOR
+SELECT DISTINCT ON (a) a,b FROM distinct_a ORDER BY a DESC, b DESC;
+
+FETCH FROM c;
+FETCH BACKWARD FROM c;
+
+FETCH 6 FROM c;
+FETCH BACKWARD 6 FROM c;
+
+FETCH 6 FROM c;
+FETCH BACKWARD 6 FROM c;
+
+END;
+
+-- test missing values and skipping from the end
+CREATE TABLE distinct_abc(a int, b int, c int);
+CREATE INDEX ON distinct_abc(a, b, c);
+INSERT INTO distinct_abc
+	VALUES (1, 1, 1),
+		   (1, 1, 2),
+		   (1, 2, 2),
+		   (1, 2, 3),
+		   (2, 2, 1),
+		   (2, 2, 3),
+		   (3, 1, 1),
+		   (3, 1, 2),
+		   (3, 2, 2),
+		   (3, 2, 3);
+
+EXPLAIN (COSTS OFF)
+SELECT DISTINCT ON (a) a,b,c FROM distinct_abc WHERE c = 2;
+
+BEGIN;
+DECLARE c SCROLL CURSOR FOR
+SELECT DISTINCT ON (a) a,b,c FROM distinct_abc WHERE c = 2;
+
+FETCH ALL FROM c;
+FETCH BACKWARD ALL FROM c;
+
+END;
+
+EXPLAIN (COSTS OFF)
+SELECT DISTINCT ON (a) a,b,c FROM distinct_abc WHERE c = 2
+ORDER BY a DESC, b DESC;
+
+BEGIN;
+DECLARE c SCROLL CURSOR FOR
+SELECT DISTINCT ON (a) a,b,c FROM distinct_abc WHERE c = 2
+ORDER BY a DESC, b DESC;
+
+FETCH ALL FROM c;
+FETCH BACKWARD ALL FROM c;
+
+END;
+
+DROP TABLE distinct_abc;
+
+-- check colums order
+SELECT DISTINCT a FROM distinct_a WHERE b = 2 AND c = 10;
+
+EXPLAIN (COSTS OFF)
+SELECT DISTINCT a FROM distinct_a WHERE b = 2 AND c = 10;
+
+-- check projection case
+SELECT DISTINCT a, a FROM distinct_a WHERE b = 2;
+SELECT DISTINCT a, 1 FROM distinct_a WHERE b = 2;
+
+-- test cursor forward/backward movements
+BEGIN;
+DECLARE c SCROLL CURSOR FOR SELECT DISTINCT a FROM distinct_a;
+
+FETCH FROM c;
+FETCH BACKWARD FROM c;
+
+FETCH 6 FROM c;
+FETCH BACKWARD 6 FROM c;
+
+FETCH 6 FROM c;
+FETCH BACKWARD 6 FROM c;
+
+END;
+
+DROP TABLE distinct_a;
+
+-- test tuples visibility
+CREATE TABLE distinct_visibility (a int, b int);
+INSERT INTO distinct_visibility (select a, b from generate_series(1,5) a, generate_series(1, 10000) b);
+CREATE INDEX ON distinct_visibility (a, b);
+ANALYZE distinct_visibility;
+
+SELECT DISTINCT ON (a) a, b FROM distinct_visibility ORDER BY a, b;
+DELETE FROM distinct_visibility WHERE a = 2 and b = 1;
+SELECT DISTINCT ON (a) a, b FROM distinct_visibility ORDER BY a, b;
+
+SELECT DISTINCT ON (a) a, b FROM distinct_visibility ORDER BY a DESC, b DESC;
+DELETE FROM distinct_visibility WHERE a = 2 and b = 10000;
+SELECT DISTINCT ON (a) a, b FROM distinct_visibility ORDER BY a DESC, b DESC;
+DROP TABLE distinct_visibility;
+
+-- test page boundaries
+CREATE TABLE distinct_boundaries AS
+    SELECT a, b::int2 b, (b % 2)::int2 c FROM
+        generate_series(1, 5) a,
+        generate_series(1,366) b;
+
+CREATE INDEX ON distinct_boundaries (a, b, c);
+ANALYZE distinct_boundaries;
+
+EXPLAIN (COSTS OFF)
+SELECT DISTINCT ON (a) a, b, c from distinct_boundaries
+WHERE b >= 1 and c = 0 ORDER BY a, b;
+
+SELECT DISTINCT ON (a) a, b, c from distinct_boundaries
+WHERE b >= 1 and c = 0 ORDER BY a, b;
+
+DROP TABLE distinct_boundaries;
+
+-- test tuple killing
+
+-- DESC ordering
+CREATE TABLE distinct_killed AS
+    SELECT a, b, b % 2 AS c, 10 AS d
+        FROM generate_series(1, 5) a,
+             generate_series(1,1000) b;
+
+CREATE INDEX ON distinct_killed (a, b, c, d);
+
+DELETE FROM distinct_killed where a = 3;
+
+BEGIN;
+    DECLARE c SCROLL CURSOR FOR
+    SELECT DISTINCT ON (a) a,b,c,d
+    FROM distinct_killed ORDER BY a DESC, b DESC;
+    FETCH FORWARD ALL FROM c;
+    FETCH BACKWARD ALL FROM c;
+COMMIT;
+
+DROP TABLE distinct_killed;
+
+-- regular ordering
+CREATE TABLE distinct_killed AS
+    SELECT a, b, b % 2 AS c, 10 AS d
+        FROM generate_series(1, 5) a,
+             generate_series(1,1000) b;
+
+CREATE INDEX ON distinct_killed (a, b, c, d);
+
+DELETE FROM distinct_killed where a = 3;
+
+BEGIN;
+    DECLARE c SCROLL CURSOR FOR
+    SELECT DISTINCT ON (a) a,b,c,d
+    FROM distinct_killed ORDER BY a, b;
+    FETCH FORWARD ALL FROM c;
+    FETCH BACKWARD ALL FROM c;
+COMMIT;
+
+DROP TABLE distinct_killed;
+
+-- partial delete
+CREATE TABLE distinct_killed AS
+    SELECT a, b, b % 2 AS c, 10 AS d
+        FROM generate_series(1, 5) a,
+             generate_series(1,1000) b;
+
+CREATE INDEX ON distinct_killed (a, b, c, d);
+
+DELETE FROM distinct_killed WHERE a = 3 AND b <= 999;
+
+BEGIN;
+    DECLARE c SCROLL CURSOR FOR
+    SELECT DISTINCT ON (a) a,b,c,d
+    FROM distinct_killed ORDER BY a DESC, b DESC;
+    FETCH FORWARD ALL FROM c;
+    FETCH BACKWARD ALL FROM c;
+COMMIT;
+
+DROP TABLE distinct_killed;
+
+-- test posting lists
+CREATE TABLE distinct_posting (a int, b int, c int);
+CREATE INDEX ON distinct_posting (a, b, c);
+INSERT INTO distinct_posting
+	VALUES (1, 1, 1),
+		   (1, 1, 2),
+		   (1, 2, 2),
+		   (1, 2, 3),
+		   (2, 2, 1),
+		   (2, 2, 3),
+		   (3, 1, 1),
+		   (3, 1, 2),
+		   (3, 2, 2),
+		   (3, 2, 3);
+
+INSERT INTO distinct_posting (
+    SELECT 1 as a, 1 as b, 1 AS c
+        FROM generate_series(1,1000) i
+);
+
+BEGIN;
+    DECLARE c SCROLL CURSOR FOR
+    SELECT DISTINCT ON (a) a,b,c FROM distinct_posting WHERE c = 2
+    ORDER BY a DESC, b DESC;
+    FETCH ALL FROM c;
+
+    FETCH BACKWARD ALL FROM c;
+COMMIT;
+
+-- test that quals are check for indexability before applied
+CREATE TABLE Indexable_quals (a text, b text, c text);
+CREATE INDEX ON indexable_quals (a, b, c);
+
+INSERT INTO indexable_quals VALUES ('a1', 'b', 'xxx');
+INSERT INTO indexable_quals VALUES ('a1', 'b', 'yyy');
+INSERT INTO indexable_quals VALUES ('a2', 'b', 'xxx');
+INSERT INTO indexable_quals VALUES ('a2', 'b', 'yyy');
+
+SELECT DISTINCT ON (a, b)  a, b
+FROM indexable_quals WHERE c LIKE '%y%' AND a LIKE 'a%' AND b = 'b';
-- 
2.32.0

