diff --git a/google/cloud/spanner_dbapi/batch_dml_executor.py b/google/cloud/spanner_dbapi/batch_dml_executor.py index 5c4e2495bb..a3ff606295 100644 --- a/google/cloud/spanner_dbapi/batch_dml_executor.py +++ b/google/cloud/spanner_dbapi/batch_dml_executor.py @@ -54,9 +54,12 @@ def execute_statement(self, parsed_statement: ParsedStatement): """ from google.cloud.spanner_dbapi import ProgrammingError + # Note: Let the server handle it if the client-side parser did not + # recognize the type of statement. if ( parsed_statement.statement_type != StatementType.UPDATE and parsed_statement.statement_type != StatementType.INSERT + and parsed_statement.statement_type != StatementType.UNKNOWN ): raise ProgrammingError("Only DML statements are allowed in batch DML mode.") self._statements.append(parsed_statement.statement) diff --git a/google/cloud/spanner_dbapi/client_side_statement_parser.py b/google/cloud/spanner_dbapi/client_side_statement_parser.py index 002779adb4..f978d17f03 100644 --- a/google/cloud/spanner_dbapi/client_side_statement_parser.py +++ b/google/cloud/spanner_dbapi/client_side_statement_parser.py @@ -21,18 +21,18 @@ Statement, ) -RE_BEGIN = re.compile(r"^\s*(BEGIN|START)(TRANSACTION)?", re.IGNORECASE) -RE_COMMIT = re.compile(r"^\s*(COMMIT)(TRANSACTION)?", re.IGNORECASE) -RE_ROLLBACK = re.compile(r"^\s*(ROLLBACK)(TRANSACTION)?", re.IGNORECASE) +RE_BEGIN = re.compile(r"^\s*(BEGIN|START)(\s+TRANSACTION)?\s*$", re.IGNORECASE) +RE_COMMIT = re.compile(r"^\s*(COMMIT)(\s+TRANSACTION)?\s*$", re.IGNORECASE) +RE_ROLLBACK = re.compile(r"^\s*(ROLLBACK)(\s+TRANSACTION)?\s*$", re.IGNORECASE) RE_SHOW_COMMIT_TIMESTAMP = re.compile( - r"^\s*(SHOW)\s+(VARIABLE)\s+(COMMIT_TIMESTAMP)", re.IGNORECASE + r"^\s*(SHOW)\s+(VARIABLE)\s+(COMMIT_TIMESTAMP)\s*$", re.IGNORECASE ) RE_SHOW_READ_TIMESTAMP = re.compile( - r"^\s*(SHOW)\s+(VARIABLE)\s+(READ_TIMESTAMP)", re.IGNORECASE + r"^\s*(SHOW)\s+(VARIABLE)\s+(READ_TIMESTAMP)\s*$", re.IGNORECASE ) -RE_START_BATCH_DML = re.compile(r"^\s*(START)\s+(BATCH)\s+(DML)", re.IGNORECASE) -RE_RUN_BATCH = re.compile(r"^\s*(RUN)\s+(BATCH)", re.IGNORECASE) -RE_ABORT_BATCH = re.compile(r"^\s*(ABORT)\s+(BATCH)", re.IGNORECASE) +RE_START_BATCH_DML = re.compile(r"^\s*(START)\s+(BATCH)\s+(DML)\s*$", re.IGNORECASE) +RE_RUN_BATCH = re.compile(r"^\s*(RUN)\s+(BATCH)\s*$", re.IGNORECASE) +RE_ABORT_BATCH = re.compile(r"^\s*(ABORT)\s+(BATCH)\s*$", re.IGNORECASE) RE_PARTITION_QUERY = re.compile(r"^\s*(PARTITION)\s+(.+)", re.IGNORECASE) RE_RUN_PARTITION = re.compile(r"^\s*(RUN)\s+(PARTITION)\s+(.+)", re.IGNORECASE) RE_RUN_PARTITIONED_QUERY = re.compile( diff --git a/google/cloud/spanner_dbapi/connection.py b/google/cloud/spanner_dbapi/connection.py index adcb9e97eb..a615a282b5 100644 --- a/google/cloud/spanner_dbapi/connection.py +++ b/google/cloud/spanner_dbapi/connection.py @@ -20,11 +20,7 @@ from google.cloud import spanner_v1 as spanner from google.cloud.spanner_dbapi import partition_helper from google.cloud.spanner_dbapi.batch_dml_executor import BatchMode, BatchDmlExecutor -from google.cloud.spanner_dbapi.parse_utils import _get_statement_type -from google.cloud.spanner_dbapi.parsed_statement import ( - StatementType, - AutocommitDmlMode, -) +from google.cloud.spanner_dbapi.parsed_statement import AutocommitDmlMode from google.cloud.spanner_dbapi.partition_helper import PartitionId from google.cloud.spanner_dbapi.parsed_statement import ParsedStatement, Statement from google.cloud.spanner_dbapi.transaction_helper import TransactionRetryHelper @@ -702,10 +698,6 @@ def set_autocommit_dml_mode( self._autocommit_dml_mode = autocommit_dml_mode def _partitioned_query_validation(self, partitioned_query, statement): - if _get_statement_type(Statement(partitioned_query)) is not StatementType.QUERY: - raise ProgrammingError( - "Only queries can be partitioned. Invalid statement: " + statement.sql - ) if self.read_only is not True and self._client_transaction_started is True: raise ProgrammingError( "Partitioned query is not supported, because the connection is in a read/write transaction." diff --git a/google/cloud/spanner_dbapi/cursor.py b/google/cloud/spanner_dbapi/cursor.py index 5c1539e7fc..75a368c89f 100644 --- a/google/cloud/spanner_dbapi/cursor.py +++ b/google/cloud/spanner_dbapi/cursor.py @@ -404,9 +404,12 @@ def executemany(self, operation, seq_of_params): # For every operation, we've got to ensure that any prior DDL # statements were run. self.connection.run_prior_DDL_statements() + # Treat UNKNOWN statements as if they are DML and let the server + # determine what is wrong with it. if self._parsed_statement.statement_type in ( StatementType.INSERT, StatementType.UPDATE, + StatementType.UNKNOWN, ): statements = [] for params in seq_of_params: diff --git a/google/cloud/spanner_dbapi/parse_utils.py b/google/cloud/spanner_dbapi/parse_utils.py index 245840ca0d..66741eb264 100644 --- a/google/cloud/spanner_dbapi/parse_utils.py +++ b/google/cloud/spanner_dbapi/parse_utils.py @@ -155,6 +155,7 @@ STMT_INSERT = "INSERT" # Heuristic for identifying statements that don't need to be run as updates. +# TODO: This and the other regexes do not match statements that start with a hint. RE_NON_UPDATE = re.compile(r"^\W*(SELECT|GRAPH|FROM)", re.IGNORECASE) RE_WITH = re.compile(r"^\s*(WITH)", re.IGNORECASE) @@ -162,18 +163,22 @@ # DDL statements follow # https://cloud.google.com/spanner/docs/data-definition-language RE_DDL = re.compile( - r"^\s*(CREATE|ALTER|DROP|GRANT|REVOKE|RENAME)", re.IGNORECASE | re.DOTALL + r"^\s*(CREATE|ALTER|DROP|GRANT|REVOKE|RENAME|ANALYZE)", re.IGNORECASE | re.DOTALL ) -RE_IS_INSERT = re.compile(r"^\s*(INSERT)", re.IGNORECASE | re.DOTALL) +# TODO: These do not match statements that start with a hint. +RE_IS_INSERT = re.compile(r"^\s*(INSERT\s+)", re.IGNORECASE | re.DOTALL) +RE_IS_UPDATE = re.compile(r"^\s*(UPDATE\s+)", re.IGNORECASE | re.DOTALL) +RE_IS_DELETE = re.compile(r"^\s*(DELETE\s+)", re.IGNORECASE | re.DOTALL) RE_INSERT = re.compile( # Only match the `INSERT INTO (columns...) # otherwise the rest of the statement could be a complex # operation. - r"^\s*INSERT INTO (?P[^\s\(\)]+)\s*\((?P[^\(\)]+)\)", + r"^\s*INSERT(?:\s+INTO)?\s+(?P[^\s\(\)]+)\s*\((?P[^\(\)]+)\)", re.IGNORECASE | re.DOTALL, ) +"""Deprecated: Use the RE_IS_INSERT, RE_IS_UPDATE, and RE_IS_DELETE regexes""" RE_VALUES_TILL_END = re.compile(r"VALUES\s*\(.+$", re.IGNORECASE | re.DOTALL) @@ -259,8 +264,13 @@ def _get_statement_type(statement): # statements and doesn't yet support WITH for DML statements. return StatementType.QUERY - statement.sql = ensure_where_clause(query) - return StatementType.UPDATE + if RE_IS_UPDATE.match(query) or RE_IS_DELETE.match(query): + # TODO: Remove this? It makes more sense to have this in SQLAlchemy and + # Django than here. + statement.sql = ensure_where_clause(query) + return StatementType.UPDATE + + return StatementType.UNKNOWN def sql_pyformat_args_to_spanner(sql, params): @@ -355,7 +365,7 @@ def get_param_types(params): def ensure_where_clause(sql): """ Cloud Spanner requires a WHERE clause on UPDATE and DELETE statements. - Add a dummy WHERE clause if non detected. + Add a dummy WHERE clause if not detected. :type sql: str :param sql: SQL code to check. diff --git a/google/cloud/spanner_dbapi/parsed_statement.py b/google/cloud/spanner_dbapi/parsed_statement.py index f89d6ea19e..a8d03f6fa4 100644 --- a/google/cloud/spanner_dbapi/parsed_statement.py +++ b/google/cloud/spanner_dbapi/parsed_statement.py @@ -17,6 +17,7 @@ class StatementType(Enum): + UNKNOWN = 0 CLIENT_SIDE = 1 DDL = 2 QUERY = 3 diff --git a/tests/unit/spanner_dbapi/test_parse_utils.py b/tests/unit/spanner_dbapi/test_parse_utils.py index f0721bdbe3..031fbc443f 100644 --- a/tests/unit/spanner_dbapi/test_parse_utils.py +++ b/tests/unit/spanner_dbapi/test_parse_utils.py @@ -74,11 +74,31 @@ def test_classify_stmt(self): ("REVOKE SELECT ON TABLE Singers TO ROLE parent", StatementType.DDL), ("GRANT ROLE parent TO ROLE child", StatementType.DDL), ("INSERT INTO table (col1) VALUES (1)", StatementType.INSERT), + ("INSERT table (col1) VALUES (1)", StatementType.INSERT), + ("INSERT OR UPDATE table (col1) VALUES (1)", StatementType.INSERT), + ("INSERT OR IGNORE table (col1) VALUES (1)", StatementType.INSERT), ("UPDATE table SET col1 = 1 WHERE col1 = NULL", StatementType.UPDATE), + ("delete from table WHERE col1 = 2", StatementType.UPDATE), + ("delete from table WHERE col1 in (select 1)", StatementType.UPDATE), + ("dlete from table where col1 = 2", StatementType.UNKNOWN), + ("udpate table set col2=1 where col1 = 2", StatementType.UNKNOWN), + ("begin foo", StatementType.UNKNOWN), + ("begin transaction foo", StatementType.UNKNOWN), + ("commit foo", StatementType.UNKNOWN), + ("commit transaction foo", StatementType.UNKNOWN), + ("rollback foo", StatementType.UNKNOWN), + ("rollback transaction foo", StatementType.UNKNOWN), + ("show variable", StatementType.UNKNOWN), + ("show variable read_timestamp foo", StatementType.UNKNOWN), + ("INSERTs INTO table (col1) VALUES (1)", StatementType.UNKNOWN), + ("UPDATEs table SET col1 = 1 WHERE col1 = NULL", StatementType.UNKNOWN), + ("DELETEs from table WHERE col1 = 2", StatementType.UNKNOWN), ) for query, want_class in cases: - self.assertEqual(classify_statement(query).statement_type, want_class) + self.assertEqual( + classify_statement(query).statement_type, want_class, query + ) def test_partition_query_classify_stmt(self): parsed_statement = classify_statement(