From afa7ff7dc369b6343ace86ddb553911e85c4916b Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 26 Sep 2023 13:47:15 -0700 Subject: [PATCH 01/13] chore(deps): bump cryptography from 41.0.3 to 41.0.4 in /.kokoro (#1123) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * chore(deps): bump cryptography from 41.0.3 to 41.0.4 in /.kokoro Bumps [cryptography](https://github.com/pyca/cryptography) from 41.0.3 to 41.0.4. - [Changelog](https://github.com/pyca/cryptography/blob/main/CHANGELOG.rst) - [Commits](https://github.com/pyca/cryptography/compare/41.0.3...41.0.4) --- updated-dependencies: - dependency-name: cryptography dependency-type: indirect ... Signed-off-by: dependabot[bot] * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md --------- Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Owl Bot From 69bd4a935a995f8f261a589ee2978f58b90224ab Mon Sep 17 00:00:00 2001 From: Andrew Gorcester Date: Wed, 27 Sep 2023 09:15:35 -0700 Subject: [PATCH 02/13] fix: mark _deprecate_threads_param as a wrapper to unblock introspection and docs (#1122) --- google/cloud/storage/transfer_manager.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/google/cloud/storage/transfer_manager.py b/google/cloud/storage/transfer_manager.py index 3060528c9..da6973c71 100644 --- a/google/cloud/storage/transfer_manager.py +++ b/google/cloud/storage/transfer_manager.py @@ -22,6 +22,7 @@ import warnings import pickle import copyreg +import functools from google.api_core import exceptions from google.cloud.storage import Client @@ -61,6 +62,7 @@ def _deprecate_threads_param(func): + @functools.wraps(func) def convert_threads_or_raise(*args, **kwargs): binding = inspect.signature(func).bind(*args, **kwargs) threads = binding.arguments.get("threads") From a3a1159c924ce15aa8cdaf5f42fc44891f4506ef Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Fri, 6 Oct 2023 21:35:35 -0400 Subject: [PATCH 03/13] chore: [autoapprove] bump cryptography from 41.0.3 to 41.0.4 (#1136) Source-Link: https://github.com/googleapis/synthtool/commit/dede53ff326079b457cfb1aae5bbdc82cbb51dc3 Post-Processor: gcr.io/cloud-devrel-public-resources/owlbot-python:latest@sha256:fac304457974bb530cc5396abd4ab25d26a469cd3bc97cbfb18c8d4324c584eb Co-authored-by: Owl Bot --- .github/.OwlBot.lock.yaml | 4 ++-- .gitignore | 1 + .kokoro/requirements.txt | 49 ++++++++++++++++++++------------------- 3 files changed, 28 insertions(+), 26 deletions(-) diff --git a/.github/.OwlBot.lock.yaml b/.github/.OwlBot.lock.yaml index a3da1b0d4..a9bdb1b7a 100644 --- a/.github/.OwlBot.lock.yaml +++ b/.github/.OwlBot.lock.yaml @@ -13,5 +13,5 @@ # limitations under the License. docker: image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest - digest: sha256:3e3800bb100af5d7f9e810d48212b37812c1856d20ffeafb99ebe66461b61fc7 -# created: 2023-08-02T10:53:29.114535628Z + digest: sha256:fac304457974bb530cc5396abd4ab25d26a469cd3bc97cbfb18c8d4324c584eb +# created: 2023-10-02T21:31:03.517640371Z diff --git a/.gitignore b/.gitignore index b4243ced7..d083ea1dd 100644 --- a/.gitignore +++ b/.gitignore @@ -50,6 +50,7 @@ docs.metadata # Virtual environment env/ +venv/ # Test logs coverage.xml diff --git a/.kokoro/requirements.txt b/.kokoro/requirements.txt index 029bd342d..96d593c8c 100644 --- a/.kokoro/requirements.txt +++ b/.kokoro/requirements.txt @@ -113,30 +113,30 @@ commonmark==0.9.1 \ --hash=sha256:452f9dc859be7f06631ddcb328b6919c67984aca654e5fefb3914d54691aed60 \ --hash=sha256:da2f38c92590f83de410ba1a3cbceafbc74fee9def35f9251ba9a971d6d66fd9 # via rich -cryptography==41.0.3 \ - --hash=sha256:0d09fb5356f975974dbcb595ad2d178305e5050656affb7890a1583f5e02a306 \ - --hash=sha256:23c2d778cf829f7d0ae180600b17e9fceea3c2ef8b31a99e3c694cbbf3a24b84 \ - --hash=sha256:3fb248989b6363906827284cd20cca63bb1a757e0a2864d4c1682a985e3dca47 \ - --hash=sha256:41d7aa7cdfded09b3d73a47f429c298e80796c8e825ddfadc84c8a7f12df212d \ - --hash=sha256:42cb413e01a5d36da9929baa9d70ca90d90b969269e5a12d39c1e0d475010116 \ - --hash=sha256:4c2f0d35703d61002a2bbdcf15548ebb701cfdd83cdc12471d2bae80878a4207 \ - --hash=sha256:4fd871184321100fb400d759ad0cddddf284c4b696568204d281c902fc7b0d81 \ - --hash=sha256:5259cb659aa43005eb55a0e4ff2c825ca111a0da1814202c64d28a985d33b087 \ - --hash=sha256:57a51b89f954f216a81c9d057bf1a24e2f36e764a1ca9a501a6964eb4a6800dd \ - --hash=sha256:652627a055cb52a84f8c448185922241dd5217443ca194d5739b44612c5e6507 \ - --hash=sha256:67e120e9a577c64fe1f611e53b30b3e69744e5910ff3b6e97e935aeb96005858 \ - --hash=sha256:6af1c6387c531cd364b72c28daa29232162010d952ceb7e5ca8e2827526aceae \ - --hash=sha256:6d192741113ef5e30d89dcb5b956ef4e1578f304708701b8b73d38e3e1461f34 \ - --hash=sha256:7efe8041897fe7a50863e51b77789b657a133c75c3b094e51b5e4b5cec7bf906 \ - --hash=sha256:84537453d57f55a50a5b6835622ee405816999a7113267739a1b4581f83535bd \ - --hash=sha256:8f09daa483aedea50d249ef98ed500569841d6498aa9c9f4b0531b9964658922 \ - --hash=sha256:95dd7f261bb76948b52a5330ba5202b91a26fbac13ad0e9fc8a3ac04752058c7 \ - --hash=sha256:a74fbcdb2a0d46fe00504f571a2a540532f4c188e6ccf26f1f178480117b33c4 \ - --hash=sha256:a983e441a00a9d57a4d7c91b3116a37ae602907a7618b882c8013b5762e80574 \ - --hash=sha256:ab8de0d091acbf778f74286f4989cf3d1528336af1b59f3e5d2ebca8b5fe49e1 \ - --hash=sha256:aeb57c421b34af8f9fe830e1955bf493a86a7996cc1338fe41b30047d16e962c \ - --hash=sha256:ce785cf81a7bdade534297ef9e490ddff800d956625020ab2ec2780a556c313e \ - --hash=sha256:d0d651aa754ef58d75cec6edfbd21259d93810b73f6ec246436a21b7841908de +cryptography==41.0.4 \ + --hash=sha256:004b6ccc95943f6a9ad3142cfabcc769d7ee38a3f60fb0dddbfb431f818c3a67 \ + --hash=sha256:047c4603aeb4bbd8db2756e38f5b8bd7e94318c047cfe4efeb5d715e08b49311 \ + --hash=sha256:0d9409894f495d465fe6fda92cb70e8323e9648af912d5b9141d616df40a87b8 \ + --hash=sha256:23a25c09dfd0d9f28da2352503b23e086f8e78096b9fd585d1d14eca01613e13 \ + --hash=sha256:2ed09183922d66c4ec5fdaa59b4d14e105c084dd0febd27452de8f6f74704143 \ + --hash=sha256:35c00f637cd0b9d5b6c6bd11b6c3359194a8eba9c46d4e875a3660e3b400005f \ + --hash=sha256:37480760ae08065437e6573d14be973112c9e6dcaf5f11d00147ee74f37a3829 \ + --hash=sha256:3b224890962a2d7b57cf5eeb16ccaafba6083f7b811829f00476309bce2fe0fd \ + --hash=sha256:5a0f09cefded00e648a127048119f77bc2b2ec61e736660b5789e638f43cc397 \ + --hash=sha256:5b72205a360f3b6176485a333256b9bcd48700fc755fef51c8e7e67c4b63e3ac \ + --hash=sha256:7e53db173370dea832190870e975a1e09c86a879b613948f09eb49324218c14d \ + --hash=sha256:7febc3094125fc126a7f6fb1f420d0da639f3f32cb15c8ff0dc3997c4549f51a \ + --hash=sha256:80907d3faa55dc5434a16579952ac6da800935cd98d14dbd62f6f042c7f5e839 \ + --hash=sha256:86defa8d248c3fa029da68ce61fe735432b047e32179883bdb1e79ed9bb8195e \ + --hash=sha256:8ac4f9ead4bbd0bc8ab2d318f97d85147167a488be0e08814a37eb2f439d5cf6 \ + --hash=sha256:93530900d14c37a46ce3d6c9e6fd35dbe5f5601bf6b3a5c325c7bffc030344d9 \ + --hash=sha256:9eeb77214afae972a00dee47382d2591abe77bdae166bda672fb1e24702a3860 \ + --hash=sha256:b5f4dfe950ff0479f1f00eda09c18798d4f49b98f4e2006d644b3301682ebdca \ + --hash=sha256:c3391bd8e6de35f6f1140e50aaeb3e2b3d6a9012536ca23ab0d9c35ec18c8a91 \ + --hash=sha256:c880eba5175f4307129784eca96f4e70b88e57aa3f680aeba3bab0e980b0f37d \ + --hash=sha256:cecfefa17042941f94ab54f769c8ce0fe14beff2694e9ac684176a2535bf9714 \ + --hash=sha256:e40211b4923ba5a6dc9769eab704bdb3fbb58d56c5b336d30996c24fcf12aadb \ + --hash=sha256:efc8ad4e6fc4f1752ebfb58aefece8b4e3c4cae940b0994d43649bdfce8d0d4f # via # gcp-releasetool # secretstorage @@ -382,6 +382,7 @@ protobuf==3.20.3 \ # gcp-docuploader # gcp-releasetool # google-api-core + # googleapis-common-protos pyasn1==0.4.8 \ --hash=sha256:39c7e2ec30515947ff4e87fb6f456dfc6e84857d34be479c9d4a4ba4bf46aa5d \ --hash=sha256:aef77c9fb94a3ac588e87841208bdec464471d9871bd5050a287cc9a475cd0ba From 3a0f551436b659afb2208fd558ddb846f4d62d98 Mon Sep 17 00:00:00 2001 From: Andrew Gorcester Date: Mon, 9 Oct 2023 15:18:48 -0700 Subject: [PATCH 04/13] docs: Add snippets for upload_chunks_concurrently and add chunk_size (#1135) * docs: Add snippets for upload_chunks_concurrently and add chunk_size * switch from 'processes' to 'workers' in sample nomenclature * copyright * tests --- samples/snippets/snippets_test.py | 61 ++++++++++++++----- ...torage_transfer_manager_download_bucket.py | 9 +-- ...er_manager_download_chunks_concurrently.py | 20 ++++-- .../storage_transfer_manager_download_many.py | 9 +-- ...sfer_manager_upload_chunks_concurrently.py | 57 +++++++++++++++++ ...orage_transfer_manager_upload_directory.py | 9 +-- .../storage_transfer_manager_upload_many.py | 9 +-- 7 files changed, 140 insertions(+), 34 deletions(-) create mode 100644 samples/snippets/storage_transfer_manager_upload_chunks_concurrently.py diff --git a/samples/snippets/snippets_test.py b/samples/snippets/snippets_test.py index 2da7bb94c..8014411e8 100644 --- a/samples/snippets/snippets_test.py +++ b/samples/snippets/snippets_test.py @@ -75,6 +75,7 @@ import storage_transfer_manager_download_bucket import storage_transfer_manager_download_chunks_concurrently import storage_transfer_manager_download_many +import storage_transfer_manager_upload_chunks_concurrently import storage_transfer_manager_upload_directory import storage_transfer_manager_upload_many import storage_upload_file @@ -243,7 +244,10 @@ def test_upload_blob_with_kms(test_bucket): with tempfile.NamedTemporaryFile() as source_file: source_file.write(b"test") storage_upload_with_kms_key.upload_blob_with_kms( - test_bucket.name, source_file.name, blob_name, KMS_KEY, + test_bucket.name, + source_file.name, + blob_name, + KMS_KEY, ) bucket = storage.Client().bucket(test_bucket.name) kms_blob = bucket.get_blob(blob_name) @@ -396,7 +400,10 @@ def test_move_blob(test_bucket_create, test_blob): print(f"test_move_blob not found in bucket {test_bucket_create.name}") storage_move_file.move_blob( - bucket.name, test_blob.name, test_bucket_create.name, "test_move_blob", + bucket.name, + test_blob.name, + test_bucket_create.name, + "test_move_blob", ) assert test_bucket_create.get_blob("test_move_blob") is not None @@ -412,7 +419,10 @@ def test_copy_blob(test_blob): pass storage_copy_file.copy_blob( - bucket.name, test_blob.name, bucket.name, "test_copy_blob", + bucket.name, + test_blob.name, + bucket.name, + "test_copy_blob", ) assert bucket.get_blob("test_copy_blob") is not None @@ -551,7 +561,10 @@ def test_define_bucket_website_configuration(test_bucket): def test_object_get_kms_key(test_bucket): with tempfile.NamedTemporaryFile() as source_file: storage_upload_with_kms_key.upload_blob_with_kms( - test_bucket.name, source_file.name, "test_upload_blob_encrypted", KMS_KEY, + test_bucket.name, + source_file.name, + "test_upload_blob_encrypted", + KMS_KEY, ) kms_key = storage_object_get_kms_key.object_get_kms_key( test_bucket.name, "test_upload_blob_encrypted" @@ -568,7 +581,10 @@ def test_storage_compose_file(test_bucket): with tempfile.NamedTemporaryFile() as dest_file: destination = storage_compose_file.compose_file( - test_bucket.name, source_files[0], source_files[1], dest_file.name, + test_bucket.name, + source_files[0], + source_files[1], + dest_file.name, ) composed = destination.download_as_string() @@ -608,7 +624,8 @@ def test_change_default_storage_class(test_bucket, capsys): def test_change_file_storage_class(test_blob, capsys): blob = storage_change_file_storage_class.change_file_storage_class( - test_blob.bucket.name, test_blob.name, + test_blob.bucket.name, + test_blob.name, ) out, _ = capsys.readouterr() assert f"Blob {blob.name} in bucket {blob.bucket.name}" in out @@ -694,7 +711,7 @@ def test_transfer_manager_snippets(test_bucket, capsys): test_bucket.name, BLOB_NAMES, source_directory="{}/".format(uploads), - processes=8, + workers=8, ) out, _ = capsys.readouterr() @@ -706,7 +723,7 @@ def test_transfer_manager_snippets(test_bucket, capsys): storage_transfer_manager_download_bucket.download_bucket_with_transfer_manager( test_bucket.name, destination_directory=os.path.join(downloads, ""), - processes=8, + workers=8, max_results=10000, ) out, _ = capsys.readouterr() @@ -720,7 +737,7 @@ def test_transfer_manager_snippets(test_bucket, capsys): test_bucket.name, blob_names=BLOB_NAMES, destination_directory=os.path.join(downloads, ""), - processes=8, + workers=8, ) out, _ = capsys.readouterr() @@ -763,9 +780,7 @@ def test_transfer_manager_download_chunks_concurrently(test_bucket, capsys): with tempfile.NamedTemporaryFile() as file: file.write(b"test") - storage_upload_file.upload_blob( - test_bucket.name, file.name, BLOB_NAME - ) + storage_upload_file.upload_blob(test_bucket.name, file.name, BLOB_NAME) with tempfile.TemporaryDirectory() as downloads: # Download the file. @@ -773,8 +788,26 @@ def test_transfer_manager_download_chunks_concurrently(test_bucket, capsys): test_bucket.name, BLOB_NAME, os.path.join(downloads, BLOB_NAME), - processes=8, + workers=8, ) out, _ = capsys.readouterr() - assert "Downloaded {} to {}".format(BLOB_NAME, os.path.join(downloads, BLOB_NAME)) in out + assert ( + "Downloaded {} to {}".format(BLOB_NAME, os.path.join(downloads, BLOB_NAME)) + in out + ) + + +def test_transfer_manager_upload_chunks_concurrently(test_bucket, capsys): + BLOB_NAME = "test_file.txt" + + with tempfile.NamedTemporaryFile() as file: + file.write(b"test") + file.flush() + + storage_transfer_manager_upload_chunks_concurrently.upload_chunks_concurrently( + test_bucket.name, file.name, BLOB_NAME + ) + + out, _ = capsys.readouterr() + assert "File {} uploaded to {}".format(file.name, BLOB_NAME) in out diff --git a/samples/snippets/storage_transfer_manager_download_bucket.py b/samples/snippets/storage_transfer_manager_download_bucket.py index 4f21ee6e9..5d94a67ae 100644 --- a/samples/snippets/storage_transfer_manager_download_bucket.py +++ b/samples/snippets/storage_transfer_manager_download_bucket.py @@ -14,7 +14,7 @@ # [START storage_transfer_manager_download_bucket] def download_bucket_with_transfer_manager( - bucket_name, destination_directory="", processes=8, max_results=1000 + bucket_name, destination_directory="", workers=8, max_results=1000 ): """Download all of the blobs in a bucket, concurrently in a process pool. @@ -40,8 +40,9 @@ def download_bucket_with_transfer_manager( # The maximum number of processes to use for the operation. The performance # impact of this value depends on the use case, but smaller files usually # benefit from a higher number of processes. Each additional process occupies - # some CPU and memory resources until finished. - # processes=8 + # some CPU and memory resources until finished. Threads can be used instead + # of processes by passing `worker_type=transfer_manager.THREAD`. + # workers=8 # The maximum number of results to fetch from bucket.list_blobs(). This # sample code fetches all of the blobs up to max_results and queues them all @@ -60,7 +61,7 @@ def download_bucket_with_transfer_manager( blob_names = [blob.name for blob in bucket.list_blobs(max_results=max_results)] results = transfer_manager.download_many_to_path( - bucket, blob_names, destination_directory=destination_directory, max_workers=processes + bucket, blob_names, destination_directory=destination_directory, max_workers=workers ) for name, result in zip(blob_names, results): diff --git a/samples/snippets/storage_transfer_manager_download_chunks_concurrently.py b/samples/snippets/storage_transfer_manager_download_chunks_concurrently.py index 9ddec094e..b6ac9982d 100644 --- a/samples/snippets/storage_transfer_manager_download_chunks_concurrently.py +++ b/samples/snippets/storage_transfer_manager_download_chunks_concurrently.py @@ -13,7 +13,9 @@ # limitations under the License. # [START storage_transfer_manager_download_chunks_concurrently] -def download_chunks_concurrently(bucket_name, blob_name, filename, processes=8): +def download_chunks_concurrently( + bucket_name, blob_name, filename, chunk_size=32 * 1024 * 1024, workers=8 +): """Download a single file in chunks, concurrently in a process pool.""" # The ID of your GCS bucket @@ -25,11 +27,17 @@ def download_chunks_concurrently(bucket_name, blob_name, filename, processes=8): # The destination filename or path # filename = "" + # The size of each chunk. The performance impact of this value depends on + # the use case. The remote service has a minimum of 5 MiB and a maximum of + # 5 GiB. + # chunk_size = 32 * 1024 * 1024 (32 MiB) + # The maximum number of processes to use for the operation. The performance # impact of this value depends on the use case, but smaller files usually # benefit from a higher number of processes. Each additional process occupies - # some CPU and memory resources until finished. - # processes=8 + # some CPU and memory resources until finished. Threads can be used instead + # of processes by passing `worker_type=transfer_manager.THREAD`. + # workers=8 from google.cloud.storage import Client, transfer_manager @@ -37,7 +45,11 @@ def download_chunks_concurrently(bucket_name, blob_name, filename, processes=8): bucket = storage_client.bucket(bucket_name) blob = bucket.blob(blob_name) - transfer_manager.download_chunks_concurrently(blob, filename, max_workers=processes) + transfer_manager.download_chunks_concurrently( + blob, filename, chunk_size=chunk_size, max_workers=workers + ) print("Downloaded {} to {}.".format(blob_name, filename)) + + # [END storage_transfer_manager_download_chunks_concurrently] diff --git a/samples/snippets/storage_transfer_manager_download_many.py b/samples/snippets/storage_transfer_manager_download_many.py index 500eea1ce..02cb9b887 100644 --- a/samples/snippets/storage_transfer_manager_download_many.py +++ b/samples/snippets/storage_transfer_manager_download_many.py @@ -14,7 +14,7 @@ # [START storage_transfer_manager_download_many] def download_many_blobs_with_transfer_manager( - bucket_name, blob_names, destination_directory="", processes=8 + bucket_name, blob_names, destination_directory="", workers=8 ): """Download blobs in a list by name, concurrently in a process pool. @@ -46,8 +46,9 @@ def download_many_blobs_with_transfer_manager( # The maximum number of processes to use for the operation. The performance # impact of this value depends on the use case, but smaller files usually # benefit from a higher number of processes. Each additional process occupies - # some CPU and memory resources until finished. - # processes=8 + # some CPU and memory resources until finished. Threads can be used instead + # of processes by passing `worker_type=transfer_manager.THREAD`. + # workers=8 from google.cloud.storage import Client, transfer_manager @@ -55,7 +56,7 @@ def download_many_blobs_with_transfer_manager( bucket = storage_client.bucket(bucket_name) results = transfer_manager.download_many_to_path( - bucket, blob_names, destination_directory=destination_directory, max_workers=processes + bucket, blob_names, destination_directory=destination_directory, max_workers=workers ) for name, result in zip(blob_names, results): diff --git a/samples/snippets/storage_transfer_manager_upload_chunks_concurrently.py b/samples/snippets/storage_transfer_manager_upload_chunks_concurrently.py new file mode 100644 index 000000000..009f09648 --- /dev/null +++ b/samples/snippets/storage_transfer_manager_upload_chunks_concurrently.py @@ -0,0 +1,57 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the 'License'); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# [START storage_transfer_manager_upload_chunks_concurrently] +def upload_chunks_concurrently( + bucket_name, + source_filename, + destination_blob_name, + chunk_size=32 * 1024 * 1024, + workers=8, +): + """Upload a single file, in chunks, concurrently in a process pool.""" + # The ID of your GCS bucket + # bucket_name = "your-bucket-name" + + # The path to your file to upload + # source_filename = "local/path/to/file" + + # The ID of your GCS object + # destination_blob_name = "storage-object-name" + + # The size of each chunk. The performance impact of this value depends on + # the use case. The remote service has a minimum of 5 MiB and a maximum of + # 5 GiB. + # chunk_size = 32 * 1024 * 1024 (32 MiB) + + # The maximum number of processes to use for the operation. The performance + # impact of this value depends on the use case. Each additional process + # occupies some CPU and memory resources until finished. Threads can be used + # instead of processes by passing `worker_type=transfer_manager.THREAD`. + # workers=8 + + from google.cloud.storage import Client, transfer_manager + + storage_client = Client() + bucket = storage_client.bucket(bucket_name) + blob = bucket.blob(destination_blob_name) + + transfer_manager.upload_chunks_concurrently( + source_filename, blob, chunk_size=chunk_size, max_workers=workers + ) + + print(f"File {source_filename} uploaded to {destination_blob_name}.") + + +# [END storage_transfer_manager_upload_chunks_concurrently] diff --git a/samples/snippets/storage_transfer_manager_upload_directory.py b/samples/snippets/storage_transfer_manager_upload_directory.py index c0dbb9c9c..329ca1081 100644 --- a/samples/snippets/storage_transfer_manager_upload_directory.py +++ b/samples/snippets/storage_transfer_manager_upload_directory.py @@ -13,7 +13,7 @@ # limitations under the License. # [START storage_transfer_manager_upload_directory] -def upload_directory_with_transfer_manager(bucket_name, source_directory, processes=8): +def upload_directory_with_transfer_manager(bucket_name, source_directory, workers=8): """Upload every file in a directory, including all files in subdirectories. Each blob name is derived from the filename, not including the `directory` @@ -33,8 +33,9 @@ def upload_directory_with_transfer_manager(bucket_name, source_directory, proces # The maximum number of processes to use for the operation. The performance # impact of this value depends on the use case, but smaller files usually # benefit from a higher number of processes. Each additional process occupies - # some CPU and memory resources until finished. - # processes=8 + # some CPU and memory resources until finished. Threads can be used instead + # of processes by passing `worker_type=transfer_manager.THREAD`. + # workers=8 from pathlib import Path @@ -65,7 +66,7 @@ def upload_directory_with_transfer_manager(bucket_name, source_directory, proces # Start the upload. results = transfer_manager.upload_many_from_filenames( - bucket, string_paths, source_directory=source_directory, max_workers=processes + bucket, string_paths, source_directory=source_directory, max_workers=workers ) for name, result in zip(string_paths, results): diff --git a/samples/snippets/storage_transfer_manager_upload_many.py b/samples/snippets/storage_transfer_manager_upload_many.py index 2ed647650..1b9b9fc89 100644 --- a/samples/snippets/storage_transfer_manager_upload_many.py +++ b/samples/snippets/storage_transfer_manager_upload_many.py @@ -14,7 +14,7 @@ # [START storage_transfer_manager_upload_many] def upload_many_blobs_with_transfer_manager( - bucket_name, filenames, source_directory="", processes=8 + bucket_name, filenames, source_directory="", workers=8 ): """Upload every file in a list to a bucket, concurrently in a process pool. @@ -43,8 +43,9 @@ def upload_many_blobs_with_transfer_manager( # The maximum number of processes to use for the operation. The performance # impact of this value depends on the use case, but smaller files usually # benefit from a higher number of processes. Each additional process occupies - # some CPU and memory resources until finished. - # processes=8 + # some CPU and memory resources until finished. Threads can be used instead + # of processes by passing `worker_type=transfer_manager.THREAD`. + # workers=8 from google.cloud.storage import Client, transfer_manager @@ -52,7 +53,7 @@ def upload_many_blobs_with_transfer_manager( bucket = storage_client.bucket(bucket_name) results = transfer_manager.upload_many_from_filenames( - bucket, filenames, source_directory=source_directory, max_workers=processes + bucket, filenames, source_directory=source_directory, max_workers=workers ) for name, result in zip(filenames, results): From aefcdd4623bacf26987843364b16ec865cf16175 Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Tue, 10 Oct 2023 10:34:32 -0400 Subject: [PATCH 05/13] chore(deps): bump urllib3 from 1.26.12 to 1.26.17 in /.kokoro (#1140) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * chore: [autoapprove] Update `black` and `isort` to latest versions Source-Link: https://github.com/googleapis/synthtool/commit/0c7b0333f44b2b7075447f43a121a12d15a7b76a Post-Processor: gcr.io/cloud-devrel-public-resources/owlbot-python:latest@sha256:08e34975760f002746b1d8c86fdc90660be45945ee6d9db914d1508acdf9a547 * update black in noxfile.py * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md --------- Co-authored-by: Owl Bot Co-authored-by: Anthonios Partheniou --- .github/.OwlBot.lock.yaml | 4 ++-- .kokoro/requirements.txt | 6 +++--- .pre-commit-config.yaml | 2 +- google/cloud/storage/_signing.py | 2 -- google/cloud/storage/blob.py | 1 - google/cloud/storage/bucket.py | 2 -- google/cloud/storage/transfer_manager.py | 1 - noxfile.py | 2 +- tests/system/test_blob.py | 3 --- tests/system/test_client.py | 1 - tests/unit/test__helpers.py | 1 - tests/unit/test_acl.py | 2 -- tests/unit/test_batch.py | 1 - tests/unit/test_blob.py | 2 -- tests/unit/test_client.py | 1 - tests/unit/test_notification.py | 1 - tests/unit/test_transfer_manager.py | 8 ++++---- 17 files changed, 11 insertions(+), 29 deletions(-) diff --git a/.github/.OwlBot.lock.yaml b/.github/.OwlBot.lock.yaml index a9bdb1b7a..dd98abbde 100644 --- a/.github/.OwlBot.lock.yaml +++ b/.github/.OwlBot.lock.yaml @@ -13,5 +13,5 @@ # limitations under the License. docker: image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest - digest: sha256:fac304457974bb530cc5396abd4ab25d26a469cd3bc97cbfb18c8d4324c584eb -# created: 2023-10-02T21:31:03.517640371Z + digest: sha256:08e34975760f002746b1d8c86fdc90660be45945ee6d9db914d1508acdf9a547 +# created: 2023-10-09T14:06:13.397766266Z diff --git a/.kokoro/requirements.txt b/.kokoro/requirements.txt index 96d593c8c..0332d3267 100644 --- a/.kokoro/requirements.txt +++ b/.kokoro/requirements.txt @@ -467,9 +467,9 @@ typing-extensions==4.4.0 \ --hash=sha256:1511434bb92bf8dd198c12b1cc812e800d4181cfcb867674e0f8279cc93087aa \ --hash=sha256:16fa4864408f655d35ec496218b85f79b3437c829e93320c7c9215ccfd92489e # via -r requirements.in -urllib3==1.26.12 \ - --hash=sha256:3fa96cf423e6987997fc326ae8df396db2a8b7c667747d47ddd8ecba91f4a74e \ - --hash=sha256:b930dd878d5a8afb066a637fbb35144fe7901e3b209d1cd4f524bd0e9deee997 +urllib3==1.26.17 \ + --hash=sha256:24d6a242c28d29af46c3fae832c36db3bbebcc533dd1bb549172cd739c82df21 \ + --hash=sha256:94a757d178c9be92ef5539b8840d48dc9cf1b2709c9d6b588232a055c524458b # via # requests # twine diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 19409cbd3..6a8e16950 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -22,7 +22,7 @@ repos: - id: end-of-file-fixer - id: check-yaml - repo: https://github.com/psf/black - rev: 22.3.0 + rev: 23.7.0 hooks: - id: black - repo: https://github.com/pycqa/flake8 diff --git a/google/cloud/storage/_signing.py b/google/cloud/storage/_signing.py index fb50a2acc..1ec61142d 100644 --- a/google/cloud/storage/_signing.py +++ b/google/cloud/storage/_signing.py @@ -147,7 +147,6 @@ def get_expiration_seconds_v4(expiration): seconds = expiration if isinstance(expiration, datetime.datetime): - if expiration.tzinfo is None: expiration = expiration.replace(tzinfo=_helpers.UTC) @@ -646,7 +645,6 @@ def get_v4_now_dtstamps(): def _sign_message(message, access_token, service_account_email): - """Signs a message. :type message: str diff --git a/google/cloud/storage/blob.py b/google/cloud/storage/blob.py index ece758dbc..a95e08911 100644 --- a/google/cloud/storage/blob.py +++ b/google/cloud/storage/blob.py @@ -984,7 +984,6 @@ def _do_download( response = download.consume(transport, timeout=timeout) self._extract_headers_from_download(response) else: - if checksum: msg = _CHUNKED_DOWNLOAD_CHECKSUM_MESSAGE.format(checksum) _logger.info(msg) diff --git a/google/cloud/storage/bucket.py b/google/cloud/storage/bucket.py index c3a1a0523..f6d5e5aa2 100644 --- a/google/cloud/storage/bucket.py +++ b/google/cloud/storage/bucket.py @@ -474,7 +474,6 @@ def __init__( bucket_policy_only_locked_time=_default, ): if bucket_policy_only_enabled is not _default: - if uniform_bucket_level_access_enabled is not _default: raise ValueError(_UBLA_BPO_ENABLED_MESSAGE) @@ -482,7 +481,6 @@ def __init__( uniform_bucket_level_access_enabled = bucket_policy_only_enabled if bucket_policy_only_locked_time is not _default: - if uniform_bucket_level_access_locked_time is not _default: raise ValueError(_UBLA_BPO_LOCK_TIME_MESSAGE) diff --git a/google/cloud/storage/transfer_manager.py b/google/cloud/storage/transfer_manager.py index da6973c71..5cd2bc0d8 100644 --- a/google/cloud/storage/transfer_manager.py +++ b/google/cloud/storage/transfer_manager.py @@ -1029,7 +1029,6 @@ def upload_chunks_concurrently( futures = [] with pool_class(max_workers=max_workers) as executor: - for part_number in range(1, num_of_parts + 1): start = (part_number - 1) * chunk_size end = min(part_number * chunk_size, size) diff --git a/noxfile.py b/noxfile.py index 1a72c9144..895f5ee32 100644 --- a/noxfile.py +++ b/noxfile.py @@ -24,7 +24,7 @@ import nox -BLACK_VERSION = "black==22.3.0" +BLACK_VERSION = "black==23.7.0" BLACK_PATHS = ["docs", "google", "tests", "noxfile.py", "setup.py"] DEFAULT_PYTHON_VERSION = "3.8" diff --git a/tests/system/test_blob.py b/tests/system/test_blob.py index 2d6a76b80..4c2078f6a 100644 --- a/tests/system/test_blob.py +++ b/tests/system/test_blob.py @@ -86,7 +86,6 @@ def test_large_file_write_from_stream_w_failed_checksum( # The # remote API is still exercised. info = file_data["big"] with open(info["path"], "rb") as file_obj: - with mock.patch( "google.resumable_media._helpers.prepare_checksum_digest", return_value="FFFFFF==", @@ -527,7 +526,6 @@ def test_blob_direct_write_and_read_into_file( same_blob.reload() # Initialize properties. with tempfile.NamedTemporaryFile() as temp_f: - with open(temp_f.name, "wb") as file_obj: same_blob.download_to_file(file_obj) @@ -553,7 +551,6 @@ def test_blob_download_w_generation_match( same_blob.reload() # Initialize properties. with tempfile.NamedTemporaryFile() as temp_f: - with open(temp_f.name, "wb") as file_obj: with pytest.raises(exceptions.PreconditionFailed): same_blob.download_to_file( diff --git a/tests/system/test_client.py b/tests/system/test_client.py index bb09e6075..70f341851 100644 --- a/tests/system/test_client.py +++ b/tests/system/test_client.py @@ -130,7 +130,6 @@ def test_download_blob_to_file_w_uri( blobs_to_delete.append(blob) with tempfile.NamedTemporaryFile() as temp_f: - with open(temp_f.name, "wb") as file_obj: storage_client.download_blob_to_file( "gs://" + shared_bucket.name + "/MyBuffer", file_obj diff --git a/tests/unit/test__helpers.py b/tests/unit/test__helpers.py index 174b96152..324705e79 100644 --- a/tests/unit/test__helpers.py +++ b/tests/unit/test__helpers.py @@ -94,7 +94,6 @@ def _make_one(self, *args, **kw): def _derivedClass(self, path=None, user_project=None): class Derived(self._get_target_class()): - client = None _actual_encryption_headers = None diff --git a/tests/unit/test_acl.py b/tests/unit/test_acl.py index 3c5e6515a..8d2fa39f5 100644 --- a/tests/unit/test_acl.py +++ b/tests/unit/test_acl.py @@ -1072,7 +1072,6 @@ def test_user_project(self): class _Blob(object): - user_project = None def __init__(self, bucket, blob): @@ -1085,7 +1084,6 @@ def path(self): class _Bucket(object): - user_project = None def __init__(self, name): diff --git a/tests/unit/test_batch.py b/tests/unit/test_batch.py index 37f8b8190..c1f6bad9a 100644 --- a/tests/unit/test_batch.py +++ b/tests/unit/test_batch.py @@ -761,7 +761,6 @@ def test___setitem__(self): class _Connection(object): - project = "TESTING" def __init__(self, **kw): diff --git a/tests/unit/test_blob.py b/tests/unit/test_blob.py index a8d024176..1e84704b1 100644 --- a/tests/unit/test_blob.py +++ b/tests/unit/test_blob.py @@ -3017,7 +3017,6 @@ def _do_resumable_helper( with patch.object( _helpers, "_get_invocation_id", return_value=GCCL_INVOCATION_TEST_CONST ): - response = blob._do_resumable_upload( client, stream, @@ -5952,7 +5951,6 @@ def test_w_existing_qs(self): class _Connection(object): - API_BASE_URL = "http://example.com" USER_AGENT = "testing 1.2.3" user_agent = "testing 1.2.3" diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index 277610696..0c1c5efee 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -2818,7 +2818,6 @@ def test_conformance_post_policy(test_data): return_value=in_data["expiration"], ): with mock.patch("google.cloud.storage.client._NOW", return_value=timestamp): - policy = client.generate_signed_post_policy_v4( bucket_name=in_data["bucket"], blob_name=in_data["object"], diff --git a/tests/unit/test_notification.py b/tests/unit/test_notification.py index e5f07d5c7..d59444915 100644 --- a/tests/unit/test_notification.py +++ b/tests/unit/test_notification.py @@ -20,7 +20,6 @@ class TestBucketNotification(unittest.TestCase): - BUCKET_NAME = "test-bucket" BUCKET_PROJECT = "bucket-project-123" TOPIC_NAME = "test-topic" diff --git a/tests/unit/test_transfer_manager.py b/tests/unit/test_transfer_manager.py index 1f6d5b0dc..9c371d2ca 100644 --- a/tests/unit/test_transfer_manager.py +++ b/tests/unit/test_transfer_manager.py @@ -75,7 +75,7 @@ def test_upload_many_with_filenames(): upload_kwargs=UPLOAD_KWARGS, worker_type=transfer_manager.THREAD, ) - for (filename, mock_blob) in FILE_BLOB_PAIRS: + for filename, mock_blob in FILE_BLOB_PAIRS: mock_blob._handle_filename_and_upload.assert_any_call( filename, **expected_upload_kwargs ) @@ -100,7 +100,7 @@ def test_upload_many_with_file_objs(): upload_kwargs=UPLOAD_KWARGS, worker_type=transfer_manager.THREAD, ) - for (file, mock_blob) in FILE_BLOB_PAIRS: + for file, mock_blob in FILE_BLOB_PAIRS: mock_blob._prep_and_do_upload.assert_any_call(file, **expected_upload_kwargs) for result in results: assert result == FAKE_RESULT @@ -263,7 +263,7 @@ def test_download_many_with_filenames(): download_kwargs=DOWNLOAD_KWARGS, worker_type=transfer_manager.THREAD, ) - for (mock_blob, file) in BLOB_FILE_PAIRS: + for mock_blob, file in BLOB_FILE_PAIRS: mock_blob._handle_filename_and_download.assert_any_call( file, **EXPECTED_DOWNLOAD_KWARGS ) @@ -285,7 +285,7 @@ def test_download_many_with_file_objs(): download_kwargs=DOWNLOAD_KWARGS, worker_type=transfer_manager.THREAD, ) - for (mock_blob, file) in BLOB_FILE_PAIRS: + for mock_blob, file in BLOB_FILE_PAIRS: mock_blob._prep_and_do_download.assert_any_call(file, **DOWNLOAD_KWARGS) for result in results: assert result == FAKE_RESULT From 28c02dd41010e6d818a77f51c539457b2dbfa233 Mon Sep 17 00:00:00 2001 From: Andrew Gorcester Date: Wed, 11 Oct 2023 11:00:43 -0700 Subject: [PATCH 06/13] fix: bump python-auth version to fix issue and remove workaround (#1158) --- google/cloud/storage/client.py | 1 - google/cloud/storage/transfer_manager.py | 7 +++++-- setup.py | 2 +- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/google/cloud/storage/client.py b/google/cloud/storage/client.py index e6391f5fb..10f2e5904 100644 --- a/google/cloud/storage/client.py +++ b/google/cloud/storage/client.py @@ -127,7 +127,6 @@ def __init__( # are passed along, for use in __reduce__ defined elsewhere. self._initial_client_info = client_info self._initial_client_options = client_options - self._initial_credentials = credentials kw_args = {"client_info": client_info} diff --git a/google/cloud/storage/transfer_manager.py b/google/cloud/storage/transfer_manager.py index 5cd2bc0d8..38f327895 100644 --- a/google/cloud/storage/transfer_manager.py +++ b/google/cloud/storage/transfer_manager.py @@ -1154,11 +1154,14 @@ def _call_method_on_maybe_pickled_blob( def _reduce_client(cl): - """Replicate a Client by constructing a new one with the same params.""" + """Replicate a Client by constructing a new one with the same params. + + LazyClient performs transparent caching for when the same client is needed + on the same process multiple times.""" client_object_id = id(cl) project = cl.project - credentials = cl._initial_credentials + credentials = cl._credentials _http = None # Can't carry this over client_info = cl._initial_client_info client_options = cl._initial_client_options diff --git a/setup.py b/setup.py index a57f972ff..11ee0a190 100644 --- a/setup.py +++ b/setup.py @@ -28,7 +28,7 @@ # 'Development Status :: 5 - Production/Stable' release_status = "Development Status :: 5 - Production/Stable" dependencies = [ - "google-auth >= 1.25.0, < 3.0dev", + "google-auth >= 2.23.3, < 3.0dev", "google-api-core >= 1.31.5, <3.0.0dev,!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.0", "google-cloud-core >= 2.3.0, < 3.0dev", "google-resumable-media >= 2.6.0", From a455195fb12c37950d7d3a0d4a5b2e4d7a43df90 Mon Sep 17 00:00:00 2001 From: cojenco Date: Wed, 11 Oct 2023 12:54:16 -0700 Subject: [PATCH 07/13] test: dedup kms_bucket fixture (#1129) After adding back KMS permissions to the kokoro project, KMS integration tests now pass. However, upon investigation, I noticed that we have a duplicate set of kms pytest fixtures. This removes the duplicates and changes fixture scope to per-function. Fixes #1128 --- tests/system/conftest.py | 60 +++++++++++++++++-- tests/system/test_kms_integration.py | 88 ---------------------------- 2 files changed, 56 insertions(+), 92 deletions(-) diff --git a/tests/system/conftest.py b/tests/system/conftest.py index fe90ceb80..329be584f 100644 --- a/tests/system/conftest.py +++ b/tests/system/conftest.py @@ -17,6 +17,8 @@ import pytest +from google.api_core import exceptions +from google.cloud import kms from google.cloud.storage._helpers import _base64_md5hash from . import _helpers @@ -235,12 +237,12 @@ def file_data(): return _file_data -@pytest.fixture(scope="session") +@pytest.fixture(scope="function") def kms_bucket_name(): return _helpers.unique_name("gcp-systest-kms") -@pytest.fixture(scope="session") +@pytest.fixture(scope="function") def kms_bucket(storage_client, kms_bucket_name, no_mtls): bucket = _helpers.retry_429_503(storage_client.create_bucket)(kms_bucket_name) @@ -249,11 +251,61 @@ def kms_bucket(storage_client, kms_bucket_name, no_mtls): _helpers.delete_bucket(bucket) -@pytest.fixture(scope="session") +@pytest.fixture(scope="function") def kms_key_name(storage_client, kms_bucket): return _kms_key_name(storage_client, kms_bucket, default_key_name) -@pytest.fixture(scope="session") +@pytest.fixture(scope="function") def alt_kms_key_name(storage_client, kms_bucket): return _kms_key_name(storage_client, kms_bucket, alt_key_name) + + +@pytest.fixture(scope="session") +def kms_client(): + return kms.KeyManagementServiceClient() + + +@pytest.fixture(scope="function") +def keyring(storage_client, kms_bucket, kms_client): + project = storage_client.project + location = kms_bucket.location.lower() + purpose = kms.enums.CryptoKey.CryptoKeyPurpose.ENCRYPT_DECRYPT + + # If the keyring doesn't exist create it. + keyring_path = kms_client.key_ring_path(project, location, keyring_name) + + try: + kms_client.get_key_ring(keyring_path) + except exceptions.NotFound: + parent = kms_client.location_path(project, location) + kms_client.create_key_ring(parent, keyring_name, {}) + + # Mark this service account as an owner of the new keyring + service_account_email = storage_client.get_service_account_email() + policy = { + "bindings": [ + { + "role": "roles/cloudkms.cryptoKeyEncrypterDecrypter", + "members": ["serviceAccount:" + service_account_email], + } + ] + } + kms_client.set_iam_policy(keyring_path, policy) + + # Populate the keyring with the keys we use in the tests + key_names = [ + "gcs-test", + "gcs-test-alternate", + "explicit-kms-key-name", + "default-kms-key-name", + "override-default-kms-key-name", + "alt-default-kms-key-name", + ] + for key_name in key_names: + key_path = kms_client.crypto_key_path(project, location, keyring_name, key_name) + try: + kms_client.get_crypto_key(key_path) + except exceptions.NotFound: + key = {"purpose": purpose} + kms_client.create_crypto_key(keyring_path, key_name, key) diff --git a/tests/system/test_kms_integration.py b/tests/system/test_kms_integration.py index f047baced..619ffe110 100644 --- a/tests/system/test_kms_integration.py +++ b/tests/system/test_kms_integration.py @@ -14,99 +14,11 @@ import os -import pytest - -from google.api_core import exceptions -from google.cloud import kms from . import _helpers keyring_name = "gcs-test" default_key_name = "gcs-test" alt_key_name = "gcs-test-alternate" -_key_name_format = "projects/{}/locations/{}/keyRings/{}/cryptoKeys/{}" - - -def _kms_key_name(client, bucket, key_name): - return _key_name_format.format( - client.project, - bucket.location.lower(), - keyring_name, - key_name, - ) - - -@pytest.fixture(scope="session") -def kms_bucket_name(): - return _helpers.unique_name("gcp-systest-kms") - - -@pytest.fixture(scope="session") -def kms_bucket(storage_client, kms_bucket_name, no_mtls): - bucket = _helpers.retry_429_503(storage_client.create_bucket)(kms_bucket_name) - - yield bucket - - _helpers.delete_bucket(bucket) - - -@pytest.fixture(scope="session") -def kms_client(): - return kms.KeyManagementServiceClient() - - -@pytest.fixture(scope="function") -def keyring(storage_client, kms_bucket, kms_client): - project = storage_client.project - location = kms_bucket.location.lower() - purpose = kms.enums.CryptoKey.CryptoKeyPurpose.ENCRYPT_DECRYPT - - # If the keyring doesn't exist create it. - keyring_path = kms_client.key_ring_path(project, location, keyring_name) - - try: - kms_client.get_key_ring(keyring_path) - except exceptions.NotFound: - parent = kms_client.location_path(project, location) - kms_client.create_key_ring(parent, keyring_name, {}) - - # Mark this service account as an owner of the new keyring - service_account_email = storage_client.get_service_account_email() - policy = { - "bindings": [ - { - "role": "roles/cloudkms.cryptoKeyEncrypterDecrypter", - "members": ["serviceAccount:" + service_account_email], - } - ] - } - kms_client.set_iam_policy(keyring_path, policy) - - # Populate the keyring with the keys we use in the tests - key_names = [ - "gcs-test", - "gcs-test-alternate", - "explicit-kms-key-name", - "default-kms-key-name", - "override-default-kms-key-name", - "alt-default-kms-key-name", - ] - for key_name in key_names: - key_path = kms_client.crypto_key_path(project, location, keyring_name, key_name) - try: - kms_client.get_crypto_key(key_path) - except exceptions.NotFound: - key = {"purpose": purpose} - kms_client.create_crypto_key(keyring_path, key_name, key) - - -@pytest.fixture(scope="session") -def kms_key_name(storage_client, kms_bucket): - return _kms_key_name(storage_client, kms_bucket, default_key_name) - - -@pytest.fixture(scope="session") -def alt_kms_key_name(storage_client, kms_bucket): - return _kms_key_name(storage_client, kms_bucket, alt_key_name) def test_blob_w_explicit_kms_key_name( From fc92ad19ff0f9704456452e8c7c47a5f90c29eab Mon Sep 17 00:00:00 2001 From: Andrew Gorcester Date: Wed, 11 Oct 2023 13:46:14 -0700 Subject: [PATCH 08/13] feat: add crc32c_checksum argument to download_chunks_concurrently (#1138) --- google/cloud/storage/transfer_manager.py | 158 ++++++++++++++++++++--- samples/snippets/snippets_test.py | 3 + setup.py | 1 + tests/system/test_transfer_manager.py | 13 +- tests/unit/test_transfer_manager.py | 109 +++++++++++++++- 5 files changed, 262 insertions(+), 22 deletions(-) diff --git a/google/cloud/storage/transfer_manager.py b/google/cloud/storage/transfer_manager.py index 38f327895..fec5965cf 100644 --- a/google/cloud/storage/transfer_manager.py +++ b/google/cloud/storage/transfer_manager.py @@ -22,6 +22,8 @@ import warnings import pickle import copyreg +import struct +import base64 import functools from google.api_core import exceptions @@ -32,9 +34,11 @@ from google.cloud.storage._helpers import _api_core_retry_to_resumable_media_retry from google.cloud.storage.retry import DEFAULT_RETRY +import google_crc32c + from google.resumable_media.requests.upload import XMLMPUContainer from google.resumable_media.requests.upload import XMLMPUPart - +from google.resumable_media.common import DataCorruption warnings.warn( "The module `transfer_manager` is a preview feature. Functionality and API " @@ -44,6 +48,7 @@ TM_DEFAULT_CHUNK_SIZE = 32 * 1024 * 1024 DEFAULT_MAX_WORKERS = 8 +MAX_CRC32C_ZERO_ARRAY_SIZE = 4 * 1024 * 1024 METADATA_HEADER_TRANSLATION = { "cacheControl": "Cache-Control", "contentDisposition": "Content-Disposition", @@ -57,6 +62,20 @@ PROCESS = "process" THREAD = "thread" +DOWNLOAD_CRC32C_MISMATCH_TEMPLATE = """\ +Checksum mismatch while downloading: + + {} + +The object metadata indicated a crc32c checksum of: + + {} + +but the actual crc32c checksum of the downloaded contents was: + + {} +""" + _cached_clients = {} @@ -732,6 +751,8 @@ def download_chunks_concurrently( deadline=None, worker_type=PROCESS, max_workers=DEFAULT_MAX_WORKERS, + *, + crc32c_checksum=True, ): """Download a single file in chunks, concurrently. @@ -744,9 +765,6 @@ def download_chunks_concurrently( performance under normal circumstances due to Python interpreter threading behavior. The default is therefore to use processes instead of threads. - Checksumming (md5 or crc32c) is not supported for chunked operations. Any - `checksum` parameter passed in to download_kwargs will be ignored. - :param bucket: The bucket which contains the blobs to be downloaded @@ -768,10 +786,13 @@ def download_chunks_concurrently( :param download_kwargs: A dictionary of keyword arguments to pass to the download method. Refer to the documentation for blob.download_to_file() or - blob.download_to_filename() for more information. The dict is directly passed into the download methods and is not validated by this function. + blob.download_to_filename() for more information. The dict is directly + passed into the download methods and is not validated by this function. Keyword arguments "start" and "end" which are not supported and will - cause a ValueError if present. + cause a ValueError if present. The key "checksum" is also not supported + in download_kwargs, but see the argument "crc32c_checksum" (which does + not go in download_kwargs) below. :type deadline: int :param deadline: @@ -811,8 +832,22 @@ def download_chunks_concurrently( and the default is a conservative number that should work okay in most cases without consuming excessive resources. - :raises: :exc:`concurrent.futures.TimeoutError` if deadline is exceeded. + :type crc32c_checksum: bool + :param crc32c_checksum: + Whether to compute a checksum for the resulting object, using the crc32c + algorithm. As the checksums for each chunk must be combined using a + feature of crc32c that is not available for md5, md5 is not supported. + + :raises: + :exc:`concurrent.futures.TimeoutError` + if deadline is exceeded. + :exc:`google.resumable_media.common.DataCorruption` if the download's + checksum doesn't agree with server-computed checksum. The + `google.resumable_media` exception is used here for consistency + with other download methods despite the exception originating + elsewhere. """ + client = blob.client if download_kwargs is None: download_kwargs = {} @@ -820,6 +855,10 @@ def download_chunks_concurrently( raise ValueError( "Download arguments 'start' and 'end' are not supported by download_chunks_concurrently." ) + if "checksum" in download_kwargs: + raise ValueError( + "'checksum' is in download_kwargs, but is not supported because sliced downloads have a different checksum mechanism from regular downloads. Use the 'crc32c_checksum' argument on download_chunks_concurrently instead." + ) download_kwargs["command"] = "tm.download_sharded" @@ -851,6 +890,7 @@ def download_chunks_concurrently( start=start, end=cursor - 1, download_kwargs=download_kwargs, + crc32c_checksum=crc32c_checksum, ) ) @@ -858,9 +898,34 @@ def download_chunks_concurrently( futures, timeout=deadline, return_when=concurrent.futures.ALL_COMPLETED ) - # Raise any exceptions. Successful results can be ignored. + # Raise any exceptions; combine checksums. + results = [] for future in futures: - future.result() + results.append(future.result()) + + if crc32c_checksum and results: + crc_digest = _digest_ordered_checksum_and_size_pairs(results) + actual_checksum = base64.b64encode(crc_digest).decode("utf-8") + expected_checksum = blob.crc32c + if actual_checksum != expected_checksum: + # For consistency with other download methods we will use + # "google.resumable_media.common.DataCorruption" despite the error + # not originating inside google.resumable_media. + download_url = blob._get_download_url( + client, + if_generation_match=download_kwargs.get("if_generation_match"), + if_generation_not_match=download_kwargs.get("if_generation_not_match"), + if_metageneration_match=download_kwargs.get("if_metageneration_match"), + if_metageneration_not_match=download_kwargs.get( + "if_metageneration_not_match" + ), + ) + raise DataCorruption( + None, + DOWNLOAD_CRC32C_MISMATCH_TEMPLATE.format( + download_url, expected_checksum, actual_checksum + ), + ) return None @@ -1118,23 +1183,58 @@ def _headers_from_metadata(metadata): def _download_and_write_chunk_in_place( - maybe_pickled_blob, filename, start, end, download_kwargs + maybe_pickled_blob, filename, start, end, download_kwargs, crc32c_checksum ): """Helper function that runs inside a thread or subprocess. `maybe_pickled_blob` is either a Blob (for threads) or a specially pickled Blob (for processes) because the default pickling mangles Client objects - which are attached to Blobs.""" + which are attached to Blobs. + + Returns a crc if configured (or None) and the size written. + """ if isinstance(maybe_pickled_blob, Blob): blob = maybe_pickled_blob else: blob = pickle.loads(maybe_pickled_blob) - with open( - filename, "rb+" - ) as f: # Open in mixed read/write mode to avoid truncating or appending - f.seek(start) - return blob._prep_and_do_download(f, start=start, end=end, **download_kwargs) + + with _ChecksummingSparseFileWrapper(filename, start, crc32c_checksum) as f: + blob._prep_and_do_download(f, start=start, end=end, **download_kwargs) + return (f.crc, (end - start) + 1) + + +class _ChecksummingSparseFileWrapper: + """A file wrapper that writes to a sparse file and optionally checksums. + + This wrapper only implements write() and does not inherit from `io` module + base classes. + """ + + def __init__(self, filename, start_position, crc32c_enabled): + # Open in mixed read/write mode to avoid truncating or appending + self.f = open(filename, "rb+") + self.f.seek(start_position) + self._crc = None + self._crc32c_enabled = crc32c_enabled + + def write(self, chunk): + if self._crc32c_enabled: + if self._crc is None: + self._crc = google_crc32c.value(chunk) + else: + self._crc = google_crc32c.extend(self._crc, chunk) + self.f.write(chunk) + + @property + def crc(self): + return self._crc + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_value, tb): + self.f.close() def _call_method_on_maybe_pickled_blob( @@ -1208,6 +1308,32 @@ def _get_pool_class_and_requirements(worker_type): ) +def _digest_ordered_checksum_and_size_pairs(checksum_and_size_pairs): + base_crc = None + zeroes = bytes(MAX_CRC32C_ZERO_ARRAY_SIZE) + for part_crc, size in checksum_and_size_pairs: + if not base_crc: + base_crc = part_crc + else: + base_crc ^= 0xFFFFFFFF # precondition + + # Zero pad base_crc32c. To conserve memory, do so with only + # MAX_CRC32C_ZERO_ARRAY_SIZE at a time. Reuse the zeroes array where + # possible. + padded = 0 + while padded < size: + desired_zeroes_size = min((size - padded), MAX_CRC32C_ZERO_ARRAY_SIZE) + base_crc = google_crc32c.extend(base_crc, zeroes[:desired_zeroes_size]) + padded += desired_zeroes_size + + base_crc ^= 0xFFFFFFFF # postcondition + base_crc ^= part_crc + crc_digest = struct.pack( + ">L", base_crc + ) # https://cloud.google.com/storage/docs/json_api/v1/objects#crc32c + return crc_digest + + class _LazyClient: """An object that will transform into either a cached or a new Client""" diff --git a/samples/snippets/snippets_test.py b/samples/snippets/snippets_test.py index 8014411e8..7a5f8c960 100644 --- a/samples/snippets/snippets_test.py +++ b/samples/snippets/snippets_test.py @@ -213,6 +213,7 @@ def test_list_blobs_with_prefix(test_blob, capsys): def test_upload_blob(test_bucket): with tempfile.NamedTemporaryFile() as source_file: source_file.write(b"test") + source_file.flush() storage_upload_file.upload_blob( test_bucket.name, source_file.name, "test_upload_blob" @@ -243,6 +244,7 @@ def test_upload_blob_with_kms(test_bucket): blob_name = f"test_upload_with_kms_{uuid.uuid4().hex}" with tempfile.NamedTemporaryFile() as source_file: source_file.write(b"test") + source_file.flush() storage_upload_with_kms_key.upload_blob_with_kms( test_bucket.name, source_file.name, @@ -779,6 +781,7 @@ def test_transfer_manager_download_chunks_concurrently(test_bucket, capsys): with tempfile.NamedTemporaryFile() as file: file.write(b"test") + file.flush() storage_upload_file.upload_blob(test_bucket.name, file.name, BLOB_NAME) diff --git a/setup.py b/setup.py index 11ee0a190..88d2f581b 100644 --- a/setup.py +++ b/setup.py @@ -33,6 +33,7 @@ "google-cloud-core >= 2.3.0, < 3.0dev", "google-resumable-media >= 2.6.0", "requests >= 2.18.0, < 3.0.0dev", + "google-crc32c >= 1.0, < 2.0dev", ] extras = {"protobuf": ["protobuf<5.0.0dev"]} diff --git a/tests/system/test_transfer_manager.py b/tests/system/test_transfer_manager.py index fc7bc2d51..b8f209b63 100644 --- a/tests/system/test_transfer_manager.py +++ b/tests/system/test_transfer_manager.py @@ -172,8 +172,19 @@ def test_download_chunks_concurrently(shared_bucket, file_data): with open(trailing_chunk_filename, "rb") as file_obj: assert _base64_md5hash(file_obj) == source_file["hash"] + # And for a case where there is only one chunk. + trailing_chunk_filename = os.path.join(tempdir, "chunky_file_3") + transfer_manager.download_chunks_concurrently( + download_blob, + trailing_chunk_filename, + chunk_size=size, + deadline=DEADLINE, + ) + with open(trailing_chunk_filename, "rb") as file_obj: + assert _base64_md5hash(file_obj) == source_file["hash"] + # Also test threaded mode. - threaded_filename = os.path.join(tempdir, "chunky_file_3") + threaded_filename = os.path.join(tempdir, "chunky_file_4") transfer_manager.download_chunks_concurrently( download_blob, threaded_filename, diff --git a/tests/unit/test_transfer_manager.py b/tests/unit/test_transfer_manager.py index 9c371d2ca..503b8fd2e 100644 --- a/tests/unit/test_transfer_manager.py +++ b/tests/unit/test_transfer_manager.py @@ -22,6 +22,8 @@ from google.api_core import exceptions +from google.resumable_media.common import DataCorruption + import os import tempfile import mock @@ -546,8 +548,6 @@ def test_download_chunks_concurrently(): expected_download_kwargs = EXPECTED_DOWNLOAD_KWARGS.copy() expected_download_kwargs["command"] = "tm.download_sharded" - blob_mock._handle_filename_and_download.return_value = FAKE_RESULT - with mock.patch("google.cloud.storage.transfer_manager.open", mock.mock_open()): result = transfer_manager.download_chunks_concurrently( blob_mock, @@ -555,6 +555,7 @@ def test_download_chunks_concurrently(): chunk_size=CHUNK_SIZE, download_kwargs=DOWNLOAD_KWARGS, worker_type=transfer_manager.THREAD, + crc32c_checksum=False, ) for x in range(MULTIPLE): blob_mock._prep_and_do_download.assert_any_call( @@ -567,7 +568,64 @@ def test_download_chunks_concurrently(): assert result is None -def test_download_chunks_concurrently_raises_on_start_and_end(): +def test_download_chunks_concurrently_with_crc32c(): + blob_mock = mock.Mock(spec=Blob) + FILENAME = "file_a.txt" + MULTIPLE = 4 + BLOB_CHUNK = b"abcdefgh" + BLOB_CONTENTS = BLOB_CHUNK * MULTIPLE + blob_mock.size = len(BLOB_CONTENTS) + blob_mock.crc32c = "eOVVVw==" + + expected_download_kwargs = EXPECTED_DOWNLOAD_KWARGS.copy() + expected_download_kwargs["command"] = "tm.download_sharded" + + def write_to_file(f, *args, **kwargs): + f.write(BLOB_CHUNK) + + blob_mock._prep_and_do_download.side_effect = write_to_file + + with mock.patch("google.cloud.storage.transfer_manager.open", mock.mock_open()): + transfer_manager.download_chunks_concurrently( + blob_mock, + FILENAME, + chunk_size=CHUNK_SIZE, + download_kwargs=DOWNLOAD_KWARGS, + worker_type=transfer_manager.THREAD, + crc32c_checksum=True, + ) + + +def test_download_chunks_concurrently_with_crc32c_failure(): + blob_mock = mock.Mock(spec=Blob) + FILENAME = "file_a.txt" + MULTIPLE = 4 + BLOB_CHUNK = b"abcdefgh" + BLOB_CONTENTS = BLOB_CHUNK * MULTIPLE + blob_mock.size = len(BLOB_CONTENTS) + blob_mock.crc32c = "invalid" + + expected_download_kwargs = EXPECTED_DOWNLOAD_KWARGS.copy() + expected_download_kwargs["command"] = "tm.download_sharded" + + def write_to_file(f, *args, **kwargs): + f.write(BLOB_CHUNK) + + blob_mock._prep_and_do_download.side_effect = write_to_file + + with mock.patch("google.cloud.storage.transfer_manager.open", mock.mock_open()): + with pytest.raises(DataCorruption): + transfer_manager.download_chunks_concurrently( + blob_mock, + FILENAME, + chunk_size=CHUNK_SIZE, + download_kwargs=DOWNLOAD_KWARGS, + worker_type=transfer_manager.THREAD, + crc32c_checksum=True, + ) + + +def test_download_chunks_concurrently_raises_on_invalid_kwargs(): blob_mock = mock.Mock(spec=Blob) FILENAME = "file_a.txt" MULTIPLE = 4 @@ -594,6 +652,16 @@ def test_download_chunks_concurrently_raises_on_start_and_end(): "end": (CHUNK_SIZE * (MULTIPLE - 1)) - 1, }, ) + with pytest.raises(ValueError): + transfer_manager.download_chunks_concurrently( + blob_mock, + FILENAME, + chunk_size=CHUNK_SIZE, + worker_type=transfer_manager.THREAD, + download_kwargs={ + "checksum": "crc32c", + }, + ) def test_download_chunks_concurrently_passes_concurrency_options(): @@ -616,6 +684,7 @@ def test_download_chunks_concurrently_passes_concurrency_options(): deadline=DEADLINE, worker_type=transfer_manager.THREAD, max_workers=MAX_WORKERS, + crc32c_checksum=False, ) pool_patch.assert_called_with(max_workers=MAX_WORKERS) wait_patch.assert_called_with(mock.ANY, timeout=DEADLINE, return_when=mock.ANY) @@ -819,6 +888,7 @@ def __init__( self.generation = generation self._size_after_reload = size_after_reload self._generation_after_reload = generation_after_reload + self.client = _PickleableMockClient() def reload(self): self.size = self._size_after_reload @@ -876,6 +946,7 @@ def test_download_chunks_concurrently_with_processes(): chunk_size=CHUNK_SIZE, download_kwargs=DOWNLOAD_KWARGS, worker_type=transfer_manager.PROCESS, + crc32c_checksum=False, ) assert result is None @@ -907,9 +978,9 @@ def test__download_and_write_chunk_in_place(): FILENAME = "file_a.txt" with mock.patch("google.cloud.storage.transfer_manager.open", mock.mock_open()): result = transfer_manager._download_and_write_chunk_in_place( - pickled_mock, FILENAME, 0, 8, {} + pickled_mock, FILENAME, 0, 8, {}, False ) - assert result == "SUCCESS" + assert result is not None def test__upload_part(): @@ -973,3 +1044,31 @@ def test__call_method_on_maybe_pickled_blob(): pickled_blob, "_prep_and_do_download" ) assert result == "SUCCESS" + + +def test__ChecksummingSparseFileWrapper(): + FILENAME = "file_a.txt" + import google_crc32c + + with mock.patch( + "google.cloud.storage.transfer_manager.open", mock.mock_open() + ) as open_mock: + # test no checksumming + wrapper = transfer_manager._ChecksummingSparseFileWrapper(FILENAME, 0, False) + wrapper.write(b"abcdefgh") + handle = open_mock() + handle.write.assert_called_with(b"abcdefgh") + wrapper.write(b"ijklmnop") + assert wrapper.crc is None + handle.write.assert_called_with(b"ijklmnop") + + with mock.patch( + "google.cloud.storage.transfer_manager.open", mock.mock_open() + ) as open_mock: + wrapper = transfer_manager._ChecksummingSparseFileWrapper(FILENAME, 0, True) + wrapper.write(b"abcdefgh") + handle = open_mock() + handle.write.assert_called_with(b"abcdefgh") + wrapper.write(b"ijklmnop") + assert wrapper.crc == google_crc32c.value(b"abcdefghijklmnop") + handle.write.assert_called_with(b"ijklmnop") From c5a983d5a0b0632811af86fb64664b4382b05512 Mon Sep 17 00:00:00 2001 From: Andrew Gorcester Date: Wed, 11 Oct 2023 15:38:14 -0700 Subject: [PATCH 09/13] feat: add skip_if_exists to download_many (#1161) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * feat: add skip_if_exists to download_many * docstring * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md --------- Co-authored-by: Owl Bot --- google/cloud/storage/transfer_manager.py | 19 ++++++++++++++++ tests/unit/test_transfer_manager.py | 29 ++++++++++++++++++++++++ 2 files changed, 48 insertions(+) diff --git a/google/cloud/storage/transfer_manager.py b/google/cloud/storage/transfer_manager.py index fec5965cf..25abfacae 100644 --- a/google/cloud/storage/transfer_manager.py +++ b/google/cloud/storage/transfer_manager.py @@ -273,6 +273,8 @@ def download_many( raise_exception=False, worker_type=PROCESS, max_workers=DEFAULT_MAX_WORKERS, + *, + skip_if_exists=False, ): """Download many blobs concurrently via a worker pool. @@ -348,6 +350,11 @@ def download_many( and the default is a conservative number that should work okay in most cases without consuming excessive resources. + :type skip_if_exists: bool + :param skip_if_exists: + Before downloading each blob, check if the file for the filename exists; + if it does, skip that blob. + :raises: :exc:`concurrent.futures.TimeoutError` if deadline is exceeded. :rtype: list @@ -374,6 +381,10 @@ def download_many( "Passing in a file object is only supported by the THREAD worker type. Please either select THREAD workers, or pass in filenames only." ) + if skip_if_exists and isinstance(path_or_file, str): + if os.path.isfile(path_or_file): + continue + futures.append( executor.submit( _call_method_on_maybe_pickled_blob, @@ -589,6 +600,8 @@ def download_many_to_path( raise_exception=False, worker_type=PROCESS, max_workers=DEFAULT_MAX_WORKERS, + *, + skip_if_exists=False, ): """Download many files concurrently by their blob names. @@ -715,6 +728,11 @@ def download_many_to_path( and the default is a conservative number that should work okay in most cases without consuming excessive resources. + :type skip_if_exists: bool + :param skip_if_exists: + Before downloading each blob, check if the file for the filename exists; + if it does, skip that blob. This only works for filenames. + :raises: :exc:`concurrent.futures.TimeoutError` if deadline is exceeded. :rtype: list @@ -740,6 +758,7 @@ def download_many_to_path( raise_exception=raise_exception, worker_type=worker_type, max_workers=max_workers, + skip_if_exists=skip_if_exists, ) diff --git a/tests/unit/test_transfer_manager.py b/tests/unit/test_transfer_manager.py index 503b8fd2e..732f09a75 100644 --- a/tests/unit/test_transfer_manager.py +++ b/tests/unit/test_transfer_manager.py @@ -273,6 +273,32 @@ def test_download_many_with_filenames(): assert result == FAKE_RESULT +def test_download_many_with_skip_if_exists(): + with tempfile.NamedTemporaryFile() as tf: + BLOB_FILE_PAIRS = [ + (mock.Mock(spec=Blob), "file_a.txt"), + (mock.Mock(spec=Blob), tf.name), + ] + + for blob_mock, _ in BLOB_FILE_PAIRS: + blob_mock._handle_filename_and_download.return_value = FAKE_RESULT + + results = transfer_manager.download_many( + BLOB_FILE_PAIRS, + download_kwargs=DOWNLOAD_KWARGS, + worker_type=transfer_manager.THREAD, + skip_if_exists=True, + ) + mock_blob, file = BLOB_FILE_PAIRS[0] + mock_blob._handle_filename_and_download.assert_any_call( + file, **EXPECTED_DOWNLOAD_KWARGS + ) + mock_blob, _ = BLOB_FILE_PAIRS[1] + mock_blob._handle_filename_and_download.assert_not_called() + for result in results: + assert result == FAKE_RESULT + + def test_download_many_with_file_objs(): BLOB_FILE_PAIRS = [ (mock.Mock(spec=Blob), tempfile.TemporaryFile()), @@ -485,6 +511,7 @@ def test_download_many_to_path(): raise_exception=True, max_workers=MAX_WORKERS, worker_type=WORKER_TYPE, + skip_if_exists=True, ) mock_download_many.assert_called_once_with( @@ -494,6 +521,7 @@ def test_download_many_to_path(): raise_exception=True, max_workers=MAX_WORKERS, worker_type=WORKER_TYPE, + skip_if_exists=True, ) for blobname in BLOBNAMES: bucket.blob.assert_any_call(BLOB_NAME_PREFIX + blobname) @@ -532,6 +560,7 @@ def test_download_many_to_path_creates_directories(): raise_exception=True, worker_type=transfer_manager.PROCESS, max_workers=8, + skip_if_exists=False, ) for blobname in BLOBNAMES: bucket.blob.assert_any_call(blobname) From c7229f2e53151fc2f2eb1268afc67dad87ebbb0a Mon Sep 17 00:00:00 2001 From: Andrew Gorcester Date: Thu, 12 Oct 2023 11:46:16 -0700 Subject: [PATCH 10/13] feat: add additional_blob_attributes to upload_many_from_filenames (#1162) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes #996 🦕 --- google/cloud/storage/transfer_manager.py | 17 +++++++++++++ tests/system/test_transfer_manager.py | 19 ++++++++++++++ tests/unit/test_transfer_manager.py | 32 ++++++++++++++++++++++++ 3 files changed, 68 insertions(+) diff --git a/google/cloud/storage/transfer_manager.py b/google/cloud/storage/transfer_manager.py index 25abfacae..1a9497505 100644 --- a/google/cloud/storage/transfer_manager.py +++ b/google/cloud/storage/transfer_manager.py @@ -427,6 +427,8 @@ def upload_many_from_filenames( raise_exception=False, worker_type=PROCESS, max_workers=DEFAULT_MAX_WORKERS, + *, + additional_blob_attributes=None, ): """Upload many files concurrently by their filenames. @@ -557,6 +559,17 @@ def upload_many_from_filenames( and the default is a conservative number that should work okay in most cases without consuming excessive resources. + :type additional_blob_attributes: dict + :param additional_blob_attributes: + A dictionary of blob attribute names and values. This allows the + configuration of blobs beyond what is possible with + blob_constructor_kwargs. For instance, {"cache_control": "no-cache"} + would set the cache_control attribute of each blob to "no-cache". + + As with blob_constructor_kwargs, this affects the creation of every + blob identically. To fine-tune each blob individually, use `upload_many` + and create the blobs as desired before passing them in. + :raises: :exc:`concurrent.futures.TimeoutError` if deadline is exceeded. :rtype: list @@ -567,6 +580,8 @@ def upload_many_from_filenames( """ if blob_constructor_kwargs is None: blob_constructor_kwargs = {} + if additional_blob_attributes is None: + additional_blob_attributes = {} file_blob_pairs = [] @@ -574,6 +589,8 @@ def upload_many_from_filenames( path = os.path.join(source_directory, filename) blob_name = blob_name_prefix + filename blob = bucket.blob(blob_name, **blob_constructor_kwargs) + for prop, value in additional_blob_attributes.items(): + setattr(blob, prop, value) file_blob_pairs.append((path, blob)) return upload_many( diff --git a/tests/system/test_transfer_manager.py b/tests/system/test_transfer_manager.py index b8f209b63..c29bbe718 100644 --- a/tests/system/test_transfer_manager.py +++ b/tests/system/test_transfer_manager.py @@ -102,6 +102,25 @@ def test_upload_many_skip_if_exists( assert len(blobs_to_delete) == 1 +def test_upload_many_from_filenames_with_attributes( + listable_bucket, listable_filenames, file_data, blobs_to_delete +): + SOURCE_DIRECTORY, FILENAME = os.path.split(file_data["logo"]["path"]) + + transfer_manager.upload_many_from_filenames( + listable_bucket, + [FILENAME], + source_directory=SOURCE_DIRECTORY, + additional_blob_attributes={"cache_control": "no-cache"}, + raise_exception=True, + ) + + blob = listable_bucket.blob(FILENAME) + blob.reload() + blobs_to_delete.append(blob) + assert blob.cache_control == "no-cache" + + def test_download_many(listable_bucket): blobs = list(listable_bucket.list_blobs()) with tempfile.TemporaryDirectory() as tempdir: diff --git a/tests/unit/test_transfer_manager.py b/tests/unit/test_transfer_manager.py index 732f09a75..c8f6e560e 100644 --- a/tests/unit/test_transfer_manager.py +++ b/tests/unit/test_transfer_manager.py @@ -482,6 +482,38 @@ def test_upload_many_from_filenames_minimal_args(): bucket.blob.assert_any_call(FILENAMES[1]) +def test_upload_many_from_filenames_additional_properties(): + bucket = mock.Mock() + blob = mock.Mock() + bucket_blob = mock.Mock(return_value=blob) + blob.cache_control = None + bucket.blob = bucket_blob + + FILENAME = "file_a.txt" + ADDITIONAL_BLOB_ATTRIBUTES = {"cache_control": "no-cache"} + EXPECTED_FILE_BLOB_PAIRS = [(FILENAME, mock.ANY)] + + with mock.patch( + "google.cloud.storage.transfer_manager.upload_many" + ) as mock_upload_many: + transfer_manager.upload_many_from_filenames( + bucket, [FILENAME], additional_blob_attributes=ADDITIONAL_BLOB_ATTRIBUTES + ) + + mock_upload_many.assert_called_once_with( + EXPECTED_FILE_BLOB_PAIRS, + skip_if_exists=False, + upload_kwargs=None, + deadline=None, + raise_exception=False, + worker_type=transfer_manager.PROCESS, + max_workers=8, + ) + + for attrib, value in ADDITIONAL_BLOB_ATTRIBUTES.items(): + assert getattr(blob, attrib) == value + + def test_download_many_to_path(): bucket = mock.Mock() From 5c905637947c45e39ed8ee84911a12e254bde571 Mon Sep 17 00:00:00 2001 From: Andrew Gorcester Date: Thu, 12 Oct 2023 12:10:11 -0700 Subject: [PATCH 11/13] feat: launch transfer manager to GA (#1159) * feat: launch transfer manager to GA * re-add import... --- google/cloud/storage/transfer_manager.py | 18 +----------------- tests/unit/test_transfer_manager.py | 4 +--- 2 files changed, 2 insertions(+), 20 deletions(-) diff --git a/google/cloud/storage/transfer_manager.py b/google/cloud/storage/transfer_manager.py index 1a9497505..8001e40b0 100644 --- a/google/cloud/storage/transfer_manager.py +++ b/google/cloud/storage/transfer_manager.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -"""Concurrent media operations. This is a PREVIEW FEATURE: API may change.""" +"""Concurrent media operations.""" import concurrent.futures @@ -40,12 +40,6 @@ from google.resumable_media.requests.upload import XMLMPUPart from google.resumable_media.common import DataCorruption -warnings.warn( - "The module `transfer_manager` is a preview feature. Functionality and API " - "may change. This warning will be removed in a future release." -) - - TM_DEFAULT_CHUNK_SIZE = 32 * 1024 * 1024 DEFAULT_MAX_WORKERS = 8 MAX_CRC32C_ZERO_ARRAY_SIZE = 4 * 1024 * 1024 @@ -120,8 +114,6 @@ def upload_many( ): """Upload many files concurrently via a worker pool. - This function is a PREVIEW FEATURE: the API may change in a future version. - :type file_blob_pairs: List(Tuple(IOBase or str, 'google.cloud.storage.blob.Blob')) :param file_blob_pairs: A list of tuples of a file or filename and a blob. Each file will be @@ -278,8 +270,6 @@ def download_many( ): """Download many blobs concurrently via a worker pool. - This function is a PREVIEW FEATURE: the API may change in a future version. - :type blob_file_pairs: List(Tuple('google.cloud.storage.blob.Blob', IOBase or str)) :param blob_file_pairs: A list of tuples of blob and a file or filename. Each blob will be downloaded to the corresponding blob by using APIs identical to blob.download_to_file() or blob.download_to_filename() as appropriate. @@ -432,8 +422,6 @@ def upload_many_from_filenames( ): """Upload many files concurrently by their filenames. - This function is a PREVIEW FEATURE: the API may change in a future version. - The destination blobs are automatically created, with blob names based on the source filenames and the blob_name_prefix. @@ -622,8 +610,6 @@ def download_many_to_path( ): """Download many files concurrently by their blob names. - This function is a PREVIEW FEATURE: the API may change in a future version. - The destination files are automatically created, with paths based on the source blob_names and the destination_directory. @@ -792,8 +778,6 @@ def download_chunks_concurrently( ): """Download a single file in chunks, concurrently. - This function is a PREVIEW FEATURE: the API may change in a future version. - In some environments, using this feature with mutiple processes will result in faster downloads of large files. diff --git a/tests/unit/test_transfer_manager.py b/tests/unit/test_transfer_manager.py index c8f6e560e..54284becd 100644 --- a/tests/unit/test_transfer_manager.py +++ b/tests/unit/test_transfer_manager.py @@ -14,11 +14,9 @@ import pytest -with pytest.warns(UserWarning): - from google.cloud.storage import transfer_manager - from google.cloud.storage import Blob from google.cloud.storage import Client +from google.cloud.storage import transfer_manager from google.api_core import exceptions From 9e460d8106cbfb76caf35df4f6beed159fa2c22d Mon Sep 17 00:00:00 2001 From: Andrew Gorcester Date: Thu, 12 Oct 2023 12:46:36 -0700 Subject: [PATCH 12/13] docs: update formatting and wording in transfer_manager docstrings (#1163) * docs: update formatting and wording in transfer_manager docstrings * remove extraneous bucket param doc mention --- google/cloud/storage/transfer_manager.py | 122 ++++++++++++----------- 1 file changed, 63 insertions(+), 59 deletions(-) diff --git a/google/cloud/storage/transfer_manager.py b/google/cloud/storage/transfer_manager.py index 8001e40b0..41a67b5a4 100644 --- a/google/cloud/storage/transfer_manager.py +++ b/google/cloud/storage/transfer_manager.py @@ -117,7 +117,9 @@ def upload_many( :type file_blob_pairs: List(Tuple(IOBase or str, 'google.cloud.storage.blob.Blob')) :param file_blob_pairs: A list of tuples of a file or filename and a blob. Each file will be - uploaded to the corresponding blob by using APIs identical to blob.upload_from_file() or blob.upload_from_filename() as appropriate. + uploaded to the corresponding blob by using APIs identical to + `blob.upload_from_file()` or `blob.upload_from_filename()` as + appropriate. File handlers are only supported if worker_type is set to THREAD. If worker_type is set to PROCESS, please use filenames only. @@ -125,7 +127,7 @@ def upload_many( :type skip_if_exists: bool :param skip_if_exists: If True, blobs that already have a live version will not be overwritten. - This is accomplished by setting "if_generation_match = 0" on uploads. + This is accomplished by setting `if_generation_match = 0` on uploads. Uploads so skipped will result in a 412 Precondition Failed response code, which will be included in the return value but not raised as an exception regardless of the value of raise_exception. @@ -133,8 +135,9 @@ def upload_many( :type upload_kwargs: dict :param upload_kwargs: A dictionary of keyword arguments to pass to the upload method. Refer - to the documentation for blob.upload_from_file() or - blob.upload_from_filename() for more information. The dict is directly passed into the upload methods and is not validated by this function. + to the documentation for `blob.upload_from_file()` or + `blob.upload_from_filename()` for more information. The dict is directly + passed into the upload methods and is not validated by this function. :type threads: int :param threads: @@ -147,8 +150,8 @@ def upload_many( :param deadline: The number of seconds to wait for all threads to resolve. If the deadline is reached, all threads will be terminated regardless of their - progress and concurrent.futures.TimeoutError will be raised. This can be - left as the default of None (no deadline) for most use cases. + progress and `concurrent.futures.TimeoutError` will be raised. This can + be left as the default of `None` (no deadline) for most use cases. :type raise_exception: bool :param raise_exception: @@ -163,8 +166,8 @@ def upload_many( :type worker_type: str :param worker_type: - The worker type to use; one of google.cloud.storage.transfer_manager.PROCESS - or google.cloud.storage.transfer_manager.THREAD. + The worker type to use; one of `google.cloud.storage.transfer_manager.PROCESS` + or `google.cloud.storage.transfer_manager.THREAD`. Although the exact performance impact depends on the use case, in most situations the PROCESS worker type will use more system resources (both @@ -201,7 +204,7 @@ def upload_many( :returns: A list of results corresponding to, in order, each item in the input list. If an exception was received, it will be the result for that operation. Otherwise, the return value from the successful - upload method is used (typically, None). + upload method is used (which will be None). """ if upload_kwargs is None: upload_kwargs = {} @@ -282,7 +285,9 @@ def download_many( :type download_kwargs: dict :param download_kwargs: A dictionary of keyword arguments to pass to the download method. Refer - to the documentation for blob.download_to_file() or blob.download_to_filename() for more information. The dict is directly passed into the download methods and is not validated by this function. + to the documentation for `blob.download_to_file()` or + `blob.download_to_filename()` for more information. The dict is directly + passed into the download methods and is not validated by this function. :type threads: int :param threads: @@ -295,8 +300,8 @@ def download_many( :param deadline: The number of seconds to wait for all threads to resolve. If the deadline is reached, all threads will be terminated regardless of their - progress and concurrent.futures.TimeoutError will be raised. This can be - left as the default of None (no deadline) for most use cases. + progress and `concurrent.futures.TimeoutError` will be raised. This can + be left as the default of `None` (no deadline) for most use cases. :type raise_exception: bool :param raise_exception: @@ -308,8 +313,8 @@ def download_many( :type worker_type: str :param worker_type: - The worker type to use; one of google.cloud.storage.transfer_manager.PROCESS - or google.cloud.storage.transfer_manager.THREAD. + The worker type to use; one of `google.cloud.storage.transfer_manager.PROCESS` + or `google.cloud.storage.transfer_manager.THREAD`. Although the exact performance impact depends on the use case, in most situations the PROCESS worker type will use more system resources (both @@ -351,7 +356,7 @@ def download_many( :returns: A list of results corresponding to, in order, each item in the input list. If an exception was received, it will be the result for that operation. Otherwise, the return value from the successful - download method is used (typically, None). + download method is used (which will be None). """ if download_kwargs is None: @@ -430,18 +435,19 @@ def upload_many_from_filenames( then the file at "/home/myuser/images/icon.jpg" will be uploaded to a blob named "myfiles/images/icon.jpg". - :type bucket: 'google.cloud.storage.bucket.Bucket' + :type bucket: :class:`google.cloud.storage.bucket.Bucket` :param bucket: The bucket which will contain the uploaded blobs. :type filenames: list(str) :param filenames: A list of filenames to be uploaded. This may include part of the path. - The full path to the file must be source_directory + filename. + The file will be accessed at the full path of `source_directory` + + `filename`. :type source_directory: str :param source_directory: - A string that will be prepended (with os.path.join()) to each filename + A string that will be prepended (with `os.path.join()`) to each filename in the input list, in order to find the source file for each blob. Unlike the filename itself, the source_directory does not affect the name of the uploaded blob. @@ -472,7 +478,7 @@ def upload_many_from_filenames( :type skip_if_exists: bool :param skip_if_exists: If True, blobs that already have a live version will not be overwritten. - This is accomplished by setting "if_generation_match = 0" on uploads. + This is accomplished by setting `if_generation_match = 0` on uploads. Uploads so skipped will result in a 412 Precondition Failed response code, which will be included in the return value, but not raised as an exception regardless of the value of raise_exception. @@ -480,7 +486,7 @@ def upload_many_from_filenames( :type blob_constructor_kwargs: dict :param blob_constructor_kwargs: A dictionary of keyword arguments to pass to the blob constructor. Refer - to the documentation for blob.Blob() for more information. The dict is + to the documentation for `blob.Blob()` for more information. The dict is directly passed into the constructor and is not validated by this function. `name` and `bucket` keyword arguments are reserved by this function and will result in an error if passed in here. @@ -488,8 +494,9 @@ def upload_many_from_filenames( :type upload_kwargs: dict :param upload_kwargs: A dictionary of keyword arguments to pass to the upload method. Refer - to the documentation for blob.upload_from_file() or - blob.upload_from_filename() for more information. The dict is directly passed into the upload methods and is not validated by this function. + to the documentation for `blob.upload_from_file()` or + `blob.upload_from_filename()` for more information. The dict is directly + passed into the upload methods and is not validated by this function. :type threads: int :param threads: @@ -502,8 +509,8 @@ def upload_many_from_filenames( :param deadline: The number of seconds to wait for all threads to resolve. If the deadline is reached, all threads will be terminated regardless of their - progress and concurrent.futures.TimeoutError will be raised. This can be - left as the default of None (no deadline) for most use cases. + progress and `concurrent.futures.TimeoutError` will be raised. This can + be left as the default of `None` (no deadline) for most use cases. :type raise_exception: bool :param raise_exception: @@ -518,8 +525,8 @@ def upload_many_from_filenames( :type worker_type: str :param worker_type: - The worker type to use; one of google.cloud.storage.transfer_manager.PROCESS - or google.cloud.storage.transfer_manager.THREAD. + The worker type to use; one of `google.cloud.storage.transfer_manager.PROCESS` + or `google.cloud.storage.transfer_manager.THREAD`. Although the exact performance impact depends on the use case, in most situations the PROCESS worker type will use more system resources (both @@ -564,7 +571,7 @@ def upload_many_from_filenames( :returns: A list of results corresponding to, in order, each item in the input list. If an exception was received, it will be the result for that operation. Otherwise, the return value from the successful - upload method is used (typically, None). + upload method is used (which will be None). """ if blob_constructor_kwargs is None: blob_constructor_kwargs = {} @@ -622,7 +629,7 @@ def download_many_to_path( "images/icon.jpg" will be downloaded to a file named "/home/myuser/icon.jpg". - :type bucket: 'google.cloud.storage.bucket.Bucket' + :type bucket: :class:`google.cloud.storage.bucket.Bucket` :param bucket: The bucket which contains the blobs to be downloaded @@ -666,8 +673,8 @@ def download_many_to_path( :type download_kwargs: dict :param download_kwargs: A dictionary of keyword arguments to pass to the download method. Refer - to the documentation for blob.download_to_file() or - blob.download_to_filename() for more information. The dict is directly + to the documentation for `blob.download_to_file()` or + `blob.download_to_filename()` for more information. The dict is directly passed into the download methods and is not validated by this function. :type threads: int @@ -681,8 +688,8 @@ def download_many_to_path( :param deadline: The number of seconds to wait for all threads to resolve. If the deadline is reached, all threads will be terminated regardless of their - progress and concurrent.futures.TimeoutError will be raised. This can be - left as the default of None (no deadline) for most use cases. + progress and `concurrent.futures.TimeoutError` will be raised. This can + be left as the default of `None` (no deadline) for most use cases. :type create_directories: bool :param create_directories: @@ -702,8 +709,8 @@ def download_many_to_path( :type worker_type: str :param worker_type: - The worker type to use; one of google.cloud.storage.transfer_manager.PROCESS - or google.cloud.storage.transfer_manager.THREAD. + The worker type to use; one of `google.cloud.storage.transfer_manager.PROCESS` + or `google.cloud.storage.transfer_manager.THREAD`. Although the exact performance impact depends on the use case, in most situations the PROCESS worker type will use more system resources (both @@ -742,7 +749,7 @@ def download_many_to_path( :returns: A list of results corresponding to, in order, each item in the input list. If an exception was received, it will be the result for that operation. Otherwise, the return value from the successful - download method is used (typically, None). + download method is used (which will be None). """ blob_file_pairs = [] @@ -785,10 +792,7 @@ def download_chunks_concurrently( performance under normal circumstances due to Python interpreter threading behavior. The default is therefore to use processes instead of threads. - :param bucket: - The bucket which contains the blobs to be downloaded - - :type blob: `google.cloud.storage.Blob` + :type blob: :class:`google.cloud.storage.blob.Blob` :param blob: The blob to be downloaded. @@ -805,26 +809,26 @@ def download_chunks_concurrently( :type download_kwargs: dict :param download_kwargs: A dictionary of keyword arguments to pass to the download method. Refer - to the documentation for blob.download_to_file() or - blob.download_to_filename() for more information. The dict is directly + to the documentation for `blob.download_to_file()` or + `blob.download_to_filename()` for more information. The dict is directly passed into the download methods and is not validated by this function. Keyword arguments "start" and "end" which are not supported and will cause a ValueError if present. The key "checksum" is also not supported - in download_kwargs, but see the argument "crc32c_checksum" (which does - not go in download_kwargs) below. + in `download_kwargs`, but see the argument `crc32c_checksum` (which does + not go in `download_kwargs`) below. :type deadline: int :param deadline: The number of seconds to wait for all threads to resolve. If the deadline is reached, all threads will be terminated regardless of their - progress and concurrent.futures.TimeoutError will be raised. This can be - left as the default of None (no deadline) for most use cases. + progress and `concurrent.futures.TimeoutError` will be raised. This can + be left as the default of `None` (no deadline) for most use cases. :type worker_type: str :param worker_type: - The worker type to use; one of google.cloud.storage.transfer_manager.PROCESS - or google.cloud.storage.transfer_manager.THREAD. + The worker type to use; one of `google.cloud.storage.transfer_manager.PROCESS` + or `google.cloud.storage.transfer_manager.THREAD`. Although the exact performance impact depends on the use case, in most situations the PROCESS worker type will use more system resources (both @@ -968,7 +972,7 @@ def upload_chunks_concurrently( file in chunks, concurrently with a worker pool. The XML MPU API is significantly different from other uploads; please review - the documentation at https://cloud.google.com/storage/docs/multipart-uploads + the documentation at `https://cloud.google.com/storage/docs/multipart-uploads` before using this feature. The library will attempt to cancel uploads that fail due to an exception. @@ -990,7 +994,7 @@ def upload_chunks_concurrently( :param filename: The path to the file to upload. File-like objects are not supported. - :type blob: `google.cloud.storage.Blob` + :type blob: :class:`google.cloud.storage.blob.Blob` :param blob: The blob to which to upload. @@ -1002,19 +1006,19 @@ def upload_chunks_concurrently( The size in bytes of each chunk to send. The optimal chunk size for maximum throughput may vary depending on the exact network environment and size of the blob. The remote API has restrictions on the minimum - and maximum size allowable, see: https://cloud.google.com/storage/quotas#requests + and maximum size allowable, see: `https://cloud.google.com/storage/quotas#requests` :type deadline: int :param deadline: The number of seconds to wait for all threads to resolve. If the deadline is reached, all threads will be terminated regardless of their - progress and concurrent.futures.TimeoutError will be raised. This can be - left as the default of None (no deadline) for most use cases. + progress and `concurrent.futures.TimeoutError` will be raised. This can + be left as the default of `None` (no deadline) for most use cases. :type worker_type: str :param worker_type: - The worker type to use; one of google.cloud.storage.transfer_manager.PROCESS - or google.cloud.storage.transfer_manager.THREAD. + The worker type to use; one of `google.cloud.storage.transfer_manager.PROCESS` + or `google.cloud.storage.transfer_manager.THREAD`. Although the exact performance impact depends on the use case, in most situations the PROCESS worker type will use more system resources (both @@ -1044,12 +1048,12 @@ def upload_chunks_concurrently( :type checksum: str :param checksum: - (Optional) The checksum scheme to use: either 'md5', 'crc32c' or None. + (Optional) The checksum scheme to use: either "md5", "crc32c" or None. Each individual part is checksummed. At present, the selected checksum rule is only applied to parts and a separate checksum of the entire resulting blob is not computed. Please compute and compare the checksum of the file to the resulting blob separately if needed, using the - 'crc32c' algorithm as per the XML MPU documentation. + "crc32c" algorithm as per the XML MPU documentation. :type timeout: float or tuple :param timeout: @@ -1058,16 +1062,16 @@ def upload_chunks_concurrently( :type retry: google.api_core.retry.Retry :param retry: (Optional) How to retry the RPC. A None value will disable - retries. A google.api_core.retry.Retry value will enable retries, + retries. A `google.api_core.retry.Retry` value will enable retries, and the object will configure backoff and timeout options. Custom predicates (customizable error codes) are not supported for media operations such as this one. - This function does not accept ConditionalRetryPolicy values because + This function does not accept `ConditionalRetryPolicy` values because preconditions are not supported by the underlying API call. See the retry.py source code and docstrings in this package - (google.cloud.storage.retry) for information on retry types and how + (`google.cloud.storage.retry`) for information on retry types and how to configure them. :raises: :exc:`concurrent.futures.TimeoutError` if deadline is exceeded. From 1ef0e1a94976780f1e847ec662344fe261757aec Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Thu, 12 Oct 2023 13:04:28 -0700 Subject: [PATCH 13/13] chore(main): release 2.12.0 (#1132) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- CHANGELOG.md | 22 ++++++++++++++++++++++ google/cloud/storage/version.py | 2 +- 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 15c4c1f38..57d91347a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,28 @@ [1]: https://pypi.org/project/google-cloud-storage/#history +## [2.12.0](https://github.com/googleapis/python-storage/compare/v2.11.0...v2.12.0) (2023-10-12) + + +### Features + +* Add additional_blob_attributes to upload_many_from_filenames ([#1162](https://github.com/googleapis/python-storage/issues/1162)) ([c7229f2](https://github.com/googleapis/python-storage/commit/c7229f2e53151fc2f2eb1268afc67dad87ebbb0a)) +* Add crc32c_checksum argument to download_chunks_concurrently ([#1138](https://github.com/googleapis/python-storage/issues/1138)) ([fc92ad1](https://github.com/googleapis/python-storage/commit/fc92ad19ff0f9704456452e8c7c47a5f90c29eab)) +* Add skip_if_exists to download_many ([#1161](https://github.com/googleapis/python-storage/issues/1161)) ([c5a983d](https://github.com/googleapis/python-storage/commit/c5a983d5a0b0632811af86fb64664b4382b05512)) +* Launch transfer manager to GA ([#1159](https://github.com/googleapis/python-storage/issues/1159)) ([5c90563](https://github.com/googleapis/python-storage/commit/5c905637947c45e39ed8ee84911a12e254bde571)) + + +### Bug Fixes + +* Bump python-auth version to fix issue and remove workaround ([#1158](https://github.com/googleapis/python-storage/issues/1158)) ([28c02dd](https://github.com/googleapis/python-storage/commit/28c02dd41010e6d818a77f51c539457b2dbfa233)) +* Mark _deprecate_threads_param as a wrapper to unblock introspection and docs ([#1122](https://github.com/googleapis/python-storage/issues/1122)) ([69bd4a9](https://github.com/googleapis/python-storage/commit/69bd4a935a995f8f261a589ee2978f58b90224ab)) + + +### Documentation + +* Add snippets for upload_chunks_concurrently and add chunk_size ([#1135](https://github.com/googleapis/python-storage/issues/1135)) ([3a0f551](https://github.com/googleapis/python-storage/commit/3a0f551436b659afb2208fd558ddb846f4d62d98)) +* Update formatting and wording in transfer_manager docstrings ([#1163](https://github.com/googleapis/python-storage/issues/1163)) ([9e460d8](https://github.com/googleapis/python-storage/commit/9e460d8106cbfb76caf35df4f6beed159fa2c22d)) + ## [2.11.0](https://github.com/googleapis/python-storage/compare/v2.10.0...v2.11.0) (2023-09-19) diff --git a/google/cloud/storage/version.py b/google/cloud/storage/version.py index e6e357434..67e043bde 100644 --- a/google/cloud/storage/version.py +++ b/google/cloud/storage/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "2.11.0" +__version__ = "2.12.0"