Skip to content

Commit 9f7bbeb

Browse files
authored
feat(bigquery): add dataset/table collation (#7235)
1 parent 1d165ff commit 9f7bbeb

File tree

6 files changed

+176
-0
lines changed

6 files changed

+176
-0
lines changed

bigquery/dataset.go

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,15 @@ type DatasetMetadata struct {
4949
// all newly created partitioned tables in the dataset.
5050
DefaultPartitionExpiration time.Duration
5151

52+
// Defines the default collation specification of future tables
53+
// created in the dataset. If a table is created in this dataset without
54+
// table-level default collation, then the table inherits the dataset default
55+
// collation, which is applied to the string fields that do not have explicit
56+
// collation specified. A change to this field affects only tables created
57+
// afterwards, and does not alter the existing tables.
58+
// More information: https://cloud.google.com/bigquery/docs/reference/standard-sql/collation-concepts
59+
DefaultCollation string
60+
5261
// These fields are read-only.
5362
CreationTime time.Time
5463
LastModifiedTime time.Time // When the dataset or any of its tables were modified.
@@ -104,6 +113,10 @@ type DatasetMetadataToUpdate struct {
104113
// in the dataset.
105114
DefaultEncryptionConfig *EncryptionConfig
106115

116+
// Defines the default collation specification of future tables
117+
// created in the dataset.
118+
DefaultCollation optional.String
119+
107120
// The entire access list. It is not possible to replace individual entries.
108121
Access []*AccessEntry
109122

@@ -174,6 +187,7 @@ func (dm *DatasetMetadata) toBQ() (*bq.Dataset, error) {
174187
ds.Location = dm.Location
175188
ds.DefaultTableExpirationMs = int64(dm.DefaultTableExpiration / time.Millisecond)
176189
ds.DefaultPartitionExpirationMs = int64(dm.DefaultPartitionExpiration / time.Millisecond)
190+
ds.DefaultCollation = string(dm.DefaultCollation)
177191
ds.Labels = dm.Labels
178192
var err error
179193
ds.Access, err = accessListToBQ(dm.Access)
@@ -259,6 +273,7 @@ func bqToDatasetMetadata(d *bq.Dataset, c *Client) (*DatasetMetadata, error) {
259273
LastModifiedTime: unixMillisToTime(d.LastModifiedTime),
260274
DefaultTableExpiration: time.Duration(d.DefaultTableExpirationMs) * time.Millisecond,
261275
DefaultPartitionExpiration: time.Duration(d.DefaultPartitionExpirationMs) * time.Millisecond,
276+
DefaultCollation: d.DefaultCollation,
262277
DefaultEncryptionConfig: bqToEncryptionConfig(d.DefaultEncryptionConfiguration),
263278
Description: d.Description,
264279
Name: d.FriendlyName,
@@ -344,6 +359,10 @@ func (dm *DatasetMetadataToUpdate) toBQ() (*bq.Dataset, error) {
344359
ds.DefaultPartitionExpirationMs = int64(dur / time.Millisecond)
345360
}
346361
}
362+
if dm.DefaultCollation != nil {
363+
ds.DefaultCollation = optional.ToString(dm.DefaultCollation)
364+
forceSend("DefaultCollation")
365+
}
347366
if dm.DefaultEncryptionConfig != nil {
348367
ds.DefaultEncryptionConfiguration = dm.DefaultEncryptionConfig.toBQ()
349368
ds.DefaultEncryptionConfiguration.ForceSendFields = []string{"KmsKeyName"}

bigquery/dataset_integration_test.go

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -229,6 +229,36 @@ func TestIntegration_DatasetUpdateDefaultPartitionExpiration(t *testing.T) {
229229
}
230230
}
231231

232+
func TestIntegration_DatasetUpdateDefaultCollation(t *testing.T) {
233+
if client == nil {
234+
t.Skip("Integration tests skipped")
235+
}
236+
ctx := context.Background()
237+
_, err := dataset.Metadata(ctx)
238+
if err != nil {
239+
t.Fatal(err)
240+
}
241+
caseInsensitiveCollation := "und:ci"
242+
// Set the default collation
243+
md, err := dataset.Update(ctx, DatasetMetadataToUpdate{
244+
DefaultCollation: caseInsensitiveCollation,
245+
}, "")
246+
if err != nil {
247+
t.Fatal(err)
248+
}
249+
if md.DefaultCollation != caseInsensitiveCollation {
250+
t.Fatalf("got %q, want und:ci", md.DefaultCollation)
251+
}
252+
// Omitting DefaultCollation doesn't change it.
253+
md, err = dataset.Update(ctx, DatasetMetadataToUpdate{Name: "xyz"}, "")
254+
if err != nil {
255+
t.Fatal(err)
256+
}
257+
if md.DefaultCollation != caseInsensitiveCollation {
258+
t.Fatalf("got %q, want und:ci", md.DefaultCollation)
259+
}
260+
}
261+
232262
func TestIntegration_DatasetUpdateAccess(t *testing.T) {
233263
if client == nil {
234264
t.Skip("Integration tests skipped")

bigquery/schema.go

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -141,6 +141,13 @@ type FieldSchema struct {
141141
// - Struct or array composed with the above allowed functions, for example:
142142
// [CURRENT_DATE(), DATE '2020-01-01']"
143143
DefaultValueExpression string
144+
145+
// Collation can be set only when the type of field is STRING.
146+
// The following values are supported:
147+
// - 'und:ci': undetermined locale, case insensitive.
148+
// - '': empty string. Default to case-sensitive behavior.
149+
// More information: https://cloud.google.com/bigquery/docs/reference/standard-sql/collation-concepts
150+
Collation string
144151
}
145152

146153
func (fs *FieldSchema) toBQ() *bq.TableFieldSchema {
@@ -153,6 +160,7 @@ func (fs *FieldSchema) toBQ() *bq.TableFieldSchema {
153160
Precision: fs.Precision,
154161
Scale: fs.Scale,
155162
DefaultValueExpression: fs.DefaultValueExpression,
163+
Collation: string(fs.Collation),
156164
}
157165

158166
if fs.Repeated {
@@ -212,6 +220,7 @@ func bqToFieldSchema(tfs *bq.TableFieldSchema) *FieldSchema {
212220
Precision: tfs.Precision,
213221
Scale: tfs.Scale,
214222
DefaultValueExpression: tfs.DefaultValueExpression,
223+
Collation: tfs.Collation,
215224
}
216225

217226
for _, f := range tfs.Fields {

bigquery/schema_test.go

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -348,6 +348,34 @@ func TestSchemaConversion(t *testing.T) {
348348
},
349349
},
350350
},
351+
{
352+
// collation values
353+
bqSchema: &bq.TableSchema{
354+
Fields: []*bq.TableFieldSchema{
355+
{
356+
Name: "name",
357+
Type: "STRING",
358+
Collation: "und:ci",
359+
},
360+
{
361+
Name: "another_name",
362+
Type: "STRING",
363+
Collation: "",
364+
},
365+
}},
366+
schema: Schema{
367+
{
368+
Name: "name",
369+
Type: StringFieldType,
370+
Collation: "und:ci",
371+
},
372+
{
373+
Name: "another_name",
374+
Type: StringFieldType,
375+
Collation: "",
376+
},
377+
},
378+
},
351379
{
352380
// policy tags
353381
bqSchema: &bq.TableSchema{

bigquery/table.go

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -136,6 +136,17 @@ type TableMetadata struct {
136136
// ETag is the ETag obtained when reading metadata. Pass it to Table.Update to
137137
// ensure that the metadata hasn't changed since it was read.
138138
ETag string
139+
140+
// Defines the default collation specification of new STRING fields
141+
// in the table. During table creation or update, if a STRING field is added
142+
// to this table without explicit collation specified, then the table inherits
143+
// the table default collation. A change to this field affects only fields
144+
// added afterwards, and does not alter the existing fields.
145+
// The following values are supported:
146+
// - 'und:ci': undetermined locale, case insensitive.
147+
// - '': empty string. Default to case-sensitive behavior.
148+
// More information: https://cloud.google.com/bigquery/docs/reference/standard-sql/collation-concepts
149+
DefaultCollation string
139150
}
140151

141152
// TableCreateDisposition specifies the circumstances under which destination table will be created.
@@ -663,6 +674,7 @@ func (tm *TableMetadata) toBQ() (*bq.Table, error) {
663674
if tm.ETag != "" {
664675
return nil, errors.New("cannot set ETag on create")
665676
}
677+
t.DefaultCollation = string(tm.DefaultCollation)
666678
return t, nil
667679
}
668680

@@ -743,6 +755,7 @@ func bqToTableMetadata(t *bq.Table, c *Client) (*TableMetadata, error) {
743755
CreationTime: unixMillisToTime(t.CreationTime),
744756
LastModifiedTime: unixMillisToTime(int64(t.LastModifiedTime)),
745757
ETag: t.Etag,
758+
DefaultCollation: t.DefaultCollation,
746759
EncryptionConfig: bqToEncryptionConfig(t.EncryptionConfiguration),
747760
RequirePartitionFilter: t.RequirePartitionFilter,
748761
SnapshotDefinition: bqToSnapshotDefinition(t.SnapshotDefinition, c),
@@ -924,6 +937,10 @@ func (tm *TableMetadataToUpdate) toBQ() (*bq.Table, error) {
924937
t.View.UseLegacySql = optional.ToBool(tm.UseLegacySQL)
925938
t.View.ForceSendFields = append(t.View.ForceSendFields, "UseLegacySql")
926939
}
940+
if tm.DefaultCollation != nil {
941+
t.DefaultCollation = optional.ToString(tm.DefaultCollation)
942+
forceSend("DefaultCollation")
943+
}
927944
labels, forces, nulls := tm.update()
928945
t.Labels = labels
929946
t.ForceSendFields = append(t.ForceSendFields, forces...)
@@ -997,6 +1014,10 @@ type TableMetadataToUpdate struct {
9971014
// elimination when referenced in a query.
9981015
RequirePartitionFilter optional.Bool
9991016

1017+
// Defines the default collation specification of new STRING fields
1018+
// in the table.
1019+
DefaultCollation optional.String
1020+
10001021
labelUpdater
10011022
}
10021023

bigquery/table_integration_test.go

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -592,3 +592,72 @@ func TestIntegration_TableUseLegacySQL(t *testing.T) {
592592
_ = view.Delete(ctx)
593593
}
594594
}
595+
596+
func TestIntegration_TableDefaultCollation(t *testing.T) {
597+
// Test DefaultCollation for Table.Create and Table.Update
598+
if client == nil {
599+
t.Skip("Integration tests skipped")
600+
}
601+
ctx := context.Background()
602+
table := dataset.Table(tableIDs.New())
603+
caseInsensitiveCollation := "und:ci"
604+
caseSensitiveCollation := ""
605+
err := table.Create(context.Background(), &TableMetadata{
606+
Schema: schema,
607+
DefaultCollation: caseInsensitiveCollation,
608+
ExpirationTime: testTableExpiration,
609+
})
610+
if err != nil {
611+
t.Fatal(err)
612+
}
613+
defer table.Delete(ctx)
614+
md, err := table.Metadata(ctx)
615+
if err != nil {
616+
t.Fatal(err)
617+
}
618+
if md.DefaultCollation != caseInsensitiveCollation {
619+
t.Fatalf("expected default collation to be %q, but found %q", caseInsensitiveCollation, md.DefaultCollation)
620+
}
621+
for _, field := range md.Schema {
622+
if field.Type == StringFieldType {
623+
if field.Collation != caseInsensitiveCollation {
624+
t.Fatalf("expected all columns to have collation %q, but found %q on field :%v", caseInsensitiveCollation, field.Collation, field.Name)
625+
}
626+
}
627+
}
628+
629+
// Update table DefaultCollation to case-sensitive
630+
md, err = table.Update(ctx, TableMetadataToUpdate{
631+
DefaultCollation: caseSensitiveCollation,
632+
}, "")
633+
if err != nil {
634+
t.Fatal(err)
635+
}
636+
if md.DefaultCollation != caseSensitiveCollation {
637+
t.Fatalf("expected default collation to be %q, but found %q", caseSensitiveCollation, md.DefaultCollation)
638+
}
639+
640+
// Add a field with different case-insensitive collation
641+
updatedSchema := md.Schema
642+
updatedSchema = append(updatedSchema, &FieldSchema{
643+
Name: "another_name",
644+
Type: StringFieldType,
645+
Collation: caseInsensitiveCollation,
646+
})
647+
md, err = table.Update(ctx, TableMetadataToUpdate{
648+
Schema: updatedSchema,
649+
}, "")
650+
if err != nil {
651+
t.Fatal(err)
652+
}
653+
if md.DefaultCollation != caseSensitiveCollation {
654+
t.Fatalf("expected default collation to be %q, but found %q", caseSensitiveCollation, md.DefaultCollation)
655+
}
656+
for _, field := range md.Schema {
657+
if field.Type == StringFieldType {
658+
if field.Collation != caseInsensitiveCollation {
659+
t.Fatalf("expected all columns to have collation %q, but found %q on field :%v", caseInsensitiveCollation, field.Collation, field.Name)
660+
}
661+
}
662+
}
663+
}

0 commit comments

Comments
 (0)