Skip to content

Commit 219f7a9

Browse files
author
Praful Makani
authored
docs(samples): add load table from gcs using avro file (#564)
1 parent 0c092e0 commit 219f7a9

File tree

2 files changed

+130
-0
lines changed

2 files changed

+130
-0
lines changed
Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
/*
2+
* Copyright 2020 Google LLC
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package com.example.bigquery;
18+
19+
// [START bigquery_load_table_gcs_avro]
20+
import com.google.cloud.bigquery.BigQuery;
21+
import com.google.cloud.bigquery.BigQueryException;
22+
import com.google.cloud.bigquery.BigQueryOptions;
23+
import com.google.cloud.bigquery.FormatOptions;
24+
import com.google.cloud.bigquery.Job;
25+
import com.google.cloud.bigquery.JobInfo;
26+
import com.google.cloud.bigquery.LoadJobConfiguration;
27+
import com.google.cloud.bigquery.TableId;
28+
29+
// Sample to load Avro data from Cloud Storage into a new BigQuery table
30+
public class LoadAvroFromGCS {
31+
32+
public static void runLoadAvroFromGCS() {
33+
// TODO(developer): Replace these variables before running the sample.
34+
String datasetName = "MY_DATASET_NAME";
35+
String tableName = "MY_TABLE_NAME";
36+
String sourceUri = "gs://cloud-samples-data/bigquery/us-states/us-states.avro";
37+
loadAvroFromGCS(datasetName, tableName, sourceUri);
38+
}
39+
40+
public static void loadAvroFromGCS(String datasetName, String tableName, String sourceUri) {
41+
try {
42+
// Initialize client that will be used to send requests. This client only needs to be created
43+
// once, and can be reused for multiple requests.
44+
BigQuery bigquery = BigQueryOptions.getDefaultInstance().getService();
45+
46+
TableId tableId = TableId.of(datasetName, tableName);
47+
LoadJobConfiguration loadConfig =
48+
LoadJobConfiguration.of(tableId, sourceUri, FormatOptions.avro());
49+
50+
// Load data from a GCS Avro file into the table
51+
Job job = bigquery.create(JobInfo.of(loadConfig));
52+
// Blocks until this load table job completes its execution, either failing or succeeding.
53+
job = job.waitFor();
54+
if (job.isDone()) {
55+
System.out.println("Avro from GCS successfully loaded in a table");
56+
} else {
57+
System.out.println(
58+
"BigQuery was unable to load into the table due to an error:"
59+
+ job.getStatus().getError());
60+
}
61+
} catch (BigQueryException | InterruptedException e) {
62+
System.out.println("Column not added during load append \n" + e.toString());
63+
}
64+
}
65+
}
66+
// [END bigquery_load_table_gcs_avro]
Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
package com.example.bigquery;
2+
3+
import static com.google.common.truth.Truth.assertThat;
4+
import static junit.framework.TestCase.assertNotNull;
5+
6+
import com.google.cloud.bigquery.Schema;
7+
import java.io.ByteArrayOutputStream;
8+
import java.io.PrintStream;
9+
import java.util.UUID;
10+
import org.junit.After;
11+
import org.junit.Before;
12+
import org.junit.BeforeClass;
13+
import org.junit.Test;
14+
15+
public class LoadAvroFromGCSIT {
16+
17+
private String tableName;
18+
private ByteArrayOutputStream bout;
19+
private PrintStream out;
20+
21+
private static final String BIGQUERY_DATASET_NAME = requireEnvVar("BIGQUERY_DATASET_NAME");
22+
23+
private static String requireEnvVar(String varName) {
24+
String value = System.getenv(varName);
25+
assertNotNull(
26+
"Environment variable " + varName + " is required to perform these tests.",
27+
System.getenv(varName));
28+
return value;
29+
}
30+
31+
@BeforeClass
32+
public static void checkRequirements() {
33+
requireEnvVar("BIGQUERY_DATASET_NAME");
34+
}
35+
36+
@Before
37+
public void setUp() {
38+
bout = new ByteArrayOutputStream();
39+
out = new PrintStream(bout);
40+
System.setOut(out);
41+
42+
// Create a test table
43+
tableName = "MY_LOAD_AVRO_TABLE_FROM_GCS_TEST_" + UUID.randomUUID().toString().substring(0, 8);
44+
CreateTable.createTable(BIGQUERY_DATASET_NAME, tableName, Schema.of());
45+
46+
bout = new ByteArrayOutputStream();
47+
out = new PrintStream(bout);
48+
System.setOut(out);
49+
}
50+
51+
@After
52+
public void tearDown() {
53+
// Clean up
54+
DeleteTable.deleteTable(BIGQUERY_DATASET_NAME, tableName);
55+
System.setOut(null);
56+
}
57+
58+
@Test
59+
public void loadLoadAvroFromGCS() {
60+
String sourceUri = "gs://cloud-samples-data/bigquery/us-states/us-states.avro";
61+
LoadAvroFromGCS.loadAvroFromGCS(BIGQUERY_DATASET_NAME, tableName, sourceUri);
62+
assertThat(bout.toString()).contains("Avro from GCS successfully loaded in a table");
63+
}
64+
}

0 commit comments

Comments
 (0)