Skip to content

Commit cbb1902

Browse files
author
Praful Makani
authored
docs(samples): load orc file into a table from gcs (#583)
1 parent 8d254d8 commit cbb1902

File tree

2 files changed

+156
-0
lines changed

2 files changed

+156
-0
lines changed
Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
/*
2+
* Copyright 2020 Google LLC
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package com.example.bigquery;
18+
19+
// [START bigquery_load_table_gcs_orc]
20+
import com.google.cloud.bigquery.BigQuery;
21+
import com.google.cloud.bigquery.BigQueryException;
22+
import com.google.cloud.bigquery.BigQueryOptions;
23+
import com.google.cloud.bigquery.Field;
24+
import com.google.cloud.bigquery.FormatOptions;
25+
import com.google.cloud.bigquery.Job;
26+
import com.google.cloud.bigquery.JobInfo;
27+
import com.google.cloud.bigquery.LoadJobConfiguration;
28+
import com.google.cloud.bigquery.Schema;
29+
import com.google.cloud.bigquery.StandardSQLTypeName;
30+
import com.google.cloud.bigquery.TableId;
31+
32+
// Sample to load ORC data from Cloud Storage into a new BigQuery table
33+
public class LoadOrcFromGCS {
34+
35+
public static void runLoadOrcFromGCS() {
36+
// TODO(developer): Replace these variables before running the sample.
37+
String datasetName = "MY_DATASET_NAME";
38+
String tableName = "MY_TABLE_NAME";
39+
String sourceUri = "gs://cloud-samples-data/bigquery/us-states/us-states.orc";
40+
Schema schema =
41+
Schema.of(
42+
Field.of("name", StandardSQLTypeName.STRING),
43+
Field.of("post_abbr", StandardSQLTypeName.STRING));
44+
loadOrcFromGCS(datasetName, tableName, sourceUri, schema);
45+
}
46+
47+
public static void loadOrcFromGCS(
48+
String datasetName, String tableName, String sourceUri, Schema schema) {
49+
try {
50+
// Initialize client that will be used to send requests. This client only needs to be created
51+
// once, and can be reused for multiple requests.
52+
BigQuery bigquery = BigQueryOptions.getDefaultInstance().getService();
53+
54+
TableId tableId = TableId.of(datasetName, tableName);
55+
LoadJobConfiguration loadConfig =
56+
LoadJobConfiguration.newBuilder(tableId, sourceUri, FormatOptions.orc())
57+
.setSchema(schema)
58+
.build();
59+
60+
// Load data from a GCS ORC file into the table
61+
Job job = bigquery.create(JobInfo.of(loadConfig));
62+
// Blocks until this load table job completes its execution, either failing or succeeding.
63+
job = job.waitFor();
64+
if (job.isDone() && job.getStatus().getError() == null) {
65+
System.out.println("ORC from GCS successfully added during load append job");
66+
} else {
67+
System.out.println(
68+
"BigQuery was unable to load into the table due to an error:"
69+
+ job.getStatus().getError());
70+
}
71+
} catch (BigQueryException | InterruptedException e) {
72+
System.out.println("Column not added during load append \n" + e.toString());
73+
}
74+
}
75+
}
76+
// [END bigquery_load_table_gcs_orc]
Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
/*
2+
* Copyright 2020 Google LLC
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package com.example.bigquery;
18+
19+
import static com.google.common.truth.Truth.assertThat;
20+
import static junit.framework.TestCase.assertNotNull;
21+
22+
import com.google.cloud.bigquery.Field;
23+
import com.google.cloud.bigquery.Schema;
24+
import com.google.cloud.bigquery.StandardSQLTypeName;
25+
import java.io.ByteArrayOutputStream;
26+
import java.io.PrintStream;
27+
import java.util.UUID;
28+
import org.junit.After;
29+
import org.junit.Before;
30+
import org.junit.BeforeClass;
31+
import org.junit.Test;
32+
33+
public class LoadOrcFromGCSIT {
34+
35+
private String tableName;
36+
private ByteArrayOutputStream bout;
37+
private PrintStream out;
38+
39+
private static final String BIGQUERY_DATASET_NAME = requireEnvVar("BIGQUERY_DATASET_NAME");
40+
41+
private static String requireEnvVar(String varName) {
42+
String value = System.getenv(varName);
43+
assertNotNull(
44+
"Environment variable " + varName + " is required to perform these tests.",
45+
System.getenv(varName));
46+
return value;
47+
}
48+
49+
@BeforeClass
50+
public static void checkRequirements() {
51+
requireEnvVar("BIGQUERY_DATASET_NAME");
52+
}
53+
54+
@Before
55+
public void setUp() {
56+
// Create a test table
57+
tableName = "LOAD_ORC_TABLE_FROM_GCS_TEST_" + UUID.randomUUID().toString().substring(0, 8);
58+
bout = new ByteArrayOutputStream();
59+
out = new PrintStream(bout);
60+
System.setOut(out);
61+
}
62+
63+
@After
64+
public void tearDown() {
65+
// Clean up
66+
DeleteTable.deleteTable(BIGQUERY_DATASET_NAME, tableName);
67+
System.setOut(null);
68+
}
69+
70+
@Test
71+
public void testLoadOrcFromGCS() {
72+
String sourceUri = "gs://cloud-samples-data/bigquery/us-states/us-states.orc";
73+
Schema schema =
74+
Schema.of(
75+
Field.of("name", StandardSQLTypeName.STRING),
76+
Field.of("post_abbr", StandardSQLTypeName.STRING));
77+
LoadOrcFromGCS.loadOrcFromGCS(BIGQUERY_DATASET_NAME, tableName, sourceUri, schema);
78+
assertThat(bout.toString()).contains("ORC from GCS successfully added during load append job");
79+
}
80+
}

0 commit comments

Comments
 (0)