Skip to content

Commit c24c14f

Browse files
feat: add cache for location in stream writer, and trigger that when location is not presented (#1804)
* feat: Split writer into connection worker and wrapper, this is a prerequisite for multiplexing client * feat: add connection worker pool skeleton, used for multiplexing client * feat: add Load api for connection worker for multiplexing client * feat: add multiplexing support to connection worker. We will treat every new stream name as a switch of destinationt * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * feat: port the multiplexing client core algorithm and basic tests also fixed a tiny bug inside fake bigquery write impl for getting thre response from offset * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * feat: wire multiplexing connection pool to stream writer * feat: some fixes for multiplexing client * feat: fix some todos, and reject the mixed behavior of passed in client or not Co-authored-by: Owl Bot <gcf-owl-bot[bot]@users.noreply.github.com>
1 parent 4bf67bc commit c24c14f

File tree

3 files changed

+108
-32
lines changed

3 files changed

+108
-32
lines changed

google-cloud-bigquerystorage/src/main/java/com/google/cloud/bigquery/storage/v1/StreamWriter.java

Lines changed: 72 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,10 @@
3333
import java.util.Objects;
3434
import java.util.UUID;
3535
import java.util.concurrent.ConcurrentHashMap;
36+
import java.util.concurrent.TimeUnit;
3637
import java.util.logging.Logger;
38+
import java.util.regex.Matcher;
39+
import java.util.regex.Pattern;
3740

3841
/**
3942
* A BigQuery Stream Writer that can be used to write data into BigQuery Table.
@@ -43,6 +46,12 @@
4346
public class StreamWriter implements AutoCloseable {
4447
private static final Logger log = Logger.getLogger(StreamWriter.class.getName());
4548

49+
private static String datasetsMatching = "projects/[^/]+/datasets/[^/]+/";
50+
private static Pattern streamPattern = Pattern.compile(datasetsMatching);
51+
52+
// Cache of location info for a given dataset.
53+
private static Map<String, String> projectAndDatasetToLocation = new ConcurrentHashMap<>();
54+
4655
/*
4756
* The identifier of stream to write to.
4857
*/
@@ -167,12 +176,11 @@ public static SingleConnectionOrConnectionPool ofConnectionPool(
167176
}
168177

169178
private StreamWriter(Builder builder) throws IOException {
170-
BigQueryWriteClient client;
171179
this.streamName = builder.streamName;
172180
this.writerSchema = builder.writerSchema;
173-
this.location = builder.location;
174181
boolean ownsBigQueryWriteClient = builder.client == null;
175182
if (!builder.enableConnectionPool) {
183+
this.location = builder.location;
176184
this.singleConnectionOrConnectionPool =
177185
SingleConnectionOrConnectionPool.ofSingleConnection(
178186
new ConnectionWorker(
@@ -185,31 +193,79 @@ private StreamWriter(Builder builder) throws IOException {
185193
getBigQueryWriteClient(builder),
186194
ownsBigQueryWriteClient));
187195
} else {
188-
if (builder.location == null || builder.location.isEmpty()) {
189-
throw new IllegalArgumentException("Location must be specified for multiplexing client!");
196+
BigQueryWriteClient client = getBigQueryWriteClient(builder);
197+
String location = builder.location;
198+
if (location == null || location.isEmpty()) {
199+
// Location is not passed in, try to fetch from RPC
200+
String datasetAndProjectName = extractDatasetAndProjectName(builder.streamName);
201+
location =
202+
projectAndDatasetToLocation.computeIfAbsent(
203+
datasetAndProjectName,
204+
(key) -> {
205+
GetWriteStreamRequest writeStreamRequest =
206+
GetWriteStreamRequest.newBuilder()
207+
.setName(this.getStreamName())
208+
.setView(WriteStreamView.BASIC)
209+
.build();
210+
211+
WriteStream writeStream = client.getWriteStream(writeStreamRequest);
212+
TableSchema writeStreamTableSchema = writeStream.getTableSchema();
213+
String fetchedLocation = writeStream.getLocation();
214+
log.info(
215+
String.format(
216+
"Fethed location %s for stream name %s", fetchedLocation, streamName));
217+
return fetchedLocation;
218+
});
219+
if (location.isEmpty()) {
220+
throw new IllegalStateException(
221+
String.format(
222+
"The location is empty for both user passed in value and looked up value for "
223+
+ "stream: %s",
224+
streamName));
225+
}
190226
}
227+
this.location = location;
191228
// Assume the connection in the same pool share the same client and trace id.
192229
// The first StreamWriter for a new stub will create the pool for the other
193230
// streams in the same region, meaning the per StreamWriter settings are no
194231
// longer working unless all streams share the same set of settings
195232
this.singleConnectionOrConnectionPool =
196233
SingleConnectionOrConnectionPool.ofConnectionPool(
197234
connectionPoolMap.computeIfAbsent(
198-
ConnectionPoolKey.create(builder.location),
235+
ConnectionPoolKey.create(location),
199236
(key) -> {
200-
try {
201-
return new ConnectionWorkerPool(
202-
builder.maxInflightRequest,
203-
builder.maxInflightBytes,
204-
builder.limitExceededBehavior,
205-
builder.traceId,
206-
getBigQueryWriteClient(builder),
207-
ownsBigQueryWriteClient);
208-
} catch (IOException e) {
209-
throw new RuntimeException(e);
210-
}
237+
return new ConnectionWorkerPool(
238+
builder.maxInflightRequest,
239+
builder.maxInflightBytes,
240+
builder.limitExceededBehavior,
241+
builder.traceId,
242+
client,
243+
ownsBigQueryWriteClient);
211244
}));
212245
validateFetchedConnectonPool(builder);
246+
// Shut down the passed in client. Internally we will create another client inside connection
247+
// pool for every new connection worker.
248+
if (client != singleConnectionOrConnectionPool.connectionWorkerPool().bigQueryWriteClient()
249+
&& ownsBigQueryWriteClient) {
250+
client.shutdown();
251+
try {
252+
client.awaitTermination(150, TimeUnit.SECONDS);
253+
} catch (InterruptedException unused) {
254+
// Ignore interruption as this client is not used.
255+
}
256+
client.close();
257+
}
258+
}
259+
}
260+
261+
@VisibleForTesting
262+
static String extractDatasetAndProjectName(String streamName) {
263+
Matcher streamMatcher = streamPattern.matcher(streamName);
264+
if (streamMatcher.find()) {
265+
return streamMatcher.group();
266+
} else {
267+
throw new IllegalStateException(
268+
String.format("The passed in stream name does not match standard format %s", streamName));
213269
}
214270
}
215271

google-cloud-bigquerystorage/src/test/java/com/google/cloud/bigquery/storage/v1/JsonStreamWriterTest.java

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -391,7 +391,7 @@ public void testAppendOutOfRangeException() throws Exception {
391391
}
392392

393393
@Test
394-
public void testCreateDefaultStream() throws Exception {
394+
public void testCreateDefaultStream_withNoSchemaPassedIn() throws Exception {
395395
TableSchema tableSchema =
396396
TableSchema.newBuilder().addFields(0, TEST_INT).addFields(1, TEST_STRING).build();
397397
testBigQueryWrite.addResponse(
@@ -411,6 +411,28 @@ public void testCreateDefaultStream() throws Exception {
411411
}
412412
}
413413

414+
@Test
415+
public void testCreateDefaultStream_withNoClientPassedIn() throws Exception {
416+
TableSchema tableSchema =
417+
TableSchema.newBuilder().addFields(0, TEST_INT).addFields(1, TEST_STRING).build();
418+
testBigQueryWrite.addResponse(
419+
WriteStream.newBuilder()
420+
.setName(TEST_STREAM)
421+
.setLocation("aa")
422+
.setTableSchema(tableSchema)
423+
.build());
424+
try (JsonStreamWriter writer =
425+
JsonStreamWriter.newBuilder(TEST_TABLE, tableSchema)
426+
.setChannelProvider(channelProvider)
427+
.setCredentialsProvider(NoCredentialsProvider.create())
428+
.setExecutorProvider(InstantiatingExecutorProvider.newBuilder().build())
429+
.setEnableConnectionPool(true)
430+
.build()) {
431+
assertEquals("projects/p/datasets/d/tables/t/_default", writer.getStreamName());
432+
assertEquals("aa", writer.getLocation());
433+
}
434+
}
435+
414436
@Test
415437
public void testCreateDefaultStreamWrongLocation() throws Exception {
416438
TableSchema tableSchema =

google-cloud-bigquerystorage/src/test/java/com/google/cloud/bigquery/storage/v1/StreamWriterTest.java

Lines changed: 13 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -725,22 +725,20 @@ public void testInitialization_operationKind() throws Exception {
725725
}
726726

727727
@Test
728-
public void createStreamWithDifferentWhetherOwnsClient() throws Exception {
729-
StreamWriter streamWriter1 = getMultiplexingTestStreamWriter();
728+
public void testExtractDatasetName() throws Exception {
729+
Assert.assertEquals(
730+
StreamWriter.extractDatasetAndProjectName(
731+
"projects/project1/datasets/dataset2/tables/something"),
732+
"projects/project1/datasets/dataset2/");
730733

731-
assertThrows(
732-
IllegalArgumentException.class,
733-
new ThrowingRunnable() {
734-
@Override
735-
public void run() throws Throwable {
736-
StreamWriter.newBuilder(TEST_STREAM)
737-
.setWriterSchema(createProtoSchema())
738-
.setTraceId(TEST_TRACE_ID)
739-
.setLocation("US")
740-
.setEnableConnectionPool(true)
741-
.build();
742-
}
743-
});
734+
IllegalStateException ex =
735+
assertThrows(
736+
IllegalStateException.class,
737+
() -> {
738+
StreamWriter.extractDatasetAndProjectName(
739+
"wrong/projects/project1/wrong/datasets/dataset2/tables/something");
740+
});
741+
Assert.assertTrue(ex.getMessage().contains("The passed in stream name does not match"));
744742
}
745743

746744
// Timeout to ensure close() doesn't wait for done callback timeout.

0 commit comments

Comments
 (0)