Skip to content

Commit 307874f

Browse files
authored
fix: fix CloudBigtableIO scan to take version and filters (#3901)
* WIP: TODO: fix row adaptor and test * WIP: todo: continue fixing read test, refactor CloudBigtableScanConfiguration * fixed tests * clean up debugging logs * update * fix tests and update * fix class name * remove debug logging` * add some doc, reformat * clean up code * refactor * add some docs * fix test update * refactor * refactor 2 * fix some typos * remove some classes * remove helper methods
1 parent f4c7833 commit 307874f

File tree

14 files changed

+506
-221
lines changed

14 files changed

+506
-221
lines changed
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
/*
2+
* Copyright 2022 Google LLC
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
package com.google.cloud.bigtable.hbase;
17+
18+
import com.google.api.core.InternalApi;
19+
import com.google.bigtable.v2.ReadRowsRequest;
20+
import org.apache.hadoop.hbase.client.Scan;
21+
22+
/** A wrapper class that wraps a Bigtable {@link ReadRowsRequest}. */
23+
@InternalApi
24+
public class BigtableFixedProtoScan extends Scan {
25+
26+
private ReadRowsRequest request;
27+
28+
public BigtableFixedProtoScan(ReadRowsRequest request) {
29+
this.request = request;
30+
}
31+
32+
public ReadRowsRequest getRequest() {
33+
return request;
34+
}
35+
}

bigtable-client-core-parent/bigtable-hbase/src/main/java/com/google/cloud/bigtable/hbase/adapters/HBaseRequestAdapter.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -152,7 +152,7 @@ public RowMutationEntry adaptEntry(Delete delete) {
152152
public Query adapt(Get get) {
153153
ReadHooks readHooks = new DefaultReadHooks();
154154
Query query = Query.create(getTableId());
155-
Adapters.GET_ADAPTER.adapt(get, readHooks, query);
155+
query = Adapters.GET_ADAPTER.adapt(get, readHooks, query);
156156
readHooks.applyPreSendHook(query);
157157
return query;
158158
}
@@ -166,7 +166,7 @@ public Query adapt(Get get) {
166166
public Query adapt(Scan scan) {
167167
ReadHooks readHooks = new DefaultReadHooks();
168168
Query query = Query.create(getTableId());
169-
Adapters.SCAN_ADAPTER.adapt(scan, readHooks, query);
169+
query = Adapters.SCAN_ADAPTER.adapt(scan, readHooks, query);
170170
readHooks.applyPreSendHook(query);
171171
return query;
172172
}

bigtable-client-core-parent/bigtable-hbase/src/main/java/com/google/cloud/bigtable/hbase/adapters/read/GetAdapter.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -55,11 +55,11 @@ public GetAdapter(ScanAdapter scanAdapter) {
5555

5656
/** {@inheritDoc} */
5757
@Override
58-
public void adapt(Get operation, ReadHooks readHooks, Query query) {
58+
public Query adapt(Get operation, ReadHooks readHooks, Query query) {
5959
Scan operationAsScan = new Scan(addKeyOnlyFilter(operation));
6060
scanAdapter.throwIfUnsupportedScan(operationAsScan);
6161

62-
query
62+
return query
6363
.filter(scanAdapter.buildFilter(operationAsScan, readHooks))
6464
.rowKey(ByteString.copyFrom(operation.getRow()));
6565
}

bigtable-client-core-parent/bigtable-hbase/src/main/java/com/google/cloud/bigtable/hbase/adapters/read/ReadOperationAdapter.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,5 +33,5 @@ public interface ReadOperationAdapter<T extends Operation> {
3333
* @param readHooks a {@link ReadHooks} object.
3434
* @param query a {@link Query} object.
3535
*/
36-
void adapt(T request, ReadHooks readHooks, Query query);
36+
Query adapt(T request, ReadHooks readHooks, Query query);
3737
}

bigtable-client-core-parent/bigtable-hbase/src/main/java/com/google/cloud/bigtable/hbase/adapters/read/ScanAdapter.java

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
import com.google.cloud.bigtable.data.v2.models.Filters.TimestampRangeFilter;
2626
import com.google.cloud.bigtable.data.v2.models.Query;
2727
import com.google.cloud.bigtable.hbase.BigtableExtendedScan;
28+
import com.google.cloud.bigtable.hbase.BigtableFixedProtoScan;
2829
import com.google.cloud.bigtable.hbase.adapters.filters.FilterAdapter;
2930
import com.google.cloud.bigtable.hbase.adapters.filters.FilterAdapterContext;
3031
import com.google.cloud.bigtable.hbase.util.RowKeyWrapper;
@@ -51,7 +52,6 @@
5152
*/
5253
@InternalApi("For internal usage only")
5354
public class ScanAdapter implements ReadOperationAdapter<Scan> {
54-
5555
private static final int UNSET_MAX_RESULTS_PER_COLUMN_FAMILY = -1;
5656
private static final boolean OPEN_CLOSED_AVAILABLE = isOpenClosedAvailable();
5757
private static final boolean LIMIT_AVAILABLE = isLimitAvailable();
@@ -156,14 +156,18 @@ private List<Filters.Filter> buildStartFilter(Scan scan) {
156156

157157
/** {@inheritDoc} */
158158
@Override
159-
public void adapt(Scan scan, ReadHooks readHooks, Query query) {
160-
throwIfUnsupportedScan(scan);
161-
162-
toByteStringRange(scan, query);
163-
query.filter(buildFilter(scan, readHooks));
159+
public Query adapt(Scan scan, ReadHooks readHooks, Query query) {
160+
if (scan instanceof BigtableFixedProtoScan) {
161+
return Query.fromProto(((BigtableFixedProtoScan) scan).getRequest());
162+
} else {
163+
throwIfUnsupportedScan(scan);
164+
toByteStringRange(scan, query);
165+
query.filter(buildFilter(scan, readHooks));
164166

165-
if (LIMIT_AVAILABLE && scan.getLimit() > 0) {
166-
query.limit(scan.getLimit());
167+
if (LIMIT_AVAILABLE && scan.getLimit() > 0) {
168+
query.limit(scan.getLimit());
169+
}
170+
return query;
167171
}
168172
}
169173

bigtable-client-core-parent/bigtable-hbase/src/test/java/com/google/cloud/bigtable/hbase/adapters/read/TestScanAdapter.java

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
import com.google.cloud.bigtable.data.v2.models.Filters;
2626
import com.google.cloud.bigtable.data.v2.models.Query;
2727
import com.google.cloud.bigtable.hbase.BigtableExtendedScan;
28+
import com.google.cloud.bigtable.hbase.BigtableFixedProtoScan;
2829
import com.google.cloud.bigtable.hbase.adapters.filters.FilterAdapter;
2930
import com.google.cloud.bigtable.hbase.adapters.filters.FilterAdapterContext;
3031
import com.google.cloud.bigtable.hbase.util.ByteStringer;
@@ -448,4 +449,17 @@ public void testMaxVersionsWithTimeRanges() throws IOException {
448449
.filter(FILTERS.key().regex("blah\\C*"));
449450
Assert.assertEquals(expected.toProto(), query.toProto(requestContext).getFilter());
450451
}
452+
453+
@Test
454+
public void testFixedRequest() {
455+
BigtableFixedProtoScan fixedProto =
456+
new BigtableFixedProtoScan(query.limit(10).toProto(requestContext));
457+
458+
Query placeholder = Query.create("PLACEHOLDER");
459+
Query newQuery = scanAdapter.adapt(fixedProto, throwingReadHooks, placeholder);
460+
461+
Query expected = Query.create("tableId").limit(10);
462+
463+
Assert.assertEquals(expected.toProto(requestContext), newQuery.toProto(requestContext));
464+
}
451465
}

bigtable-dataflow-parent/bigtable-beam-import/src/main/java/com/google/cloud/bigtable/beam/TemplateUtils.java

Lines changed: 6 additions & 85 deletions
Original file line numberDiff line numberDiff line change
@@ -15,27 +15,12 @@
1515
*/
1616
package com.google.cloud.bigtable.beam;
1717

18-
import static com.google.cloud.bigtable.beam.CloudBigtableScanConfiguration.PLACEHOLDER_APP_PROFILE_ID;
19-
import static com.google.cloud.bigtable.beam.CloudBigtableScanConfiguration.PLACEHOLDER_INSTANCE_ID;
20-
import static com.google.cloud.bigtable.beam.CloudBigtableScanConfiguration.PLACEHOLDER_PROJECT_ID;
21-
import static com.google.cloud.bigtable.beam.CloudBigtableScanConfiguration.PLACEHOLDER_TABLE_ID;
22-
2318
import com.google.bigtable.repackaged.com.google.api.core.InternalApi;
24-
import com.google.bigtable.repackaged.com.google.bigtable.v2.ReadRowsRequest;
25-
import com.google.bigtable.repackaged.com.google.cloud.bigtable.data.v2.internal.RequestContext;
26-
import com.google.bigtable.repackaged.com.google.cloud.bigtable.data.v2.models.Query;
2719
import com.google.cloud.bigtable.beam.sequencefiles.ExportJob.ExportOptions;
2820
import com.google.cloud.bigtable.beam.sequencefiles.ImportJob.ImportOptions;
2921
import com.google.cloud.bigtable.beam.validation.SyncTableJob.SyncTableOptions;
3022
import com.google.cloud.bigtable.hbase.BigtableOptionsFactory;
31-
import com.google.cloud.bigtable.hbase.adapters.Adapters;
32-
import com.google.cloud.bigtable.hbase.adapters.read.DefaultReadHooks;
33-
import com.google.cloud.bigtable.hbase.adapters.read.ReadHooks;
34-
import java.io.Serializable;
35-
import java.nio.charset.CharacterCodingException;
3623
import org.apache.beam.sdk.options.ValueProvider;
37-
import org.apache.hadoop.hbase.client.Scan;
38-
import org.apache.hadoop.hbase.filter.ParseFilter;
3924

4025
/**
4126
* !!! DO NOT TOUCH THIS CLASS !!!
@@ -92,77 +77,8 @@ public static CloudBigtableTableConfiguration buildSyncTableConfig(SyncTableOpti
9277
return builder.build();
9378
}
9479

95-
/** Provides a request that is constructed with some attributes. */
96-
private static class RequestValueProvider
97-
implements ValueProvider<ReadRowsRequest>, Serializable {
98-
private final ValueProvider<String> start;
99-
private final ValueProvider<String> stop;
100-
private final ValueProvider<Integer> maxVersion;
101-
private final ValueProvider<String> filter;
102-
private ReadRowsRequest cachedRequest;
103-
104-
RequestValueProvider(ExportOptions options) {
105-
this.start = options.getBigtableStartRow();
106-
this.stop = options.getBigtableStopRow();
107-
this.maxVersion = options.getBigtableMaxVersions();
108-
this.filter = options.getBigtableFilter();
109-
}
110-
111-
@Override
112-
public ReadRowsRequest get() {
113-
if (cachedRequest == null) {
114-
Scan scan = new Scan();
115-
if (start.get() != null && !start.get().isEmpty()) {
116-
scan.setStartRow(start.get().getBytes());
117-
}
118-
if (stop.get() != null && !stop.get().isEmpty()) {
119-
scan.setStopRow(stop.get().getBytes());
120-
}
121-
if (maxVersion.get() != null) {
122-
scan.setMaxVersions(maxVersion.get());
123-
}
124-
if (filter.get() != null && !filter.get().isEmpty()) {
125-
try {
126-
scan.setFilter(new ParseFilter().parseFilterString(filter.get()));
127-
} catch (CharacterCodingException e) {
128-
throw new RuntimeException(e);
129-
}
130-
}
131-
132-
ReadHooks readHooks = new DefaultReadHooks();
133-
Query query = Query.create(PLACEHOLDER_TABLE_ID);
134-
Adapters.SCAN_ADAPTER.adapt(scan, readHooks, query);
135-
readHooks.applyPreSendHook(query);
136-
RequestContext requestContext =
137-
RequestContext.create(
138-
PLACEHOLDER_PROJECT_ID, PLACEHOLDER_INSTANCE_ID, PLACEHOLDER_APP_PROFILE_ID);
139-
140-
cachedRequest =
141-
query.toProto(requestContext).toBuilder().setTableName("").setAppProfileId("").build();
142-
}
143-
return cachedRequest;
144-
}
145-
146-
@Override
147-
public boolean isAccessible() {
148-
return start.isAccessible()
149-
&& stop.isAccessible()
150-
&& maxVersion.isAccessible()
151-
&& filter.isAccessible();
152-
}
153-
154-
@Override
155-
public String toString() {
156-
if (isAccessible()) {
157-
return String.valueOf(get());
158-
}
159-
return CloudBigtableConfiguration.VALUE_UNAVAILABLE;
160-
}
161-
}
162-
16380
/** Builds CloudBigtableScanConfiguration from input runtime parameters for export job. */
16481
public static CloudBigtableScanConfiguration buildExportConfig(ExportOptions options) {
165-
ValueProvider<ReadRowsRequest> request = new RequestValueProvider(options);
16682
CloudBigtableScanConfiguration.Builder configBuilder =
16783
new CloudBigtableScanConfiguration.Builder()
16884
.withProjectId(options.getBigtableProject())
@@ -171,7 +87,12 @@ public static CloudBigtableScanConfiguration buildExportConfig(ExportOptions opt
17187
.withAppProfileId(options.getBigtableAppProfileId())
17288
.withConfiguration(
17389
BigtableOptionsFactory.CUSTOM_USER_AGENT_KEY, "SequenceFileExportJob")
174-
.withRequest(request);
90+
.withScan(
91+
new ScanValueProvider(
92+
options.getBigtableStartRow(),
93+
options.getBigtableStopRow(),
94+
options.getBigtableMaxVersions(),
95+
options.getBigtableFilter()));
17596

17697
return configBuilder.build();
17798
}

bigtable-dataflow-parent/bigtable-hbase-beam/src/main/java/com/google/cloud/bigtable/beam/CloudBigtableConfiguration.java

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -189,8 +189,30 @@ public String getInstanceId() {
189189
return configuration.get(BigtableOptionsFactory.INSTANCE_ID_KEY).get();
190190
}
191191

192+
/**
193+
* Gets the value provider for project id.
194+
*
195+
* @return The value provider for project id.
196+
*/
197+
ValueProvider<String> getProjectIdValueProvider() {
198+
return configuration.get(BigtableOptionsFactory.PROJECT_ID_KEY);
199+
}
200+
201+
/**
202+
* Gets the value provider for instance id.
203+
*
204+
* @return The value provider for instance id.
205+
*/
206+
ValueProvider<String> getInstanceIdValueProvider() {
207+
return configuration.get(BigtableOptionsFactory.INSTANCE_ID_KEY);
208+
}
209+
192210
/** Get the Cloud Bigtable App Profile id. */
193211
public String getAppProfileId() {
212+
if (configuration.get(BigtableOptionsFactory.APP_PROFILE_ID_KEY) == null
213+
|| configuration.get(BigtableOptionsFactory.APP_PROFILE_ID_KEY).get() == null) {
214+
return "default";
215+
}
194216
return configuration.get(BigtableOptionsFactory.APP_PROFILE_ID_KEY).get();
195217
}
196218

0 commit comments

Comments
 (0)