8

I want to use Tensorflow's Dataset API to read TFRecords file of lists of variant length. Here is my code.

def _int64_feature(value): # value must be a numpy array. return tf.train.Feature(int64_list=tf.train.Int64List(value=value)) def main1(): # Write an array to TFrecord. # a is an array which contains lists of variant length. a = np.array([[0, 54, 91, 153, 177], [0, 50, 89, 147, 196], [0, 38, 79, 157], [0, 49, 89, 147, 177], [0, 32, 73, 145]]) writer = tf.python_io.TFRecordWriter('file') for i in range(a.shape[0]): # i = 0 ~ 4 x_train = a[i] feature = {'i': _int64_feature(np.array([i])), 'data': _int64_feature(x_train)} # Create an example protocol buffer example = tf.train.Example(features=tf.train.Features(feature=feature)) # Serialize to string and write on the file writer.write(example.SerializeToString()) writer.close() # Check TFRocord file. record_iterator = tf.python_io.tf_record_iterator(path='file') for string_record in record_iterator: example = tf.train.Example() example.ParseFromString(string_record) i = (example.features.feature['i'].int64_list.value) data = (example.features.feature['data'].int64_list.value) #data = np.fromstring(data_string, dtype=np.int64) print(i, data) # Use Dataset API to read the TFRecord file. def _parse_function(example_proto): keys_to_features = {'i' :tf.FixedLenFeature([], tf.int64), 'data':tf.FixedLenFeature([], tf.int64)} parsed_features = tf.parse_single_example(example_proto, keys_to_features) return parsed_features['i'], parsed_features['data'] ds = tf.data.TFRecordDataset('file') iterator = ds.map(_parse_function).make_one_shot_iterator() i, data = iterator.get_next() with tf.Session() as sess: print(i.eval()) print(data.eval()) 

Check TFRecord file

[0] [0, 54, 91, 153, 177] [1] [0, 50, 89, 147, 196] [2] [0, 38, 79, 157] [3] [0, 49, 89, 147, 177] [4] [0, 32, 73, 145] 

But it showed the following error when I tried to use Dataset API to read TFRecord file.

tensorflow.python.framework.errors_impl.InvalidArgumentError: Name: , Key: data, Index: 0. Number of int64 values != expected. Values size: 5 but output shape: []

Thank you.
UPDATE: I tried to use the following code to read TFRecord with Dataset API, but both of them failed.

def _parse_function(example_proto): keys_to_features = {'i' :tf.FixedLenFeature([], tf.int64), 'data':tf.VarLenFeature(tf.int64)} parsed_features = tf.parse_single_example(example_proto, keys_to_features) return parsed_features['i'], parsed_features['data'] ds = tf.data.TFRecordDataset('file') iterator = ds.map(_parse_function).make_one_shot_iterator() i, data = iterator.get_next() with tf.Session() as sess: print(sess.run([i, data])) 

or

def _parse_function(example_proto): keys_to_features = {'i' :tf.VarLenFeature(tf.int64), 'data':tf.VarLenFeature(tf.int64)} parsed_features = tf.parse_single_example(example_proto, keys_to_features) return parsed_features['i'], parsed_features['data'] ds = tf.data.TFRecordDataset('file') iterator = ds.map(_parse_function).make_one_shot_iterator() i, data = iterator.get_next() with tf.Session() as sess: print(sess.run([i, data])) 

And the error:

Traceback (most recent call last): File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/framework/tensor_util.py", line 468, in make_tensor_proto str_values = [compat.as_bytes(x) for x in proto_values] File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/framework/tensor_util.py", line 468, in str_values = [compat.as_bytes(x) for x in proto_values] File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/util/compat.py", line 65, in as_bytes (bytes_or_text,)) TypeError: Expected binary or unicode string, got

During handling of the above exception, another exception occurred:

Traceback (most recent call last): File "2tfrecord.py", line 126, in main1() File "2tfrecord.py", line 72, in main1 iterator = ds.map(_parse_function).make_one_shot_iterator() File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/data/ops/dataset_ops.py", line 712, in map return MapDataset(self, map_func) File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/data/ops/dataset_ops.py", line 1385, in init self._map_func.add_to_graph(ops.get_default_graph()) File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/framework/function.py", line 486, in add_to_graph self._create_definition_if_needed() File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/framework/function.py", line 321, in _create_definition_if_needed self._create_definition_if_needed_impl() File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/framework/function.py", line 338, in _create_definition_if_needed_impl outputs = self._func(*inputs) File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/data/ops/dataset_ops.py", line 1376, in tf_map_func flattened_ret = [ops.convert_to_tensor(t) for t in nest.flatten(ret)] File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/data/ops/dataset_ops.py", line 1376, in flattened_ret = [ops.convert_to_tensor(t) for t in nest.flatten(ret)] File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/framework/ops.py", line 836, in convert_to_tensor as_ref=False) File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/framework/ops.py", line 926, in internal_convert_to_tensor ret = conversion_func(value, dtype=dtype, name=name, as_ref=as_ref) File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/framework/constant_op.py", line 229, in _constant_tensor_conversion_function return constant(v, dtype=dtype, name=name) File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/framework/constant_op.py", line 208, in constant value, dtype=dtype, shape=shape, verify_shape=verify_shape)) File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/framework/tensor_util.py", line 472, in make_tensor_proto "supported type." % (type(values), values)) TypeError: Failed to convert object of type to Tensor. Contents: SparseTensor(indices=Tensor("ParseSingleExample/Slice_Indices_i:0", shape=(?, 1), dtype=int64), values=Tensor("ParseSingleExample/ParseExample/ParseExample:3", shape=(?,), dtype=int64), dense_shape=Tensor("ParseSingleExample/Squeeze_Shape_i:0", shape=(1,), dtype=int64)). Consider casting elements to a supported type.

Python version: 3.5.2
Tensorflow version: 1.4.1

2 Answers 2

12

After hours of searching and trying, I believe the answer emerges. Below is my code.

def _int64_feature(value): # value must be a numpy array. return tf.train.Feature(int64_list=tf.train.Int64List(value=value.flatten())) # Write an array to TFrecord. # a is an array which contains lists of variant length. a = np.array([[0, 54, 91, 153, 177], [0, 50, 89, 147, 196], [0, 38, 79, 157], [0, 49, 89, 147, 177], [0, 32, 73, 145]]) writer = tf.python_io.TFRecordWriter('file') for i in range(a.shape[0]): # i = 0 ~ 4 x_train = np.array(a[i]) feature = {'i' : _int64_feature(np.array([i])), 'data': _int64_feature(x_train)} # Create an example protocol buffer example = tf.train.Example(features=tf.train.Features(feature=feature)) # Serialize to string and write on the file writer.write(example.SerializeToString()) writer.close() # Check TFRocord file. record_iterator = tf.python_io.tf_record_iterator(path='file') for string_record in record_iterator: example = tf.train.Example() example.ParseFromString(string_record) i = (example.features.feature['i'].int64_list.value) data = (example.features.feature['data'].int64_list.value) print(i, data) # Use Dataset API to read the TFRecord file. filenames = ["file"] dataset = tf.data.TFRecordDataset(filenames) def _parse_function(example_proto): keys_to_features = {'i':tf.VarLenFeature(tf.int64), 'data':tf.VarLenFeature(tf.int64)} parsed_features = tf.parse_single_example(example_proto, keys_to_features) return tf.sparse_tensor_to_dense(parsed_features['i']), \ tf.sparse_tensor_to_dense(parsed_features['data']) # Parse the record into tensors. dataset = dataset.map(_parse_function) # Shuffle the dataset dataset = dataset.shuffle(buffer_size=1) # Repeat the input indefinitly dataset = dataset.repeat() # Generate batches dataset = dataset.batch(1) # Create a one-shot iterator iterator = dataset.make_one_shot_iterator() i, data = iterator.get_next() with tf.Session() as sess: print(sess.run([i, data])) print(sess.run([i, data])) print(sess.run([i, data])) 

There are few things to note.
1. This SO question helps a lot.
2. tf.VarLenFeature would return SparseTensor, thus, using tf.sparse_tensor_to_dense to convert to dense tensor is necessary.
3. In my code, parse_single_example() can't be replaced with parse_example(), and it bugs me for a day. I don't know why parse_example() doesn't work out. If anyone know the reason, please enlighten me.

Sign up to request clarification or add additional context in comments.

3 Comments

Ad3 @Lion Lai . Beacuse dataset.map function accept a function that operates on singular TFRecord example! Not on a collection of those (Collection of those form a whole TFRecord file). Even if you would use parse_example with examples number set to one - it would still expect a list as parameter and it would be getting singular example.
@Pietrko I don't understand what you mean. Can you show me how to use parse_example() in my case?
I mean you're doing it right using parse_single_example with map function. TFRecord file contains a collection of examples inside it. What function Dataset.map does it applies the function you send to it to all of the elements of TFRecord
2

The error is very simple. Your data is not FixedLenFeature it is VarLenFeature. Replace your line:

 'data':tf.FixedLenFeature([], tf.int64)} 

with

 'data':tf.VarLenFeature(tf.int64)} 

Also, when you call print(i.eval()) and print(data.eval()) you are calling the iterator twice. The first print will print 0, but the second one will print the value of the second row [ 0, 50, 89, 147, 196]. You can do print(sess.run([i, data])) to get i and data from the same row.

1 Comment

Hi, did you run the code successfully? I encountered another error when using 'data':tf.VarLenFeature(tf.int64)}. Pls see my updated question. thx

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.