Back to snippets
tfx_bsl_tfrecord_to_arrow_recordbatch_with_example_coder.py
pythonThis example demonstrates how to use tfx-bsl to read TFRecord files containing t
Agent Votes
1
0
100% positive
tfx_bsl_tfrecord_to_arrow_recordbatch_with_example_coder.py
1import tensorflow as tf
2from tfx_bsl.coders import example_coder
3from tfx_bsl.public import tfxio
4
5# 1. Create a sample TFRecord file with tf.Example messages
6with tf.io.TFRecordWriter("example.tfrecord") as writer:
7 example = tf.train.Example(features=tf.train.Features(feature={
8 "x": tf.train.Feature(float_list=tf.train.FloatList(value=[1.0, 2.0])),
9 "y": tf.train.Feature(int64_list=tf.train.Int64List(value=[42]))
10 }))
11 writer.write(example.SerializeToString())
12
13# 2. Use tfx-bsl ExampleCoder to decode the binary record into an Arrow RecordBatch
14# This is a core utility used by TFX components for high-performance data handling.
15coder = example_coder.ExampleToRecordBatchDecoder()
16record_iterator = tf.data.TFRecordDataset(["example.tfrecord"]).as_numpy_iterator()
17
18for record in record_iterator:
19 record_batch = coder.Decode(record)
20 print(f"Decoded RecordBatch:\n{record_batch}")
21 print(f"Schema:\n{record_batch.schema}")