I am training a segmentation model using TensorFlow, and I encountered an error during the training process. After approximately 6 seconds, the training stopped with the following error message:
Epoch 1/100 2023-07-17 08:14:20.618828: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:954] layout failed: INVALID_ARGUMENT: Size of values 0 does not match size of permutation 4 @ fanin shape inmodel_3/dropout_15/dropout/SelectV2-2-TransposeNHWCToNCHW-LayoutOptimizer 8278/8278 [==============================] - 6s 198us/step - loss: 2.1831 - accuracy: 0.8421 - val_loss: 2.2880 - val_accuracy: 0.8349 I am using a custom data generator (DataGen) to load and preprocess the input images and masks. The error seems to be related to the layout of the model, particularly the dropout layer. I'm not sure why the sizes of the values do not match the permutation size. I think it might be related to Data Generator.
I have included the relevant code snippets below:
# Data generator class DataGen(tf.keras.utils.Sequence): def __init__(self, path_input, path_mask, class_name='person', batch_size=8, image_size=128): self.ids = os.listdir(path_mask) self.path_input = path_input self.path_mask = path_mask self.class_name = class_name self.batch_size = batch_size self.image_size = image_size self.on_epoch_end() def __load__(self, id_name): image_path = os.path.join(self.path_input, id_name) mask_path = os.path.join(self.path_mask, id_name) image = cv2.imread(image_path, 1) # 1 specifies RGB format image = cv2.resize(image, (self.image_size, self.image_size)) # resizing before inserting into the network mask = cv2.imread(mask_path, -1) mask = cv2.resize(mask, (self.image_size, self.image_size)) mask = mask.reshape((self.image_size, self.image_size, 1)) # normalize image image = image / 255.0 mask = mask / 255.0 return image, mask def __getitem__(self, index): id_name = self.ids[index] image, mask = self.__load__(id_name) if image is not None and mask is not None: images = np.expand_dims(image, axis=0) masks = np.expand_dims(mask, axis=0) else: images = np.empty((self.image_size, self.image_size, 3)) masks = np.empty((self.image_size, self.image_size, 1)) return images, masks def on_epoch_end(self): pass def __len__(self): return len(self.ids) # Configure model image_size = 128 epochs = 100 batch_size = 10 # Create data generators train_gen = DataGen(path_input="/kaggle/input/coco-2014-dataset-for-yolov3/coco2014/images/train2014", path_mask="/kaggle/working/mask_train_2014", batch_size=batch_size, image_size=image_size) val_gen = DataGen(path_input="/kaggle/input/coco-2014-dataset-for-yolov3/coco2014/images/val2014", path_mask="/kaggle/working/mask_val_2014", batch_size=batch_size, image_size=image_size) # Define model architecture inputs = Input(shape=(128, 128, 3)) # ... # Compile and train the model optimizer = tf.keras.optimizers.Adam(lr=1e-4) model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy']) model.fit(train_gen, validation_data=val_gen, steps_per_epoch=train_steps, epochs=epochs) Any insights or suggestions on how to resolve this issue would be greatly appreciated.
I am using coco2014 dataset. tf version '2.12.0'