Python TensorFlow, infinite loop in the first epoch

Question

I've got a Python program that uses TensorFlow, but when I run it the first epoch runs, but the next one doesn't. I'm not sure why.

# -*- coding: utf-8 -*- !pwd """Doit être dans '/content' Importation des datasets """ !pip install tqdm import os import requests from tqdm import tqdm # Importation de tqdm pour la barre de progression # Liste des URLs des fichiers à télécharger urls = [ "https://service.scedc.caltech.edu/ftp/Ross_FinalTrainedModels/scsn_p_2000_2017_6sec_0.5r_pick_test.hdf5", "https://service.scedc.caltech.edu/ftp/Ross_FinalTrainedModels/scsn_p_2000_2017_6sec_0.5r_pick_train.hdf5" ] # Fonction pour télécharger un fichier avec suivi de l'avancement def download_file(url): # Récupérer le nom du fichier à partir de l'URL file_name = url.split("/")[-1] # Vérifier si le fichier existe déjà dans le répertoire courant if os.path.exists(file_name): print(f"Le fichier {file_name} existe déjà.") return # Télécharger le fichier print(f"Téléchargement du fichier {file_name}...") try: # Effectuer la requête GET pour récupérer le fichier response = requests.get(url, stream=True) response.raise_for_status() # Vérifie si la requête a réussi (code 200) # Obtenir la taille du fichier à télécharger total_size = int(response.headers.get('Content-Length', 0)) # Utiliser tqdm pour afficher une barre de progression with open(file_name, 'wb') as f: # Télécharger par morceaux de 1024 octets for data in tqdm(response.iter_content(chunk_size=1024), total=total_size // 1024, # Calculer le nombre d'itérations unit='KB', # Unité de la barre de progression desc=file_name): # Le nom du fichier comme description de la barre f.write(data) print(f"Fichier {file_name} téléchargé avec succès.") except requests.exceptions.RequestException as e: print(f"Erreur lors du téléchargement de {file_name}: {e}") # Télécharger chaque fichier for url in urls: download_file(url) !ls -l """Les fichiers suivants doivent apparaitre : scsn_p_2000_2017_6sec_0.5r_pick_test.hdf5 scsn_p_2000_2017_6sec_0.5r_pick_train.hdf5 Analyse des données contenues dans les datasets """ !pip install h5py import h5py def afficher_contenu_hdf5(fichier_hdf5): try: # Ouvrir le fichier HDF5 en mode lecture with h5py.File(fichier_hdf5, 'r') as f: print(f"Contenu du fichier HDF5: {fichier_hdf5}\n") # Fonction récursive pour afficher le contenu de chaque groupe def explorer_groupe(groupe, indent=0): # Afficher le nom du groupe print(' ' * indent + f"Group: {groupe}") # Explorer les datasets dans ce groupe for nom, dataset in groupe.items(): if isinstance(dataset, h5py.Dataset): print(' ' * (indent + 1) + f"Dataset: {nom}, Shape: {dataset.shape}, Type: {dataset.dtype}") # Optionnel : afficher les premiers éléments du dataset print(' ' * (indent + 2) + f"Premiers éléments: {dataset[:5]}") # Affiche les 5 premiers éléments # Explorer les sous-groupes for nom, sous_groupe in groupe.items(): if isinstance(sous_groupe, h5py.Group): explorer_groupe(sous_groupe, indent + 1) # Lancer l'exploration du fichier principal explorer_groupe(f) except Exception as e: print(f"Erreur lors de l'ouverture du fichier HDF5: {e}") # Exemple d'utilisation avec un fichier HDF5 (remplacer par ton propre fichier) # fichier = 'ton_fichier.hdf5' # Remplace par le chemin vers ton fichier HDF5 # afficher_contenu_hdf5(fichier) fichier = 'scsn_p_2000_2017_6sec_0.5r_pick_train.hdf5' afficher_contenu_hdf5(fichier) fichier = 'scsn_p_2000_2017_6sec_0.5r_pick_test.hdf5' afficher_contenu_hdf5(fichier) """Importation des bibliothèques""" import tensorflow as tf from tensorflow.keras import layers, models from sklearn.preprocessing import StandardScaler import numpy as np import h5py import datetime """Importation des données pour utilisation dans le modèle""" def data_generator(file_path, batch_size=10000, scaler_X=None): with h5py.File(file_path, 'r') as file: total_samples = file['X'].shape[0] X = file['X'] Y = file['fm'] print(f"Total samples: {total_samples}") # Affichez la taille totale while True: for start in range(0, total_samples, batch_size): end = min(start + batch_size, total_samples) X_batch = X[start:end] Y_batch = Y[start:end] if scaler_X is None: scaler_X = StandardScaler() X_batch_scaled = scaler_X.fit_transform(X_batch) else: X_batch_scaled = scaler_X.transform(X_batch) print(f"Batch: {start}-{end} (X_batch shape: {X_batch_scaled.shape})") # Affichez la taille du lot yield X_batch_scaled, Y_batch # Calcul dynamique de `steps_per_epoch` def get_steps_per_epoch(file_path, batch_size=10000): with h5py.File(file_path, 'r') as file: total_samples = file['X'].shape[0] # Nombre total d'échantillons steps_per_epoch = total_samples // batch_size if total_samples % batch_size != 0: # Si la division n'est pas exacte steps_per_epoch += 1 # Ajouter une étape supplémentaire pour les restes return steps_per_epoch """Création de la fonction qui génère le modèle""" def create_model(): model = tf.keras.models.Sequential([ tf.keras.layers.Input(shape=(600,), name='layers_input'), tf.keras.layers.Dense(128, activation='relu', name='layers_dense'), tf.keras.layers.Dropout(0.2, name='layers_dropout'), tf.keras.layers.Dense(64, activation='relu', name='layers_dense_2'), tf.keras.layers.Dropout(0.2, name='layers_dropout_2'), tf.keras.layers.Dense(3, activation='softmax', name='layers_output') ]) return model model = create_model() model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy']) model.summary() # Vérifiez si le modèle est compilé correctement if model.optimizer: print(f"Optimiseur : {model.optimizer}") else: print("Le modèle n'est pas compilé correctement") """Initialisation de la sauvegarde des logs""" # Commented out IPython magic to ensure Python compatibility. # Load the TensorBoard notebook extension # %load_ext tensorboard # Commented out IPython magic to ensure Python compatibility. # Reload TensorBoard extension # %reload_ext tensorboard # Clear any logs from previous runs !rm -rf ./logs/ log_dir = "logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S") tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=0) train_file_path = 'scsn_p_2000_2017_6sec_0.5r_pick_train.hdf5' steps_per_epoch = get_steps_per_epoch(train_file_path, batch_size=10000) print(f"Steps per epoch: {steps_per_epoch}") # Affichez les étapes par époque test_file_path = 'scsn_p_2000_2017_6sec_0.5r_pick_test.hdf5' scaler_X = StandardScaler() model.fit( data_generator(train_file_path, batch_size=1000), steps_per_epoch=steps_per_epoch//2, epochs=2, validation_data=data_generator(test_file_path, batch_size=1000), verbose=1, callbacks=[tensorboard_callback] ) # Commented out IPython magic to ensure Python compatibility. # %tensorboard --logdir logs/fit

Here's an example of what I get:

Total samples: 3635436 Batch: 0-1000 (X_batch shape: (1000, 600)) Batch: 1000-2000 (X_batch shape: (1000, 600)) Epoch 1/2 1/182 ━━━━━━━━━━━━━━━━━━━━ 22s 126ms/step - accuracy: 0.7280 - loss: 0.6024Batch: 2000-3000 (X_batch shape: (1000, 600)) ... ... 179/182 ━━━━━━━━━━━━━━━━━━━━ 0s 37ms/step - accuracy: 0.7474 - loss: 0.6028Batch: 182000-183000 (X_batch shape: (1000, 600)) Batch: 183000-184000 (X_batch shape: (1000, 600)) Batch: 184000-185000 (X_batch shape: (1000, 600)) 181/182 ━━━━━━━━━━━━━━━━━━━━ 0s 37ms/step - accuracy: 0.7475 - loss: 0.6028Batch: 185000-186000 (X_batch shape: (1000, 600)) 182/182 ━━━━━━━━━━━━━━━━━━━━ 0s 37ms/step - accuracy: 0.7475 - loss: 0.6027Total samples: 1211812 Batch: 0-1000 (X_batch shape: (1000, 600)) Batch: 1000-2000 (X_batch shape: (1000, 600)) Batch: 2000-3000 (X_batch shape: (1000, 600)) Batch: 3000-4000 (X_batch shape: (1000, 600)) Batch: 4000-5000 (X_batch shape: (1000, 600)) Batch: 5000-6000 (X_batch shape: (1000, 600)) Batch: 6000-7000 (X_batch shape: (1000, 600)) Batch: 7000-8000 (X_batch shape: (1000, 600)) Batch: 8000-9000 (X_batch shape: (1000, 600)) Batch: 9000-10000 (X_batch shape: (1000, 600)) Batch: 10000-11000 (X_batch shape: (1000, 600)) Batch: 11000-12000 (X_batch shape: (1000, 600)) ... (infinit)

I think the problem may come from data loading, but not where precisely, because normally the epoch should stop on its own.

I'd like my program to be able to iterate over any period of time. I tried looking at the 'steps_per_epoch' parameter to stop my epoch but my attempts didn't work.

Collectives™ on Stack Overflow

Python TensorFlow, infinite loop in the first epoch

0

Hot Network Questions

Collectives™ on Stack Overflow

0

Know someone who can answer? Share a link to this question via email, Twitter, or Facebook.