I trained a model for sequence classification using transformers (BertForSequenceClassification) and I get the error:
Expected all tensors to be on the same device, but found at least two devices, cpu and cuda:0! (when checking argument for argument index in method wrapper__index_select)
I don't really get where is the problem, if it's on my model, on how I tokenize the data, or what.
Here is my code:
LOADING THE PRETRAINED MODEL
model_state_dict = torch.load("../MODELOS/TRANSFORMERS/TransformersNormal", map_location='cpu') #Doesnt work with map_location='cuda:0' neither model = BertForSequenceClassification.from_pretrained(pretrained_model_name_or_path="bert-base-uncased", state_dict=model_state_dict, cache_dir='./data') CREATING DATALOAD
def crearDataLoad(dfv,tokenizer): dft=dfv # usamos el del validacion para que nos salga los resultados y no tener que cambiar mucho codigo #validation=dfv['text'] validation=dfv['text'].str.lower() # para modelos uncased # el fichero que hemos llamado test es usado en la red neuronal validation_labels=dfv['label'] validation_inputs = crearinputs (validation,tokenizer) validation_masks= crearmask (validation_inputs) validation_inputs = torch.tensor(validation_inputs) validation_labels = torch.tensor(validation_labels.values) validation_masks = torch.tensor(validation_masks) from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler# The DataLoader needs to know our batch size for training, so we specify it #Colab batch_size = 32 #local #batch_size = 15 validation_data = TensorDataset(validation_inputs, validation_masks, validation_labels) validation_sampler = SequentialSampler(validation_data) validation_dataloader = DataLoader(validation_data, sampler=validation_sampler, batch_size=batch_size) return validation_dataloader SHOWING RESULTS
def resultados(validation_dataloader, model, tokenizer): model.eval() # Tracking variables predictions , true_labels = [], [] pred = [] t_label =[] # Predict for batch in validation_dataloader: # Add batch to GPU , como no tengo lo dejo aquí batch = tuple(t.to(device) for t in batch) # Unpack the inputs from our dataloader b_input_ids, b_input_mask, b_labels = batch # Telling the model not to compute or store gradients, saving memory and # speeding up prediction with torch.no_grad(): # Forward pass, calculate logit predictions outputs = model(b_input_ids, #toktype_ids=None, # attention_mask=b_input_mask) #I GET THE ERROR HERE logits = outputs[0] # Move logits and labels to CPU logits = logits.detach().cpu().numpy() label_ids = b_labels.to('cpu').numpy() # Store predictions and true labels # Store predictions and true labels predictions.append(logits) true_labels.append(label_ids) for l in logits: # para cada tupla del logits, se selecciona 0 o 1 dependiendo del valor # que sea el mayor (argmax) pred_labels_i = np.argmax(l).item() pred.append(pred_labels_i) #Si no me equivoco, en pred guardamos las predicciones hechas por el modelo pred=np.asarray(pred).tolist() t_label = [val for sublist in true_labels for val in sublist] # para aplanar la lista de etiquetas #print('predicciones',pred) #print('t_labels',t_label) #print('validation_labels',validation_labels ) print("RESULTADOS KFOLD validacion cruzada") from sklearn.metrics import confusion_matrix from sklearn.metrics import classification_report print(classification_report(t_label, pred)) print ("Distribution test {}".format(Counter(t_label))) from sklearn.metrics import confusion_matrix print(confusion_matrix(t_label, pred)) from sklearn.metrics import roc_auc_score print('AUC ROC:') print(roc_auc_score(t_label, pred)) from sklearn.metrics import f1_score result=f1_score(t_label, pred, average='binary',labels=[0,1],pos_label=1,zero_division=0) print('f1-score macro:') print(result) print("****************************************************************") return result I get the error at this line in function resultados:
with torch.no_grad(): # Forward pass, calculate logit predictions outputs = model(b_input_ids, #toktype_ids=None, # attention_mask=b_input_mask) #Esto falla MAIN PROGRAM
trial_data = pd.DataFrame(trial_dataset) device_name = tf.test.gpu_device_name() if device_name != '/device:GPU:0': print('no hay gpu') print('Found GPU at: {}'.format(device_name)) #import torch# If there's a GPU available... if torch.cuda.is_available(): # Tell PyTorch to use the GPU. device = torch.device("cuda") print('There are %d GPU(s) available.' % torch.cuda.device_count()) print('We will use the GPU:', torch.cuda.get_device_name(0)) # If not... else: print('No GPU available, using the CPU instead.') device = torch.device("cpu") tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') validation_dataloader = crearDataLoad(trial_data,tokenizer) # obteniendo metricas del modelo generado en el paso anterior model.eval() result= resultados(validation_dataloader, model,tokenizer)