Add extra debugging

This commit is contained in:
Timothy Allen 2023-12-31 10:32:32 +02:00
parent 54db72fd89
commit f8eb91fddb
1 changed files with 34 additions and 24 deletions

View File

@ -169,6 +169,7 @@ class TextCategoriesDataset(Dataset):
return ( return (
self.textTransform()(text), self.textTransform()(text),
cats.fillna(0).values.tolist(), cats.fillna(0).values.tolist(),
text,
) )
def textTransform(self): def textTransform(self):
@ -214,7 +215,7 @@ class CollateBatch:
batch: a list of tuples with (text, cats), each of which batch: a list of tuples with (text, cats), each of which
is a list of tokens is a list of tokens
''' '''
batch_text, batch_cats = zip(*batch) batch_text, batch_cats, batch_orig = zip(*batch)
# Pad text to the longest # Pad text to the longest
text_tensor = nn.utils.rnn.pad_sequence( text_tensor = nn.utils.rnn.pad_sequence(
@ -236,6 +237,7 @@ class CollateBatch:
return ( return (
text_tensor, text_tensor,
cats_tensor, cats_tensor,
batch_orig,
) )
def tensor2cat(dataset, tensor): def tensor2cat(dataset, tensor):
@ -267,28 +269,26 @@ def train(dataloader, dataset, model, optimizer, criterion, epoch=0):
total_acc, total_count = 0, 1 # XXX total_acc, total_count = 0, 1 # XXX
log_interval = 500 log_interval = 500
torch.set_printoptions(precision=2)
model.train() model.train()
batch = tqdm.tqdm(dataloader, unit="batch") batch = tqdm.tqdm(dataloader, unit="batch")
for idx, data in enumerate(batch): for idx, data in enumerate(batch):
batch.set_description(f"Train {epoch}.{idx}") batch.set_description(f"Train {epoch}.{idx}")
text, cats = data text, cats, orig_text = data
optimizer.zero_grad() optimizer.zero_grad()
output = model(text) output = model(text)
#print("output", output) #print("output", output)
#print("output shape", output.shape) #print("output shape", output.shape)
optimizer.zero_grad()
loss = criterion(input=output, target=cats) loss = criterion(input=output, target=cats)
optimizer.zero_grad()
loss.backward() loss.backward()
#nn.utils.clip_grad_norm_(model.parameters(), 0.1)
nn.utils.clip_grad_norm_(model.parameters(), 0.1)
optimizer.step() optimizer.step()
print("train loss", loss) #print("train loss", loss)
##predicted = np.round(output) ##predicted = np.round(output)
##total_acc += (predicted == cats).sum().item() ##total_acc += (predicted == cats).sum().item()
@ -299,10 +299,12 @@ def train(dataloader, dataset, model, optimizer, criterion, epoch=0):
predictions[output < 0.5] = False ## assign 0 label to those with less than 0.5 predictions[output < 0.5] = False ## assign 0 label to those with less than 0.5
batch.clear() batch.clear()
for target, out, pred in list(zip(cats, output, predictions)): for target, out, pred, orig in list(zip(cats, output, predictions, orig_text)):
expect = tensor2cat(dataset, target) expect = tensor2cat(dataset, target)
raw = tensor2cat(dataset, out) raw = tensor2cat(dataset, out)
predict = tensor2cat(dataset, pred) predict = tensor2cat(dataset, pred)
print("Text:", orig)
print("Loss:", loss.item())
print("Expected: ", expect) print("Expected: ", expect)
print("Predicted: ", predict) print("Predicted: ", predict)
print("Raw output:", raw) print("Raw output:", raw)
@ -333,7 +335,7 @@ def evaluate(dataloader, dataset, model, criterion, epoch=0):
batch = tqdm.tqdm(dataloader, unit="batch") batch = tqdm.tqdm(dataloader, unit="batch")
for idx, data in enumerate(batch): for idx, data in enumerate(batch):
batch.set_description(f"Evaluate {epoch}.{idx}") batch.set_description(f"Evaluate {epoch}.{idx}")
text, cats = data text, cats, orig_text = data
output = model(text) output = model(text)
#print("eval predicted", output) #print("eval predicted", output)
@ -346,10 +348,12 @@ def evaluate(dataloader, dataset, model, criterion, epoch=0):
predictions[output < 0.5] = False ## assign 0 label to those with less than 0.5 predictions[output < 0.5] = False ## assign 0 label to those with less than 0.5
batch.clear() batch.clear()
for target, out, pred in list(zip(cats, output, predictions)): for target, out, pred, orig in list(zip(cats, output, predictions, orig_text)):
expect = tensor2cat(dataset, target) expect = tensor2cat(dataset, target)
raw = tensor2cat(dataset, out) raw = tensor2cat(dataset, out)
predict = tensor2cat(dataset, pred) predict = tensor2cat(dataset, pred)
print("Evaluate Text:", orig)
print("Evaluate Loss:", loss.item())
print("Evaluate expected: ", expect) print("Evaluate expected: ", expect)
print("Evaluate predicted: ", predict) print("Evaluate predicted: ", predict)
print("Evaluate raw output:", raw) print("Evaluate raw output:", raw)
@ -465,17 +469,20 @@ def main():
) )
print(f"Using {device} device") print(f"Using {device} device")
torch.set_printoptions(precision=2)
# Hyperparameters # Hyperparameters
#epochs = 10 # epoch epochs = 10 # epoch
epochs = 6 # epoch #epochs = 6 # epoch
#epochs = 4 # epoch #epochs = 4 # epoch
#lr = 5 # learning rate #lr = 5 # learning rate
#lr = 0.5 #lr = 0.5
#lr = 0.05 #lr = 0.05
#lr = 0.005 # initial learning rate; too small may result in a long training process that could get stuck, whereas a value too large may result in learning a sub-optimal set of weights too fast or an unstable training process -- perhaps the most important hyperparameter. If you have time to tune only one hyperparameter, tune the learning rate #lr = 0.005 # initial learning rate; too small may result in a long training process that could get stuck, whereas a value too large may result in learning a sub-optimal set of weights too fast or an unstable training process -- perhaps the most important hyperparameter. If you have time to tune only one hyperparameter, tune the learning rate
lr = 0.0001 lr = 0.00005
#batch_size = 64 # batch size for training #batch_size = 64 # batch size for training
batch_size = 16 # batch size for training batch_size = 32 # batch size for training
#batch_size = 16 # batch size for training
#batch_size = 8 # batch size for training #batch_size = 8 # batch size for training
#batch_size = 4 # batch size for training #batch_size = 4 # batch size for training
@ -485,8 +492,9 @@ def main():
#hidden_size = 8 # hidden size of rnn module, should be tweaked manually #hidden_size = 8 # hidden size of rnn module, should be tweaked manually
mean_seq = True # use mean of rnn output mean_seq = True # use mean of rnn output
#mean_seq = False # use mean of rnn output #mean_seq = False # use mean of rnn output
weight_decay = 1e-4 # helps the neural networks to learn smoother / simpler functions which most of the time generalizes better compared to spiky, noisy ones ; try 1e-3, 1e-4
#weight_decay = 1e-3 # helps the neural networks to learn smoother / simpler functions which most of the time generalizes better compared to spiky, noisy ones ; try 1e-3, 1e-4 #weight_decay = 1e-3 # helps the neural networks to learn smoother / simpler functions which most of the time generalizes better compared to spiky, noisy ones ; try 1e-3, 1e-4
#weight_decay = 1e-4 # helps the neural networks to learn smoother / simpler functions which most of the time generalizes better compared to spiky, noisy ones ; try 1e-3, 1e-4
weight_decay = 1e-5 # helps the neural networks to learn smoother / simpler functions which most of the time generalizes better compared to spiky, noisy ones ; try 1e-3, 1e-4
''' '''
dataloader = DataLoader(dataset, dataloader = DataLoader(dataset,
@ -545,7 +553,9 @@ def main():
# optimizer and loss # optimizer and loss
criterion = nn.BCEWithLogitsLoss() criterion = nn.BCEWithLogitsLoss()
#optimizer = torch.optim.SGD(model.parameters(), lr=lr) #optimizer = torch.optim.SGD(model.parameters(), lr=lr)
optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=lr, weight_decay=weight_decay) #optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=lr, weight_decay=weight_decay)
optimizer = torch.optim.AdamW(model.parameters(), lr=lr)
if args.verbose: if args.verbose:
print(criterion) print(criterion)
print(optimizer) print(optimizer)
@ -560,17 +570,17 @@ def main():
accu_val = evaluate(valid_dataloader, valid_dataset, model, criterion, epoch) accu_val = evaluate(valid_dataloader, valid_dataset, model, criterion, epoch)
if total_accu is not None and total_accu > accu_val: #if total_accu is not None and total_accu > accu_val:
optimizer.step() # optimizer.step()
else: #else:
total_accu = accu_val # total_accu = accu_val
e.set_postfix({ e.set_postfix({
"accuracy": accu_val, "accuracy": accu_val,
}) })
# print("Checking the results of test dataset.") print("Checking the results of test dataset.")
# accu_test = evaluate(test_dataloader, test_dataset) accu_test = evaluate(test_dataloader, test_dataset)
# print("test accuracy {:8.3f}".format(accu_test)) print("test accuracy {:8.3f}".format(accu_test))
if model_out is not None: if model_out is not None:
torch.save(model.state_dict(), model_out) torch.save(model.state_dict(), model_out)