training code done
This commit is contained in:
38
main.py
38
main.py
@@ -1,8 +1,9 @@
|
||||
from typing import cast
|
||||
|
||||
import torch
|
||||
import torch.optim as optim
|
||||
import wandb
|
||||
from datasets import load_dataset
|
||||
from torch.utils.data import DataLoader
|
||||
from datasets import DatasetDict, load_dataset
|
||||
from tqdm import tqdm
|
||||
|
||||
from src.dataset.preprocess import make_transform
|
||||
@@ -10,29 +11,23 @@ from src.model.utransformer import UTransformer
|
||||
from src.rf import RF
|
||||
|
||||
transform = make_transform()
|
||||
|
||||
model = UTransformer.from_pretrained_backbone(
|
||||
"facebook/dinov3-vits16-pretrain-lvd1689m"
|
||||
).to("cuda:3")
|
||||
|
||||
rf = RF(model)
|
||||
optimizer = optim.AdamW(model.parameters(), lr=5e-4)
|
||||
|
||||
dataset = load_dataset("your-dataset-name")
|
||||
dataset = cast(DatasetDict, load_dataset("your-dataset-name"))
|
||||
train_dataset = dataset["train"]
|
||||
|
||||
|
||||
def preprocess_function(examples):
|
||||
x0_list = []
|
||||
x1_list = []
|
||||
|
||||
for x0_img, x1_img in zip(examples["cloudy_image"], examples["clear_image"]):
|
||||
for x0_img, x1_img in zip(examples["cloudy"], examples["clear"]):
|
||||
x0_transformed = transform(x0_img)
|
||||
x1_transformed = transform(x1_img)
|
||||
|
||||
x0_list.append(x0_transformed)
|
||||
x1_list.append(x1_transformed)
|
||||
|
||||
return {"x0": x0_list, "x1": x1_list}
|
||||
|
||||
|
||||
@@ -42,33 +37,33 @@ train_dataset = train_dataset.map(
|
||||
batch_size=32,
|
||||
remove_columns=train_dataset.column_names,
|
||||
)
|
||||
|
||||
train_dataset.set_format(type="torch", columns=["x0", "x1"])
|
||||
|
||||
dataloader = DataLoader(
|
||||
train_dataset, batch_size=16, shuffle=True, num_workers=4, pin_memory=True
|
||||
)
|
||||
|
||||
wandb.init(project="cloud-removal-kmu")
|
||||
|
||||
batch_size = 16
|
||||
for epoch in range(100):
|
||||
lossbin = {i: 0 for i in range(10)}
|
||||
losscnt = {i: 1e-6 for i in range(10)}
|
||||
|
||||
for batch in tqdm(dataloader, desc=f"Epoch {epoch + 1}/100"):
|
||||
x0 = batch["x0"].to("cuda:3")
|
||||
x1 = batch["x1"].to("cuda:3")
|
||||
train_dataset = train_dataset.shuffle(seed=epoch)
|
||||
|
||||
for i in tqdm(
|
||||
range(0, len(train_dataset), batch_size), desc=f"Epoch {epoch + 1}/100"
|
||||
):
|
||||
batch = train_dataset[i : i + batch_size]
|
||||
x0 = torch.stack(batch["x0"]).to("cuda:3")
|
||||
x1 = torch.stack(batch["x1"]).to("cuda:3")
|
||||
|
||||
optimizer.zero_grad()
|
||||
loss, blsct = rf.forward(x0, x1)
|
||||
loss.backward()
|
||||
optimizer.step()
|
||||
|
||||
wandb.log({"loss": loss.item()})
|
||||
|
||||
for t, l in blsct:
|
||||
for t, lss in blsct:
|
||||
bin_idx = min(int(t * 10), 9)
|
||||
lossbin[bin_idx] += l
|
||||
lossbin[bin_idx] += lss
|
||||
losscnt[bin_idx] += 1
|
||||
|
||||
epoch_metrics = {f"lossbin_{i}": lossbin[i] / losscnt[i] for i in range(10)}
|
||||
@@ -84,5 +79,4 @@ for epoch in range(100):
|
||||
},
|
||||
f"checkpoint_epoch_{epoch + 1}.pt",
|
||||
)
|
||||
|
||||
wandb.finish()
|
||||
|
||||
Reference in New Issue
Block a user