Cusom Training Model different from Engine.fit Model #2597

yxteo2 · 2025-03-12T00:59:12Z

yxteo2
Mar 12, 2025

I tried to customized the training method with pytroch. However, the model output during testing is very different from Engine.fit(). The accuracy of the mdoel is high during training, but very bad during inferencing, I dont know why the image max and min prediction score different so much when inferencing with torch inferencer

`from myPackage import utils
from torch.cuda.amp import GradScaler, autocast
from anomalib.models.image import fastflow
from anomalib.models.image.fastflow import FastflowModel
import torch
from anomalib.data import MVTec
from anomalib import TaskType
from anomalib.data.utils import (
    TestSplitMode,
    ValSplitMode,
)

image_shape = [512, 512]
dataloader = MVTec(root = "./datasets/MVTec",
        category = "led",
        train_batch_size = 32,
        eval_batch_size = 32,
        num_workers = 0,
        task = TaskType.CLASSIFICATION,
        image_size = [512,512],
        transform = None,
        test_split_mode = TestSplitMode.FROM_DIR,
        test_split_ratio = 0.2,
        val_split_mode = ValSplitMode.SAME_AS_TEST,
        val_split_ratio = 0.5,
        seed = 42,)

device = 'cuda'

model = FastflowModel(
    input_size=[512, 512],
    backbone='resnet18',
    pre_trained=True,
    flow_steps=8,
    conv3x3_only=False,
    hidden_ratio=1.0,
)

model = model.to(device)
loss_fn = fastflow.FastflowLoss()
LR = 0.001
WEIGHT_DECAY = 0.0001

optimizer = torch.optim.Adam(
        model.parameters(), lr=LR, weight_decay=WEIGHT_DECAY
    )
# Initialize GradScaler for mixed precision
scaler = GradScaler()

def train_one_epoch(dataloader, model, optimizer, loss_fn, device, epoch, log_interval=10):
    model.train()
    loss_meter = utils.AverageMeter()

    for step, data in enumerate(dataloader):
        data = data['image'].to(device, dtype=torch.float32)  # Ensure correct dtype

        optimizer.zero_grad()  # Clear previous gradients

        with autocast():  # Enable mixed precision
            hidden_variables, jacobians = model(data)
            loss = loss_fn(hidden_variables, jacobians)

        scaler.scale(loss).backward()  # Scale loss for stability
        scaler.step(optimizer)  # Update weights
        scaler.update()  # Update scaling factor

        # Log loss
        loss_meter.update(loss.item())
        if (step + 1) % log_interval == 0 or (step + 1) == len(dataloader):
            print(f"Epoch {epoch+1}, Step {step+1}: Loss = {loss_meter.val:.3f} (Avg: {loss_meter.avg:.3f})")

for epoch in range(NUM_EPOCHS):
    # Train for one epoch
    train_one_epoch(dataloader=train_loader, 
                    model=model.to(device), 
                    optimizer=optimizer, 
                    epoch=epoch,
                    loss_fn = loss_fn,
                    device=device,
                    )
    
    # Evaluate periodically
    if (epoch + 1) % EVAL_INTERVAL == 0:
        adaptive_threshold, min_score, max_score = eval_once(val_loader, model.to(device), device)
    
    # Save model checkpoint periodically
    if (epoch + 1) % CHECKPOINT_INTERVAL == 0:
        checkpoint_path = os.path.join(checkpoint_dir, f"epoch_{epoch + 1}.pt")
        torch.save(
            {
                "epoch": epoch + 1,
                "model_state_dict": model.state_dict(),
                "optimizer_state_dict": optimizer.state_dict(),
            },
            checkpoint_path,
        )
        print(f"Checkpoint saved: {checkpoint_path}")

# Create a dummy input tensor matching the expected input size (e.g., batch_size=1, channels=3, height=512, width=512).
dummy_input = torch.randn(1, 3, 512, 512, device=device)
torch.onnx.export(
    model,                      # The model to be exported.
    dummy_input,                # A dummy input for tracing.
    r"anomaly3\self_created_onnx\model.onnx",            # Where to save the ONNX model.
    export_params=True,         # Store the trained parameter weights inside the model file.
    opset_version=11,           # ONNX version (adjust as needed).
    do_constant_folding=True,   # Whether to execute constant folding for optimization.
    input_names=['input'],      # The model's input names.
    output_names=['output'],    # The model's output names.
    dynamic_axes={
        'input': {0: 'batch_size'},  # Variable batch size.
        'output': {0: 'batch_size'}
    }
)


`

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Cusom Training Model different from Engine.fit Model #2597

{{title}}

{{editor}}'s edit

{{editor}}'s edit

Replies: 0 comments

Select a reply

Cusom Training Model different from Engine.fit Model #2597

yxteo2 Mar 12, 2025

Replies: 0 comments

yxteo2
Mar 12, 2025