File "D:\Code\Paper-code\metaformer\train.py", line 572, in main dataset_train = create_dataset( File "F:\anaconda\anaconda3\envs\DeltaZero\lib\site-packages\timm\data\dataset_factory.py", line 142, in create_dataset ds = ImageDataset(root, parser=name, class_map=class_map, load_bytes=load_bytes, **kwargs) File "F:\anaconda\anaconda3\envs\DeltaZero\lib\site-packages\timm\data\dataset.py", line 32, in __init__ parser = create_parser(parser or '', root=root, class_map=class_map) File "F:\anaconda\anaconda3\envs\DeltaZero\lib\site-packages\timm\data\parsers\parser_factory.py", line 27, in create_parser parser = ParserImageFolder(root, **kwargs) File "F:\anaconda\anaconda3\envs\DeltaZero\lib\site-packages\timm\data\parsers\parser_image_folder.py", line 73, in __init__ raise RuntimeError( RuntimeError: Found 0 images in subfolders of D:\Code\Paper-code\metaformer\cifar-100\train. Supported image extensions are .png, .jpg, .jpeg
File "D:\Code\Paper-code\metaformer\train.py", line 792, in train_one_epoch output = model(input) File "F:\anaconda\anaconda3\envs\DeltaZero\lib\site-packages\torch\nn\modules\module.py", line 1501, in _call_impl return forward_call(*args, **kwargs) File "D:\Code\Paper-code\metaformer\metaformer_baselines.py", line 666, in forward x = self.forward_features(x) File "D:\Code\Paper-code\metaformer\metaformer_baselines.py", line 662, in forward_features x = self.stages[i](x) File "F:\anaconda\anaconda3\envs\DeltaZero\lib\site-packages\torch\nn\modules\module.py", line 1501, in _call_impl return forward_call(*args, **kwargs) File "F:\anaconda\anaconda3\envs\DeltaZero\lib\site-packages\torch\nn\modules\container.py", line 217, in forward input = module(input) File "F:\anaconda\anaconda3\envs\DeltaZero\lib\site-packages\torch\nn\modules\module.py", line 1501, in _call_impl return forward_call(*args, **kwargs) File "D:\Code\Paper-code\metaformer\metaformer_baselines.py", line 522, in forward self.token_mixer(self.norm1(x)) File "F:\anaconda\anaconda3\envs\DeltaZero\lib\site-packages\torch\nn\modules\module.py", line 1501, in _call_impl return forward_call(*args, **kwargs) File "D:\Code\Paper-code\metaformer\aft\aft_full.py", line 17, in forward batch_size, seq_len, _ = x.shape ValueError: too many values to unpack (expected 3)
Traceback (most recent call last): File "D:\Code\Paper-code\metaformer\train.py", line 935, in <module> main() File "D:\Code\Paper-code\metaformer\train.py", line 709, in main train_metrics = train_one_epoch( File "D:\Code\Paper-code\metaformer\train.py", line 790, in train_one_epoch output = model(input) File "F:\anaconda\anaconda3\envs\DeltaZero\lib\site-packages\torch\nn\modules\module.py", line 1501, in _call_impl return forward_call(*args, **kwargs) File "D:\Code\Paper-code\metaformer\metaformer_baselines.py", line 666, in forward x = self.forward_features(x) File "D:\Code\Paper-code\metaformer\metaformer_baselines.py", line 661, in forward_features x = self.downsample_layers[i](x) File "F:\anaconda\anaconda3\envs\DeltaZero\lib\site-packages\torch\nn\modules\module.py", line 1501, in _call_impl return forward_call(*args, **kwargs) File "D:\Code\Paper-code\metaformer\metaformer_baselines.py", line 210, in forward x = self.conv(x) File "F:\anaconda\anaconda3\envs\DeltaZero\lib\site-packages\torch\nn\modules\module.py", line 1501, in _call_impl return forward_call(*args, **kwargs) File "F:\anaconda\anaconda3\envs\DeltaZero\lib\site-packages\torch\nn\modules\conv.py", line 463, in forward return self._conv_forward(input, self.weight, self.bias) File "F:\anaconda\anaconda3\envs\DeltaZero\lib\site-packages\torch\nn\modules\conv.py", line 459, in _conv_forward return F.conv2d(input, weight, bias, self.stride, RuntimeError: FIND was unable to find an engine to execute this computation
RuntimeError: cuDNN error: CUDNN_STATUS_INTERNAL_ERROR You can try to repro this exception using the following code snippet. If that doesn't trigger the error, please include your original repro script when reporting this issue. import torch torch.backends.cuda.matmul.allow_tf32 = False torch.backends.cudnn.benchmark = True torch.backends.cudnn.deterministic = False torch.backends.cudnn.allow_tf32 = True data = torch.randn([256, 64, 56, 56], dtype=torch.float, device='cuda', requires_grad=True) net = torch.nn.Conv2d(64, 128, kernel_size=[3, 3], padding=[1, 1], stride=[2, 2], dilation=[1, 1], groups=1) net = net.cuda().float() out = net(data) out.backward(torch.randn_like(out)) torch.cuda.synchronize()
使用下述命令解决:
1 2
torch.backends.cudnn.enabled = False
2.1.9 RuntimeError: CUDA error: out of memory
上面那个错误解决之后,又报了下面这个错误,麻了😂:
1 2 3 4 5
File "D:\Code\Paper-code\metaformer\aft\aft_full.py", line 25, in forward num = torch.exp(w_bias) @ (torch.exp(k) * v) RuntimeError: CUDA error: out of memory CUDA kernel errors might be asynchronously reported at some other API call,so the stacktrace below might be incorrect. For debugging consider passing CUDA_LAUNCH_BLOCKING=1.
这个错误 RuntimeError: CUDA error: out of memory 表明 GPU 内存不足以执行当前的运算。这通常发生在处理大型数据集或复杂模型时,特别是当所有输入、模型参数和中间结果都存储在 GPU 上时。