Compare commits
12 Commits
Author | SHA1 | Date | |
---|---|---|---|
7486fe1cc6 | |||
|
eec0e76fdb | ||
|
008d5afc38 | ||
|
0fccdb0b6f | ||
|
081385b99d | ||
|
110fb0e4e9 | ||
|
fa35a5e435 | ||
|
bd122d3b42 | ||
|
6cc8ebd200 | ||
|
c7e6d6efbd | ||
|
657aeeeefa | ||
|
5e785157ed |
1
.gitignore
vendored
1
.gitignore
vendored
|
@ -1,3 +1,4 @@
|
||||||
|
.idea
|
||||||
# Byte-compiled / optimized / DLL files
|
# Byte-compiled / optimized / DLL files
|
||||||
__pycache__/
|
__pycache__/
|
||||||
*.py[cod]
|
*.py[cod]
|
||||||
|
|
18
README.md
18
README.md
|
@ -23,19 +23,19 @@ This repo is the official implementation of ["Swin Transformer: Hierarchical Vis
|
||||||
|
|
||||||
Initial commits:
|
Initial commits:
|
||||||
|
|
||||||
1. Pretrained models on ImageNet-1K ([Swin-T-IN1K](https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_tiny_patch4_window7_224.pth), [Swin-S-IN1K](https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_small_patch4_window7_224.pth), [Swin-B-IN1K](https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_base_patch4_window7_224.pth)) and ImageNet-22K ([Swin-B-IN22K](https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_base_patch4_window7_224_22k.pth), [Swin-L-IN22K](https://github.com/SwinTransformer/storage/releases/download/v1.0.0/https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_large_patch4_window7_224_22k.pth)) are provided.
|
1. Pretrained models on ImageNet-1K ([Swin-T-IN1K](https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_tiny_patch4_window7_224.pth), [Swin-S-IN1K](https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_small_patch4_window7_224.pth), [Swin-B-IN1K](https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_base_patch4_window7_224.pth)) and ImageNet-22K ([Swin-B-IN22K](https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_base_patch4_window7_224_22k.pth), [Swin-L-IN22K](https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_large_patch4_window7_224_22k.pth)) are provided.
|
||||||
2. The supported code and models for ImageNet-1K image classification, COCO object detection and ADE20K semantic segmentation are provided.
|
2. The supported code and models for ImageNet-1K image classification, COCO object detection and ADE20K semantic segmentation are provided.
|
||||||
3. The cuda kernel implementation for the [local relation layer](https://arxiv.org/pdf/1904.11491.pdf) is provided in branch [LR-Net](https://github.com/microsoft/Swin-Transformer/tree/LR-Net).
|
3. The cuda kernel implementation for the [local relation layer](https://arxiv.org/pdf/1904.11491.pdf) is provided in branch [LR-Net](https://github.com/microsoft/Swin-Transformer/tree/LR-Net).
|
||||||
|
|
||||||
## Introduction
|
## Introduction
|
||||||
|
|
||||||
**Swin Transformer** is initially described in [arxiv](https://arxiv.org/abs/2103.14030), which capably serves as a
|
**Swin Transformer** (the name `Swin` stands for **S**hifted **win**dow) is initially described in [arxiv](https://arxiv.org/abs/2103.14030), which capably serves as a
|
||||||
general-purpose backbone for computer vision. It is basically a hierarchical Transformer whose representation is
|
general-purpose backbone for computer vision. It is basically a hierarchical Transformer whose representation is
|
||||||
computed with shifted windows. The shifted windowing scheme brings greater efficiency by limiting self-attention
|
computed with shifted windows. The shifted windowing scheme brings greater efficiency by limiting self-attention
|
||||||
computation to non-overlapping local windows while also allowing for cross-window connection.
|
computation to non-overlapping local windows while also allowing for cross-window connection.
|
||||||
|
|
||||||
Swin Transformer achieves strong performance on COCO object detection (`58.7 box AP` and `51.1 mask AP` on test-dev) and
|
Swin Transformer achieves strong performance on COCO object detection (`58.7 box AP` and `51.1 mask AP` on test-dev) and
|
||||||
ADE20K semantic segmentatiion (`53.5 mIoU` on val), surpassing previous models by a large margin.
|
ADE20K semantic segmentation (`53.5 mIoU` on val), surpassing previous models by a large margin.
|
||||||
|
|
||||||

|

|
||||||
|
|
||||||
|
@ -102,6 +102,18 @@ Note: <sup>*</sup> indicates multi-scale testing.
|
||||||
- For **Object Detection and Instance Segmentation**, please see [Swin Transformer for Object Detection](https://github.com/SwinTransformer/Swin-Transformer-Object-Detection).
|
- For **Object Detection and Instance Segmentation**, please see [Swin Transformer for Object Detection](https://github.com/SwinTransformer/Swin-Transformer-Object-Detection).
|
||||||
- For **Semantic Segmentation**, please see [Swin Transformer for Semantic Segmentation](https://github.com/SwinTransformer/Swin-Transformer-Semantic-Segmentation).
|
- For **Semantic Segmentation**, please see [Swin Transformer for Semantic Segmentation](https://github.com/SwinTransformer/Swin-Transformer-Semantic-Segmentation).
|
||||||
|
|
||||||
|
## Third-party Usage and Experiments
|
||||||
|
|
||||||
|
***In this pargraph, we cross link third-party repositories which use Swin and report results. You can let us know by raising an issue***
|
||||||
|
|
||||||
|
(`Note please report accuracy numbers and provide trained models in your new repository to facilitate others to get sense of correctness and model behavior`)
|
||||||
|
|
||||||
|
[04/14/2021] Swin for RetinaNet in Detectron: https://github.com/xiaohu2015/SwinT_detectron2.
|
||||||
|
|
||||||
|
[04/16/2021] Included in a famous model zoo: https://github.com/rwightman/pytorch-image-models.
|
||||||
|
|
||||||
|
[04/20/2021] Swin-Transformer classifier inference using TorchServe: https://github.com/kamalkraj/Swin-Transformer-Serve
|
||||||
|
|
||||||
## Contributing
|
## Contributing
|
||||||
|
|
||||||
This project welcomes contributions and suggestions. Most contributions require you to agree to a
|
This project welcomes contributions and suggestions. Most contributions require you to agree to a
|
||||||
|
|
2
main.py
2
main.py
|
@ -110,7 +110,9 @@ def main(config):
|
||||||
if resume_file:
|
if resume_file:
|
||||||
if config.MODEL.RESUME:
|
if config.MODEL.RESUME:
|
||||||
logger.warning(f"auto-resume changing resume file from {config.MODEL.RESUME} to {resume_file}")
|
logger.warning(f"auto-resume changing resume file from {config.MODEL.RESUME} to {resume_file}")
|
||||||
|
config.defrost()
|
||||||
config.MODEL.RESUME = resume_file
|
config.MODEL.RESUME = resume_file
|
||||||
|
config.freeze()
|
||||||
logger.info(f'auto resuming from {resume_file}')
|
logger.info(f'auto resuming from {resume_file}')
|
||||||
else:
|
else:
|
||||||
logger.info(f'no checkpoint found in {config.OUTPUT}, ignoring auto resume')
|
logger.info(f'no checkpoint found in {config.OUTPUT}, ignoring auto resume')
|
||||||
|
|
28
tools/pytorch2paddlepaddle.py
Normal file
28
tools/pytorch2paddlepaddle.py
Normal file
|
@ -0,0 +1,28 @@
|
||||||
|
import torch
|
||||||
|
import numpy as np
|
||||||
|
from models.swin_transformer import SwinTransformer
|
||||||
|
# 构建输入
|
||||||
|
input_data = np.random.rand(1, 3, 224, 224).astype("float32")
|
||||||
|
|
||||||
|
|
||||||
|
swin_model_cfg_map = {
|
||||||
|
"swin_tiny_patch4_window7_224": {
|
||||||
|
"EMBED_DIM": 96,
|
||||||
|
"DEPTHS": [ 2, 2, 6, 2 ],
|
||||||
|
"NUM_HEADS": [ 3, 6, 12, 24 ],
|
||||||
|
"WINDOW_SIZE": 7,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
model_name = "swin_tiny_patch4_window7_224"
|
||||||
|
torch_module = SwinTransformer(**swin_model_cfg_map[model_name])
|
||||||
|
torch_state_dict = torch.load("/home/andy/data/pretrained_models/{}.pth".format(model_name))["model"]
|
||||||
|
torch_module.load_state_dict(torch_state_dict)
|
||||||
|
# 设置为eval模式
|
||||||
|
torch_module.eval()
|
||||||
|
# 进行转换
|
||||||
|
from x2paddle.convert import pytorch2paddle
|
||||||
|
pytorch2paddle(torch_module,
|
||||||
|
save_dir="pd_{}".format(model_name),
|
||||||
|
jit_type="trace",
|
||||||
|
input_examples=[torch.tensor(input_data)])
|
2
utils.py
2
utils.py
|
@ -29,7 +29,9 @@ def load_checkpoint(config, model, optimizer, lr_scheduler, logger):
|
||||||
if not config.EVAL_MODE and 'optimizer' in checkpoint and 'lr_scheduler' in checkpoint and 'epoch' in checkpoint:
|
if not config.EVAL_MODE and 'optimizer' in checkpoint and 'lr_scheduler' in checkpoint and 'epoch' in checkpoint:
|
||||||
optimizer.load_state_dict(checkpoint['optimizer'])
|
optimizer.load_state_dict(checkpoint['optimizer'])
|
||||||
lr_scheduler.load_state_dict(checkpoint['lr_scheduler'])
|
lr_scheduler.load_state_dict(checkpoint['lr_scheduler'])
|
||||||
|
config.defrost()
|
||||||
config.TRAIN.START_EPOCH = checkpoint['epoch'] + 1
|
config.TRAIN.START_EPOCH = checkpoint['epoch'] + 1
|
||||||
|
config.freeze()
|
||||||
if 'amp' in checkpoint and config.AMP_OPT_LEVEL != "O0" and checkpoint['config'].AMP_OPT_LEVEL != "O0":
|
if 'amp' in checkpoint and config.AMP_OPT_LEVEL != "O0" and checkpoint['config'].AMP_OPT_LEVEL != "O0":
|
||||||
amp.load_state_dict(checkpoint['amp'])
|
amp.load_state_dict(checkpoint['amp'])
|
||||||
logger.info(f"=> loaded successfully '{config.MODEL.RESUME}' (epoch {checkpoint['epoch']})")
|
logger.info(f"=> loaded successfully '{config.MODEL.RESUME}' (epoch {checkpoint['epoch']})")
|
||||||
|
|
Loading…
Reference in New Issue
Block a user