Add xception model

mattdangerw · mattdangerw · commit 6251835c61c5 · 2025-04-01T15:10:55.000-07:00
diff --git a/keras_hub/api/layers/__init__.py b/keras_hub/api/layers/__init__.py
@@ -81,3 +81,6 @@
 from keras_hub.src.models.whisper.whisper_audio_converter import (
     WhisperAudioConverter,
 )
+from keras_hub.src.models.xception.xception_image_converter import (
+    XceptionImageConverter,
+)
diff --git a/keras_hub/api/models/__init__.py b/keras_hub/api/models/__init__.py
@@ -376,6 +376,13 @@
 from keras_hub.src.models.vit_det.vit_det_backbone import ViTDetBackbone
 from keras_hub.src.models.whisper.whisper_backbone import WhisperBackbone
 from keras_hub.src.models.whisper.whisper_tokenizer import WhisperTokenizer
+from keras_hub.src.models.xception.xception_backbone import XceptionBackbone
+from keras_hub.src.models.xception.xception_image_classifier import (
+    XceptionImageClassifier,
+)
+from keras_hub.src.models.xception.xception_image_classifier_preprocessor import (
+    XceptionImageClassifierPreprocessor,
+)
 from keras_hub.src.models.xlm_roberta.xlm_roberta_backbone import (
     XLMRobertaBackbone,
 )
diff --git a/keras_hub/src/models/xception/__init__.py b/keras_hub/src/models/xception/__init__.py
@@ -0,0 +1,5 @@
+from keras_hub.src.models.xception.xception_backbone import XceptionBackbone
+from keras_hub.src.models.xception.xception_presets import backbone_presets
+from keras_hub.src.utils.preset_utils import register_presets
+
+register_presets(backbone_presets, XceptionBackbone)
diff --git a/keras_hub/src/models/xception/xception_backbone.py b/keras_hub/src/models/xception/xception_backbone.py
@@ -0,0 +1,188 @@
+import functools
+
+from keras import layers
+
+from keras_hub.src.api_export import keras_hub_export
+from keras_hub.src.models.backbone import Backbone
+from keras_hub.src.utils.keras_utils import standardize_data_format
+
+
+@keras_hub_export("keras_hub.models.XceptionBackbone")
+class XceptionBackbone(Backbone):
+    """Xception core network with hyperparameters.
+
+    This class implements a Xception backbone as described in
+    [Xception: Deep Learning with Depthwise Separable Convolutions](https://arxiv.org/abs/1610.02357).
+
+    Most users will want the pretrained presets available with this model. If
+    you are creating a custom backbone, this model provides customizability
+    through the `stackwise_conv_filters` and `stackwise_pooling` arguments. This
+    backbone assumes the same basic structure as the original Xception mode:
+    * Residuals and pre-activation everywhere but the first and last block.
+    * Conv layers for the first block only, separable conv layers elsewhere.
+
+    Args:
+        stackwise_conv_filters: list of list of ints. Each outermost list
+            entry represents a block, and each innermost list entry a conv
+            layer. The integer value specifies the number of filters for the
+            conv layer.
+        stackwise_pooling: list of bools. A list of booleans per block, where
+            each entry is true if the block should includes a max pooling layer
+            and false if it should not.
+        image_shape: tuple. The input shape without the batch size.
+            Defaults to `(None, None, 3)`.
+        data_format: `None` or str. If specified, either `"channels_last"` or
+            `"channels_first"`. If unspecified, the Keras default will be used.
+        dtype: `None` or str or `keras.mixed_precision.DTypePolicy`. The dtype
+            to use for the model's computations and weights.
+
+    Examples:
+    ```python
+    input_data = np.random.uniform(0, 1, size=(2, 224, 224, 3))
+
+    # Pretrained Xception backbone.
+    model = keras_hub.models.Backbone.from_preset("exception_41_imagenet")
+    model(input_data)
+
+    # Randomly initialized Xception backbone with a custom config.
+    model = keras_hub.models.XceptionBackbone(
+        stackwise_conv_filters=[[32, 64], [64, 128], [256, 256]],
+        stackwise_pooling=[True, True, False],
+    )
+    model(input_data)
+    ```
+    """
+
+    def __init__(
+        self,
+        stackwise_conv_filters,
+        stackwise_pooling,
+        image_shape=(None, None, 3),
+        data_format=None,
+        dtype=None,
+        **kwargs,
+    ):
+        if len(stackwise_conv_filters) != len(stackwise_pooling):
+            raise ValueError("All stackwise args should have the same length.")
+
+        data_format = standardize_data_format(data_format)
+        channel_axis = -1 if data_format == "channels_last" else 1
+        num_blocks = len(stackwise_conv_filters)
+
+        # Layer shorcuts with common args.
+        norm = functools.partial(
+            layers.BatchNormalization,
+            axis=channel_axis,
+            dtype=dtype,
+        )
+        act = functools.partial(
+            layers.Activation,
+            activation="relu",
+            dtype=dtype,
+        )
+        conv = functools.partial(
+            layers.Conv2D,
+            kernel_size=(3, 3),
+            use_bias=False,
+            data_format=data_format,
+            dtype=dtype,
+        )
+        sep_conv = functools.partial(
+            layers.SeparableConv2D,
+            kernel_size=(3, 3),
+            padding="same",
+            use_bias=False,
+            data_format=data_format,
+            dtype=dtype,
+        )
+        point_conv = functools.partial(
+            layers.Conv2D,
+            kernel_size=(1, 1),
+            strides=(2, 2),
+            padding="same",
+            use_bias=False,
+            data_format=data_format,
+            dtype=dtype,
+        )
+        pool = functools.partial(
+            layers.MaxPool2D,
+            pool_size=(3, 3),
+            strides=(2, 2),
+            padding="same",
+            data_format=data_format,
+            dtype=dtype,
+        )
+
+        # === Functional Model ===
+        image_input = layers.Input(shape=image_shape)
+        x = image_input  # Intermediate result.
+
+        # Iterate through the blocks.
+        for block_i in range(num_blocks):
+            first_block, last_block = block_i == 0, block_i == num_blocks - 1
+            block_filters = stackwise_conv_filters[block_i]
+            use_pooling = stackwise_pooling[block_i]
+
+            # Save the block input as a residual.
+            residual = x
+            for conv_i, filters in enumerate(block_filters):
+                # First block has post activation and strides on first conv.
+                if first_block:
+                    prefix = f"block{block_i + 1}_conv{conv_i + 1}"
+                    strides = (2, 2) if conv_i == 0 else (1, 1)
+                    x = conv(filters, strides=strides, name=prefix)(x)
+                    x = norm(name=f"{prefix}_bn")(x)
+                    x = act(name=f"{prefix}_act")(x)
+                # Last block has post activation.
+                elif last_block:
+                    prefix = f"block{block_i + 1}_sepconv{conv_i + 1}"
+                    x = sep_conv(filters, name=prefix)(x)
+                    x = norm(name=f"{prefix}_bn")(x)
+                    x = act(name=f"{prefix}_act")(x)
+                else:
+                    prefix = f"block{block_i + 1}_sepconv{conv_i + 1}"
+                    # The first conv in second block has no activation.
+                    if block_i != 1 or conv_i != 0:
+                        x = act(name=f"{prefix}_act")(x)
+                    x = sep_conv(filters, name=prefix)(x)
+                    x = norm(name=f"{prefix}_bn")(x)
+
+            # Optional block pooling.
+            if use_pooling:
+                x = pool(name=f"block{block_i + 1}_pool")(x)
+
+            # Sum residual, first and last block do not have a residual.
+            if not first_block and not last_block:
+                prefix = f"block{block_i + 1}_residual"
+                filters = x.shape[channel_axis]
+                # Match filters with a pointwise conv if needed.
+                if filters != residual.shape[channel_axis]:
+                    residual = point_conv(filters, name=f"{prefix}_conv")(
+                        residual
+                    )
+                    residual = norm(name=f"{prefix}_bn")(residual)
+                x = layers.Add(name=f"{prefix}_add", dtype=dtype)([x, residual])
+
+        super().__init__(
+            inputs=image_input,
+            outputs=x,
+            dtype=dtype,
+            **kwargs,
+        )
+
+        # === Config ===
+        self.stackwise_conv_filters = stackwise_conv_filters
+        self.stackwise_pooling = stackwise_pooling
+        self.image_shape = image_shape
+        self.data_format = data_format
+
+    def get_config(self):
+        config = super().get_config()
+        config.update(
+            {
+                "stackwise_conv_filters": self.stackwise_conv_filters,
+                "stackwise_pooling": self.stackwise_pooling,
+                "image_shape": self.image_shape,
+            }
+        )
+        return config
diff --git a/keras_hub/src/models/xception/xception_backbone_test.py b/keras_hub/src/models/xception/xception_backbone_test.py
@@ -0,0 +1,41 @@
+import pytest
+from keras import ops
+
+from keras_hub.src.models.xception.xception_backbone import XceptionBackbone
+from keras_hub.src.tests.test_case import TestCase
+
+
+class XceptionBackboneTest(TestCase):
+    def setUp(self):
+        self.init_kwargs = {
+            "stackwise_conv_filters": [[32, 64], [128, 128], [256, 256]],
+            "stackwise_pooling": [False, True, False],
+            "image_shape": (None, None, 3),
+        }
+        self.input_size = 64
+        self.input_data = ops.ones((2, self.input_size, self.input_size, 3))
+
+    def test_backbone_basics(self):
+        self.run_vision_backbone_test(
+            cls=XceptionBackbone,
+            init_kwargs=self.init_kwargs,
+            input_data=self.input_data,
+            expected_output_shape=(2, 15, 15, 256),
+        )
+
+    @pytest.mark.large
+    def test_saved_model(self):
+        self.run_model_saving_test(
+            cls=XceptionBackbone,
+            init_kwargs=self.init_kwargs,
+            input_data=self.input_data,
+        )
+
+    @pytest.mark.extra_large
+    def test_all_presets(self):
+        for preset in XceptionBackbone.presets:
+            self.run_preset_test(
+                cls=XceptionBackbone,
+                preset=preset,
+                input_data=self.input_data,
+            )
diff --git a/keras_hub/src/models/xception/xception_image_classifier.py b/keras_hub/src/models/xception/xception_image_classifier.py
@@ -0,0 +1,12 @@
+from keras_hub.src.api_export import keras_hub_export
+from keras_hub.src.models.image_classifier import ImageClassifier
+from keras_hub.src.models.xception.xception_backbone import XceptionBackbone
+from keras_hub.src.models.xception.xception_image_classifier_preprocessor import (  # noqa: E501
+    XceptionImageClassifierPreprocessor,
+)
+
+
+@keras_hub_export("keras_hub.models.XceptionImageClassifier")
+class XceptionImageClassifier(ImageClassifier):
+    backbone_cls = XceptionBackbone
+    preprocessor_cls = XceptionImageClassifierPreprocessor
diff --git a/keras_hub/src/models/xception/xception_image_classifier_preprocessor.py b/keras_hub/src/models/xception/xception_image_classifier_preprocessor.py
@@ -0,0 +1,14 @@
+from keras_hub.src.api_export import keras_hub_export
+from keras_hub.src.models.image_classifier_preprocessor import (
+    ImageClassifierPreprocessor,
+)
+from keras_hub.src.models.xception.xception_backbone import XceptionBackbone
+from keras_hub.src.models.xception.xception_image_converter import (
+    XceptionImageConverter,
+)
+
+
+@keras_hub_export("keras_hub.models.XceptionImageClassifierPreprocessor")
+class XceptionImageClassifierPreprocessor(ImageClassifierPreprocessor):
+    backbone_cls = XceptionBackbone
+    image_converter_cls = XceptionImageConverter
diff --git a/keras_hub/src/models/xception/xception_image_classifier_test.py b/keras_hub/src/models/xception/xception_image_classifier_test.py
@@ -0,0 +1,86 @@
+import numpy as np
+import pytest
+
+from keras_hub.src.models.xception.xception_backbone import XceptionBackbone
+from keras_hub.src.models.xception.xception_image_classifier import (
+    XceptionImageClassifier,
+)
+from keras_hub.src.models.xception.xception_image_classifier_preprocessor import (  # noqa: E501
+    XceptionImageClassifierPreprocessor,
+)
+from keras_hub.src.models.xception.xception_image_converter import (
+    XceptionImageConverter,
+)
+from keras_hub.src.tests.test_case import TestCase
+
+
+class XceptionImageClassifierTest(TestCase):
+    def setUp(self):
+        self.images = np.ones((2, 299, 299, 3))
+        self.labels = [0, 1]
+        self.backbone = XceptionBackbone(
+            stackwise_conv_filters=[[32, 64], [128, 128], [256, 256]],
+            stackwise_pooling=[False, True, False],
+        )
+        self.image_converter = XceptionImageConverter(
+            image_size=(299, 299),
+            scale=1.0 / 127.5,
+            offset=-1.0,
+        )
+        self.prepocessor = XceptionImageClassifierPreprocessor(
+            image_converter=self.image_converter,
+        )
+        self.init_kwargs = {
+            "backbone": self.backbone,
+            "preprocessor": self.prepocessor,
+            "num_classes": 2,
+            "pooling": "avg",
+            "activation": "softmax",
+        }
+        self.train_data = (self.images, self.labels)
+
+    def test_classifier_basics(self):
+        self.run_task_test(
+            cls=XceptionImageClassifier,
+            init_kwargs=self.init_kwargs,
+            train_data=self.train_data,
+            expected_output_shape=(2, 2),
+        )
+
+    def test_head_dtype(self):
+        model = XceptionImageClassifier(
+            **self.init_kwargs, head_dtype="bfloat16"
+        )
+        self.assertEqual(model.output_dense.compute_dtype, "bfloat16")
+
+    @pytest.mark.large
+    def test_smallest_preset(self):
+        # Test that our forward pass is stable!
+        image_batch = self.load_test_image()[None, ...].astype("float32")
+        image_batch = self.image_converter(image_batch)
+        self.run_preset_test(
+            cls=XceptionImageClassifier,
+            preset="xception_41_imagenet",
+            input_data=image_batch,
+            expected_output_shape=(1, 1000),
+            expected_labels=[85],
+        )
+
+    @pytest.mark.large
+    def test_saved_model(self):
+        self.run_model_saving_test(
+            cls=XceptionImageClassifier,
+            init_kwargs=self.init_kwargs,
+            input_data=self.images,
+        )
+
+    @pytest.mark.extra_large
+    def test_all_presets(self):
+        for preset in XceptionImageClassifier.presets:
+            self.run_preset_test(
+                cls=XceptionImageClassifier,
+                preset=preset,
+                init_kwargs={"num_classes": 2},
+                input_data=self.images,
+                expected_output_shape=(2, 2),
+            )
diff --git a/keras_hub/src/models/xception/xception_image_converter.py b/keras_hub/src/models/xception/xception_image_converter.py
@@ -0,0 +1,8 @@
+from keras_hub.src.api_export import keras_hub_export
+from keras_hub.src.layers.preprocessing.image_converter import ImageConverter
+from keras_hub.src.models.xception.xception_backbone import XceptionBackbone
+
+
+@keras_hub_export("keras_hub.layers.XceptionImageConverter")
+class XceptionImageConverter(ImageConverter):
+    backbone_cls = XceptionBackbone
diff --git a/keras_hub/src/models/xception/xception_presets.py b/keras_hub/src/models/xception/xception_presets.py
@@ -0,0 +1,14 @@
+"""Xception preset configurations."""
+
+backbone_presets = {
+    "xception_41_imagenet": {
+        "metadata": {
+            "description": (
+                "41-layer Xception model pre-trained on ImageNet 1k."
+            ),
+            "params": 20861480,
+            "path": "xception",
+        },
+        "kaggle_handle": "kaggle://keras/xception/keras/xception_41_imagenet/2",
+    },
+}
diff --git a/tools/checkpoint_conversion/convert_xception_checkpoints.py b/tools/checkpoint_conversion/convert_xception_checkpoints.py

Original file line number	Diff line number	Diff line change
`@@ -81,3 +81,6 @@`
`81`	`81`	`from keras_hub.src.models.whisper.whisper_audio_converter import (`
`82`	`82`	`WhisperAudioConverter,`
`83`	`83`	`)`
	`84`	`+from keras_hub.src.models.xception.xception_image_converter import (`
	`85`	`+ XceptionImageConverter,`
	`86`	`+)`