diff --git a/WIP/6-gated_pixelcnn_cropped/cropped_gated_pixelcnn.ipynb b/WIP/6-gated_pixelcnn_cropped/cropped_gated_pixelcnn.ipynb index 2b30544..5deefb7 100644 --- a/WIP/6-gated_pixelcnn_cropped/cropped_gated_pixelcnn.ipynb +++ b/WIP/6-gated_pixelcnn_cropped/cropped_gated_pixelcnn.ipynb @@ -1,624 +1,623 @@ { - "nbformat": 4, - "nbformat_minor": 0, - "metadata": { - "accelerator": "GPU", - "colab": { - "name": "cropped gated_pixelcnn.ipynb", - "provenance": [], - "collapsed_sections": [] - }, - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.9" - } + "cells": [ + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "k1uZnxh4Xz9Z" + }, + "outputs": [], + "source": [ + "import tensorflow as tf\n", + "gpu_devices = tf.config.experimental.list_physical_devices('GPU')\n", + "for device in gpu_devices: tf.config.experimental.set_memory_growth(device, True)\n", + "\n", + "import random as rn\n", + "import time\n", + "\n", + "import matplotlib\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import tensorflow as tf\n", + "from tensorflow.keras.utils import Progbar\n", + "from tensorflow import keras\n", + "from tensorflow.keras import initializers\n", + "from tensorflow import nn" + ] }, - "cells": [ - { - "cell_type": "code", - "metadata": { - "colab_type": "code", - "id": "k1uZnxh4Xz9Z", - "colab": {} - }, - "source": [ - "import tensorflow as tf\n", - "gpu_devices = tf.config.experimental.list_physical_devices('GPU')\n", - "for device in gpu_devices: tf.config.experimental.set_memory_growth(device, True)\n", - "\n", - "import random as rn\n", - "import time\n", - "\n", - "import matplotlib\n", - "import matplotlib.pyplot as plt\n", - "import numpy as np\n", - "import tensorflow as tf\n", - "from tensorflow.keras.utils import Progbar\n", - "from tensorflow import keras\n", - "from tensorflow.keras import initializers\n", - "from tensorflow import nn" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "colab_type": "code", - "id": "NN6vJl7eVnZ4", - "colab": {} - }, - "source": [ - "# Defining random seeds\n", - "random_seed = 42\n", - "tf.random.set_seed(random_seed)\n", - "np.random.seed(random_seed)\n", - "rn.seed(random_seed)" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "colab_type": "code", - "id": "8BnkhgCjVpJu", - "colab": {} - }, - "source": [ - "# Loading data\n", - "(x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()\n", - "\n", - "height = 28\n", - "width = 28\n", - "n_channel = 1\n", - "\n", - "x_train = x_train.astype('float32') / 255.\n", - "x_test = x_test.astype('float32') / 255.\n", - "\n", - "x_train = x_train.reshape(x_train.shape[0], height, width, n_channel)\n", - "x_test = x_test.reshape(x_test.shape[0], height, width, n_channel)" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "colab_type": "code", - "id": "3ne-qY7JVZaB", - "colab": {} - }, - "source": [ - "def quantise(images, q_levels):\n", - " \"\"\"Quantise image into q levels\"\"\"\n", - " return (np.digitize(images, np.arange(q_levels) / q_levels) - 1).astype('float32')" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "colab_type": "code", - "id": "3QVhnMymVrzc", - "colab": {} - }, - "source": [ - "# Quantise the input data in q levels\n", - "q_levels = 2\n", - "x_train_quantised = quantise(x_train, q_levels)\n", - "x_test_quantised = quantise(x_test, q_levels)" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "colab_type": "code", - "id": "ZObIXqzNGwmo", - "colab": {} - }, - "source": [ - "# Creating input stream using tf.data API\n", - "batch_size = 192\n", - "train_buf = 10000\n", - "\n", - "train_dataset = tf.data.Dataset.from_tensor_slices((x_train_quantised / (q_levels - 1),\n", - " x_train_quantised.astype('int32')))\n", - "train_dataset = train_dataset.shuffle(buffer_size=train_buf)\n", - "train_dataset = train_dataset.batch(batch_size)\n", - "\n", - "test_dataset = tf.data.Dataset.from_tensor_slices((x_test_quantised / (q_levels - 1),\n", - " x_test_quantised.astype('int32')))\n", - "test_dataset = test_dataset.batch(batch_size)" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "colab_type": "code", - "id": "75VTDkK8VZLA", - "colab": {} - }, - "source": [ - "class VerticalConv2D(keras.layers.Conv2D):\n", - " \"\"\"https://github.com/JesseFarebro/PixelCNNPP/blob/master/layers/VerticalConv2D.py\"\"\"\n", - " def __init__(self,\n", - " filters,\n", - " kernel_size,\n", - " **kwargs):\n", - " if not isinstance(kernel_size, tuple):\n", - " kernel_size = (kernel_size // 2 + 1, kernel_size)\n", - "\n", - " super(VerticalConv2D, self).__init__(filters, kernel_size, **kwargs)\n", - "\n", - " self.pad = tf.keras.layers.ZeroPadding2D(\n", - " (\n", - " (kernel_size[0] - 1, 0), # Top, Bottom\n", - " (kernel_size[1] // 2, kernel_size[1] // 2), # Left, Right\n", - " )\n", - " )\n", - "\n", - " def call(self, inputs):\n", - " inputs = self.pad(inputs)\n", - " output = super(VerticalConv2D, self).call(inputs)\n", - "\n", - " return output\n", - "\n", - "\n", - "class HorizontalConv2D(keras.layers.Conv2D):\n", - " def __init__(self,\n", - " filters,\n", - " kernel_size,\n", - " **kwargs):\n", - "\n", - " if not isinstance(kernel_size, tuple):\n", - " kernel_size = (kernel_size // 2 + 1,) * 2\n", - "\n", - " super(HorizontalConv2D, self).__init__(filters, kernel_size, **kwargs)\n", - " self.pad = tf.keras.layers.ZeroPadding2D(\n", - " (\n", - " (kernel_size[0] - 1, 0), # (Top, Bottom)\n", - " (kernel_size[1] - 1, 0), # (Left, Right)\n", - " )\n", - " )\n", - "\n", - " def call(self, inputs):\n", - " inputs = self.pad(inputs)\n", - " outputs = super(HorizontalConv2D, self).call(inputs)\n", - "\n", - " return outputs" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "g625fSNYR9I6", - "colab_type": "code", - "colab": {} - }, - "source": [ - "filters = 1\n", - "kernel_size = 3\n", - "vertical_conv = VerticalConv2D(filters=2 * filters,\n", - " kernel_size=kernel_size)" - ], - "execution_count": 0, - "outputs": [] + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "NN6vJl7eVnZ4" + }, + "outputs": [], + "source": [ + "# Defining random seeds\n", + "random_seed = 42\n", + "tf.random.set_seed(random_seed)\n", + "np.random.seed(random_seed)\n", + "rn.seed(random_seed)" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "8BnkhgCjVpJu" + }, + "outputs": [], + "source": [ + "# Loading data\n", + "(x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()\n", + "\n", + "height = 28\n", + "width = 28\n", + "n_channel = 1\n", + "\n", + "x_train = x_train.astype('float32') / 255.\n", + "x_test = x_test.astype('float32') / 255.\n", + "\n", + "x_train = x_train.reshape(x_train.shape[0], height, width, n_channel)\n", + "x_test = x_test.reshape(x_test.shape[0], height, width, n_channel)" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "3ne-qY7JVZaB" + }, + "outputs": [], + "source": [ + "def quantise(images, q_levels):\n", + " \"\"\"Quantise image into q levels\"\"\"\n", + " return (np.digitize(images, np.arange(q_levels) / q_levels) - 1).astype('float32')" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "3QVhnMymVrzc" + }, + "outputs": [], + "source": [ + "# Quantise the input data in q levels\n", + "q_levels = 2\n", + "x_train_quantised = quantise(x_train, q_levels)\n", + "x_test_quantised = quantise(x_test, q_levels)" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "ZObIXqzNGwmo" + }, + "outputs": [], + "source": [ + "# Creating input stream using tf.data API\n", + "batch_size = 192\n", + "train_buf = 10000\n", + "\n", + "train_dataset = tf.data.Dataset.from_tensor_slices((x_train_quantised / (q_levels - 1),\n", + " x_train_quantised.astype('int32')))\n", + "train_dataset = train_dataset.shuffle(buffer_size=train_buf)\n", + "train_dataset = train_dataset.batch(batch_size)\n", + "\n", + "test_dataset = tf.data.Dataset.from_tensor_slices((x_test_quantised / (q_levels - 1),\n", + " x_test_quantised.astype('int32')))\n", + "test_dataset = test_dataset.batch(batch_size)" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "75VTDkK8VZLA" + }, + "outputs": [], + "source": [ + "class VerticalConv2D(keras.layers.Conv2D):\n", + " \"\"\"https://github.com/JesseFarebro/PixelCNNPP/blob/master/layers/VerticalConv2D.py\"\"\"\n", + " def __init__(self,\n", + " filters,\n", + " kernel_size,\n", + " **kwargs):\n", + " if not isinstance(kernel_size, tuple):\n", + " kernel_size = (kernel_size // 2 + 1, kernel_size)\n", + "\n", + " super(VerticalConv2D, self).__init__(filters, kernel_size, **kwargs)\n", + "\n", + " self.pad = tf.keras.layers.ZeroPadding2D(\n", + " (\n", + " (kernel_size[0] - 1, 0), # Top, Bottom\n", + " (kernel_size[1] // 2, kernel_size[1] // 2), # Left, Right\n", + " )\n", + " )\n", + "\n", + " def call(self, inputs):\n", + " inputs = self.pad(inputs)\n", + " output = super(VerticalConv2D, self).call(inputs)\n", + "\n", + " return output\n", + "\n", + "\n", + "class HorizontalConv2D(keras.layers.Conv2D):\n", + " def __init__(self,\n", + " filters,\n", + " kernel_size,\n", + " **kwargs):\n", + "\n", + " if not isinstance(kernel_size, tuple):\n", + " kernel_size = (kernel_size // 2 + 1,) * 2\n", + "\n", + " super(HorizontalConv2D, self).__init__(filters, kernel_size, **kwargs)\n", + " self.pad = tf.keras.layers.ZeroPadding2D(\n", + " (\n", + " (kernel_size[0] - 1, 0), # (Top, Bottom)\n", + " (kernel_size[1] - 1, 0), # (Left, Right)\n", + " )\n", + " )\n", + "\n", + " def call(self, inputs):\n", + " inputs = self.pad(inputs)\n", + " outputs = super(HorizontalConv2D, self).call(inputs)\n", + "\n", + " return outputs" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "g625fSNYR9I6" + }, + "outputs": [], + "source": [ + "filters = 1\n", + "kernel_size = 3\n", + "vertical_conv = VerticalConv2D(filters=2 * filters,\n", + " kernel_size=kernel_size)" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 34 }, + "colab_type": "code", + "id": "DqsfC9JnS8C8", + "outputId": "ef53f88a-c437-4aa4-b40f-0184e140b1b3" + }, + "outputs": [ { - "cell_type": "code", - "metadata": { - "id": "DqsfC9JnS8C8", - "colab_type": "code", - "outputId": "ef53f88a-c437-4aa4-b40f-0184e140b1b3", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 34 - } - }, - "source": [ - "vertical_conv.kernel_size\n" - ], - "execution_count": 0, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "(2, 3)" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 9 - } + "data": { + "text/plain": [ + "(2, 3)" ] + }, + "execution_count": 9, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "vertical_conv.kernel_size\n" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "PTUN4s52Nu3w" + }, + "outputs": [], + "source": [ + "class GatedBlock(tf.keras.Model):\n", + " \"\"\" Gated block of the Gated PixelCNN.\"\"\"\n", + "\n", + " def __init__(self,\n", + " mask_type,\n", + " filters,\n", + " kernel_size):\n", + " super(GatedBlock, self).__init__(name='')\n", + "\n", + " self.mask_type = mask_type\n", + " self.vertical_conv = VerticalConv2D(filters=2 * filters,\n", + " kernel_size=kernel_size)\n", + " \n", + "\n", + " if mask_type =='A':\n", + " self.horizontal_conv = keras.layers.Conv2D(filters=2 * filters, \n", + " kernel_size=1)\n", + "\n", + " else: \n", + " self.horizontal_conv = HorizontalConv2D(filters=2 * filters,\n", + " kernel_size=kernel_size)\n", + "\n", + " self.padding_A = keras.layers.ZeroPadding2D(padding=(0, (1,0)))\n", + " self.cropping_A = keras.layers.Cropping2D(cropping=(0, (0, 1)))\n", + "\n", + " self.padding = keras.layers.ZeroPadding2D(padding=((1,0),0))\n", + " self.cropping = keras.layers.Cropping2D(cropping=((0, 1), 0))\n", + "\n", + " self.v_to_h_conv = keras.layers.Conv2D(filters=2 * filters, kernel_size=1)\n", + "\n", + " self.horizontal_output = keras.layers.Conv2D(filters=filters, kernel_size=1)\n", + "\n", + " def _gate(self, x):\n", + " tanh_preactivation, sigmoid_preactivation = tf.split(x, 2, axis=-1)\n", + " return tf.nn.tanh(tanh_preactivation) * tf.nn.sigmoid(sigmoid_preactivation)\n", + "\n", + " def call(self, input_tensor):\n", + " v = input_tensor[0]\n", + " h = input_tensor[1]\n", + "\n", + " vertical_preactivation = self.vertical_conv(v) # NxN\n", + "\n", + " # Shifting feature map down to ensure causality\n", + " v_to_h = self.padding(vertical_preactivation)\n", + " v_to_h = self.cropping(v_to_h)\n", + " v_to_h = self.v_to_h_conv(v_to_h) # 1x1\n", + "\n", + " horizontal_preactivation = self.horizontal_conv(h) # 1xN\n", + " if self.mask_type == 'A':\n", + " horizontal_preactivation = self.padding_A(horizontal_preactivation)\n", + " horizontal_preactivation = self.cropping_A(horizontal_preactivation)\n", + " \n", + " \n", + " v_out = self._gate(vertical_preactivation)\n", + "\n", + " horizontal_preactivation = horizontal_preactivation + v_to_h\n", + " h_activated = self._gate(horizontal_preactivation)\n", + " h_activated = self.horizontal_output(h_activated)\n", + "\n", + " if self.mask_type == 'A':\n", + " h_out = h_activated\n", + " elif self.mask_type == 'B':\n", + " h_out = h + h_activated\n", + "\n", + " return v_out, h_out" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "WB57YufrVxn2" + }, + "outputs": [], + "source": [ + "# Create Gated PixelCNN model\n", + "inputs = keras.layers.Input(shape=(height, width, n_channel))\n", + "v, h = GatedBlock(mask_type='A', filters=64, kernel_size=3)([inputs, inputs])\n", + "\n", + "for i in range(7):\n", + " v, h = GatedBlock(mask_type='B', filters=64, kernel_size=3)([v, h])\n", + "\n", + "x = keras.layers.Activation(activation='relu')(h)\n", + "x = keras.layers.Conv2D(filters=128, kernel_size=1, strides=1)(x)\n", + "\n", + "x = keras.layers.Activation(activation='relu')(x)\n", + "x = keras.layers.Conv2D(filters=q_levels, kernel_size=1, strides=1)(x)\n", + "\n", + "gated_pixelcnn = tf.keras.Model(inputs=inputs, outputs=x)" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "_LnzHUaqV77d" + }, + "outputs": [], + "source": [ + "# Prepare optimizer and loss function\n", + "lr_decay = 0.999995\n", + "learning_rate = 1e-3\n", + "optimizer = keras.optimizers.Adam(lr=learning_rate)\n", + "\n", + "compute_loss = keras.losses.CategoricalCrossentropy(from_logits=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "CsAgEKVzLCJD" + }, + "outputs": [], + "source": [ + "@tf.function\n", + "def train_step(batch_x, batch_y):\n", + " with tf.GradientTape() as ae_tape:\n", + " logits = gated_pixelcnn(batch_x, training=True)\n", + "\n", + " loss = compute_loss(tf.squeeze(tf.one_hot(batch_y, q_levels)), logits)\n", + "\n", + " gradients = ae_tape.gradient(loss, gated_pixelcnn.trainable_variables)\n", + " gradients, _ = tf.clip_by_global_norm(gradients, 1.0)\n", + " optimizer.apply_gradients(zip(gradients, gated_pixelcnn.trainable_variables))\n", + "\n", + " return loss" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 476 }, + "colab_type": "code", + "id": "NoEPrfwQNM-s", + "outputId": "4a9422c4-8ce1-4310-8783-882be0c7c924" + }, + "outputs": [ { - "cell_type": "code", - "metadata": { - "colab_type": "code", - "id": "PTUN4s52Nu3w", - "colab": {} - }, - "source": [ - "class GatedBlock(tf.keras.Model):\n", - " \"\"\" Gated block of the Gated PixelCNN.\"\"\"\n", - "\n", - " def __init__(self,\n", - " mask_type,\n", - " filters,\n", - " kernel_size):\n", - " super(GatedBlock, self).__init__(name='')\n", - "\n", - " self.mask_type = mask_type\n", - " self.vertical_conv = VerticalConv2D(filters=2 * filters,\n", - " kernel_size=kernel_size)\n", - " \n", - "\n", - " if mask_type =='A':\n", - " self.horizontal_conv = keras.layers.Conv2D(filters=2 * filters, \n", - " kernel_size=1)\n", - "\n", - " else: \n", - " self.horizontal_conv = HorizontalConv2D(filters=2 * filters,\n", - " kernel_size=kernel_size)\n", - "\n", - " self.padding_A = keras.layers.ZeroPadding2D(padding=(0, (1,0)))\n", - " self.cropping_A = keras.layers.Cropping2D(cropping=(0, (0, 1)))\n", - "\n", - " self.padding = keras.layers.ZeroPadding2D(padding=((1,0),0))\n", - " self.cropping = keras.layers.Cropping2D(cropping=((0, 1), 0))\n", - "\n", - " self.v_to_h_conv = keras.layers.Conv2D(filters=2 * filters, kernel_size=1)\n", - "\n", - " self.horizontal_output = keras.layers.Conv2D(filters=filters, kernel_size=1)\n", - "\n", - " def _gate(self, x):\n", - " tanh_preactivation, sigmoid_preactivation = tf.split(x, 2, axis=-1)\n", - " return tf.nn.tanh(tanh_preactivation) * tf.nn.sigmoid(sigmoid_preactivation)\n", - "\n", - " def call(self, input_tensor):\n", - " v = input_tensor[0]\n", - " h = input_tensor[1]\n", - "\n", - " vertical_preactivation = self.vertical_conv(v) # NxN\n", - "\n", - " # Shifting feature map down to ensure causality\n", - " v_to_h = self.padding(vertical_preactivation)\n", - " v_to_h = self.cropping(v_to_h)\n", - " v_to_h = self.v_to_h_conv(v_to_h) # 1x1\n", - "\n", - " horizontal_preactivation = self.horizontal_conv(h) # 1xN\n", - " if self.mask_type == 'A':\n", - " horizontal_preactivation = self.padding_A(horizontal_preactivation)\n", - " horizontal_preactivation = self.cropping_A(horizontal_preactivation)\n", - " \n", - " \n", - " v_out = self._gate(vertical_preactivation)\n", - "\n", - " horizontal_preactivation = horizontal_preactivation + v_to_h\n", - " h_activated = self._gate(horizontal_preactivation)\n", - " h_activated = self.horizontal_output(h_activated)\n", - "\n", - " if self.mask_type == 'A':\n", - " h_out = h_activated\n", - " elif self.mask_type == 'B':\n", - " h_out = h + h_activated\n", - "\n", - " return v_out, h_out" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "colab_type": "code", - "id": "WB57YufrVxn2", - "colab": {} - }, - "source": [ - "# Create Gated PixelCNN model\n", - "inputs = keras.layers.Input(shape=(height, width, n_channel))\n", - "v, h = GatedBlock(mask_type='A', filters=64, kernel_size=3)([inputs, inputs])\n", - "\n", - "for i in range(7):\n", - " v, h = GatedBlock(mask_type='B', filters=64, kernel_size=3)([v, h])\n", - "\n", - "x = keras.layers.Activation(activation='relu')(h)\n", - "x = keras.layers.Conv2D(filters=128, kernel_size=1, strides=1)(x)\n", - "\n", - "x = keras.layers.Activation(activation='relu')(x)\n", - "x = keras.layers.Conv2D(filters=q_levels, kernel_size=1, strides=1)(x)\n", - "\n", - "gated_pixelcnn = tf.keras.Model(inputs=inputs, outputs=x)" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "colab_type": "code", - "id": "_LnzHUaqV77d", - "colab": {} - }, - "source": [ - "# Prepare optimizer and loss function\n", - "lr_decay = 0.999995\n", - "learning_rate = 1e-3\n", - "optimizer = keras.optimizers.Adam(lr=learning_rate)\n", - "\n", - "compute_loss = keras.losses.CategoricalCrossentropy(from_logits=True)" - ], - "execution_count": 0, - "outputs": [] + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch 1/50\n", + "313/313 [==============================] - 167s 533ms/step - loss: 0.1132\n", + "Epoch 2/50\n", + "313/313 [==============================] - 160s 512ms/step - loss: 0.0888\n", + "Epoch 3/50\n", + "188/313 [=================>............]" + ] }, { - "cell_type": "code", - "metadata": { - "colab_type": "code", - "id": "CsAgEKVzLCJD", - "colab": {} - }, - "source": [ - "@tf.function\n", - "def train_step(batch_x, batch_y):\n", - " with tf.GradientTape() as ae_tape:\n", - " logits = gated_pixelcnn(batch_x, training=True)\n", - "\n", - " loss = compute_loss(tf.squeeze(tf.one_hot(batch_y, q_levels)), logits)\n", - "\n", - " gradients = ae_tape.gradient(loss, gated_pixelcnn.trainable_variables)\n", - " gradients, _ = tf.clip_by_global_norm(gradients, 1.0)\n", - " optimizer.apply_gradients(zip(gradients, gated_pixelcnn.trainable_variables))\n", - "\n", - " return loss" - ], - "execution_count": 0, - "outputs": [] + "ename": "KeyboardInterrupt", + "evalue": "ignored", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 9\u001b[0m \u001b[0mloss\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtrain_step\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mbatch_x\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbatch_y\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 10\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 11\u001b[0;31m \u001b[0mprogbar\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0madd\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mvalues\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'loss'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mloss\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;32m/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/utils/generic_utils.py\u001b[0m in \u001b[0;36madd\u001b[0;34m(self, n, values)\u001b[0m\n\u001b[1;32m 676\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 677\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0madd\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mn\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mvalues\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 678\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mupdate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_seen_so_far\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0mn\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mvalues\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 679\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 680\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/utils/generic_utils.py\u001b[0m in \u001b[0;36mupdate\u001b[0;34m(self, current, values, finalize)\u001b[0m\n\u001b[1;32m 638\u001b[0m \u001b[0minfo\u001b[0m \u001b[0;34m+=\u001b[0m \u001b[0;34m' - %s:'\u001b[0m \u001b[0;34m%\u001b[0m \u001b[0mk\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 639\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_values\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mk\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlist\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 640\u001b[0;31m \u001b[0mavg\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmean\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_values\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mk\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m/\u001b[0m \u001b[0mmax\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_values\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mk\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 641\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mabs\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mavg\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m>\u001b[0m \u001b[0;36m1e-3\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 642\u001b[0m \u001b[0minfo\u001b[0m \u001b[0;34m+=\u001b[0m \u001b[0;34m' %.4f'\u001b[0m \u001b[0;34m%\u001b[0m \u001b[0mavg\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m<__array_function__ internals>\u001b[0m in \u001b[0;36mmean\u001b[0;34m(*args, **kwargs)\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.6/dist-packages/numpy/core/fromnumeric.py\u001b[0m in \u001b[0;36mmean\u001b[0;34m(a, axis, dtype, out, keepdims)\u001b[0m\n\u001b[1;32m 3333\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3334\u001b[0m return _methods._mean(a, axis=axis, dtype=dtype,\n\u001b[0;32m-> 3335\u001b[0;31m out=out, **kwargs)\n\u001b[0m\u001b[1;32m 3336\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3337\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.6/dist-packages/numpy/core/_methods.py\u001b[0m in \u001b[0;36m_mean\u001b[0;34m(a, axis, dtype, out, keepdims)\u001b[0m\n\u001b[1;32m 133\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 134\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_mean\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0ma\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mout\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkeepdims\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 135\u001b[0;31m \u001b[0marr\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0masanyarray\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0ma\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 136\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 137\u001b[0m \u001b[0mis_float16_result\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mFalse\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.6/dist-packages/numpy/core/_asarray.py\u001b[0m in \u001b[0;36masanyarray\u001b[0;34m(a, dtype, order)\u001b[0m\n\u001b[1;32m 136\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 137\u001b[0m \"\"\"\n\u001b[0;32m--> 138\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0marray\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0ma\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcopy\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0morder\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0morder\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0msubok\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 139\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 140\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mKeyboardInterrupt\u001b[0m: " + ] + } + ], + "source": [ + "# Training loop\n", + "n_epochs = 50\n", + "n_iter = int(np.ceil(x_train_quantised.shape[0] / batch_size))\n", + "for epoch in range(n_epochs):\n", + " progbar = Progbar(n_iter)\n", + " print('Epoch {:}/{:}'.format(epoch + 1, n_epochs))\n", + "\n", + " for i_iter, (batch_x, batch_y) in enumerate(train_dataset):\n", + " optimizer.lr = optimizer.lr * lr_decay\n", + " loss = train_step(batch_x, batch_y)\n", + "\n", + " progbar.add(1, values=[('loss', loss)])" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 51 }, + "colab_type": "code", + "id": "ue0vZbitSNmz", + "outputId": "42de5250-7404-4fb2-daff-e5283be9f53e" + }, + "outputs": [ { - "cell_type": "code", - "metadata": { - "colab_type": "code", - "id": "NoEPrfwQNM-s", - "outputId": "4a9422c4-8ce1-4310-8783-882be0c7c924", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 476 - } - }, - "source": [ - "# Training loop\n", - "n_epochs = 50\n", - "n_iter = int(np.ceil(x_train_quantised.shape[0] / batch_size))\n", - "for epoch in range(n_epochs):\n", - " progbar = Progbar(n_iter)\n", - " print('Epoch {:}/{:}'.format(epoch + 1, n_epochs))\n", - "\n", - " for i_iter, (batch_x, batch_y) in enumerate(train_dataset):\n", - " optimizer.lr = optimizer.lr * lr_decay\n", - " loss = train_step(batch_x, batch_y)\n", - "\n", - " progbar.add(1, values=[('loss', loss)])" - ], - "execution_count": 0, - "outputs": [ - { - "output_type": "stream", - "text": [ - "Epoch 1/50\n", - "313/313 [==============================] - 167s 533ms/step - loss: 0.1132\n", - "Epoch 2/50\n", - "313/313 [==============================] - 160s 512ms/step - loss: 0.0888\n", - "Epoch 3/50\n", - "188/313 [=================>............]" - ], - "name": "stdout" - }, - { - "output_type": "error", - "ename": "KeyboardInterrupt", - "evalue": "ignored", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", - "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 9\u001b[0m \u001b[0mloss\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtrain_step\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mbatch_x\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbatch_y\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 10\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 11\u001b[0;31m \u001b[0mprogbar\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0madd\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mvalues\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'loss'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mloss\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", - "\u001b[0;32m/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/utils/generic_utils.py\u001b[0m in \u001b[0;36madd\u001b[0;34m(self, n, values)\u001b[0m\n\u001b[1;32m 676\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 677\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0madd\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mn\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mvalues\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 678\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mupdate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_seen_so_far\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0mn\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mvalues\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 679\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 680\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/utils/generic_utils.py\u001b[0m in \u001b[0;36mupdate\u001b[0;34m(self, current, values, finalize)\u001b[0m\n\u001b[1;32m 638\u001b[0m \u001b[0minfo\u001b[0m \u001b[0;34m+=\u001b[0m \u001b[0;34m' - %s:'\u001b[0m \u001b[0;34m%\u001b[0m \u001b[0mk\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 639\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_values\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mk\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlist\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 640\u001b[0;31m \u001b[0mavg\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmean\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_values\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mk\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m/\u001b[0m \u001b[0mmax\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_values\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mk\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 641\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mabs\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mavg\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m>\u001b[0m \u001b[0;36m1e-3\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 642\u001b[0m \u001b[0minfo\u001b[0m \u001b[0;34m+=\u001b[0m \u001b[0;34m' %.4f'\u001b[0m \u001b[0;34m%\u001b[0m \u001b[0mavg\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m<__array_function__ internals>\u001b[0m in \u001b[0;36mmean\u001b[0;34m(*args, **kwargs)\u001b[0m\n", - "\u001b[0;32m/usr/local/lib/python3.6/dist-packages/numpy/core/fromnumeric.py\u001b[0m in \u001b[0;36mmean\u001b[0;34m(a, axis, dtype, out, keepdims)\u001b[0m\n\u001b[1;32m 3333\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3334\u001b[0m return _methods._mean(a, axis=axis, dtype=dtype,\n\u001b[0;32m-> 3335\u001b[0;31m out=out, **kwargs)\n\u001b[0m\u001b[1;32m 3336\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3337\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/usr/local/lib/python3.6/dist-packages/numpy/core/_methods.py\u001b[0m in \u001b[0;36m_mean\u001b[0;34m(a, axis, dtype, out, keepdims)\u001b[0m\n\u001b[1;32m 133\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 134\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_mean\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0ma\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mout\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkeepdims\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 135\u001b[0;31m \u001b[0marr\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0masanyarray\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0ma\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 136\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 137\u001b[0m \u001b[0mis_float16_result\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mFalse\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/usr/local/lib/python3.6/dist-packages/numpy/core/_asarray.py\u001b[0m in \u001b[0;36masanyarray\u001b[0;34m(a, dtype, order)\u001b[0m\n\u001b[1;32m 136\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 137\u001b[0m \"\"\"\n\u001b[0;32m--> 138\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0marray\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0ma\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcopy\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0morder\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0morder\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0msubok\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 139\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 140\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;31mKeyboardInterrupt\u001b[0m: " - ] - } - ] + "name": "stdout", + "output_type": "stream", + "text": [ + "nll : 0.08501224964857101 nats\n", + "bits/dim : 0.12264675098280793\n" + ] + } + ], + "source": [ + "# Test set performance\n", + "test_loss = []\n", + "for batch_x, batch_y in test_dataset:\n", + " logits = gated_pixelcnn(batch_x, training=False)\n", + "\n", + " # Calculate cross-entropy (= negative log-likelihood)\n", + " loss = compute_loss(tf.squeeze(tf.one_hot(batch_y, q_levels)), logits)\n", + "\n", + " test_loss.append(loss)\n", + "print('nll : {:} nats'.format(np.array(test_loss).mean()))\n", + "print('bits/dim : {:}'.format(np.array(test_loss).mean() / np.log(2)))" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 581 }, + "colab_type": "code", + "id": "-Ia9VXYySkuW", + "outputId": "ad1075b1-70cc-4012-851e-925cae32ac2c" + }, + "outputs": [ { - "cell_type": "code", - "metadata": { - "colab_type": "code", - "id": "ue0vZbitSNmz", - "outputId": "42de5250-7404-4fb2-daff-e5283be9f53e", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 51 - } - }, - "source": [ - "# Test set performance\n", - "test_loss = []\n", - "for batch_x, batch_y in test_dataset:\n", - " logits = gated_pixelcnn(batch_x, training=False)\n", - "\n", - " # Calculate cross-entropy (= negative log-likelihood)\n", - " loss = compute_loss(tf.squeeze(tf.one_hot(batch_y, q_levels)), logits)\n", - "\n", - " test_loss.append(loss)\n", - "print('nll : {:} nats'.format(np.array(test_loss).mean()))\n", - "print('bits/dim : {:}'.format(np.array(test_loss).mean() / np.log(2)))" - ], - "execution_count": 0, - "outputs": [ - { - "output_type": "stream", - "text": [ - "nll : 0.08501224964857101 nats\n", - "bits/dim : 0.12264675098280793\n" - ], - "name": "stdout" - } + "data": { + "image/png": "\n", + "text/plain": [ + "
" ] + }, + "metadata": { + "tags": [] + }, + "output_type": "display_data" + } + ], + "source": [ + "# Generating new images\n", + "samples = np.zeros((100, height, width, n_channel), dtype='float32')\n", + "for i in range(height):\n", + " for j in range(width):\n", + " logits = gated_pixelcnn(samples)\n", + " next_sample = tf.random.categorical(logits[:, i, j, :], 1)\n", + " samples[:, i, j, 0] = (next_sample.numpy() / (q_levels - 1))[:, 0]\n", + "\n", + "fig = plt.figure(figsize=(10, 10))\n", + "for i in range(100):\n", + " ax = fig.add_subplot(10, 10, i + 1)\n", + " ax.matshow(samples[i, :, :, 0], cmap=matplotlib.cm.binary)\n", + " plt.xticks(np.array([]))\n", + " plt.yticks(np.array([]))\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 153 }, + "colab_type": "code", + "id": "KBuWx-FtSouR", + "outputId": "3498bba6-068c-413a-b1a9-74e98192b543" + }, + "outputs": [ { - "cell_type": "code", - "metadata": { - "colab_type": "code", - "id": "-Ia9VXYySkuW", - "outputId": "ad1075b1-70cc-4012-851e-925cae32ac2c", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 581 - } - }, - "source": [ - "# Generating new images\n", - "samples = np.zeros((100, height, width, n_channel), dtype='float32')\n", - "for i in range(height):\n", - " for j in range(width):\n", - " logits = gated_pixelcnn(samples)\n", - " next_sample = tf.random.categorical(logits[:, i, j, :], 1)\n", - " samples[:, i, j, 0] = (next_sample.numpy() / (q_levels - 1))[:, 0]\n", - "\n", - "fig = plt.figure(figsize=(10, 10))\n", - "for i in range(100):\n", - " ax = fig.add_subplot(10, 10, i + 1)\n", - " ax.matshow(samples[i, :, :, 0], cmap=matplotlib.cm.binary)\n", - " plt.xticks(np.array([]))\n", - " plt.yticks(np.array([]))\n", - "plt.show()" - ], - "execution_count": 0, - "outputs": [ - { - "output_type": "display_data", - "data": { - "image/png": "\n", - "text/plain": [ - "
" - ] - }, - "metadata": { - "tags": [] - } - } + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAj8AAABECAYAAABu1lQcAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAADy0lEQVR4nO3cQVLkOBAF0PTEHIFeTx2C+58A7kCvhztoFkwHBOFy2WW7UCrfW3Y4OpSWbL5Shqm1FgAAVfz10wMAAHgk4QcAKEX4AQBKEX4AgFKEHwCgFOEHAChF+AEAShF+AIBShB8AoJS/t1z89PTULpfLSUM519vbW7y/v09L12SuLyLi9fX1vbX2a+mazDWumcMINfbOs/ghc43W6afRa8xcX8T1Z3FT+LlcLvHy8nLcqB7o+fn55jWZ64uImKbp961rMte4Zg4j1Ng7z+KHzDVap59GrzFzfRHXn0XHXgBAKcIPAFDKpmMvtpum+ePU1tqDRwIAROj8AADF6Pwc5FqH59b1FTpAo9Q6N8fZawLGNE2T99MCnR8AoBSdnwNs7fpAT0bpzO2V8T58f/dkGjvb3XvC8If18Un4OcnSIvu6IDO+cNcaKRSOVMtRvt6TzOv3+9xmeCbPWI9Z5jPLOPfyzjmXYy8AoJRTOz8VPxDdWt/o9yO7W7uvDF2Ca/buLFtrdqc/4Oj3apajEWttbI/OCzo/AEApp3R+lhL6kem9lx3K2nFU3bn0Mk9b+LCwhuzd6UxjvdeanydZ78O9489a7x9r369L1+29B6eEn7Uf++6VfeGPLGvQO2rcPf+NjaOCWtY5zuqMD317/nh4y/rq+Xm75lZ92epZ0uO7wrEXAFDKw3/VfW+a7Xmnck321jrzvs5hjzubORmfnzNkma+IXGP9SVlOAqrM59o65+br+1zO/V97u306PwBAKf7I4cmqpPwR7NmpzO1QsuxEtxp1TY82T3Myd6GXOgRz/9ZbXZnv/dHu+cD76PeOzg8AUEqazk+2bxXO/BW9nmWbp7XW1tJrB+iseRlpjjPZ8ht72Tt1o66xUes6osO19K3PUboPP9kf3K9GXewjM2efRnoWM1nzg+Deuel1ffe2edjLs/Ohp7/z59gLACil+85P1rSfddx7Za575OOgI8bTW017ZKwl45jvtfWYuXdZxnmEpVp7ug86PwBAKcIPAFCK8AMAlCL8AAClCD8AQCnCDwBQivADAJQi/AAApQg/AEApwg8AUIrwAwCUIvwAAKUIPwBAKcIPAFCK8AMAlCL8AAClCD8AQCnCDwBQivADAJQi/AAApQg/AEApwg8AUIrwAwCUMrXW1l88Tf9GxO/zhnOqf1prv5YuSF5fxPg13qwvQo0JjL5OI8av0Tr93+g1Jq8v4kqNm8IPAEB2jr0AgFKEHwCgFOEHAChF+AEAShF+AIBShB8AoBThBwAoRfgBAEoRfgCAUv4Dzge+SeXbn58AAAAASUVORK5CYII=\n", + "text/plain": [ + "
" ] + }, + "metadata": { + "tags": [] + }, + "output_type": "display_data" }, { - "cell_type": "code", - "metadata": { - "colab_type": "code", - "id": "KBuWx-FtSouR", - "outputId": "3498bba6-068c-413a-b1a9-74e98192b543", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 153 - } - }, - "source": [ - "# Filling occluded images\n", - "occlude_start_row = 14\n", - "num_generated_images = 10\n", - "samples = np.copy(x_test_quantised[0:num_generated_images, :, :, :])\n", - "samples = samples / (q_levels - 1)\n", - "samples[:, occlude_start_row:, :, :] = 0\n", - "\n", - "fig = plt.figure(figsize=(10, 10))\n", - "\n", - "for i in range(10):\n", - " ax = fig.add_subplot(1, 10, i + 1)\n", - " ax.matshow(samples[i, :, :, 0], cmap=matplotlib.cm.binary)\n", - " plt.xticks(np.array([]))\n", - " plt.yticks(np.array([]))\n", - "\n", - "for i in range(occlude_start_row, height):\n", - " for j in range(width):\n", - " logits = gated_pixelcnn(samples)\n", - " next_sample = tf.random.categorical(logits[:, i, j, :], 1)\n", - " samples[:, i, j, 0] = (next_sample.numpy() / (q_levels - 1))[:, 0]\n", - "\n", - "fig = plt.figure(figsize=(10, 10))\n", - "\n", - "for i in range(10):\n", - " ax = fig.add_subplot(1, 10, i + 1)\n", - " ax.matshow(samples[i, :, :, 0], cmap=matplotlib.cm.binary)\n", - " plt.xticks(np.array([]))\n", - " plt.yticks(np.array([]))\n", - "plt.show()" - ], - "execution_count": 0, - "outputs": [ - { - "output_type": "display_data", - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAj8AAABECAYAAABu1lQcAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAADy0lEQVR4nO3cQVLkOBAF0PTEHIFeTx2C+58A7kCvhztoFkwHBOFy2WW7UCrfW3Y4OpSWbL5Shqm1FgAAVfz10wMAAHgk4QcAKEX4AQBKEX4AgFKEHwCgFOEHAChF+AEAShF+AIBShB8AoJS/t1z89PTULpfLSUM519vbW7y/v09L12SuLyLi9fX1vbX2a+mazDWumcMINfbOs/ghc43W6afRa8xcX8T1Z3FT+LlcLvHy8nLcqB7o+fn55jWZ64uImKbp961rMte4Zg4j1Ng7z+KHzDVap59GrzFzfRHXn0XHXgBAKcIPAFDKpmMvtpum+ePU1tqDRwIAROj8AADF6Pwc5FqH59b1FTpAo9Q6N8fZawLGNE2T99MCnR8AoBSdnwNs7fpAT0bpzO2V8T58f/dkGjvb3XvC8If18Un4OcnSIvu6IDO+cNcaKRSOVMtRvt6TzOv3+9xmeCbPWI9Z5jPLOPfyzjmXYy8AoJRTOz8VPxDdWt/o9yO7W7uvDF2Ca/buLFtrdqc/4Oj3apajEWttbI/OCzo/AEApp3R+lhL6kem9lx3K2nFU3bn0Mk9b+LCwhuzd6UxjvdeanydZ78O9489a7x9r369L1+29B6eEn7Uf++6VfeGPLGvQO2rcPf+NjaOCWtY5zuqMD317/nh4y/rq+Xm75lZ92epZ0uO7wrEXAFDKw3/VfW+a7Xmnck321jrzvs5hjzubORmfnzNkma+IXGP9SVlOAqrM59o65+br+1zO/V97u306PwBAKf7I4cmqpPwR7NmpzO1QsuxEtxp1TY82T3Myd6GXOgRz/9ZbXZnv/dHu+cD76PeOzg8AUEqazk+2bxXO/BW9nmWbp7XW1tJrB+iseRlpjjPZ8ht72Tt1o66xUes6osO19K3PUboPP9kf3K9GXewjM2efRnoWM1nzg+Deuel1ffe2edjLs/Ohp7/z59gLACil+85P1rSfddx7Za575OOgI8bTW017ZKwl45jvtfWYuXdZxnmEpVp7ug86PwBAKcIPAFCK8AMAlCL8AAClCD8AQCnCDwBQivADAJQi/AAApQg/AEApwg8AUIrwAwCUIvwAAKUIPwBAKcIPAFCK8AMAlCL8AAClCD8AQCnCDwBQivADAJQi/AAApQg/AEApwg8AUIrwAwCUMrXW1l88Tf9GxO/zhnOqf1prv5YuSF5fxPg13qwvQo0JjL5OI8av0Tr93+g1Jq8v4kqNm8IPAEB2jr0AgFKEHwCgFOEHAChF+AEAShF+AIBShB8AoBThBwAoRfgBAEoRfgCAUv4Dzge+SeXbn58AAAAASUVORK5CYII=\n", - "text/plain": [ - "
" - ] - }, - "metadata": { - "tags": [] - } - }, - { - "output_type": "display_data", - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAj8AAABECAYAAABu1lQcAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAAFtklEQVR4nO3dS5LbNhAAUDCVI9jrzCF8/xPYd7DXmTswC8U18hRFkRQ/3ej3li55Ck0AVKMBUsM4jg0AoIq/rm4AAMCZJD8AQCmSHwCgFMkPAFCK5AcAKEXyAwCUIvkBAEqR/AAApUh+AIBS/l7z4S9fvoxvb28HNeVYP3/+bO/v78PcZzLH11prP378eB/H8evcZzLHuKQPWxNjdObiTeYYjdMPvceYOb7WHs/FVcnP29tb+/79+36tOtG3b9+efiZzfK21NgzDr2efyRzjkj5sTYzRmYs3mWM0Tj/0HmPm+Fp7PBdtewEApUh+AIBSVm17sd4wTG+njuN4cksAgNZUfgCAYlR+dvKowvPs8xUqQL3EOtXH2WMC+jQMg/vTDJUfAKAUlZ8drK36QCS9VOZelfE6fL73ZGo7623dYfjN+Pgg+TnI3CC7H5AZb7hL9ZQU9hTLXu6vSebx+7lvM8zJI8Zjlv7M0s5Xueccy7YXAFDKoZWfigdE18bX+/XI7tnqK0OV4JFXV5bjOFqdXmDv+2qWrRFjrW9n5wsqPwBAKYdUfuYy9D2z9ygrlKXtqLpyidJPazhYWEP26nSmtm615Psk63XY2v6s8f629P4697lXr8Ehyc/Sw76vyj7we5Y10dur3ZHfsbFXopa1j7M64qBv5MPDa8ZX5Pn2yLP4ssUzJ+K9wrYXAFDK6Y+6v5rNRl6pPJK9tM60+z6MuLKZknH+HCFLf7WWq61XyrITUKU/l8Y51V+f+3Lqb71a7VP5AQBK8ZLDg1XJ8nvwykplaoWSZSW6Vq9jurd+mpK5Cj1XIZj6t2hxZb72e9tywHvv+47KDwBQSprKT7azCkc+ohdZtn5aamksUStAR/VLT32cyZon9rJX6nodY73GtUeFa+6sz17CJz/ZJ+69Xgd7z/TZh57mYiZLvgi29k3U8R1t8fAqc+cm0nv+bHsBAKWErfxsXeVkXh30Tr/FUPlt1JlX4EsP/G79W5yj4rWPOO9UfgCAUkJUfvbICiNn05Hbtpe5R06X/v8K1ymbCtW6zHGsrQZliXXq0eb7sz8RKwncHHVYee+xe1ny02u5tuqkXDPgH70ZuYeDjVE9e1Pqlr9FTGc8KXOljHFVnDN7vgH/iOtn2wsAKOXQyk+kx9q4zlzfRV+lrn1nRdQ4ftuyGutl7kXvm71Fn1vPbG1/L+O1J0v65Ow3YKv8AAClhDjw3JPqq461q7WMq9JeVqJR28U+en3b+iMVYmQ/Kj8AQCkqP+xqSVXk0Wd6W7n1Fg85ZKymftZLdZXnrvq1e8kPu5r7Yc8l/y+7XuKAK1Q7hM+fzuxX214AQClhKz8y+9yWvoU1Qj/3+sJNbnrYBnpV9BeIrumjqDHwp2dvGr96Xqr8AAClHFr5WfvYs4y+hqz9nLXdVWWpPO6p54cJeoihgq0PvZzdv6dse0UocXG9aDevil+OW2T6QrXQ6o++unk2tq+8TmvvEWvzgSOSJdteAEApoQ48Rz+Ut6dMq2nq6L1Ca37lU+l74d7W6siZ1+no7asj/77KDwBQymmVnzWHn4dh6CbL35K9R4z9ld8Jih7PlIhtvsLUdYhYHXr2WG3vIvbJGku+HyIckj3aHv0YuVIWqQ9VfgCAUk4/8/M5y3uU6UbOXqeseXLo2WezxT4l60o08zXfw5L478fx1deresVniUzXYe3rUXr55fqs98tnts7PMypElx94fvYYfNZEYK69WbYSWptu11yfRI1jqc/tzzbuzhC9j/XZTebrcN/2tYnQkrijJE2vJu9Xz8W5ZHVLbGc+CGTbCwAoZViTUQ3D8G9r7ddxzTnUP+M4fp37QPL4Wus/xqfxtSbGBHofp631H6Nx+r/eY0weX2sPYlyV/AAAZGfbCwAoRfIDAJQi+QEASpH8AAClSH4AgFIkPwBAKZIfAKAUyQ8AUIrkBwAo5T903wuZqmcetwAAAABJRU5ErkJggg==\n", - "text/plain": [ - "
" - ] - }, - "metadata": { - "tags": [] - } - } + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAj8AAABECAYAAABu1lQcAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAAFtklEQVR4nO3dS5LbNhAAUDCVI9jrzCF8/xPYd7DXmTswC8U18hRFkRQ/3ej3li55Ck0AVKMBUsM4jg0AoIq/rm4AAMCZJD8AQCmSHwCgFMkPAFCK5AcAKEXyAwCUIvkBAEqR/AAApUh+AIBS/l7z4S9fvoxvb28HNeVYP3/+bO/v78PcZzLH11prP378eB/H8evcZzLHuKQPWxNjdObiTeYYjdMPvceYOb7WHs/FVcnP29tb+/79+36tOtG3b9+efiZzfK21NgzDr2efyRzjkj5sTYzRmYs3mWM0Tj/0HmPm+Fp7PBdtewEApUh+AIBSVm17sd4wTG+njuN4cksAgNZUfgCAYlR+dvKowvPs8xUqQL3EOtXH2WMC+jQMg/vTDJUfAKAUlZ8drK36QCS9VOZelfE6fL73ZGo7623dYfjN+Pgg+TnI3CC7H5AZb7hL9ZQU9hTLXu6vSebx+7lvM8zJI8Zjlv7M0s5Xueccy7YXAFDKoZWfigdE18bX+/XI7tnqK0OV4JFXV5bjOFqdXmDv+2qWrRFjrW9n5wsqPwBAKYdUfuYy9D2z9ygrlKXtqLpyidJPazhYWEP26nSmtm615Psk63XY2v6s8f629P4697lXr8Ehyc/Sw76vyj7we5Y10dur3ZHfsbFXopa1j7M64qBv5MPDa8ZX5Pn2yLP4ssUzJ+K9wrYXAFDK6Y+6v5rNRl6pPJK9tM60+z6MuLKZknH+HCFLf7WWq61XyrITUKU/l8Y51V+f+3Lqb71a7VP5AQBK8ZLDg1XJ8nvwykplaoWSZSW6Vq9jurd+mpK5Cj1XIZj6t2hxZb72e9tywHvv+47KDwBQSprKT7azCkc+ohdZtn5aamksUStAR/VLT32cyZon9rJX6nodY73GtUeFa+6sz17CJz/ZJ+69Xgd7z/TZh57mYiZLvgi29k3U8R1t8fAqc+cm0nv+bHsBAKWErfxsXeVkXh30Tr/FUPlt1JlX4EsP/G79W5yj4rWPOO9UfgCAUkJUfvbICiNn05Hbtpe5R06X/v8K1ymbCtW6zHGsrQZliXXq0eb7sz8RKwncHHVYee+xe1ny02u5tuqkXDPgH70ZuYeDjVE9e1Pqlr9FTGc8KXOljHFVnDN7vgH/iOtn2wsAKOXQyk+kx9q4zlzfRV+lrn1nRdQ4ftuyGutl7kXvm71Fn1vPbG1/L+O1J0v65Ow3YKv8AAClhDjw3JPqq461q7WMq9JeVqJR28U+en3b+iMVYmQ/Kj8AQCkqP+xqSVXk0Wd6W7n1Fg85ZKymftZLdZXnrvq1e8kPu5r7Yc8l/y+7XuKAK1Q7hM+fzuxX214AQClhKz8y+9yWvoU1Qj/3+sJNbnrYBnpV9BeIrumjqDHwp2dvGr96Xqr8AAClHFr5WfvYs4y+hqz9nLXdVWWpPO6p54cJeoihgq0PvZzdv6dse0UocXG9aDevil+OW2T6QrXQ6o++unk2tq+8TmvvEWvzgSOSJdteAEApoQ48Rz+Ut6dMq2nq6L1Ca37lU+l74d7W6siZ1+no7asj/77KDwBQymmVnzWHn4dh6CbL35K9R4z9ld8Jih7PlIhtvsLUdYhYHXr2WG3vIvbJGku+HyIckj3aHv0YuVIWqQ9VfgCAUk4/8/M5y3uU6UbOXqeseXLo2WezxT4l60o08zXfw5L478fx1deresVniUzXYe3rUXr55fqs98tnts7PMypElx94fvYYfNZEYK69WbYSWptu11yfRI1jqc/tzzbuzhC9j/XZTebrcN/2tYnQkrijJE2vJu9Xz8W5ZHVLbGc+CGTbCwAoZViTUQ3D8G9r7ddxzTnUP+M4fp37QPL4Wus/xqfxtSbGBHofp631H6Nx+r/eY0weX2sPYlyV/AAAZGfbCwAoRfIDAJQi+QEASpH8AAClSH4AgFIkPwBAKZIfAKAUyQ8AUIrkBwAo5T903wuZqmcetwAAAABJRU5ErkJggg==\n", + "text/plain": [ + "
" ] - }, - { - "cell_type": "code", - "metadata": { - "id": "W7bzeLMjPNcI", - "colab_type": "code", - "colab": {} - }, - "source": [ - "" - ], - "execution_count": 0, - "outputs": [] + }, + "metadata": { + "tags": [] + }, + "output_type": "display_data" } - ] -} \ No newline at end of file + ], + "source": [ + "# Filling occluded images\n", + "occlude_start_row = 14\n", + "num_generated_images = 10\n", + "samples = np.copy(x_test_quantised[0:num_generated_images, :, :, :])\n", + "samples = samples / (q_levels - 1)\n", + "samples[:, occlude_start_row:, :, :] = 0\n", + "\n", + "fig = plt.figure(figsize=(10, 10))\n", + "\n", + "for i in range(10):\n", + " ax = fig.add_subplot(1, 10, i + 1)\n", + " ax.matshow(samples[i, :, :, 0], cmap=matplotlib.cm.binary)\n", + " plt.xticks(np.array([]))\n", + " plt.yticks(np.array([]))\n", + "\n", + "for i in range(occlude_start_row, height):\n", + " for j in range(width):\n", + " logits = gated_pixelcnn(samples)\n", + " next_sample = tf.random.categorical(logits[:, i, j, :], 1)\n", + " samples[:, i, j, 0] = (next_sample.numpy() / (q_levels - 1))[:, 0]\n", + "\n", + "fig = plt.figure(figsize=(10, 10))\n", + "\n", + "for i in range(10):\n", + " ax = fig.add_subplot(1, 10, i + 1)\n", + " ax.matshow(samples[i, :, :, 0], cmap=matplotlib.cm.binary)\n", + " plt.xticks(np.array([]))\n", + " plt.yticks(np.array([]))\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "W7bzeLMjPNcI" + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "collapsed_sections": [], + "name": "cropped gated_pixelcnn.ipynb", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.9" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +} diff --git a/WIP/6-gated_pixelcnn_cropped/gated_pixelcnn_vs_cropped.ipynb b/WIP/6-gated_pixelcnn_cropped/gated_pixelcnn_vs_cropped.ipynb index aede3e3..c5d69fa 100644 --- a/WIP/6-gated_pixelcnn_cropped/gated_pixelcnn_vs_cropped.ipynb +++ b/WIP/6-gated_pixelcnn_cropped/gated_pixelcnn_vs_cropped.ipynb @@ -1,1135 +1,1255 @@ { - "nbformat": 4, - "nbformat_minor": 0, - "metadata": { - "colab": { - "name": "gated_pixelcnn_vs_cropped.ipynb", - "provenance": [], - "collapsed_sections": [], - "toc_visible": true - }, - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "accelerator": "GPU" + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "HgFGN07idT26" + }, + "source": [ + "# Masked vs cropped implementation for Gated PixelCNN" + ] }, - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "id": "HgFGN07idT26", - "colab_type": "text" - }, - "source": [ - "# Converting masked-based implementation to cropping-based implementation" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "ObS7YqtCbC33", - "colab_type": "code", - "colab": {} - }, - "source": [ - "import tensorflow as tf\n", - "import tensorflow.keras as keras\n", - "import numpy as np\n", - "import math" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "iCMR2mKLbt_l", - "colab_type": "code", - "colab": {} - }, - "source": [ - "test_ones_2d = np.ones([1, 5, 5, 1], dtype='float32')\n", - "test_ones_3d = np.ones([1, 5, 5, 5, 1], dtype='float32')" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "NycU0IQZb1X1", - "colab_type": "code", - "colab": {} - }, - "source": [ - "def print_3d(matrix_3d):\n", - " for i in range(matrix_3d.shape[0]):\n", - " print(f'Depth {i}')\n", - " print(matrix_3d[i,...])" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "AeH21Zkzcrt5", - "colab_type": "code", - "outputId": "d47da908-4659-45aa-f540-e01d3e510bdc", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 527 - } - }, - "source": [ - "print_3d(test_ones_3d.squeeze())" - ], - "execution_count": 0, - "outputs": [ - { - "output_type": "stream", - "text": [ - "Depth 0\n", - "[[1. 1. 1. 1. 1.]\n", - " [1. 1. 1. 1. 1.]\n", - " [1. 1. 1. 1. 1.]\n", - " [1. 1. 1. 1. 1.]\n", - " [1. 1. 1. 1. 1.]]\n", - "Depth 1\n", - "[[1. 1. 1. 1. 1.]\n", - " [1. 1. 1. 1. 1.]\n", - " [1. 1. 1. 1. 1.]\n", - " [1. 1. 1. 1. 1.]\n", - " [1. 1. 1. 1. 1.]]\n", - "Depth 2\n", - "[[1. 1. 1. 1. 1.]\n", - " [1. 1. 1. 1. 1.]\n", - " [1. 1. 1. 1. 1.]\n", - " [1. 1. 1. 1. 1.]\n", - " [1. 1. 1. 1. 1.]]\n", - "Depth 3\n", - "[[1. 1. 1. 1. 1.]\n", - " [1. 1. 1. 1. 1.]\n", - " [1. 1. 1. 1. 1.]\n", - " [1. 1. 1. 1. 1.]\n", - " [1. 1. 1. 1. 1.]]\n", - "Depth 4\n", - "[[1. 1. 1. 1. 1.]\n", - " [1. 1. 1. 1. 1.]\n", - " [1. 1. 1. 1. 1.]\n", - " [1. 1. 1. 1. 1.]\n", - " [1. 1. 1. 1. 1.]]\n" - ], - "name": "stdout" - } - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "xqrqiDnbfoqW", - "colab_type": "code", - "outputId": "12aef41a-4992-464c-8d38-8fd732286c57", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 102 - } - }, - "source": [ - "print(test_ones_2d[0,:,:,0].squeeze())" - ], - "execution_count": 0, - "outputs": [ - { - "output_type": "stream", - "text": [ - "[[1. 1. 1. 1. 1.]\n", - " [1. 1. 1. 1. 1.]\n", - " [1. 1. 1. 1. 1.]\n", - " [1. 1. 1. 1. 1.]\n", - " [1. 1. 1. 1. 1.]]\n" - ], - "name": "stdout" - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "1UcUkEj0d7wh", - "colab_type": "text" - }, - "source": [ - "## Creating 2D masked solution to check results with cropped solution later" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "83mZFyondaAT", - "colab_type": "code", - "colab": {} - }, - "source": [ - "class MaskedConv2D(tf.keras.layers.Layer):\n", - " def __init__(self,\n", - " mask_type,\n", - " filters,\n", - " kernel_size,\n", - " strides=1,\n", - " padding='same',\n", - " kernel_initializer='glorot_uniform',\n", - " bias_initializer='zeros'):\n", - " super(MaskedConv2D, self).__init__()\n", - "\n", - " assert mask_type in {'A', 'B', 'V'}\n", - " self.mask_type = mask_type\n", - "\n", - " self.filters = filters\n", - "\n", - " if isinstance(kernel_size, int):\n", - " kernel_size = (kernel_size, kernel_size)\n", - " self.kernel_size = kernel_size\n", - "\n", - " self.strides = strides\n", - " self.padding = padding.upper()\n", - " self.kernel_initializer = tf.keras.initializers.get(kernel_initializer)\n", - " self.bias_initializer = tf.keras.initializers.get(bias_initializer)\n", - "\n", - " def build(self, input_shape):\n", - " kernel_h, kernel_w = self.kernel_size\n", - "\n", - " self.kernel = self.add_weight(\"kernel\",\n", - " shape=(kernel_h,\n", - " kernel_w,\n", - " int(input_shape[-1]),\n", - " self.filters),\n", - " initializer=self.kernel_initializer,\n", - " trainable=True)\n", - "\n", - " self.bias = self.add_weight(\"bias\",\n", - " shape=(self.filters,),\n", - " initializer=self.bias_initializer,\n", - " trainable=True)\n", - "\n", - " mask = np.ones(self.kernel.shape, dtype=np.float32)\n", - "\n", - " if kernel_h % 2 != 0: \n", - " center_h = kernel_h // 2\n", - " else:\n", - " center_h = (kernel_h - 1) // 2\n", - "\n", - " if kernel_w % 2 != 0: \n", - " center_w = kernel_w // 2\n", - " else:\n", - " center_w = (kernel_w - 1) // 2\n", - "\n", - " if self.mask_type == 'V':\n", - " mask[center_h + 1:, :, :, :] = 0.\n", - " else:\n", - " mask[:center_h, :, :] = 0.\n", - " mask[center_h, center_w + (self.mask_type == 'B'):, :, :] = 0.\n", - " mask[center_h + 1:, :, :] = 0.\n", - "\n", - " self.mask = tf.constant(mask, dtype=tf.float32, name='mask')\n", - "\n", - " def call(self, input):\n", - " masked_kernel = tf.math.multiply(self.mask, self.kernel)\n", - " x = tf.nn.conv2d(input, masked_kernel, strides=[1, self.strides, self.strides, 1], padding=self.padding)\n", - " x = tf.nn.bias_add(x, self.bias)\n", - " return x" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "nMXqIeSUkdcV", - "colab_type": "text" - }, - "source": [ - "### Tests with kernel_size 3" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "M62GZQe8ixvy", - "colab_type": "text" - }, - "source": [ - "#### Vertical stack" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "kjUrpEtIg7p9", - "colab_type": "code", - "outputId": "8df4b5ee-1d0e-4f7d-9009-beaf8c8133ee", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 204 - } - }, - "source": [ - "mask_type = 'V'\n", - "kernel_size=(3, 3)\n", - "\n", - "conv = MaskedConv2D(mask_type=mask_type,\n", - " filters=1,\n", - " kernel_size=kernel_size, \n", - " padding='same',\n", - " kernel_initializer='ones', \n", - " bias_initializer='zeros')\n", - "\n", - "result_v = conv(test_ones_2d)\n", - "\n", - "print('MASK')\n", - "print(conv.mask.numpy().squeeze())\n", - "print('')\n", - "print('OUTPUT')\n", - "print(result_v.numpy().squeeze())" - ], - "execution_count": 0, - "outputs": [ - { - "output_type": "stream", - "text": [ - "MASK\n", - "[[1. 1. 1.]\n", - " [1. 1. 1.]\n", - " [0. 0. 0.]]\n", - "\n", - "OUTPUT\n", - "[[2. 3. 3. 3. 2.]\n", - " [4. 6. 6. 6. 4.]\n", - " [4. 6. 6. 6. 4.]\n", - " [4. 6. 6. 6. 4.]\n", - " [4. 6. 6. 6. 4.]]\n" - ], - "name": "stdout" - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "PFqvGa439Z2o", - "colab_type": "text" - }, - "source": [ - "#### Feeding horizontal" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "oq_JTwdE9lr6", - "colab_type": "code", - "outputId": "a5eecd56-e068-4d9f-ff4b-01bd048f0bbb", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 119 - } - }, - "source": [ - "padding = keras.layers.ZeroPadding2D(padding=((1,0),0))\n", - "cropping = keras.layers.Cropping2D(cropping=((0, 1), 0))\n", - "\n", - "x = padding(result_v)\n", - "result = cropping(x)\n", - "\n", - "print('OUTPUT')\n", - "print(result.numpy().squeeze())" - ], - "execution_count": 0, - "outputs": [ - { - "output_type": "stream", - "text": [ - "OUTPUT\n", - "[[0. 0. 0. 0. 0.]\n", - " [2. 3. 3. 3. 2.]\n", - " [4. 6. 6. 6. 4.]\n", - " [4. 6. 6. 6. 4.]\n", - " [4. 6. 6. 6. 4.]]\n" - ], - "name": "stdout" - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "iHY6UE2_p5oc", - "colab_type": "text" - }, - "source": [ - "#### Horizontal stack A" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "4_q_IunZkFmj", - "colab_type": "code", - "outputId": "1bfa1a88-7ae3-4a7e-d43e-46e7e683cce4", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 204 - } - }, - "source": [ - "mask_type = 'A'\n", - "kernel_size=(3, 3)\n", - "\n", - "conv = MaskedConv2D(mask_type=mask_type,\n", - " filters=1,\n", - " kernel_size=kernel_size, \n", - " padding='same',\n", - " kernel_initializer='ones', \n", - " bias_initializer='zeros')\n", - "\n", - "result = conv(test_ones_2d)\n", - "\n", - "print('MASK')\n", - "print(conv.mask.numpy().squeeze())\n", - "print('')\n", - "print('OUTPUT')\n", - "print(result.numpy().squeeze())" - ], - "execution_count": 0, - "outputs": [ - { - "output_type": "stream", - "text": [ - "MASK\n", - "[[0. 0. 0.]\n", - " [1. 0. 0.]\n", - " [0. 0. 0.]]\n", - "\n", - "OUTPUT\n", - "[[0. 1. 1. 1. 1.]\n", - " [0. 1. 1. 1. 1.]\n", - " [0. 1. 1. 1. 1.]\n", - " [0. 1. 1. 1. 1.]\n", - " [0. 1. 1. 1. 1.]]\n" - ], - "name": "stdout" - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "jMuS-vgWqAWK", - "colab_type": "text" - }, - "source": [ - "#### Horizontal stack B" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "5yeB5h2tkSs_", - "colab_type": "code", - "outputId": "9e7346b9-d360-42dd-85b4-a9a22ba2bacb", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 204 - } - }, - "source": [ - "mask_type = 'B'\n", - "kernel_size=(3, 3)\n", - "\n", - "conv = MaskedConv2D(mask_type=mask_type,\n", - " filters=1,\n", - " kernel_size=kernel_size, \n", - " padding='same',\n", - " kernel_initializer='ones', \n", - " bias_initializer='zeros')\n", - "\n", - "result = conv(test_ones_2d)\n", - "\n", - "print('MASK')\n", - "print(conv.mask.numpy().squeeze())\n", - "print('')\n", - "print('OUTPUT')\n", - "print(result.numpy().squeeze())" - ], - "execution_count": 0, - "outputs": [ - { - "output_type": "stream", - "text": [ - "MASK\n", - "[[0. 0. 0.]\n", - " [1. 1. 0.]\n", - " [0. 0. 0.]]\n", - "\n", - "OUTPUT\n", - "[[1. 2. 2. 2. 2.]\n", - " [1. 2. 2. 2. 2.]\n", - " [1. 2. 2. 2. 2.]\n", - " [1. 2. 2. 2. 2.]\n", - " [1. 2. 2. 2. 2.]]\n" - ], - "name": "stdout" - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "1NxkQ3U1knbE", - "colab_type": "text" - }, - "source": [ - "### Tests with kernel_size 4" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "WNykK-WpqMlu", - "colab_type": "text" - }, - "source": [ - "#### Vertical stack" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "p3DTiFYCk57Y", - "colab_type": "code", - "outputId": "01d4c588-acde-43ba-8b52-7b5b2fddc52a", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 221 - } - }, - "source": [ - "mask_type = 'V'\n", - "kernel_size=(4, 4)\n", - "\n", - "padding = keras.layers.ZeroPadding2D(padding=((1,0),0))\n", - "\n", - "conv = MaskedConv2D(mask_type=mask_type,\n", - " filters=1,\n", - " kernel_size=kernel_size, \n", - " padding='same',\n", - " kernel_initializer='ones', \n", - " bias_initializer='zeros')\n", - "\n", - "cropping = keras.layers.Cropping2D(cropping=((0, 1), 0))\n", - "\n", - "x = padding(test_ones_2d)\n", - "x = conv(x)\n", - "result = cropping(x)\n", - "\n", - "print('MASK')\n", - "print(conv.mask.numpy().squeeze())\n", - "print('')\n", - "print('OUTPUT')\n", - "print(result.numpy().squeeze())" - ], - "execution_count": 0, - "outputs": [ - { - "output_type": "stream", - "text": [ - "MASK\n", - "[[1. 1. 1. 1.]\n", - " [1. 1. 1. 1.]\n", - " [0. 0. 0. 0.]\n", - " [0. 0. 0. 0.]]\n", - "\n", - "OUTPUT\n", - "[[0. 0. 0. 0. 0.]\n", - " [3. 4. 4. 3. 2.]\n", - " [6. 8. 8. 6. 4.]\n", - " [6. 8. 8. 6. 4.]\n", - " [6. 8. 8. 6. 4.]]\n" - ], - "name": "stdout" - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "E5jUGK3_qbT8", - "colab_type": "text" - }, - "source": [ - "#### Horizontal stack A" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "J5HSUo7Rk5xO", - "colab_type": "code", - "outputId": "d2eabc6f-1b49-43f9-f7d8-1cf13021c47b", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 221 - } - }, - "source": [ - "mask_type = 'A'\n", - "kernel_size=(4, 4)\n", - "\n", - "conv = MaskedConv2D(mask_type=mask_type,\n", - " filters=1,\n", - " kernel_size=kernel_size, \n", - " padding='same',\n", - " kernel_initializer='ones', \n", - " bias_initializer='zeros')\n", - "\n", - "result = conv(test_ones_2d)\n", - "\n", - "print('MASK')\n", - "print(conv.mask.numpy().squeeze())\n", - "print('')\n", - "print('OUTPUT')\n", - "print(result.numpy().squeeze())" - ], - "execution_count": 0, - "outputs": [ - { - "output_type": "stream", - "text": [ - "MASK\n", - "[[0. 0. 0. 0.]\n", - " [1. 0. 0. 0.]\n", - " [0. 0. 0. 0.]\n", - " [0. 0. 0. 0.]]\n", - "\n", - "OUTPUT\n", - "[[0. 1. 1. 1. 1.]\n", - " [0. 1. 1. 1. 1.]\n", - " [0. 1. 1. 1. 1.]\n", - " [0. 1. 1. 1. 1.]\n", - " [0. 1. 1. 1. 1.]]\n" - ], - "name": "stdout" - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "KqORK7mLqvPP", - "colab_type": "text" - }, - "source": [ - "#### Horizontal stack B" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "_2V51aerk5l1", - "colab_type": "code", - "outputId": "b54f7147-0080-48a2-a84c-d3ca557c31d9", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 221 - } - }, - "source": [ - "mask_type = 'B'\n", - "kernel_size=(4, 4)\n", - "\n", - "conv = MaskedConv2D(mask_type=mask_type,\n", - " filters=1,\n", - " kernel_size=kernel_size, \n", - " padding='same',\n", - " kernel_initializer='ones', \n", - " bias_initializer='zeros')\n", - "\n", - "result = conv(test_ones_2d)\n", - "\n", - "print('MASK')\n", - "print(conv.mask.numpy().squeeze())\n", - "print('')\n", - "print('OUTPUT')\n", - "print(result.numpy().squeeze())" - ], - "execution_count": 0, - "outputs": [ - { - "output_type": "stream", - "text": [ - "MASK\n", - "[[0. 0. 0. 0.]\n", - " [1. 1. 0. 0.]\n", - " [0. 0. 0. 0.]\n", - " [0. 0. 0. 0.]]\n", - "\n", - "OUTPUT\n", - "[[1. 2. 2. 2. 2.]\n", - " [1. 2. 2. 2. 2.]\n", - " [1. 2. 2. 2. 2.]\n", - " [1. 2. 2. 2. 2.]\n", - " [1. 2. 2. 2. 2.]]\n" - ], - "name": "stdout" - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "kZmui789Br2B", - "colab_type": "text" - }, - "source": [ - "## Creating 2D cropped solution" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "PxBNsvzhB1ec", - "colab_type": "code", - "colab": {} - }, - "source": [ - "class VerticalCroppedConv2d(tf.keras.Model):\n", - " def __init__(self,\n", - " filters,\n", - " kernel_size,\n", - " kernel_initializer, \n", - " bias_initializer):\n", - " super(VerticalCroppedConv2d, self).__init__(name='')\n", - "\n", - " if isinstance(kernel_size, int):\n", - " kernel_size = (kernel_size, kernel_size)\n", - "\n", - " kernel_h, kernel_w = kernel_size\n", - "\n", - " self.padding = keras.layers.ZeroPadding2D(padding=((kernel_h-1, 0),(int((kernel_w-1)/2),int((kernel_w-1)/2))))\n", - "\n", - " self.conv = keras.layers.Conv2D(filters=filters,\n", - " kernel_size=kernel_size,\n", - " strides=1,\n", - " padding='valid',\n", - " kernel_initializer=kernel_initializer, \n", - " bias_initializer=bias_initializer)\n", - "\n", - " def call(self, input_value):\n", - "\n", - " x = self.padding(input_value)\n", - " x = self.conv(x)\n", - " out = self.cropping(x)\n", - "\n", - " return out\n", - "\n" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "RfFuwKlrP2JU", - "colab_type": "text" - }, - "source": [ - "Example step by step" - ] + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Hi all, in this notebook we will compare the masked implemntation of the convolutions from the Gated PixelCNN versus the alternative sugexted in the paper, the use of convolutions operaritions with appropriate croppings and padding to achieve the same result.\n", + "Let's check out!" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "First, we willcheck if both implementation create the same result. For this we will create a 5x5 matrix filled with ones as our input example." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "ObS7YqtCbC33" + }, + "outputs": [], + "source": [ + "import math\n", + "\n", + "import numpy as np\n", + "import tensorflow as tf\n", + "from tensorflow import keras\n", + "from tensorflow import nn\n", + "from tensorflow.keras import initializers" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "iCMR2mKLbt_l" + }, + "outputs": [], + "source": [ + "test_ones_2d = np.ones([1, 5, 5, 1], dtype='float32')" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 102 }, + "colab_type": "code", + "id": "xqrqiDnbfoqW", + "outputId": "12aef41a-4992-464c-8d38-8fd732286c57" + }, + "outputs": [ { - "cell_type": "code", - "metadata": { - "id": "fH3I0lfoPdcH", - "colab_type": "code", - "outputId": "f142569c-f99d-4244-ab5d-1887243d0d29", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 204 - } - }, - "source": [ - "kernel_h = 2\n", - "kernel_w = 3\n", - "\n", - "kernel_size = (kernel_h, kernel_w)\n", - "\n", - "padding = keras.layers.ZeroPadding2D(padding=((kernel_h-1, 0),(int((kernel_w-1)/2),int((kernel_w-1)/2))))\n", - "\n", - "res = padding(test_ones_2d)\n", - "print(res.numpy().squeeze())\n", - "\n", - "conv = keras.layers.Conv2D(filters=1,\n", - " kernel_size=kernel_size,\n", - " strides=1,\n", - " padding='valid',\n", - " kernel_initializer='ones', \n", - " bias_initializer='zeros')\n", - "\n", - "res2 = conv(res)\n", - "print(res2.numpy().squeeze())\n", - "\n", - "\n", - "\n" - ], - "execution_count": 0, - "outputs": [ - { - "output_type": "stream", - "text": [ - "[[0. 0. 0. 0. 0. 0. 0.]\n", - " [0. 1. 1. 1. 1. 1. 0.]\n", - " [0. 1. 1. 1. 1. 1. 0.]\n", - " [0. 1. 1. 1. 1. 1. 0.]\n", - " [0. 1. 1. 1. 1. 1. 0.]\n", - " [0. 1. 1. 1. 1. 1. 0.]]\n", - "[[2. 3. 3. 3. 2.]\n", - " [4. 6. 6. 6. 4.]\n", - " [4. 6. 6. 6. 4.]\n", - " [4. 6. 6. 6. 4.]\n", - " [4. 6. 6. 6. 4.]]\n" - ], - "name": "stdout" - } - ] + "name": "stdout", + "output_type": "stream", + "text": [ + "[[1. 1. 1. 1. 1.]\n", + " [1. 1. 1. 1. 1.]\n", + " [1. 1. 1. 1. 1.]\n", + " [1. 1. 1. 1. 1.]\n", + " [1. 1. 1. 1. 1.]]\n" + ] + } + ], + "source": [ + "print(test_ones_2d[0,:,:,0].squeeze())" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "1UcUkEj0d7wh" + }, + "source": [ + "Now, let's copy themasked implementation that we have been using for our Gated PixelCNN models." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Masked convolutions" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "83mZFyondaAT" + }, + "outputs": [], + "source": [ + "class MaskedConv2D(keras.layers.Layer):\n", + " \"\"\"Convolutional layers with masks extended to work with Gated PixelCNN.\n", + "\n", + " Convolutional layers with simple implementation of masks type A and B for\n", + " autoregressive models. Extended version to work with the verticala and horizontal\n", + " stacks from the Gated PixelCNN model.\n", + "\n", + " Arguments:\n", + " mask_type: one of `\"V\"`, `\"A\"` or `\"B\".`\n", + " filters: Integer, the dimensionality of the output space (i.e. the number of output\n", + " filters in the convolution).\n", + " kernel_size: An integer or tuple/list of 2 integers, specifying the height and width\n", + " of the 2D convolution window.\n", + " Can be a single integer to specify the same value for all spatial dimensions.\n", + " strides: An integer or tuple/list of 2 integers, specifying the strides of the\n", + " convolution along the height and width.\n", + " Can be a single integer to specify the same value for all spatial dimensions.\n", + " Specifying any stride value != 1 is incompatible with specifying any\n", + " `dilation_rate` value != 1.\n", + " padding: one of `\"valid\"` or `\"same\"` (case-insensitive).\n", + " kernel_initializer: Initializer for the `kernel` weights matrix.\n", + " bias_initializer: Initializer for the bias vector.\n", + " \"\"\"\n", + "\n", + " def __init__(self,\n", + " mask_type,\n", + " filters,\n", + " kernel_size,\n", + " strides=1,\n", + " padding='same',\n", + " kernel_initializer='glorot_uniform',\n", + " bias_initializer='zeros'):\n", + " super(MaskedConv2D, self).__init__()\n", + "\n", + " assert mask_type in {'A', 'B', 'V'}\n", + " self.mask_type = mask_type\n", + "\n", + " self.filters = filters\n", + "\n", + " if isinstance(kernel_size, int):\n", + " kernel_size = (kernel_size, kernel_size)\n", + " self.kernel_size = kernel_size\n", + "\n", + " self.strides = strides\n", + " self.padding = padding.upper()\n", + " self.kernel_initializer = initializers.get(kernel_initializer)\n", + " self.bias_initializer = initializers.get(bias_initializer)\n", + "\n", + " def build(self, input_shape):\n", + " kernel_h, kernel_w = self.kernel_size\n", + "\n", + " self.kernel = self.add_weight('kernel',\n", + " shape=(kernel_h,\n", + " kernel_w,\n", + " int(input_shape[-1]),\n", + " self.filters),\n", + " initializer=self.kernel_initializer,\n", + " trainable=True)\n", + "\n", + " self.bias = self.add_weight('bias',\n", + " shape=(self.filters,),\n", + " initializer=self.bias_initializer,\n", + " trainable=True)\n", + "\n", + " mask = np.ones(self.kernel.shape, dtype=np.float32)\n", + "\n", + " # Get centre of the filter for even or odd dimensions\n", + " if kernel_h % 2 != 0:\n", + " center_h = kernel_h // 2\n", + " else:\n", + " center_h = (kernel_h - 1) // 2\n", + "\n", + " if kernel_w % 2 != 0:\n", + " center_w = kernel_w // 2\n", + " else:\n", + " center_w = (kernel_w - 1) // 2\n", + "\n", + " if self.mask_type == 'V':\n", + " mask[center_h + 1:, :, :, :] = 0.\n", + " else:\n", + " mask[:center_h, :, :] = 0.\n", + " mask[center_h, center_w + (self.mask_type == 'B'):, :, :] = 0.\n", + " mask[center_h + 1:, :, :] = 0.\n", + "\n", + " self.mask = tf.constant(mask, dtype=tf.float32, name='mask')\n", + "\n", + " def call(self, input):\n", + " masked_kernel = tf.math.multiply(self.mask, self.kernel)\n", + " x = nn.conv2d(input,\n", + " masked_kernel,\n", + " strides=[1, self.strides, self.strides, 1],\n", + " padding=self.padding)\n", + " x = nn.bias_add(x, self.bias)\n", + " return x" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "With this implementation, we will recreate all convolutional operation that occur inside of the Gated Block. These operations are:\n", + "\n", + "- Vertical stack\n", + "- Vertical to horizontal stack\n", + "- Horizontal stack - convolution layer with mask type \"A\"\n", + "- Horizontal stack - convolution layer with mask type \"B\"\n", + "\n", + "\n", + "\n", + " IMAGE GATED BLOCK\n", + " \n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "M62GZQe8ixvy" + }, + "source": [ + "## Vertical stack" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 204 }, + "colab_type": "code", + "id": "kjUrpEtIg7p9", + "outputId": "8df4b5ee-1d0e-4f7d-9009-beaf8c8133ee" + }, + "outputs": [ { - "cell_type": "code", - "metadata": { - "id": "TFexYspQWEpo", - "colab_type": "code", - "outputId": "d0533bb8-38e8-482c-8e65-4f6522519087", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 51 - } - }, - "source": [ - "conv.weights[0].numpy().squeeze()" - ], - "execution_count": 0, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "array([[1., 1., 1.],\n", - " [1., 1., 1.]], dtype=float32)" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 16 - } - ] + "name": "stdout", + "output_type": "stream", + "text": [ + "MASK\n", + "[[1. 1. 1.]\n", + " [1. 1. 1.]\n", + " [0. 0. 0.]]\n", + "\n", + "OUTPUT\n", + "[[2. 3. 3. 3. 2.]\n", + " [4. 6. 6. 6. 4.]\n", + " [4. 6. 6. 6. 4.]\n", + " [4. 6. 6. 6. 4.]\n", + " [4. 6. 6. 6. 4.]]\n" + ] + } + ], + "source": [ + "mask_type = 'V'\n", + "kernel_size = (3, 3)\n", + "\n", + "conv = MaskedConv2D(mask_type=mask_type,\n", + " filters=1,\n", + " kernel_size=kernel_size,\n", + " padding='same',\n", + " kernel_initializer='ones',\n", + " bias_initializer='zeros')\n", + "\n", + "result_v = conv(test_ones_2d)\n", + "\n", + "print('MASK')\n", + "print(conv.mask.numpy().squeeze())\n", + "print('')\n", + "print('OUTPUT')\n", + "print(result_v.numpy().squeeze())" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "PFqvGa439Z2o" + }, + "source": [ + "## Vertical to horizontal stack" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 119 }, + "colab_type": "code", + "id": "oq_JTwdE9lr6", + "outputId": "a5eecd56-e068-4d9f-ff4b-01bd048f0bbb" + }, + "outputs": [ { - "cell_type": "code", - "metadata": { - "id": "AfyRyUmTNYZ8", - "colab_type": "code", - "colab": {} - }, - "source": [ - "def build_test_croppedv_stack_2d(input_shape=(5, 5, 1), kernel_size=3):\n", - " inputs = tf.keras.layers.Input(shape=input_shape)\n", - " \n", - " x = VerticalCroppedConv2d(\n", - " filters=1,\n", - " kernel_size=kernel_size, \n", - " kernel_initializer='ones', \n", - " bias_initializer='zeros')(inputs)\n", - "\n", - " stack = tf.keras.Model(inputs=inputs, outputs=x)\n", - " stack.compile(optimizer='adam', loss='mse')\n", - " return stack" - ], - "execution_count": 0, - "outputs": [] + "name": "stdout", + "output_type": "stream", + "text": [ + "INPUT\n", + "[[2. 3. 3. 3. 2.]\n", + " [4. 6. 6. 6. 4.]\n", + " [4. 6. 6. 6. 4.]\n", + " [4. 6. 6. 6. 4.]\n", + " [4. 6. 6. 6. 4.]]\n", + "\n", + "OUTPUT\n", + "[[0. 0. 0. 0. 0.]\n", + " [2. 3. 3. 3. 2.]\n", + " [4. 6. 6. 6. 4.]\n", + " [4. 6. 6. 6. 4.]\n", + " [4. 6. 6. 6. 4.]]\n" + ] + } + ], + "source": [ + "padding = keras.layers.ZeroPadding2D(padding=((1, 0), 0))\n", + "cropping = keras.layers.Cropping2D(cropping=((0, 1), 0))\n", + "\n", + "x = padding(result_v)\n", + "result = cropping(x)\n", + "\n", + "print('INPUT')\n", + "print(result_v.numpy().squeeze())\n", + "print('')\n", + "print('OUTPUT')\n", + "print(result.numpy().squeeze())" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "iHY6UE2_p5oc" + }, + "source": [ + "## Horizontal stack - convolution layer with mask type \"A\"" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 204 }, + "colab_type": "code", + "id": "4_q_IunZkFmj", + "outputId": "1bfa1a88-7ae3-4a7e-d43e-46e7e683cce4" + }, + "outputs": [ { - "cell_type": "markdown", - "metadata": { - "id": "Nskk-zJwN3Em", - "colab_type": "text" - }, - "source": [ - "###Tests with kernel_size 3" - ] + "name": "stdout", + "output_type": "stream", + "text": [ + "MASK\n", + "[1. 0. 0.]\n", + "\n", + "OUTPUT\n", + "[[0. 1. 1. 1. 1.]\n", + " [0. 1. 1. 1. 1.]\n", + " [0. 1. 1. 1. 1.]\n", + " [0. 1. 1. 1. 1.]\n", + " [0. 1. 1. 1. 1.]]\n" + ] + } + ], + "source": [ + "mask_type = 'A'\n", + "kernel_size = (1, 3)\n", + "\n", + "conv = MaskedConv2D(mask_type=mask_type,\n", + " filters=1,\n", + " kernel_size=kernel_size,\n", + " padding='same',\n", + " kernel_initializer='ones',\n", + " bias_initializer='zeros')\n", + "\n", + "result = conv(test_ones_2d)\n", + "\n", + "print('MASK')\n", + "print(conv.mask.numpy().squeeze())\n", + "print('')\n", + "print('OUTPUT')\n", + "print(result.numpy().squeeze())" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "jMuS-vgWqAWK" + }, + "source": [ + "## Horizontal stack - convolution layer with mask type \"B\"" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 204 }, + "colab_type": "code", + "id": "5yeB5h2tkSs_", + "outputId": "9e7346b9-d360-42dd-85b4-a9a22ba2bacb" + }, + "outputs": [ { - "cell_type": "markdown", - "metadata": { - "id": "DdcDFcMbxwpZ", - "colab_type": "text" - }, - "source": [ - "#### Vertical stack" - ] + "name": "stdout", + "output_type": "stream", + "text": [ + "MASK\n", + "[1. 1. 0.]\n", + "\n", + "OUTPUT\n", + "[[1. 2. 2. 2. 2.]\n", + " [1. 2. 2. 2. 2.]\n", + " [1. 2. 2. 2. 2.]\n", + " [1. 2. 2. 2. 2.]\n", + " [1. 2. 2. 2. 2.]]\n" + ] + } + ], + "source": [ + "mask_type = 'B'\n", + "kernel_size = (1, 3)\n", + "\n", + "conv = MaskedConv2D(mask_type=mask_type,\n", + " filters=1,\n", + " kernel_size=kernel_size,\n", + " padding='same',\n", + " kernel_initializer='ones',\n", + " bias_initializer='zeros')\n", + "\n", + "result = conv(test_ones_2d)\n", + "\n", + "print('MASK')\n", + "print(conv.mask.numpy().squeeze())\n", + "print('')\n", + "print('OUTPUT')\n", + "print(result.numpy().squeeze())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Using the results of the masked approach as reference, let's check the cropped method." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "kZmui789Br2B" + }, + "source": [ + "# Cropped and padded convolutions" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Vertical stack\n", + "\n", + "First, let's checkout this operation that some strategic padding and applying the convolution in \"valid\" mode to achieve the same result from the masked version. " + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 204 }, + "colab_type": "code", + "id": "fH3I0lfoPdcH", + "outputId": "f142569c-f99d-4244-ab5d-1887243d0d29" + }, + "outputs": [ { - "cell_type": "code", - "metadata": { - "id": "jc06sHDoNzx8", - "colab_type": "code", - "outputId": "750bf826-6cf4-400b-f693-51ad6350fd15", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 187 - } - }, - "source": [ - "kernel_size=(2, 3)\n", - "kernel_h, kernel_w = kernel_size\n", - "\n", - "padding2 = keras.layers.ZeroPadding2D(padding=((kernel_h-1, 0),(int((kernel_w-1)/2),int((kernel_w-1)/2))))\n", - "conv = keras.layers.Conv2D(filters=1,\n", - " kernel_size=kernel_size,\n", - " strides=1,\n", - " padding='valid',\n", - " kernel_initializer='ones', \n", - " bias_initializer='zeros')\n", - "\n", - "x = padding2(test_ones_2d)\n", - "result_v = conv(x)\n", - "\n", - "print('KERNEL')\n", - "print(conv.weights[0].numpy().squeeze())\n", - "print('')\n", - "print('OUTPUT')\n", - "print(result_v.numpy().squeeze())" - ], - "execution_count": 0, - "outputs": [ - { - "output_type": "stream", - "text": [ - "KERNEL\n", - "[[1. 1. 1.]\n", - " [1. 1. 1.]]\n", - "\n", - "OUTPUT\n", - "[[2. 3. 3. 3. 2.]\n", - " [4. 6. 6. 6. 4.]\n", - " [4. 6. 6. 6. 4.]\n", - " [4. 6. 6. 6. 4.]\n", - " [4. 6. 6. 6. 4.]]\n" - ], - "name": "stdout" - } - ] - }, + "name": "stdout", + "output_type": "stream", + "text": [ + "INPUT\n", + "[[1. 1. 1. 1. 1.]\n", + " [1. 1. 1. 1. 1.]\n", + " [1. 1. 1. 1. 1.]\n", + " [1. 1. 1. 1. 1.]\n", + " [1. 1. 1. 1. 1.]]\n", + "\n", + "PADDED INPUT\n", + "[[0. 0. 0. 0. 0. 0. 0.]\n", + " [0. 1. 1. 1. 1. 1. 0.]\n", + " [0. 1. 1. 1. 1. 1. 0.]\n", + " [0. 1. 1. 1. 1. 1. 0.]\n", + " [0. 1. 1. 1. 1. 1. 0.]\n", + " [0. 1. 1. 1. 1. 1. 0.]]\n", + "\n", + "CONV FILTER\n", + "[[1. 1. 1.]\n", + " [1. 1. 1.]]\n", + "\n", + "OUTPUT\n", + "[[2. 3. 3. 3. 2.]\n", + " [4. 6. 6. 6. 4.]\n", + " [4. 6. 6. 6. 4.]\n", + " [4. 6. 6. 6. 4.]\n", + " [4. 6. 6. 6. 4.]]\n" + ] + } + ], + "source": [ + "kernel_h = 2\n", + "kernel_w = 3\n", + "\n", + "kernel_size = (kernel_h, kernel_w)\n", + "\n", + "padding = keras.layers.ZeroPadding2D(padding=((kernel_h - 1, 0), (int((kernel_w - 1) / 2), int((kernel_w - 1) / 2))))\n", + "\n", + "res = padding(test_ones_2d)\n", + "\n", + "conv = keras.layers.Conv2D(filters=1,\n", + " kernel_size=kernel_size,\n", + " strides=1,\n", + " padding='valid',\n", + " kernel_initializer='ones',\n", + " bias_initializer='zeros')\n", + "\n", + "result_v = conv(res)\n", + "\n", + "print('INPUT')\n", + "print(test_ones_2d.squeeze())\n", + "print('')\n", + "print('PADDED INPUT')\n", + "print(res.numpy().squeeze())\n", + "print('')\n", + "print('CONV FILTER')\n", + "print(conv.weights[0].numpy().squeeze())\n", + "print('')\n", + "print('OUTPUT')\n", + "print(result_v.numpy().squeeze())" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "RfFuwKlrP2JU" + }, + "source": [ + "Now, let's implement a layer that we will include all the previous operations." + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "PxBNsvzhB1ec" + }, + "outputs": [], + "source": [ + "class VerticalConv2D(keras.layers.Conv2D):\n", + " \"\"\"https://github.com/JesseFarebro/PixelCNNPP/blob/master/layers/VerticalConv2D.py\"\"\"\n", + "\n", + " def __init__(self,\n", + " filters,\n", + " kernel_size,\n", + " **kwargs):\n", + " if not isinstance(kernel_size, tuple):\n", + " kernel_size = (kernel_size // 2 + 1, kernel_size)\n", + "\n", + " super(VerticalConv2D, self).__init__(filters, kernel_size, **kwargs)\n", + "\n", + " self.pad = tf.keras.layers.ZeroPadding2D(\n", + " (\n", + " (kernel_size[0] - 1, 0), # Top, Bottom\n", + " (kernel_size[1] // 2, kernel_size[1] // 2), # Left, Right\n", + " )\n", + " )\n", + "\n", + " def call(self, inputs):\n", + " inputs = self.pad(inputs)\n", + " output = super(VerticalConv2D, self).call(inputs)\n", + "\n", + " return output" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "AfyRyUmTNYZ8" + }, + "outputs": [ { - "cell_type": "markdown", - "metadata": { - "id": "SvWpzQFGEGGm", - "colab_type": "text" - }, - "source": [ - "#### Feeding horizontal" - ] + "name": "stdout", + "output_type": "stream", + "text": [ + "INPUT\n", + "[[1. 1. 1. 1. 1.]\n", + " [1. 1. 1. 1. 1.]\n", + " [1. 1. 1. 1. 1.]\n", + " [1. 1. 1. 1. 1.]\n", + " [1. 1. 1. 1. 1.]]\n", + "\n", + "CONV FILTER\n", + "[[1. 1. 1.]\n", + " [1. 1. 1.]]\n", + "\n", + "OUTPUT\n", + "[[2. 3. 3. 3. 2.]\n", + " [4. 6. 6. 6. 4.]\n", + " [4. 6. 6. 6. 4.]\n", + " [4. 6. 6. 6. 4.]\n", + " [4. 6. 6. 6. 4.]]\n" + ] + } + ], + "source": [ + "kernel_h = 2\n", + "kernel_w = 3\n", + "\n", + "kernel_size = (kernel_h, kernel_w)\n", + "\n", + "conv = VerticalConv2D(filters=1,\n", + " kernel_size=kernel_size,\n", + " strides=1,\n", + " padding='valid',\n", + " kernel_initializer='ones',\n", + " bias_initializer='zeros')\n", + "\n", + "result_v = conv(test_ones_2d)\n", + "\n", + "print('INPUT')\n", + "print(test_ones_2d.squeeze())\n", + "print('')\n", + "print('CONV FILTER')\n", + "print(conv.weights[0].numpy().squeeze())\n", + "print('')\n", + "print('OUTPUT')\n", + "print(result_v.numpy().squeeze())" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "SvWpzQFGEGGm" + }, + "source": [ + "## Vertical to horizontal stack\n", + "In this operation, the implementation continue the same." + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 119 }, + "colab_type": "code", + "id": "5jLEZhYtOZgi", + "outputId": "d293ab6f-7fa6-4aec-cc68-9ab3ae7185dd" + }, + "outputs": [ { - "cell_type": "code", - "metadata": { - "id": "5jLEZhYtOZgi", - "colab_type": "code", - "outputId": "d293ab6f-7fa6-4aec-cc68-9ab3ae7185dd", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 119 - } - }, - "source": [ - "padding = keras.layers.ZeroPadding2D(padding=((1,0),0))\n", - "cropping = keras.layers.Cropping2D(cropping=((0, 1), 0))\n", - "\n", - "x = padding(result_v)\n", - "result = cropping(x)\n", - "\n", - "print('OUTPUT')\n", - "print(result.numpy().squeeze())" - ], - "execution_count": 0, - "outputs": [ - { - "output_type": "stream", - "text": [ - "OUTPUT\n", - "[[0. 0. 0. 0. 0.]\n", - " [2. 3. 3. 3. 2.]\n", - " [4. 6. 6. 6. 4.]\n", - " [4. 6. 6. 6. 4.]\n", - " [4. 6. 6. 6. 4.]]\n" - ], - "name": "stdout" - } - ] + "name": "stdout", + "output_type": "stream", + "text": [ + "INPUT\n", + "[[2. 3. 3. 3. 2.]\n", + " [4. 6. 6. 6. 4.]\n", + " [4. 6. 6. 6. 4.]\n", + " [4. 6. 6. 6. 4.]\n", + " [4. 6. 6. 6. 4.]]\n", + "\n", + "OUTPUT\n", + "[[0. 0. 0. 0. 0.]\n", + " [2. 3. 3. 3. 2.]\n", + " [4. 6. 6. 6. 4.]\n", + " [4. 6. 6. 6. 4.]\n", + " [4. 6. 6. 6. 4.]]\n" + ] + } + ], + "source": [ + "padding = keras.layers.ZeroPadding2D(padding=((1, 0), 0))\n", + "cropping = keras.layers.Cropping2D(cropping=((0, 1), 0))\n", + "\n", + "x = padding(result_v)\n", + "result = cropping(x)\n", + "\n", + "print('INPUT')\n", + "print(result_v.numpy().squeeze())\n", + "print('')\n", + "print('OUTPUT')\n", + "print(result.numpy().squeeze())" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "MQLekDEaEUUT" + }, + "source": [ + "## Horizontal stack - convolution layer with mask type \"A\"\n", + "Again, let's check each operation step by step." + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 170 }, + "colab_type": "code", + "id": "bHiwKZniEk5A", + "outputId": "ebd659c5-d899-4d6f-9c81-5c821cf4ea61" + }, + "outputs": [ { - "cell_type": "markdown", - "metadata": { - "id": "MQLekDEaEUUT", - "colab_type": "text" - }, - "source": [ - "#### Horizontal stack A" - ] + "name": "stdout", + "output_type": "stream", + "text": [ + "INPUT\n", + "[[1. 1. 1. 1. 1.]\n", + " [1. 1. 1. 1. 1.]\n", + " [1. 1. 1. 1. 1.]\n", + " [1. 1. 1. 1. 1.]\n", + " [1. 1. 1. 1. 1.]]\n", + "\n", + "CONV FILTER\n", + "1.0\n", + "\n", + "CONVOLUTION RESULT\n", + "[[1. 1. 1. 1. 1.]\n", + " [1. 1. 1. 1. 1.]\n", + " [1. 1. 1. 1. 1.]\n", + " [1. 1. 1. 1. 1.]\n", + " [1. 1. 1. 1. 1.]]\n", + "\n", + "PADDED RESULT\n", + "[[0. 1. 1. 1. 1. 1.]\n", + " [0. 1. 1. 1. 1. 1.]\n", + " [0. 1. 1. 1. 1. 1.]\n", + " [0. 1. 1. 1. 1. 1.]\n", + " [0. 1. 1. 1. 1. 1.]]\n", + "\n", + "CROPPED RESULT\n", + "[[0. 1. 1. 1. 1.]\n", + " [0. 1. 1. 1. 1.]\n", + " [0. 1. 1. 1. 1.]\n", + " [0. 1. 1. 1. 1.]\n", + " [0. 1. 1. 1. 1.]]\n" + ] + } + ], + "source": [ + "kernel_size = (1, 1)\n", + "conv = keras.layers.Conv2D(filters=1,\n", + " kernel_size=kernel_size,\n", + " strides=1,\n", + " kernel_initializer='ones',\n", + " bias_initializer='zeros')\n", + "\n", + "padding = keras.layers.ZeroPadding2D(padding=(0, (1, 0)))\n", + "cropping = keras.layers.Cropping2D(cropping=(0, (0, 1)))\n", + "\n", + "res = conv(test_ones_2d)\n", + "res_2 = padding(res)\n", + "res_3 = cropping(res_2)\n", + "\n", + "print('INPUT')\n", + "print(test_ones_2d.squeeze())\n", + "print('')\n", + "print('CONV FILTER')\n", + "print(conv.weights[0].numpy().squeeze())\n", + "print('')\n", + "print('CONVOLUTION RESULT')\n", + "print(res.numpy().squeeze())\n", + "print('')\n", + "print('PADDED RESULT')\n", + "print(res_2.numpy().squeeze())\n", + "print('')\n", + "print('CROPPED RESULT')\n", + "print(res_3.numpy().squeeze())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Note: Since our input test just have one channel, the convolution 1x1 looks like did not perform any change." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "IvmGrDziEadf" + }, + "source": [ + "## Horizontal stack - convolution layer with mask type \"B\"\n", + "The step by step of the mask type \"B\" convolution layer is a little different." + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 170 }, + "colab_type": "code", + "id": "pRKJFE4TFx4I", + "outputId": "75d34ade-0983-49a5-f157-b98975c22560" + }, + "outputs": [ { - "cell_type": "code", - "metadata": { - "id": "bHiwKZniEk5A", - "colab_type": "code", - "outputId": "ebd659c5-d899-4d6f-9c81-5c821cf4ea61", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 170 - } - }, - "source": [ - "kernel_size=(1, 1)\n", - "kernel_h, kernel_w = kernel_size\n", - "\n", - "conv = keras.layers.Conv2D(filters=1,\n", - " kernel_size=kernel_size,\n", - " strides=1,\n", - " kernel_initializer='ones', \n", - " bias_initializer='zeros')\n", - "\n", - "padding = keras.layers.ZeroPadding2D(padding=(0,(1,0)))\n", - "cropping = keras.layers.Cropping2D(cropping=(0, (0, 1)))\n", - "\n", - "x = conv(test_ones_2d)\n", - "x = padding(x)\n", - "result = cropping(x)\n", - "\n", - "print('KERNEL')\n", - "print(conv.weights[0].numpy().squeeze())\n", - "print('')\n", - "print('OUTPUT')\n", - "print(result.numpy().squeeze())" - ], - "execution_count": 0, - "outputs": [ - { - "output_type": "stream", - "text": [ - "KERNEL\n", - "1.0\n", - "\n", - "OUTPUT\n", - "[[0. 1. 1. 1. 1.]\n", - " [0. 1. 1. 1. 1.]\n", - " [0. 1. 1. 1. 1.]\n", - " [0. 1. 1. 1. 1.]\n", - " [0. 1. 1. 1. 1.]]\n" - ], - "name": "stdout" - } - ] - }, + "name": "stdout", + "output_type": "stream", + "text": [ + "INPUT\n", + "[[1. 1. 1. 1. 1.]\n", + " [1. 1. 1. 1. 1.]\n", + " [1. 1. 1. 1. 1.]\n", + " [1. 1. 1. 1. 1.]\n", + " [1. 1. 1. 1. 1.]]\n", + "\n", + "PADDED INPUT\n", + "[[0. 1. 1. 1. 1. 1.]\n", + " [0. 1. 1. 1. 1. 1.]\n", + " [0. 1. 1. 1. 1. 1.]\n", + " [0. 1. 1. 1. 1. 1.]\n", + " [0. 1. 1. 1. 1. 1.]]\n", + "\n", + "CONV FILTER\n", + "[1. 1.]\n", + "\n", + "RESULT\n", + "[[1. 2. 2. 2. 2.]\n", + " [1. 2. 2. 2. 2.]\n", + " [1. 2. 2. 2. 2.]\n", + " [1. 2. 2. 2. 2.]\n", + " [1. 2. 2. 2. 2.]]\n" + ] + } + ], + "source": [ + "kernel_size = (1, 2)\n", + "kernel_h, kernel_w = kernel_size\n", + "\n", + "padding = keras.layers.ZeroPadding2D(padding=((int((kernel_h - 1) / 2), int((kernel_h - 1) / 2)), (kernel_w - 1, 0)))\n", + "conv = keras.layers.Conv2D(filters=1,\n", + " kernel_size=kernel_size,\n", + " strides=1,\n", + " padding='valid',\n", + " kernel_initializer='ones',\n", + " bias_initializer='zeros')\n", + "\n", + "res = padding(test_ones_2d)\n", + "result = conv(res)\n", + "\n", + "print('INPUT')\n", + "print(test_ones_2d.squeeze())\n", + "print('')\n", + "print('PADDED INPUT')\n", + "print(res.numpy().squeeze())\n", + "print('')\n", + "print('CONV FILTER')\n", + "print(conv.weights[0].numpy().squeeze())\n", + "print('')\n", + "print('RESULT')\n", + "print(result.numpy().squeeze())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In this case, we also implemented a layer version encapsulation these operations" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [], + "source": [ + "class HorizontalConv2D(keras.layers.Conv2D):\n", + " def __init__(self,\n", + " filters,\n", + " kernel_size,\n", + " **kwargs):\n", + " if not isinstance(kernel_size, tuple):\n", + " kernel_size = (kernel_size // 2 + 1,) * 2\n", + "\n", + " super(HorizontalConv2D, self).__init__(filters, kernel_size, **kwargs)\n", + " self.pad = tf.keras.layers.ZeroPadding2D(\n", + " (\n", + " (kernel_size[0] - 1, 0), # (Top, Bottom)\n", + " (kernel_size[1] - 1, 0), # (Left, Right)\n", + " )\n", + " )\n", + "\n", + " def call(self, inputs):\n", + " inputs = self.pad(inputs)\n", + " outputs = super(HorizontalConv2D, self).call(inputs)\n", + "\n", + " return outputs" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ { - "cell_type": "markdown", - "metadata": { - "id": "IvmGrDziEadf", - "colab_type": "text" - }, - "source": [ - "#### Horizontal stack B" - ] - }, + "name": "stdout", + "output_type": "stream", + "text": [ + "INPUT\n", + "[[1. 1. 1. 1. 1.]\n", + " [1. 1. 1. 1. 1.]\n", + " [1. 1. 1. 1. 1.]\n", + " [1. 1. 1. 1. 1.]\n", + " [1. 1. 1. 1. 1.]]\n", + "\n", + "CONV FILTER\n", + "[1. 1.]\n", + "\n", + "RESULT\n", + "[[1. 2. 2. 2. 2.]\n", + " [1. 2. 2. 2. 2.]\n", + " [1. 2. 2. 2. 2.]\n", + " [1. 2. 2. 2. 2.]\n", + " [1. 2. 2. 2. 2.]]\n" + ] + } + ], + "source": [ + "kernel_size = (1, 2)\n", + "conv = HorizontalConv2D(filters=1,\n", + " kernel_size=kernel_size,\n", + " strides=1,\n", + " kernel_initializer='ones',\n", + " bias_initializer='zeros')\n", + "\n", + "result = conv(test_ones_2d)\n", + "\n", + "print('INPUT')\n", + "print(test_ones_2d.squeeze())\n", + "print('')\n", + "print('CONV FILTER')\n", + "print(conv.weights[0].numpy().squeeze())\n", + "print('')\n", + "print('RESULT')\n", + "print(result.numpy().squeeze())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Execution time\n", + "Now we will compare the time that takes to perform each convolutional operation." + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [], + "source": [ + "import time\n", + "def measure_time(conv_fn):\n", + " exec_time = []\n", + " n_iter = 100\n", + " for _ in range(n_iter):\n", + " test_input = np.random.rand(128, 256, 256, 1).astype('float32') \n", + " start = time.time()\n", + " conv_fn(test_input)\n", + " exec_time.append(time.time() - start)\n", + " exec_time = np.array(exec_time, dtype='float32')\n", + " return exec_time.mean(), exec_time.std()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Vertical stack" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ { - "cell_type": "code", - "metadata": { - "id": "pRKJFE4TFx4I", - "colab_type": "code", - "outputId": "75d34ade-0983-49a5-f157-b98975c22560", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 170 - } - }, - "source": [ - "kernel_size=(1, 2)\n", - "kernel_h, kernel_w = kernel_size\n", - "\n", - "padding2 = keras.layers.ZeroPadding2D(padding=((int((kernel_h-1)/2),int((kernel_h-1)/2)), (kernel_w-1, 0)))\n", - "conv = keras.layers.Conv2D(filters=1,\n", - " kernel_size=kernel_size,\n", - " strides=1,\n", - " padding='valid',\n", - " kernel_initializer='ones', \n", - " bias_initializer='zeros')\n", - "\n", - "\n", - "x = padding2(test_ones_2d)\n", - "result = conv(x)\n", - "\n", - "print('KERNEL')\n", - "print(conv.weights[0].numpy().squeeze())\n", - "print('')\n", - "print('OUTPUT')\n", - "print(result.numpy().squeeze())" - ], - "execution_count": 0, - "outputs": [ - { - "output_type": "stream", - "text": [ - "KERNEL\n", - "[1. 1.]\n", - "\n", - "OUTPUT\n", - "[[1. 2. 2. 2. 2.]\n", - " [1. 2. 2. 2. 2.]\n", - " [1. 2. 2. 2. 2.]\n", - " [1. 2. 2. 2. 2.]\n", - " [1. 2. 2. 2. 2.]]\n" - ], - "name": "stdout" - } - ] - }, + "name": "stdout", + "output_type": "stream", + "text": [ + "Vertical stack\n", + "Masked convolution: 0.01410292 +- 0.00891058 seconds\n", + "Cropped padded convolution: 0.01386628 +- 0.00675169 seconds\n" + ] + } + ], + "source": [ + "mask_type = 'V'\n", + "kernel_size = (3, 3)\n", + "masked_conv = MaskedConv2D(mask_type=mask_type,\n", + " filters=32,\n", + " kernel_size=kernel_size,\n", + " padding='same',\n", + " kernel_initializer='ones',\n", + " bias_initializer='zeros')\n", + "\n", + "@tf.function\n", + "def test_masked_fn(x):\n", + " _ = masked_conv(x)\n", + " \n", + "\n", + "masked_time = measure_time(test_masked_fn)\n", + "# ----------------------------------------------------------------\n", + "\n", + "kernel_size = (2, 3)\n", + "cropped_conv = VerticalConv2D(filters=32,\n", + " kernel_size=kernel_size,\n", + " strides=1,\n", + " padding='valid',\n", + " kernel_initializer='ones',\n", + " bias_initializer='zeros')\n", + "\n", + "@tf.function\n", + "def test_cropped_fn(x):\n", + " _ = cropped_conv(x)\n", + "\n", + "cropped_time = measure_time(test_cropped_fn)\n", + "# ----------------------------------------------------------------\n", + "\n", + "print(\"Vertical stack\")\n", + "print(f\"Masked convolution: {masked_time[0]:.8f} +- {masked_time[1]:.8f} seconds\")\n", + "print(f\"Cropped padded convolution: {cropped_time[0]:.8f} +- {cropped_time[1]:.8f} seconds\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Horizontal stack - convolution layer with mask type \"A\"" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ { - "cell_type": "markdown", - "metadata": { - "id": "swYJ4XMofUWv", - "colab_type": "text" - }, - "source": [ - "REFERENCES\n", - "\n", - "https://wiki.math.uwaterloo.ca/statwiki/index.php?title=STAT946F17/Conditional_Image_Generation_with_PixelCNN_Decoders#Gated_PixelCNN\n", - "\n", - "https://www.slideshare.net/suga93/conditional-image-generation-with-pixelcnn-decoders\n", - "\n", - "https://www.youtube.com/watch?v=1BURwCCYNEI" - ] - }, + "name": "stdout", + "output_type": "stream", + "text": [ + "Horizontal stack - convolution layer with mask type 'A'\n", + "Masked convolution: 0.01360846 +- 0.00381987 seconds\n", + "Cropped padded convolution: 0.01365352 +- 0.00476047 seconds\n" + ] + } + ], + "source": [ + "mask_type = 'A'\n", + "kernel_size = (1, 3)\n", + "masked_conv = MaskedConv2D(mask_type=mask_type,\n", + " filters=1,\n", + " kernel_size=kernel_size,\n", + " padding='same',\n", + " kernel_initializer='ones',\n", + " bias_initializer='zeros')\n", + "\n", + "@tf.function\n", + "def test_masked_fn(x):\n", + " _ = masked_conv(x)\n", + " \n", + "masked_time = measure_time(test_masked_fn)\n", + "# ----------------------------------------------------------------\n", + "\n", + "kernel_size = (1, 1)\n", + "conv = keras.layers.Conv2D(filters=1,\n", + " kernel_size=kernel_size,\n", + " strides=1,\n", + " kernel_initializer='ones',\n", + " bias_initializer='zeros')\n", + "\n", + "padding = keras.layers.ZeroPadding2D(padding=(0, (1, 0)))\n", + "cropping = keras.layers.Cropping2D(cropping=(0, (0, 1)))\n", + "\n", + "@tf.function\n", + "def test_cropped_fn(x):\n", + " x = conv(x)\n", + " x = padding(x)\n", + " x = cropping(x)\n", + "\n", + "cropped_time = measure_time(test_cropped_fn)\n", + "# ----------------------------------------------------------------\n", + "\n", + "print(\"Horizontal stack - convolution layer with mask type 'A'\")\n", + "print(f\"Masked convolution: {masked_time[0]:.8f} +- {masked_time[1]:.8f} seconds\")\n", + "print(f\"Cropped padded convolution: {cropped_time[0]:.8f} +- {cropped_time[1]:.8f} seconds\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Horizontal stack - convolution layer with mask type \"B\"\n" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ { - "cell_type": "code", - "metadata": { - "id": "jTI9ts7i7Wch", - "colab_type": "code", - "colab": {} - }, - "source": [ - "" - ], - "execution_count": 0, - "outputs": [] + "name": "stdout", + "output_type": "stream", + "text": [ + "Horizontal stack - convolution layer with mask type 'B'\n", + "Masked convolution: 0.01353339 +- 0.00374499 seconds\n", + "Cropped padded convolution: 0.01384839 +- 0.00734248 seconds\n" + ] } - ] -} \ No newline at end of file + ], + "source": [ + "mask_type = 'B'\n", + "kernel_size = (1, 3)\n", + "masked_conv = MaskedConv2D(mask_type=mask_type,\n", + " filters=1,\n", + " kernel_size=kernel_size,\n", + " padding='same',\n", + " kernel_initializer='ones',\n", + " bias_initializer='zeros')\n", + "\n", + "@tf.function\n", + "def test_masked_fn(x):\n", + " _ = masked_conv(x)\n", + " \n", + "masked_time = measure_time(test_masked_fn)\n", + "# ----------------------------------------------------------------\n", + "\n", + "kernel_size = (1, 2)\n", + "cropped_conv = HorizontalConv2D(filters=1,\n", + " kernel_size=kernel_size,\n", + " strides=1,\n", + " kernel_initializer='ones',\n", + " bias_initializer='zeros')\n", + "\n", + "@tf.function\n", + "def test_cropped_fn(x):\n", + " _ = cropped_conv(x)\n", + "\n", + "cropped_time = measure_time(test_cropped_fn)\n", + "# ----------------------------------------------------------------\n", + "\n", + "print(\"Horizontal stack - convolution layer with mask type 'B'\")\n", + "print(f\"Masked convolution: {masked_time[0]:.8f} +- {masked_time[1]:.8f} seconds\")\n", + "print(f\"Cropped padded convolution: {cropped_time[0]:.8f} +- {cropped_time[1]:.8f} seconds\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Altough its looks like cropped is better in the vertical convolution, the difference does not to look very significant." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "swYJ4XMofUWv" + }, + "source": [ + "# REFERENCES\n", + "\n", + "https://wiki.math.uwaterloo.ca/statwiki/index.php?title=STAT946F17/Conditional_Image_Generation_with_PixelCNN_Decoders#Gated_PixelCNN\n", + "\n", + "https://www.slideshare.net/suga93/conditional-image-generation-with-pixelcnn-decoders\n", + "\n", + "https://www.youtube.com/watch?v=1BURwCCYNEI" + ] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "collapsed_sections": [], + "name": "gated_pixelcnn_vs_cropped.ipynb", + "provenance": [], + "toc_visible": true + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.9" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +} diff --git a/WIP/6-gated_pixelcnn_cropped/masked_vs_cropped_even_filter_size.ipynb b/WIP/6-gated_pixelcnn_cropped/masked_vs_cropped_even_filter_size.ipynb new file mode 100644 index 0000000..42c3205 --- /dev/null +++ b/WIP/6-gated_pixelcnn_cropped/masked_vs_cropped_even_filter_size.ipynb @@ -0,0 +1,113 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Tests with kernel_size 4" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "mask_type = 'V'\n", + "kernel_size=(4, 4)\n", + "\n", + "padding = keras.layers.ZeroPadding2D(padding=((1,0),0))\n", + "\n", + "conv = MaskedConv2D(mask_type=mask_type,\n", + " filters=1,\n", + " kernel_size=kernel_size, \n", + " padding='same',\n", + " kernel_initializer='ones', \n", + " bias_initializer='zeros')\n", + "\n", + "cropping = keras.layers.Cropping2D(cropping=((0, 1), 0))\n", + "\n", + "x = padding(test_ones_2d)\n", + "x = conv(x)\n", + "result = cropping(x)\n", + "\n", + "print('MASK')\n", + "print(conv.mask.numpy().squeeze())\n", + "print('')\n", + "print('OUTPUT')\n", + "print(result.numpy().squeeze())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "mask_type = 'A'\n", + "kernel_size=(1, 4)\n", + "\n", + "conv = MaskedConv2D(mask_type=mask_type,\n", + " filters=1,\n", + " kernel_size=kernel_size, \n", + " padding='same',\n", + " kernel_initializer='ones', \n", + " bias_initializer='zeros')\n", + "\n", + "result = conv(test_ones_2d)\n", + "\n", + "print('MASK')\n", + "print(conv.mask.numpy().squeeze())\n", + "print('')\n", + "print('OUTPUT')\n", + "print(result.numpy().squeeze())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "mask_type = 'B'\n", + "kernel_size=(1, 4)\n", + "\n", + "conv = MaskedConv2D(mask_type=mask_type,\n", + " filters=1,\n", + " kernel_size=kernel_size, \n", + " padding='same',\n", + " kernel_initializer='ones', \n", + " bias_initializer='zeros')\n", + "\n", + "result = conv(test_ones_2d)\n", + "\n", + "print('MASK')\n", + "print(conv.mask.numpy().squeeze())\n", + "print('')\n", + "print('OUTPUT')\n", + "print(result.numpy().squeeze())" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.9" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +}