diff --git a/WIP/6-gated_pixelcnn_cropped/3d_gated_pixelcnn_conv.ipynb b/WIP/6-gated_pixelcnn_cropped/3d_gated_pixelcnn_conv.ipynb new file mode 100644 index 0000000..02e1f98 --- /dev/null +++ b/WIP/6-gated_pixelcnn_cropped/3d_gated_pixelcnn_conv.ipynb @@ -0,0 +1,1345 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "name": "3d_gated_pixelcnn_conv.ipynb", + "provenance": [], + "collapsed_sections": [], + "toc_visible": true + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "accelerator": "GPU" + }, + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "HgFGN07idT26", + "colab_type": "text" + }, + "source": [ + "# Converting masked-based implementation to cropping-based implementation" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "ObS7YqtCbC33", + "colab_type": "code", + "colab": {} + }, + "source": [ + "import tensorflow as tf\n", + "import tensorflow.keras as keras\n", + "import numpy as np" + ], + "execution_count": 0, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "iCMR2mKLbt_l", + "colab_type": "code", + "colab": {} + }, + "source": [ + "test_ones_2d = np.ones([1, 5, 5, 1], dtype='float32')\n", + "test_ones_3d = np.ones([1, 5, 5, 5, 1], dtype='float32')" + ], + "execution_count": 0, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "NycU0IQZb1X1", + "colab_type": "code", + "colab": {} + }, + "source": [ + "def print_test_3d(test_matrix):\n", + " for i in range(test_matrix.shape[1]):\n", + " print(f'Depth {i}')\n", + " print(test_matrix[0,i,:,:,0])" + ], + "execution_count": 0, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "AeH21Zkzcrt5", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 538 + }, + "outputId": "e7fd642b-23ac-4d79-a1f0-564853e64a79" + }, + "source": [ + "print_test_3d(test_ones_3d)" + ], + "execution_count": 4, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Depth 0\n", + "[[1. 1. 1. 1. 1.]\n", + " [1. 1. 1. 1. 1.]\n", + " [1. 1. 1. 1. 1.]\n", + " [1. 1. 1. 1. 1.]\n", + " [1. 1. 1. 1. 1.]]\n", + "Depth 1\n", + "[[1. 1. 1. 1. 1.]\n", + " [1. 1. 1. 1. 1.]\n", + " [1. 1. 1. 1. 1.]\n", + " [1. 1. 1. 1. 1.]\n", + " [1. 1. 1. 1. 1.]]\n", + "Depth 2\n", + "[[1. 1. 1. 1. 1.]\n", + " [1. 1. 1. 1. 1.]\n", + " [1. 1. 1. 1. 1.]\n", + " [1. 1. 1. 1. 1.]\n", + " [1. 1. 1. 1. 1.]]\n", + "Depth 3\n", + "[[1. 1. 1. 1. 1.]\n", + " [1. 1. 1. 1. 1.]\n", + " [1. 1. 1. 1. 1.]\n", + " [1. 1. 1. 1. 1.]\n", + " [1. 1. 1. 1. 1.]]\n", + "Depth 4\n", + "[[1. 1. 1. 1. 1.]\n", + " [1. 1. 1. 1. 1.]\n", + " [1. 1. 1. 1. 1.]\n", + " [1. 1. 1. 1. 1.]\n", + " [1. 1. 1. 1. 1.]]\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "xqrqiDnbfoqW", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 104 + }, + "outputId": "88e2939e-78e1-4692-abbb-6569282d48e8" + }, + "source": [ + "print(test_ones_2d[0,:,:,0].squeeze())" + ], + "execution_count": 5, + "outputs": [ + { + "output_type": "stream", + "text": [ + "[[1. 1. 1. 1. 1.]\n", + " [1. 1. 1. 1. 1.]\n", + " [1. 1. 1. 1. 1.]\n", + " [1. 1. 1. 1. 1.]\n", + " [1. 1. 1. 1. 1.]]\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "1UcUkEj0d7wh", + "colab_type": "text" + }, + "source": [ + "## Creating 2D masked solution to check results with cropped solution later" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "83mZFyondaAT", + "colab_type": "code", + "colab": {} + }, + "source": [ + "class MaskedConv2D(tf.keras.layers.Layer):\n", + " def __init__(self,\n", + " mask_type,\n", + " filters,\n", + " kernel_size,\n", + " strides=1,\n", + " padding='same',\n", + " kernel_initializer='glorot_uniform',\n", + " bias_initializer='zeros'):\n", + " super(MaskedConv2D, self).__init__()\n", + "\n", + " assert mask_type in {'A', 'B', 'V'}\n", + " self.mask_type = mask_type\n", + "\n", + " self.filters = filters\n", + "\n", + " if isinstance(kernel_size, int):\n", + " kernel_size = (kernel_size, kernel_size)\n", + " self.kernel_size = kernel_size\n", + "\n", + " self.strides = strides\n", + " self.padding = padding.upper()\n", + " self.kernel_initializer = tf.keras.initializers.get(kernel_initializer)\n", + " self.bias_initializer = tf.keras.initializers.get(bias_initializer)\n", + "\n", + " def build(self, input_shape):\n", + " kernel_h, kernel_w = self.kernel_size\n", + "\n", + " self.kernel = self.add_weight(\"kernel\",\n", + " shape=(kernel_h,\n", + " kernel_w,\n", + " int(input_shape[-1]),\n", + " self.filters),\n", + " initializer=self.kernel_initializer,\n", + " trainable=True)\n", + "\n", + " self.bias = self.add_weight(\"bias\",\n", + " shape=(self.filters,),\n", + " initializer=self.bias_initializer,\n", + " trainable=True)\n", + "\n", + " mask = np.ones(self.kernel.shape, dtype=np.float32)\n", + "\n", + " if kernel_h % 2 != 0: \n", + " center_h = kernel_h // 2\n", + " else:\n", + " center_h = (kernel_h - 1) // 2\n", + "\n", + " if kernel_w % 2 != 0: \n", + " center_w = kernel_w // 2\n", + " else:\n", + " center_w = (kernel_w - 1) // 2\n", + "\n", + "\n", + "\n", + " if self.mask_type == 'V':\n", + " mask[center_h + 1:, :, :, :] = 0.\n", + " else:\n", + " mask[center_h, center_w + (self.mask_type == 'B'):, :, :] = 0.\n", + " mask[center_h + 1:, :, :] = 0.\n", + "\n", + " self.mask = tf.constant(mask, dtype=tf.float32, name='mask')\n", + "\n", + " def call(self, input):\n", + " masked_kernel = tf.math.multiply(self.mask, self.kernel)\n", + " x = tf.nn.conv2d(input, masked_kernel, strides=[1, self.strides, self.strides, 1], padding=self.padding)\n", + " x = tf.nn.bias_add(x, self.bias)\n", + " return x" + ], + "execution_count": 0, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "PmmR19cRgNyV", + "colab_type": "code", + "colab": {} + }, + "source": [ + "def build_test_stack_2d(mask_type, input_shape=(5, 5, 1), kernel_size=(3, 3)):\n", + " inputs = tf.keras.layers.Input(shape=input_shape)\n", + " \n", + " x = MaskedConv2D(mask_type=mask_type,\n", + " filters=1,\n", + " kernel_size=kernel_size, \n", + " padding='same',\n", + " kernel_initializer='ones', \n", + " bias_initializer='zeros')(inputs)\n", + "\n", + " stack = tf.keras.Model(inputs=inputs, outputs=x)\n", + " stack.compile(optimizer='adam', loss='mse')\n", + " return stack" + ], + "execution_count": 0, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "nMXqIeSUkdcV", + "colab_type": "text" + }, + "source": [ + "### Tests with kernel_size 3" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "M62GZQe8ixvy", + "colab_type": "text" + }, + "source": [ + "#### Vertical stack" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "kjUrpEtIg7p9", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 104 + }, + "outputId": "78b74814-4929-4e48-e45c-a167b80ae83d" + }, + "source": [ + "mask_type = 'V'\n", + "kernel_size=(3, 3)\n", + "\n", + "padding = keras.layers.ZeroPadding2D(padding=((1,0),0))\n", + "\n", + "conv = MaskedConv2D(mask_type=mask_type,\n", + " filters=1,\n", + " kernel_size=kernel_size, \n", + " padding='same',\n", + " kernel_initializer='ones', \n", + " bias_initializer='zeros')\n", + "\n", + "cropping = keras.layers.Cropping2D(cropping=((0, 1), 0))\n", + "\n", + "\n", + "x = padding(test_ones_2d)\n", + "x = conv(x)\n", + "result = cropping(x)\n", + "\n", + "print(result.numpy().squeeze())\n" + ], + "execution_count": 93, + "outputs": [ + { + "output_type": "stream", + "text": [ + "[[0. 0. 0. 0. 0.]\n", + " [2. 3. 3. 3. 2.]\n", + " [4. 6. 6. 6. 4.]\n", + " [4. 6. 6. 6. 4.]\n", + " [4. 6. 6. 6. 4.]]\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "4_q_IunZkFmj", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 104 + }, + "outputId": "6bc566e2-2bbd-49f0-a4c6-0574125c6cd8" + }, + "source": [ + "horizontal_A_stack = build_test_stack_2d('A', kernel_size=(1, 3))\n", + "val = horizontal_A_stack.predict(test_ones_2d)\n", + "print(val[0,:,:,0].squeeze())" + ], + "execution_count": 9, + "outputs": [ + { + "output_type": "stream", + "text": [ + "[[0. 1. 1. 1. 1.]\n", + " [0. 1. 1. 1. 1.]\n", + " [0. 1. 1. 1. 1.]\n", + " [0. 1. 1. 1. 1.]\n", + " [0. 1. 1. 1. 1.]]\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "5yeB5h2tkSs_", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 104 + }, + "outputId": "8e8b83ab-2c0b-4ae9-e5c0-eceb6ab55f66" + }, + "source": [ + "horizontal_B_stack = build_test_stack_2d('B', kernel_size=(1, 3))\n", + "val = horizontal_B_stack.predict(test_ones_2d)\n", + "print(val[0,:,:,0].squeeze())" + ], + "execution_count": 10, + "outputs": [ + { + "output_type": "stream", + "text": [ + "[[1. 2. 2. 2. 2.]\n", + " [1. 2. 2. 2. 2.]\n", + " [1. 2. 2. 2. 2.]\n", + " [1. 2. 2. 2. 2.]\n", + " [1. 2. 2. 2. 2.]]\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "1NxkQ3U1knbE", + "colab_type": "text" + }, + "source": [ + "### Tests with kernel_size 4" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "p3DTiFYCk57Y", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 104 + }, + "outputId": "fe952753-ad58-4d40-9b3e-70abd2a96a4c" + }, + "source": [ + "vertical_stack = build_test_stack_2d('V', kernel_size=(4, 4))\n", + "val = vertical_stack.predict(test_ones_2d)\n", + "print(val[0,:,:,0].squeeze())" + ], + "execution_count": 11, + "outputs": [ + { + "output_type": "stream", + "text": [ + "[[0. 0. 0. 0. 0.]\n", + " [3. 4. 4. 3. 2.]\n", + " [3. 4. 4. 3. 2.]\n", + " [3. 4. 4. 3. 2.]\n", + " [3. 4. 4. 3. 2.]]\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "J5HSUo7Rk5xO", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 104 + }, + "outputId": "76861f8f-c601-4e97-e6f8-1b3818104cb7" + }, + "source": [ + "horizontal_A_stack = build_test_stack_2d('A', kernel_size=(1, 4))\n", + "val = horizontal_A_stack.predict(test_ones_2d)\n", + "print(val[0,:,:,0].squeeze())" + ], + "execution_count": 12, + "outputs": [ + { + "output_type": "stream", + "text": [ + "[[0. 1. 1. 1. 1.]\n", + " [0. 1. 1. 1. 1.]\n", + " [0. 1. 1. 1. 1.]\n", + " [0. 1. 1. 1. 1.]\n", + " [0. 1. 1. 1. 1.]]\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "_2V51aerk5l1", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 104 + }, + "outputId": "1c35dccd-8136-4ccb-bb33-26fa728acbd9" + }, + "source": [ + "horizontal_B_stack = build_test_stack_2d('B', kernel_size=(1, 4))\n", + "val = horizontal_B_stack.predict(test_ones_2d)\n", + "print(val[0,:,:,0].squeeze())" + ], + "execution_count": 13, + "outputs": [ + { + "output_type": "stream", + "text": [ + "[[1. 2. 2. 2. 2.]\n", + " [1. 2. 2. 2. 2.]\n", + " [1. 2. 2. 2. 2.]\n", + " [1. 2. 2. 2. 2.]\n", + " [1. 2. 2. 2. 2.]]\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "4XtSteZ4pbOd", + "colab_type": "text" + }, + "source": [ + "## Creating 3D masked solution to check results with cropped solution later" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "r6XeT4-cpdoA", + "colab_type": "code", + "colab": {} + }, + "source": [ + "class MaskedConv3D(tf.keras.layers.Layer):\n", + " def __init__(self,\n", + " mask_type,\n", + " filters,\n", + " kernel_size,\n", + " strides=1,\n", + " padding='same',\n", + " kernel_initializer='glorot_uniform',\n", + " bias_initializer='zeros'):\n", + " super(MaskedConv3D, self).__init__()\n", + "\n", + " assert mask_type in {'A', 'B', 'D', 'V'}\n", + " self.mask_type = mask_type\n", + "\n", + " self.filters = filters\n", + "\n", + " if isinstance(kernel_size, int):\n", + " kernel_size = (kernel_size, kernel_size, kernel_size)\n", + " self.kernel_size = kernel_size\n", + "\n", + " self.strides = strides\n", + " self.padding = padding.upper()\n", + " self.kernel_initializer = tf.keras.initializers.get(kernel_initializer)\n", + " self.bias_initializer = tf.keras.initializers.get(bias_initializer)\n", + "\n", + " def build(self, input_shape):\n", + " kernel_d, kernel_h, kernel_w = self.kernel_size\n", + "\n", + " self.kernel = self.add_weight(\"kernel\",\n", + " shape=(kernel_d,\n", + " kernel_h,\n", + " kernel_w,\n", + " int(input_shape[-1]),\n", + " self.filters),\n", + " initializer=self.kernel_initializer,\n", + " trainable=True)\n", + "\n", + " self.bias = self.add_weight(\"bias\",\n", + " shape=(self.filters,),\n", + " initializer=self.bias_initializer,\n", + " trainable=True)\n", + "\n", + " mask = np.ones(self.kernel.shape, dtype=np.float32)\n", + "\n", + "\n", + " if kernel_d % 2 != 0: \n", + " center_d = kernel_d // 2\n", + " else:\n", + " center_d = (kernel_d - 1) // 2\n", + "\n", + " if kernel_h % 2 != 0: \n", + " center_h = kernel_h // 2\n", + " else:\n", + " center_h = (kernel_h - 1) // 2\n", + "\n", + " if kernel_w % 2 != 0: \n", + " center_w = kernel_w // 2\n", + " else:\n", + " center_w = (kernel_w - 1) // 2\n", + "\n", + "\n", + " if self.mask_type == 'D':\n", + " mask[center_d:, :, :, :, :] = 0.\n", + " elif self.mask_type == 'V':\n", + " mask[center_d, center_h:, :, :, :] = 0.\n", + " mask[center_d + 1:, :, :, :, :] = 0.\n", + " else:\n", + " mask[center_d, center_h, center_w + (self.mask_type == 'B'):, :, :] = 0.\n", + " mask[center_d + 1:, :, :, :, :] = 0.\n", + " mask[:, center_h + 1:, :, :, :] = 0.\n", + "\n", + " self.mask = tf.constant(mask, dtype=tf.float32, name='mask')\n", + "\n", + " def call(self, input):\n", + " masked_kernel = tf.math.multiply(self.mask, self.kernel)\n", + " x = tf.nn.conv3d(input, masked_kernel, strides=[1, self.strides, self.strides, self.strides, 1], padding=self.padding)\n", + " x = tf.nn.bias_add(x, self.bias)\n", + " return x" + ], + "execution_count": 0, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "A-D6ge8jr04d", + "colab_type": "code", + "colab": {} + }, + "source": [ + "def build_test_stack_3d(mask_type, input_shape=(5, 5, 5, 1), kernel_size=(3, 3, 3)):\n", + " inputs = tf.keras.layers.Input(shape=input_shape)\n", + " \n", + " x = MaskedConv3D(mask_type=mask_type,\n", + " filters=1,\n", + " kernel_size=kernel_size, \n", + " padding='same',\n", + " kernel_initializer='ones', \n", + " bias_initializer='zeros')(inputs)\n", + "\n", + " stack = tf.keras.Model(inputs=inputs, outputs=x)\n", + " stack.compile(optimizer='adam', loss='mse')\n", + " return stack" + ], + "execution_count": 0, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "TJSNB3m4Apfi", + "colab_type": "text" + }, + "source": [ + "### Tests with kernel_size 3" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "DuEGynZxsChh", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 538 + }, + "outputId": "15429ddd-ce04-4e04-b79b-6fe7cfedac96" + }, + "source": [ + "depth_stack = build_test_stack_3d('D')\n", + "val = depth_stack.predict(test_ones_3d)\n", + "print_test_3d(val)" + ], + "execution_count": 16, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Depth 0\n", + "[[0. 0. 0. 0. 0.]\n", + " [0. 0. 0. 0. 0.]\n", + " [0. 0. 0. 0. 0.]\n", + " [0. 0. 0. 0. 0.]\n", + " [0. 0. 0. 0. 0.]]\n", + "Depth 1\n", + "[[4. 6. 6. 6. 4.]\n", + " [6. 9. 9. 9. 6.]\n", + " [6. 9. 9. 9. 6.]\n", + " [6. 9. 9. 9. 6.]\n", + " [4. 6. 6. 6. 4.]]\n", + "Depth 2\n", + "[[4. 6. 6. 6. 4.]\n", + " [6. 9. 9. 9. 6.]\n", + " [6. 9. 9. 9. 6.]\n", + " [6. 9. 9. 9. 6.]\n", + " [4. 6. 6. 6. 4.]]\n", + "Depth 3\n", + "[[4. 6. 6. 6. 4.]\n", + " [6. 9. 9. 9. 6.]\n", + " [6. 9. 9. 9. 6.]\n", + " [6. 9. 9. 9. 6.]\n", + " [4. 6. 6. 6. 4.]]\n", + "Depth 4\n", + "[[4. 6. 6. 6. 4.]\n", + " [6. 9. 9. 9. 6.]\n", + " [6. 9. 9. 9. 6.]\n", + " [6. 9. 9. 9. 6.]\n", + " [4. 6. 6. 6. 4.]]\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "dCgUj-q5rwIt", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 538 + }, + "outputId": "4d50016e-5f3f-4124-d02e-beda5b8fc4e2" + }, + "source": [ + "vertical_stack = build_test_stack_3d('V', kernel_size=(1, 3, 3))\n", + "val = vertical_stack.predict(test_ones_3d)\n", + "print_test_3d(val)" + ], + "execution_count": 17, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Depth 0\n", + "[[0. 0. 0. 0. 0.]\n", + " [2. 3. 3. 3. 2.]\n", + " [2. 3. 3. 3. 2.]\n", + " [2. 3. 3. 3. 2.]\n", + " [2. 3. 3. 3. 2.]]\n", + "Depth 1\n", + "[[0. 0. 0. 0. 0.]\n", + " [2. 3. 3. 3. 2.]\n", + " [2. 3. 3. 3. 2.]\n", + " [2. 3. 3. 3. 2.]\n", + " [2. 3. 3. 3. 2.]]\n", + "Depth 2\n", + "[[0. 0. 0. 0. 0.]\n", + " [2. 3. 3. 3. 2.]\n", + " [2. 3. 3. 3. 2.]\n", + " [2. 3. 3. 3. 2.]\n", + " [2. 3. 3. 3. 2.]]\n", + "Depth 3\n", + "[[0. 0. 0. 0. 0.]\n", + " [2. 3. 3. 3. 2.]\n", + " [2. 3. 3. 3. 2.]\n", + " [2. 3. 3. 3. 2.]\n", + " [2. 3. 3. 3. 2.]]\n", + "Depth 4\n", + "[[0. 0. 0. 0. 0.]\n", + " [2. 3. 3. 3. 2.]\n", + " [2. 3. 3. 3. 2.]\n", + " [2. 3. 3. 3. 2.]\n", + " [2. 3. 3. 3. 2.]]\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "0N8LWI67_vFv", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 538 + }, + "outputId": "a1b5bdfc-5415-459f-94d9-e9dffe5eaff3" + }, + "source": [ + "horizontal_A_stack = build_test_stack_3d('A', kernel_size=(1, 1, 3))\n", + "val = horizontal_A_stack.predict(test_ones_3d)\n", + "print_test_3d(val)" + ], + "execution_count": 18, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Depth 0\n", + "[[0. 1. 1. 1. 1.]\n", + " [0. 1. 1. 1. 1.]\n", + " [0. 1. 1. 1. 1.]\n", + " [0. 1. 1. 1. 1.]\n", + " [0. 1. 1. 1. 1.]]\n", + "Depth 1\n", + "[[0. 1. 1. 1. 1.]\n", + " [0. 1. 1. 1. 1.]\n", + " [0. 1. 1. 1. 1.]\n", + " [0. 1. 1. 1. 1.]\n", + " [0. 1. 1. 1. 1.]]\n", + "Depth 2\n", + "[[0. 1. 1. 1. 1.]\n", + " [0. 1. 1. 1. 1.]\n", + " [0. 1. 1. 1. 1.]\n", + " [0. 1. 1. 1. 1.]\n", + " [0. 1. 1. 1. 1.]]\n", + "Depth 3\n", + "[[0. 1. 1. 1. 1.]\n", + " [0. 1. 1. 1. 1.]\n", + " [0. 1. 1. 1. 1.]\n", + " [0. 1. 1. 1. 1.]\n", + " [0. 1. 1. 1. 1.]]\n", + "Depth 4\n", + "[[0. 1. 1. 1. 1.]\n", + " [0. 1. 1. 1. 1.]\n", + " [0. 1. 1. 1. 1.]\n", + " [0. 1. 1. 1. 1.]\n", + " [0. 1. 1. 1. 1.]]\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "yD6VvotZABid", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 538 + }, + "outputId": "ce9a0d0d-bda7-408e-b42b-7d50858b26ed" + }, + "source": [ + "horizontal_B_stack = build_test_stack_3d('B', kernel_size=(1, 1, 3))\n", + "val = horizontal_B_stack.predict(test_ones_3d)\n", + "print_test_3d(val)" + ], + "execution_count": 19, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Depth 0\n", + "[[1. 2. 2. 2. 2.]\n", + " [1. 2. 2. 2. 2.]\n", + " [1. 2. 2. 2. 2.]\n", + " [1. 2. 2. 2. 2.]\n", + " [1. 2. 2. 2. 2.]]\n", + "Depth 1\n", + "[[1. 2. 2. 2. 2.]\n", + " [1. 2. 2. 2. 2.]\n", + " [1. 2. 2. 2. 2.]\n", + " [1. 2. 2. 2. 2.]\n", + " [1. 2. 2. 2. 2.]]\n", + "Depth 2\n", + "[[1. 2. 2. 2. 2.]\n", + " [1. 2. 2. 2. 2.]\n", + " [1. 2. 2. 2. 2.]\n", + " [1. 2. 2. 2. 2.]\n", + " [1. 2. 2. 2. 2.]]\n", + "Depth 3\n", + "[[1. 2. 2. 2. 2.]\n", + " [1. 2. 2. 2. 2.]\n", + " [1. 2. 2. 2. 2.]\n", + " [1. 2. 2. 2. 2.]\n", + " [1. 2. 2. 2. 2.]]\n", + "Depth 4\n", + "[[1. 2. 2. 2. 2.]\n", + " [1. 2. 2. 2. 2.]\n", + " [1. 2. 2. 2. 2.]\n", + " [1. 2. 2. 2. 2.]\n", + " [1. 2. 2. 2. 2.]]\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "8AlzSu6GAvNj", + "colab_type": "text" + }, + "source": [ + "### Tests with kernel_size 4" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "AMp8F--cAwDs", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 538 + }, + "outputId": "a55145ec-594e-4a81-cc3b-45df89d9e086" + }, + "source": [ + "depth_stack = build_test_stack_3d('D', kernel_size=(4, 4, 4))\n", + "val = depth_stack.predict(test_ones_3d)\n", + "print_test_3d(val)" + ], + "execution_count": 24, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Depth 0\n", + "[[0. 0. 0. 0. 0.]\n", + " [0. 0. 0. 0. 0.]\n", + " [0. 0. 0. 0. 0.]\n", + " [0. 0. 0. 0. 0.]\n", + " [0. 0. 0. 0. 0.]]\n", + "Depth 1\n", + "[[ 9. 12. 12. 9. 6.]\n", + " [12. 16. 16. 12. 8.]\n", + " [12. 16. 16. 12. 8.]\n", + " [ 9. 12. 12. 9. 6.]\n", + " [ 6. 8. 8. 6. 4.]]\n", + "Depth 2\n", + "[[ 9. 12. 12. 9. 6.]\n", + " [12. 16. 16. 12. 8.]\n", + " [12. 16. 16. 12. 8.]\n", + " [ 9. 12. 12. 9. 6.]\n", + " [ 6. 8. 8. 6. 4.]]\n", + "Depth 3\n", + "[[ 9. 12. 12. 9. 6.]\n", + " [12. 16. 16. 12. 8.]\n", + " [12. 16. 16. 12. 8.]\n", + " [ 9. 12. 12. 9. 6.]\n", + " [ 6. 8. 8. 6. 4.]]\n", + "Depth 4\n", + "[[ 9. 12. 12. 9. 6.]\n", + " [12. 16. 16. 12. 8.]\n", + " [12. 16. 16. 12. 8.]\n", + " [ 9. 12. 12. 9. 6.]\n", + " [ 6. 8. 8. 6. 4.]]\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "7A8KTeJXA0f6", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 538 + }, + "outputId": "6eef6945-5102-4847-ede8-a52b1ed54443" + }, + "source": [ + "vertical_stack = build_test_stack_3d('V', kernel_size=(1, 4, 4))\n", + "val = vertical_stack.predict(test_ones_3d)\n", + "print_test_3d(val)" + ], + "execution_count": 25, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Depth 0\n", + "[[0. 0. 0. 0. 0.]\n", + " [3. 4. 4. 3. 2.]\n", + " [3. 4. 4. 3. 2.]\n", + " [3. 4. 4. 3. 2.]\n", + " [3. 4. 4. 3. 2.]]\n", + "Depth 1\n", + "[[0. 0. 0. 0. 0.]\n", + " [3. 4. 4. 3. 2.]\n", + " [3. 4. 4. 3. 2.]\n", + " [3. 4. 4. 3. 2.]\n", + " [3. 4. 4. 3. 2.]]\n", + "Depth 2\n", + "[[0. 0. 0. 0. 0.]\n", + " [3. 4. 4. 3. 2.]\n", + " [3. 4. 4. 3. 2.]\n", + " [3. 4. 4. 3. 2.]\n", + " [3. 4. 4. 3. 2.]]\n", + "Depth 3\n", + "[[0. 0. 0. 0. 0.]\n", + " [3. 4. 4. 3. 2.]\n", + " [3. 4. 4. 3. 2.]\n", + " [3. 4. 4. 3. 2.]\n", + " [3. 4. 4. 3. 2.]]\n", + "Depth 4\n", + "[[0. 0. 0. 0. 0.]\n", + " [3. 4. 4. 3. 2.]\n", + " [3. 4. 4. 3. 2.]\n", + " [3. 4. 4. 3. 2.]\n", + " [3. 4. 4. 3. 2.]]\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "RGSe6z0mA0WU", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 538 + }, + "outputId": "cd8ec46e-f49e-4761-d7d3-f1d328148263" + }, + "source": [ + "horizontal_A_stack = build_test_stack_3d('A', kernel_size=(1, 1, 4))\n", + "val = horizontal_A_stack.predict(test_ones_3d)\n", + "print_test_3d(val)" + ], + "execution_count": 26, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Depth 0\n", + "[[0. 1. 1. 1. 1.]\n", + " [0. 1. 1. 1. 1.]\n", + " [0. 1. 1. 1. 1.]\n", + " [0. 1. 1. 1. 1.]\n", + " [0. 1. 1. 1. 1.]]\n", + "Depth 1\n", + "[[0. 1. 1. 1. 1.]\n", + " [0. 1. 1. 1. 1.]\n", + " [0. 1. 1. 1. 1.]\n", + " [0. 1. 1. 1. 1.]\n", + " [0. 1. 1. 1. 1.]]\n", + "Depth 2\n", + "[[0. 1. 1. 1. 1.]\n", + " [0. 1. 1. 1. 1.]\n", + " [0. 1. 1. 1. 1.]\n", + " [0. 1. 1. 1. 1.]\n", + " [0. 1. 1. 1. 1.]]\n", + "Depth 3\n", + "[[0. 1. 1. 1. 1.]\n", + " [0. 1. 1. 1. 1.]\n", + " [0. 1. 1. 1. 1.]\n", + " [0. 1. 1. 1. 1.]\n", + " [0. 1. 1. 1. 1.]]\n", + "Depth 4\n", + "[[0. 1. 1. 1. 1.]\n", + " [0. 1. 1. 1. 1.]\n", + " [0. 1. 1. 1. 1.]\n", + " [0. 1. 1. 1. 1.]\n", + " [0. 1. 1. 1. 1.]]\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "9jCW_EzVA0JB", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 538 + }, + "outputId": "95efb670-2e51-462d-8b0d-97e4cc89852b" + }, + "source": [ + "horizontal_B_stack = build_test_stack_3d('B', kernel_size=(1, 1, 4))\n", + "val = horizontal_B_stack.predict(test_ones_3d)\n", + "print_test_3d(val)" + ], + "execution_count": 27, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Depth 0\n", + "[[1. 2. 2. 2. 2.]\n", + " [1. 2. 2. 2. 2.]\n", + " [1. 2. 2. 2. 2.]\n", + " [1. 2. 2. 2. 2.]\n", + " [1. 2. 2. 2. 2.]]\n", + "Depth 1\n", + "[[1. 2. 2. 2. 2.]\n", + " [1. 2. 2. 2. 2.]\n", + " [1. 2. 2. 2. 2.]\n", + " [1. 2. 2. 2. 2.]\n", + " [1. 2. 2. 2. 2.]]\n", + "Depth 2\n", + "[[1. 2. 2. 2. 2.]\n", + " [1. 2. 2. 2. 2.]\n", + " [1. 2. 2. 2. 2.]\n", + " [1. 2. 2. 2. 2.]\n", + " [1. 2. 2. 2. 2.]]\n", + "Depth 3\n", + "[[1. 2. 2. 2. 2.]\n", + " [1. 2. 2. 2. 2.]\n", + " [1. 2. 2. 2. 2.]\n", + " [1. 2. 2. 2. 2.]\n", + " [1. 2. 2. 2. 2.]]\n", + "Depth 4\n", + "[[1. 2. 2. 2. 2.]\n", + " [1. 2. 2. 2. 2.]\n", + " [1. 2. 2. 2. 2.]\n", + " [1. 2. 2. 2. 2.]\n", + " [1. 2. 2. 2. 2.]]\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "kZmui789Br2B", + "colab_type": "text" + }, + "source": [ + "## Creating 2D cropping solution" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "PxBNsvzhB1ec", + "colab_type": "code", + "colab": {} + }, + "source": [ + "import math\n", + "\n", + "class VerticalCroppedConv2d(tf.keras.Model):\n", + " def __init__(self,\n", + " filters,\n", + " kernel_size,\n", + " kernel_initializer, \n", + " bias_initializer):\n", + " super(VerticalCroppedConv2d, self).__init__(name='')\n", + "\n", + " if isinstance(kernel_size, int):\n", + " kernel_size = (kernel_size, kernel_size)\n", + "\n", + " kernel_h, kernel_w = kernel_size\n", + "\n", + " self.padding = keras.layers.ZeroPadding2D(padding=((kernel_h-1, 0),(int((kernel_w-1)/2),int((kernel_w-1)/2))))\n", + "\n", + " self.conv = keras.layers.Conv2D(filters=filters,\n", + " kernel_size=kernel_size,\n", + " strides=1,\n", + " padding='valid',\n", + " kernel_initializer=kernel_initializer, \n", + " bias_initializer=bias_initializer)\n", + "\n", + " def call(self, input_value):\n", + "\n", + " x = self.padding(input_value)\n", + " x = self.conv(x)\n", + " out = self.cropping(x)\n", + "\n", + " return out\n", + "\n" + ], + "execution_count": 0, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "RfFuwKlrP2JU", + "colab_type": "text" + }, + "source": [ + "Example step by step" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "fH3I0lfoPdcH", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 332 + }, + "outputId": "49b7ffca-0ec0-46d7-837c-8e0f51dd5205" + }, + "source": [ + "kernel_h = 2\n", + "kernel_w = 3\n", + "\n", + "kernel_size = (kernel_h, kernel_w)\n", + "\n", + "padding = keras.layers.ZeroPadding2D(padding=((kernel_h-1, 0),(int((kernel_w-1)/2),int((kernel_w-1)/2))))\n", + "\n", + "res = padding(test_ones_2d)\n", + "print(res.numpy().squeeze())\n", + "\n", + "conv = keras.layers.Conv2D(filters=1,\n", + " kernel_size=kernel_size,\n", + " strides=1,\n", + " padding='valid',\n", + " kernel_initializer='ones', \n", + " bias_initializer='zeros')\n", + "\n", + "res2 = conv(res)\n", + "print(res2.numpy().squeeze())\n", + "\n", + "\n", + "\n" + ], + "execution_count": 67, + "outputs": [ + { + "output_type": "stream", + "text": [ + "WARNING:tensorflow:Layer zero_padding2d_15 is casting an input tensor from dtype float64 to the layer's dtype of float32, which is new behavior in TensorFlow 2. The layer has dtype float32 because it's dtype defaults to floatx.\n", + "\n", + "If you intended to run this layer in float32, you can safely ignore this warning. If in doubt, this warning is likely only an issue if you are porting a TensorFlow 1.X model to TensorFlow 2.\n", + "\n", + "To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.\n", + "\n", + "[[0. 0. 0. 0. 0. 0. 0.]\n", + " [0. 1. 1. 1. 1. 1. 0.]\n", + " [0. 1. 1. 1. 1. 1. 0.]\n", + " [0. 1. 1. 1. 1. 1. 0.]\n", + " [0. 1. 1. 1. 1. 1. 0.]\n", + " [0. 1. 1. 1. 1. 1. 0.]]\n", + "[[2. 3. 3. 3. 2.]\n", + " [4. 6. 6. 6. 4.]\n", + " [4. 6. 6. 6. 4.]\n", + " [4. 6. 6. 6. 4.]\n", + " [4. 6. 6. 6. 4.]]\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "TFexYspQWEpo", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 69 + }, + "outputId": "e97bd930-188e-4831-8025-d379ea385190" + }, + "source": [ + "conv.weights[0].numpy().squeeze()" + ], + "execution_count": 64, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "array([[1., 1., 1.],\n", + " [1., 1., 1.],\n", + " [1., 1., 1.]], dtype=float32)" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 64 + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "AfyRyUmTNYZ8", + "colab_type": "code", + "colab": {} + }, + "source": [ + "def build_test_croppedv_stack_2d(input_shape=(5, 5, 1), kernel_size=3):\n", + " inputs = tf.keras.layers.Input(shape=input_shape)\n", + " \n", + " x = VerticalCroppedConv2d(\n", + " filters=1,\n", + " kernel_size=kernel_size, \n", + " kernel_initializer='ones', \n", + " bias_initializer='zeros')(inputs)\n", + "\n", + " stack = tf.keras.Model(inputs=inputs, outputs=x)\n", + " stack.compile(optimizer='adam', loss='mse')\n", + " return stack" + ], + "execution_count": 0, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Nskk-zJwN3Em", + "colab_type": "text" + }, + "source": [ + "###Tests with kernel_size 3" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "jc06sHDoNzx8", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 86 + }, + "outputId": "194d0b97-c5ed-4cbc-c82c-82a4885ce7a9" + }, + "source": [ + "vertical_stack = build_test_croppedv_stack_2d()\n", + "val = vertical_stack.predict(test_ones_2d)\n", + "print(val[0,:,:,0].squeeze())" + ], + "execution_count": 59, + "outputs": [ + { + "output_type": "stream", + "text": [ + "[[2. 3. 3. 3. 2.]\n", + " [4. 6. 6. 6. 4.]\n", + " [6. 9. 9. 9. 6.]\n", + " [6. 9. 9. 9. 6.]]\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "5jLEZhYtOZgi", + "colab_type": "code", + "colab": {} + }, + "source": [ + "" + ], + "execution_count": 0, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "swYJ4XMofUWv", + "colab_type": "text" + }, + "source": [ + "REFERENCES\n", + "\n", + "https://www.youtube.com/watch?v=1BURwCCYNEI" + ] + } + ] +} \ No newline at end of file diff --git a/WIP/6-gated_pixelcnn_cropped/comparing_2d_3d.py b/WIP/6-gated_pixelcnn_cropped/comparing_2d_3d.py new file mode 100644 index 0000000..e69de29