diff --git a/WIP/6-gated_pixelcnn_cropped/3d_gated_pixelcnn_conv.ipynb b/WIP/6-gated_pixelcnn_cropped/3d_gated_pixelcnn_conv.ipynb index 02e1f98..8df9dee 100644 --- a/WIP/6-gated_pixelcnn_cropped/3d_gated_pixelcnn_conv.ipynb +++ b/WIP/6-gated_pixelcnn_cropped/3d_gated_pixelcnn_conv.ipynb @@ -35,7 +35,8 @@ "source": [ "import tensorflow as tf\n", "import tensorflow.keras as keras\n", - "import numpy as np" + "import numpy as np\n", + "import math" ], "execution_count": 0, "outputs": [] @@ -62,10 +63,10 @@ "colab": {} }, "source": [ - "def print_test_3d(test_matrix):\n", - " for i in range(test_matrix.shape[1]):\n", + "def print_3d(matrix_3d):\n", + " for i in range(matrix_3d.shape[0]):\n", " print(f'Depth {i}')\n", - " print(test_matrix[0,i,:,:,0])" + " print(matrix_3d[i,...])" ], "execution_count": 0, "outputs": [] @@ -79,12 +80,12 @@ "base_uri": "https://localhost:8080/", "height": 538 }, - "outputId": "e7fd642b-23ac-4d79-a1f0-564853e64a79" + "outputId": "456c0c0f-9701-40c2-d545-c8091d50a59a" }, "source": [ - "print_test_3d(test_ones_3d)" + "print_3d(test_ones_3d.squeeze())" ], - "execution_count": 4, + "execution_count": 146, "outputs": [ { "output_type": "stream", @@ -229,6 +230,7 @@ " if self.mask_type == 'V':\n", " mask[center_h + 1:, :, :, :] = 0.\n", " else:\n", + " mask[:center_h, :, :] = 0.\n", " mask[center_h, center_w + (self.mask_type == 'B'):, :, :] = 0.\n", " mask[center_h + 1:, :, :] = 0.\n", "\n", @@ -243,31 +245,6 @@ "execution_count": 0, "outputs": [] }, - { - "cell_type": "code", - "metadata": { - "id": "PmmR19cRgNyV", - "colab_type": "code", - "colab": {} - }, - "source": [ - "def build_test_stack_2d(mask_type, input_shape=(5, 5, 1), kernel_size=(3, 3)):\n", - " inputs = tf.keras.layers.Input(shape=input_shape)\n", - " \n", - " x = MaskedConv2D(mask_type=mask_type,\n", - " filters=1,\n", - " kernel_size=kernel_size, \n", - " padding='same',\n", - " kernel_initializer='ones', \n", - " bias_initializer='zeros')(inputs)\n", - "\n", - " stack = tf.keras.Model(inputs=inputs, outputs=x)\n", - " stack.compile(optimizer='adam', loss='mse')\n", - " return stack" - ], - "execution_count": 0, - "outputs": [] - }, { "cell_type": "markdown", "metadata": { @@ -295,9 +272,9 @@ "colab_type": "code", "colab": { "base_uri": "https://localhost:8080/", - "height": 104 + "height": 208 }, - "outputId": "78b74814-4929-4e48-e45c-a167b80ae83d" + "outputId": "3f769c93-2f50-452d-ea81-503488f3c7c1" }, "source": [ "mask_type = 'V'\n", @@ -319,13 +296,23 @@ "x = conv(x)\n", "result = cropping(x)\n", "\n", - "print(result.numpy().squeeze())\n" + "print('MASK')\n", + "print(conv.mask.numpy().squeeze())\n", + "print('')\n", + "print('OUTPUT')\n", + "print(result.numpy().squeeze())" ], - "execution_count": 93, + "execution_count": 111, "outputs": [ { "output_type": "stream", "text": [ + "MASK\n", + "[[1. 1. 1.]\n", + " [1. 1. 1.]\n", + " [0. 0. 0.]]\n", + "\n", + "OUTPUT\n", "[[0. 0. 0. 0. 0.]\n", " [2. 3. 3. 3. 2.]\n", " [4. 6. 6. 6. 4.]\n", @@ -336,6 +323,16 @@ } ] }, + { + "cell_type": "markdown", + "metadata": { + "id": "iHY6UE2_p5oc", + "colab_type": "text" + }, + "source": [ + "#### Horizontal stack A" + ] + }, { "cell_type": "code", "metadata": { @@ -343,20 +340,40 @@ "colab_type": "code", "colab": { "base_uri": "https://localhost:8080/", - "height": 104 + "height": 208 }, - "outputId": "6bc566e2-2bbd-49f0-a4c6-0574125c6cd8" + "outputId": "f9a62e80-29f4-4c9d-b511-620a3eb76037" }, "source": [ - "horizontal_A_stack = build_test_stack_2d('A', kernel_size=(1, 3))\n", - "val = horizontal_A_stack.predict(test_ones_2d)\n", - "print(val[0,:,:,0].squeeze())" + "mask_type = 'A'\n", + "kernel_size=(3, 3)\n", + "\n", + "conv = MaskedConv2D(mask_type=mask_type,\n", + " filters=1,\n", + " kernel_size=kernel_size, \n", + " padding='same',\n", + " kernel_initializer='ones', \n", + " bias_initializer='zeros')\n", + "\n", + "result = conv(test_ones_2d)\n", + "\n", + "print('MASK')\n", + "print(conv.mask.numpy().squeeze())\n", + "print('')\n", + "print('OUTPUT')\n", + "print(result.numpy().squeeze())" ], - "execution_count": 9, + "execution_count": 113, "outputs": [ { "output_type": "stream", "text": [ + "MASK\n", + "[[0. 0. 0.]\n", + " [1. 0. 0.]\n", + " [0. 0. 0.]]\n", + "\n", + "OUTPUT\n", "[[0. 1. 1. 1. 1.]\n", " [0. 1. 1. 1. 1.]\n", " [0. 1. 1. 1. 1.]\n", @@ -367,6 +384,16 @@ } ] }, + { + "cell_type": "markdown", + "metadata": { + "id": "jMuS-vgWqAWK", + "colab_type": "text" + }, + "source": [ + "#### Horizontal stack B" + ] + }, { "cell_type": "code", "metadata": { @@ -374,20 +401,40 @@ "colab_type": "code", "colab": { "base_uri": "https://localhost:8080/", - "height": 104 + "height": 208 }, - "outputId": "8e8b83ab-2c0b-4ae9-e5c0-eceb6ab55f66" + "outputId": "64e2dfcf-ba0f-4b8b-b92d-8b5504340a4a" }, "source": [ - "horizontal_B_stack = build_test_stack_2d('B', kernel_size=(1, 3))\n", - "val = horizontal_B_stack.predict(test_ones_2d)\n", - "print(val[0,:,:,0].squeeze())" + "mask_type = 'B'\n", + "kernel_size=(3, 3)\n", + "\n", + "conv = MaskedConv2D(mask_type=mask_type,\n", + " filters=1,\n", + " kernel_size=kernel_size, \n", + " padding='same',\n", + " kernel_initializer='ones', \n", + " bias_initializer='zeros')\n", + "\n", + "result = conv(test_ones_2d)\n", + "\n", + "print('MASK')\n", + "print(conv.mask.numpy().squeeze())\n", + "print('')\n", + "print('OUTPUT')\n", + "print(result.numpy().squeeze())" ], - "execution_count": 10, + "execution_count": 115, "outputs": [ { "output_type": "stream", "text": [ + "MASK\n", + "[[0. 0. 0.]\n", + " [1. 1. 0.]\n", + " [0. 0. 0.]]\n", + "\n", + "OUTPUT\n", "[[1. 2. 2. 2. 2.]\n", " [1. 2. 2. 2. 2.]\n", " [1. 2. 2. 2. 2.]\n", @@ -408,6 +455,16 @@ "### Tests with kernel_size 4" ] }, + { + "cell_type": "markdown", + "metadata": { + "id": "WNykK-WpqMlu", + "colab_type": "text" + }, + "source": [ + "#### Vertical stack" + ] + }, { "cell_type": "code", "metadata": { @@ -415,30 +472,68 @@ "colab_type": "code", "colab": { "base_uri": "https://localhost:8080/", - "height": 104 + "height": 225 }, - "outputId": "fe952753-ad58-4d40-9b3e-70abd2a96a4c" + "outputId": "f2ec68a3-95e6-4113-f0ba-9c37c7c63589" }, "source": [ - "vertical_stack = build_test_stack_2d('V', kernel_size=(4, 4))\n", - "val = vertical_stack.predict(test_ones_2d)\n", - "print(val[0,:,:,0].squeeze())" + "mask_type = 'V'\n", + "kernel_size=(4, 4)\n", + "\n", + "padding = keras.layers.ZeroPadding2D(padding=((1,0),0))\n", + "\n", + "conv = MaskedConv2D(mask_type=mask_type,\n", + " filters=1,\n", + " kernel_size=kernel_size, \n", + " padding='same',\n", + " kernel_initializer='ones', \n", + " bias_initializer='zeros')\n", + "\n", + "cropping = keras.layers.Cropping2D(cropping=((0, 1), 0))\n", + "\n", + "\n", + "x = padding(test_ones_2d)\n", + "x = conv(x)\n", + "result = cropping(x)\n", + "\n", + "print('MASK')\n", + "print(conv.mask.numpy().squeeze())\n", + "print('')\n", + "print('OUTPUT')\n", + "print(result.numpy().squeeze())" ], - "execution_count": 11, + "execution_count": 119, "outputs": [ { "output_type": "stream", "text": [ + "MASK\n", + "[[1. 1. 1. 1.]\n", + " [1. 1. 1. 1.]\n", + " [0. 0. 0. 0.]\n", + " [0. 0. 0. 0.]]\n", + "\n", + "OUTPUT\n", "[[0. 0. 0. 0. 0.]\n", " [3. 4. 4. 3. 2.]\n", - " [3. 4. 4. 3. 2.]\n", - " [3. 4. 4. 3. 2.]\n", - " [3. 4. 4. 3. 2.]]\n" + " [6. 8. 8. 6. 4.]\n", + " [6. 8. 8. 6. 4.]\n", + " [6. 8. 8. 6. 4.]]\n" ], "name": "stdout" } ] }, + { + "cell_type": "markdown", + "metadata": { + "id": "E5jUGK3_qbT8", + "colab_type": "text" + }, + "source": [ + "#### Horizontal stack A" + ] + }, { "cell_type": "code", "metadata": { @@ -446,20 +541,41 @@ "colab_type": "code", "colab": { "base_uri": "https://localhost:8080/", - "height": 104 + "height": 225 }, - "outputId": "76861f8f-c601-4e97-e6f8-1b3818104cb7" + "outputId": "bd56a4a9-985e-4746-c110-02f6a37bf964" }, "source": [ - "horizontal_A_stack = build_test_stack_2d('A', kernel_size=(1, 4))\n", - "val = horizontal_A_stack.predict(test_ones_2d)\n", - "print(val[0,:,:,0].squeeze())" + "mask_type = 'A'\n", + "kernel_size=(4, 4)\n", + "\n", + "conv = MaskedConv2D(mask_type=mask_type,\n", + " filters=1,\n", + " kernel_size=kernel_size, \n", + " padding='same',\n", + " kernel_initializer='ones', \n", + " bias_initializer='zeros')\n", + "\n", + "result = conv(test_ones_2d)\n", + "\n", + "print('MASK')\n", + "print(conv.mask.numpy().squeeze())\n", + "print('')\n", + "print('OUTPUT')\n", + "print(result.numpy().squeeze())" ], - "execution_count": 12, + "execution_count": 120, "outputs": [ { "output_type": "stream", "text": [ + "MASK\n", + "[[0. 0. 0. 0.]\n", + " [1. 0. 0. 0.]\n", + " [0. 0. 0. 0.]\n", + " [0. 0. 0. 0.]]\n", + "\n", + "OUTPUT\n", "[[0. 1. 1. 1. 1.]\n", " [0. 1. 1. 1. 1.]\n", " [0. 1. 1. 1. 1.]\n", @@ -470,6 +586,16 @@ } ] }, + { + "cell_type": "markdown", + "metadata": { + "id": "KqORK7mLqvPP", + "colab_type": "text" + }, + "source": [ + "#### Horizontal stack B" + ] + }, { "cell_type": "code", "metadata": { @@ -477,20 +603,41 @@ "colab_type": "code", "colab": { "base_uri": "https://localhost:8080/", - "height": 104 + "height": 225 }, - "outputId": "1c35dccd-8136-4ccb-bb33-26fa728acbd9" + "outputId": "5a6e6421-a0f9-4665-f453-b9fb0b80c325" }, "source": [ - "horizontal_B_stack = build_test_stack_2d('B', kernel_size=(1, 4))\n", - "val = horizontal_B_stack.predict(test_ones_2d)\n", - "print(val[0,:,:,0].squeeze())" + "mask_type = 'B'\n", + "kernel_size=(4, 4)\n", + "\n", + "conv = MaskedConv2D(mask_type=mask_type,\n", + " filters=1,\n", + " kernel_size=kernel_size, \n", + " padding='same',\n", + " kernel_initializer='ones', \n", + " bias_initializer='zeros')\n", + "\n", + "result = conv(test_ones_2d)\n", + "\n", + "print('MASK')\n", + "print(conv.mask.numpy().squeeze())\n", + "print('')\n", + "print('OUTPUT')\n", + "print(result.numpy().squeeze())" ], - "execution_count": 13, + "execution_count": 121, "outputs": [ { "output_type": "stream", "text": [ + "MASK\n", + "[[0. 0. 0. 0.]\n", + " [1. 1. 0. 0.]\n", + " [0. 0. 0. 0.]\n", + " [0. 0. 0. 0.]]\n", + "\n", + "OUTPUT\n", "[[1. 2. 2. 2. 2.]\n", " [1. 2. 2. 2. 2.]\n", " [1. 2. 2. 2. 2.]\n", @@ -581,14 +728,17 @@ "\n", "\n", " if self.mask_type == 'D':\n", - " mask[center_d:, :, :, :, :] = 0.\n", + " mask[center_d+1:, :, :, :, :] = 0.\n", " elif self.mask_type == 'V':\n", - " mask[center_d, center_h:, :, :, :] = 0.\n", + " mask[:center_d, :, :, :, :] = 0.\n", + " mask[center_d, center_h+1:, :, :, :] = 0.\n", " mask[center_d + 1:, :, :, :, :] = 0.\n", " else:\n", + " mask[:center_d, :, :, :, :] = 0.\n", + " mask[:, :center_h, :, :, :] = 0.\n", " mask[center_d, center_h, center_w + (self.mask_type == 'B'):, :, :] = 0.\n", - " mask[center_d + 1:, :, :, :, :] = 0.\n", " mask[:, center_h + 1:, :, :, :] = 0.\n", + " mask[center_d + 1:, :, :, :, :] = 0.\n", "\n", " self.mask = tf.constant(mask, dtype=tf.float32, name='mask')\n", "\n", @@ -602,38 +752,23 @@ "outputs": [] }, { - "cell_type": "code", + "cell_type": "markdown", "metadata": { - "id": "A-D6ge8jr04d", - "colab_type": "code", - "colab": {} + "id": "TJSNB3m4Apfi", + "colab_type": "text" }, "source": [ - "def build_test_stack_3d(mask_type, input_shape=(5, 5, 5, 1), kernel_size=(3, 3, 3)):\n", - " inputs = tf.keras.layers.Input(shape=input_shape)\n", - " \n", - " x = MaskedConv3D(mask_type=mask_type,\n", - " filters=1,\n", - " kernel_size=kernel_size, \n", - " padding='same',\n", - " kernel_initializer='ones', \n", - " bias_initializer='zeros')(inputs)\n", - "\n", - " stack = tf.keras.Model(inputs=inputs, outputs=x)\n", - " stack.compile(optimizer='adam', loss='mse')\n", - " return stack" - ], - "execution_count": 0, - "outputs": [] + "### Tests with kernel_size 3" + ] }, { "cell_type": "markdown", "metadata": { - "id": "TJSNB3m4Apfi", + "id": "-NIs7IZdrVnu", "colab_type": "text" }, "source": [ - "### Tests with kernel_size 3" + "#### Depth stack" ] }, { @@ -643,20 +778,56 @@ "colab_type": "code", "colab": { "base_uri": "https://localhost:8080/", - "height": 538 + "height": 799 }, - "outputId": "15429ddd-ce04-4e04-b79b-6fe7cfedac96" + "outputId": "f98454bb-09ed-416f-8d20-af28b813881a" }, "source": [ - "depth_stack = build_test_stack_3d('D')\n", - "val = depth_stack.predict(test_ones_3d)\n", - "print_test_3d(val)" + "mask_type = 'D'\n", + "kernel_size=(3, 3, 3)\n", + "\n", + "padding = keras.layers.ZeroPadding3D(padding=((1,0),0,0))\n", + "\n", + "conv = MaskedConv3D(mask_type=mask_type,\n", + " filters=1,\n", + " kernel_size=kernel_size, \n", + " padding='same',\n", + " kernel_initializer='ones', \n", + " bias_initializer='zeros')\n", + "\n", + "cropping = keras.layers.Cropping3D(cropping=((0, 1), 0, 0))\n", + "\n", + "\n", + "x = padding(test_ones_3d)\n", + "x = conv(x)\n", + "result = cropping(x)\n", + "\n", + "print('MASK')\n", + "print_3d(conv.mask.numpy().squeeze())\n", + "print('')\n", + "print('OUTPUT')\n", + "print_3d(result.numpy().squeeze())\n" ], - "execution_count": 16, + "execution_count": 154, "outputs": [ { "output_type": "stream", "text": [ + "MASK\n", + "Depth 0\n", + "[[1. 1. 1.]\n", + " [1. 1. 1.]\n", + " [1. 1. 1.]]\n", + "Depth 1\n", + "[[1. 1. 1.]\n", + " [1. 1. 1.]\n", + " [1. 1. 1.]]\n", + "Depth 2\n", + "[[0. 0. 0.]\n", + " [0. 0. 0.]\n", + " [0. 0. 0.]]\n", + "\n", + "OUTPUT\n", "Depth 0\n", "[[0. 0. 0. 0. 0.]\n", " [0. 0. 0. 0. 0.]\n", @@ -670,28 +841,38 @@ " [6. 9. 9. 9. 6.]\n", " [4. 6. 6. 6. 4.]]\n", "Depth 2\n", - "[[4. 6. 6. 6. 4.]\n", - " [6. 9. 9. 9. 6.]\n", - " [6. 9. 9. 9. 6.]\n", - " [6. 9. 9. 9. 6.]\n", - " [4. 6. 6. 6. 4.]]\n", + "[[ 8. 12. 12. 12. 8.]\n", + " [12. 18. 18. 18. 12.]\n", + " [12. 18. 18. 18. 12.]\n", + " [12. 18. 18. 18. 12.]\n", + " [ 8. 12. 12. 12. 8.]]\n", "Depth 3\n", - "[[4. 6. 6. 6. 4.]\n", - " [6. 9. 9. 9. 6.]\n", - " [6. 9. 9. 9. 6.]\n", - " [6. 9. 9. 9. 6.]\n", - " [4. 6. 6. 6. 4.]]\n", + "[[ 8. 12. 12. 12. 8.]\n", + " [12. 18. 18. 18. 12.]\n", + " [12. 18. 18. 18. 12.]\n", + " [12. 18. 18. 18. 12.]\n", + " [ 8. 12. 12. 12. 8.]]\n", "Depth 4\n", - "[[4. 6. 6. 6. 4.]\n", - " [6. 9. 9. 9. 6.]\n", - " [6. 9. 9. 9. 6.]\n", - " [6. 9. 9. 9. 6.]\n", - " [4. 6. 6. 6. 4.]]\n" + "[[ 8. 12. 12. 12. 8.]\n", + " [12. 18. 18. 18. 12.]\n", + " [12. 18. 18. 18. 12.]\n", + " [12. 18. 18. 18. 12.]\n", + " [ 8. 12. 12. 12. 8.]]\n" ], "name": "stdout" } ] }, + { + "cell_type": "markdown", + "metadata": { + "id": "u4K8-zY0timI", + "colab_type": "text" + }, + "source": [ + "#### Vertical stack" + ] + }, { "cell_type": "code", "metadata": { @@ -699,55 +880,101 @@ "colab_type": "code", "colab": { "base_uri": "https://localhost:8080/", - "height": 538 + "height": 799 }, - "outputId": "4d50016e-5f3f-4124-d02e-beda5b8fc4e2" + "outputId": "b836ef8d-4cbf-40f2-f086-4a83dd7a0ddf" }, "source": [ - "vertical_stack = build_test_stack_3d('V', kernel_size=(1, 3, 3))\n", - "val = vertical_stack.predict(test_ones_3d)\n", - "print_test_3d(val)" + "mask_type = 'V'\n", + "kernel_size=(3, 3, 3)\n", + "\n", + "padding = keras.layers.ZeroPadding3D(padding=(0,(1,0),0))\n", + "\n", + "conv = MaskedConv3D(mask_type=mask_type,\n", + " filters=1,\n", + " kernel_size=kernel_size, \n", + " padding='same',\n", + " kernel_initializer='ones', \n", + " bias_initializer='zeros')\n", + "\n", + "cropping = keras.layers.Cropping3D(cropping=(0,(0, 1), 0))\n", + "\n", + "\n", + "x = padding(test_ones_3d)\n", + "x = conv(x)\n", + "result = cropping(x)\n", + "\n", + "print('MASK')\n", + "print_3d(conv.mask.numpy().squeeze())\n", + "print('')\n", + "print('OUTPUT')\n", + "print_3d(result.numpy().squeeze())\n" ], - "execution_count": 17, + "execution_count": 157, "outputs": [ { "output_type": "stream", "text": [ + "MASK\n", + "Depth 0\n", + "[[0. 0. 0.]\n", + " [0. 0. 0.]\n", + " [0. 0. 0.]]\n", + "Depth 1\n", + "[[1. 1. 1.]\n", + " [1. 1. 1.]\n", + " [0. 0. 0.]]\n", + "Depth 2\n", + "[[0. 0. 0.]\n", + " [0. 0. 0.]\n", + " [0. 0. 0.]]\n", + "\n", + "OUTPUT\n", "Depth 0\n", "[[0. 0. 0. 0. 0.]\n", " [2. 3. 3. 3. 2.]\n", - " [2. 3. 3. 3. 2.]\n", - " [2. 3. 3. 3. 2.]\n", - " [2. 3. 3. 3. 2.]]\n", + " [4. 6. 6. 6. 4.]\n", + " [4. 6. 6. 6. 4.]\n", + " [4. 6. 6. 6. 4.]]\n", "Depth 1\n", "[[0. 0. 0. 0. 0.]\n", " [2. 3. 3. 3. 2.]\n", - " [2. 3. 3. 3. 2.]\n", - " [2. 3. 3. 3. 2.]\n", - " [2. 3. 3. 3. 2.]]\n", + " [4. 6. 6. 6. 4.]\n", + " [4. 6. 6. 6. 4.]\n", + " [4. 6. 6. 6. 4.]]\n", "Depth 2\n", "[[0. 0. 0. 0. 0.]\n", " [2. 3. 3. 3. 2.]\n", - " [2. 3. 3. 3. 2.]\n", - " [2. 3. 3. 3. 2.]\n", - " [2. 3. 3. 3. 2.]]\n", + " [4. 6. 6. 6. 4.]\n", + " [4. 6. 6. 6. 4.]\n", + " [4. 6. 6. 6. 4.]]\n", "Depth 3\n", "[[0. 0. 0. 0. 0.]\n", " [2. 3. 3. 3. 2.]\n", - " [2. 3. 3. 3. 2.]\n", - " [2. 3. 3. 3. 2.]\n", - " [2. 3. 3. 3. 2.]]\n", + " [4. 6. 6. 6. 4.]\n", + " [4. 6. 6. 6. 4.]\n", + " [4. 6. 6. 6. 4.]]\n", "Depth 4\n", "[[0. 0. 0. 0. 0.]\n", " [2. 3. 3. 3. 2.]\n", - " [2. 3. 3. 3. 2.]\n", - " [2. 3. 3. 3. 2.]\n", - " [2. 3. 3. 3. 2.]]\n" + " [4. 6. 6. 6. 4.]\n", + " [4. 6. 6. 6. 4.]\n", + " [4. 6. 6. 6. 4.]]\n" ], "name": "stdout" } ] }, + { + "cell_type": "markdown", + "metadata": { + "id": "oSlof4GvuaLu", + "colab_type": "text" + }, + "source": [ + "#### Horizontal stack A" + ] + }, { "cell_type": "code", "metadata": { @@ -755,20 +982,49 @@ "colab_type": "code", "colab": { "base_uri": "https://localhost:8080/", - "height": 538 + "height": 799 }, - "outputId": "a1b5bdfc-5415-459f-94d9-e9dffe5eaff3" + "outputId": "cfea8fc3-f6af-41d6-98aa-5453eb3612ca" }, "source": [ - "horizontal_A_stack = build_test_stack_3d('A', kernel_size=(1, 1, 3))\n", - "val = horizontal_A_stack.predict(test_ones_3d)\n", - "print_test_3d(val)" + "mask_type = 'A'\n", + "kernel_size=(3, 3, 3)\n", + "\n", + "conv = MaskedConv3D(mask_type=mask_type,\n", + " filters=1,\n", + " kernel_size=kernel_size, \n", + " padding='same',\n", + " kernel_initializer='ones', \n", + " bias_initializer='zeros')\n", + "\n", + "result = conv(test_ones_3d)\n", + "\n", + "print('MASK')\n", + "print_3d(conv.mask.numpy().squeeze())\n", + "print('')\n", + "print('OUTPUT')\n", + "print_3d(result.numpy().squeeze())" ], - "execution_count": 18, + "execution_count": 168, "outputs": [ { "output_type": "stream", "text": [ + "MASK\n", + "Depth 0\n", + "[[0. 0. 0.]\n", + " [0. 0. 0.]\n", + " [0. 0. 0.]]\n", + "Depth 1\n", + "[[0. 0. 0.]\n", + " [1. 0. 0.]\n", + " [0. 0. 0.]]\n", + "Depth 2\n", + "[[0. 0. 0.]\n", + " [0. 0. 0.]\n", + " [0. 0. 0.]]\n", + "\n", + "OUTPUT\n", "Depth 0\n", "[[0. 1. 1. 1. 1.]\n", " [0. 1. 1. 1. 1.]\n", @@ -804,6 +1060,16 @@ } ] }, + { + "cell_type": "markdown", + "metadata": { + "id": "ixWP3Ey7vs-H", + "colab_type": "text" + }, + "source": [ + "#### Horizontal stack B" + ] + }, { "cell_type": "code", "metadata": { @@ -811,20 +1077,49 @@ "colab_type": "code", "colab": { "base_uri": "https://localhost:8080/", - "height": 538 + "height": 799 }, - "outputId": "ce9a0d0d-bda7-408e-b42b-7d50858b26ed" + "outputId": "50244671-766c-41e2-8999-1e4f34844622" }, "source": [ - "horizontal_B_stack = build_test_stack_3d('B', kernel_size=(1, 1, 3))\n", - "val = horizontal_B_stack.predict(test_ones_3d)\n", - "print_test_3d(val)" + "mask_type = 'B'\n", + "kernel_size=(3, 3, 3)\n", + "\n", + "conv = MaskedConv3D(mask_type=mask_type,\n", + " filters=1,\n", + " kernel_size=kernel_size, \n", + " padding='same',\n", + " kernel_initializer='ones', \n", + " bias_initializer='zeros')\n", + "\n", + "result = conv(test_ones_3d)\n", + "\n", + "print('MASK')\n", + "print_3d(conv.mask.numpy().squeeze())\n", + "print('')\n", + "print('OUTPUT')\n", + "print_3d(result.numpy().squeeze())" ], - "execution_count": 19, + "execution_count": 169, "outputs": [ { "output_type": "stream", "text": [ + "MASK\n", + "Depth 0\n", + "[[0. 0. 0.]\n", + " [0. 0. 0.]\n", + " [0. 0. 0.]]\n", + "Depth 1\n", + "[[0. 0. 0.]\n", + " [1. 1. 0.]\n", + " [0. 0. 0.]]\n", + "Depth 2\n", + "[[0. 0. 0.]\n", + " [0. 0. 0.]\n", + " [0. 0. 0.]]\n", + "\n", + "OUTPUT\n", "Depth 0\n", "[[1. 2. 2. 2. 2.]\n", " [1. 2. 2. 2. 2.]\n", @@ -870,6 +1165,16 @@ "### Tests with kernel_size 4" ] }, + { + "cell_type": "markdown", + "metadata": { + "id": "_ZFtCkYhvwGW", + "colab_type": "text" + }, + "source": [ + "#### Depth stack" + ] + }, { "cell_type": "code", "metadata": { @@ -877,20 +1182,64 @@ "colab_type": "code", "colab": { "base_uri": "https://localhost:8080/", - "height": 538 + "height": 937 }, - "outputId": "a55145ec-594e-4a81-cc3b-45df89d9e086" + "outputId": "e78c3adf-e32c-44fd-e0de-3a7db406bbd9" }, "source": [ - "depth_stack = build_test_stack_3d('D', kernel_size=(4, 4, 4))\n", - "val = depth_stack.predict(test_ones_3d)\n", - "print_test_3d(val)" + "mask_type = 'D'\n", + "kernel_size=(4, 4, 4)\n", + "\n", + "padding = keras.layers.ZeroPadding3D(padding=((1,0),0,0))\n", + "\n", + "conv = MaskedConv3D(mask_type=mask_type,\n", + " filters=1,\n", + " kernel_size=kernel_size, \n", + " padding='same',\n", + " kernel_initializer='ones', \n", + " bias_initializer='zeros')\n", + "\n", + "cropping = keras.layers.Cropping3D(cropping=((0, 1), 0, 0))\n", + "\n", + "\n", + "x = padding(test_ones_3d)\n", + "x = conv(x)\n", + "result = cropping(x)\n", + "\n", + "print('MASK')\n", + "print_3d(conv.mask.numpy().squeeze())\n", + "print('')\n", + "print('OUTPUT')\n", + "print_3d(result.numpy().squeeze())\n" ], - "execution_count": 24, + "execution_count": 171, "outputs": [ { "output_type": "stream", "text": [ + "MASK\n", + "Depth 0\n", + "[[1. 1. 1. 1.]\n", + " [1. 1. 1. 1.]\n", + " [1. 1. 1. 1.]\n", + " [1. 1. 1. 1.]]\n", + "Depth 1\n", + "[[1. 1. 1. 1.]\n", + " [1. 1. 1. 1.]\n", + " [1. 1. 1. 1.]\n", + " [1. 1. 1. 1.]]\n", + "Depth 2\n", + "[[0. 0. 0. 0.]\n", + " [0. 0. 0. 0.]\n", + " [0. 0. 0. 0.]\n", + " [0. 0. 0. 0.]]\n", + "Depth 3\n", + "[[0. 0. 0. 0.]\n", + " [0. 0. 0. 0.]\n", + " [0. 0. 0. 0.]\n", + " [0. 0. 0. 0.]]\n", + "\n", + "OUTPUT\n", "Depth 0\n", "[[0. 0. 0. 0. 0.]\n", " [0. 0. 0. 0. 0.]\n", @@ -904,28 +1253,38 @@ " [ 9. 12. 12. 9. 6.]\n", " [ 6. 8. 8. 6. 4.]]\n", "Depth 2\n", - "[[ 9. 12. 12. 9. 6.]\n", - " [12. 16. 16. 12. 8.]\n", - " [12. 16. 16. 12. 8.]\n", - " [ 9. 12. 12. 9. 6.]\n", - " [ 6. 8. 8. 6. 4.]]\n", + "[[18. 24. 24. 18. 12.]\n", + " [24. 32. 32. 24. 16.]\n", + " [24. 32. 32. 24. 16.]\n", + " [18. 24. 24. 18. 12.]\n", + " [12. 16. 16. 12. 8.]]\n", "Depth 3\n", - "[[ 9. 12. 12. 9. 6.]\n", - " [12. 16. 16. 12. 8.]\n", - " [12. 16. 16. 12. 8.]\n", - " [ 9. 12. 12. 9. 6.]\n", - " [ 6. 8. 8. 6. 4.]]\n", + "[[18. 24. 24. 18. 12.]\n", + " [24. 32. 32. 24. 16.]\n", + " [24. 32. 32. 24. 16.]\n", + " [18. 24. 24. 18. 12.]\n", + " [12. 16. 16. 12. 8.]]\n", "Depth 4\n", - "[[ 9. 12. 12. 9. 6.]\n", - " [12. 16. 16. 12. 8.]\n", - " [12. 16. 16. 12. 8.]\n", - " [ 9. 12. 12. 9. 6.]\n", - " [ 6. 8. 8. 6. 4.]]\n" + "[[18. 24. 24. 18. 12.]\n", + " [24. 32. 32. 24. 16.]\n", + " [24. 32. 32. 24. 16.]\n", + " [18. 24. 24. 18. 12.]\n", + " [12. 16. 16. 12. 8.]]\n" ], "name": "stdout" } ] }, + { + "cell_type": "markdown", + "metadata": { + "id": "3iXH2zkbv-qY", + "colab_type": "text" + }, + "source": [ + "#### Vertical stack" + ] + }, { "cell_type": "code", "metadata": { @@ -933,55 +1292,109 @@ "colab_type": "code", "colab": { "base_uri": "https://localhost:8080/", - "height": 538 + "height": 937 }, - "outputId": "6eef6945-5102-4847-ede8-a52b1ed54443" + "outputId": "3f0d6424-8783-42b5-f705-1e47592a40eb" }, "source": [ - "vertical_stack = build_test_stack_3d('V', kernel_size=(1, 4, 4))\n", - "val = vertical_stack.predict(test_ones_3d)\n", - "print_test_3d(val)" + "mask_type = 'V'\n", + "kernel_size=(4, 4, 4)\n", + "\n", + "padding = keras.layers.ZeroPadding3D(padding=(0,(1,0),0))\n", + "\n", + "conv = MaskedConv3D(mask_type=mask_type,\n", + " filters=1,\n", + " kernel_size=kernel_size, \n", + " padding='same',\n", + " kernel_initializer='ones', \n", + " bias_initializer='zeros')\n", + "\n", + "cropping = keras.layers.Cropping3D(cropping=(0,(0, 1), 0))\n", + "\n", + "\n", + "x = padding(test_ones_3d)\n", + "x = conv(x)\n", + "result = cropping(x)\n", + "\n", + "print('MASK')\n", + "print_3d(conv.mask.numpy().squeeze())\n", + "print('')\n", + "print('OUTPUT')\n", + "print_3d(result.numpy().squeeze())" ], - "execution_count": 25, + "execution_count": 172, "outputs": [ { "output_type": "stream", "text": [ + "MASK\n", + "Depth 0\n", + "[[0. 0. 0. 0.]\n", + " [0. 0. 0. 0.]\n", + " [0. 0. 0. 0.]\n", + " [0. 0. 0. 0.]]\n", + "Depth 1\n", + "[[1. 1. 1. 1.]\n", + " [1. 1. 1. 1.]\n", + " [0. 0. 0. 0.]\n", + " [0. 0. 0. 0.]]\n", + "Depth 2\n", + "[[0. 0. 0. 0.]\n", + " [0. 0. 0. 0.]\n", + " [0. 0. 0. 0.]\n", + " [0. 0. 0. 0.]]\n", + "Depth 3\n", + "[[0. 0. 0. 0.]\n", + " [0. 0. 0. 0.]\n", + " [0. 0. 0. 0.]\n", + " [0. 0. 0. 0.]]\n", + "\n", + "OUTPUT\n", "Depth 0\n", "[[0. 0. 0. 0. 0.]\n", " [3. 4. 4. 3. 2.]\n", - " [3. 4. 4. 3. 2.]\n", - " [3. 4. 4. 3. 2.]\n", - " [3. 4. 4. 3. 2.]]\n", + " [6. 8. 8. 6. 4.]\n", + " [6. 8. 8. 6. 4.]\n", + " [6. 8. 8. 6. 4.]]\n", "Depth 1\n", "[[0. 0. 0. 0. 0.]\n", " [3. 4. 4. 3. 2.]\n", - " [3. 4. 4. 3. 2.]\n", - " [3. 4. 4. 3. 2.]\n", - " [3. 4. 4. 3. 2.]]\n", + " [6. 8. 8. 6. 4.]\n", + " [6. 8. 8. 6. 4.]\n", + " [6. 8. 8. 6. 4.]]\n", "Depth 2\n", "[[0. 0. 0. 0. 0.]\n", " [3. 4. 4. 3. 2.]\n", - " [3. 4. 4. 3. 2.]\n", - " [3. 4. 4. 3. 2.]\n", - " [3. 4. 4. 3. 2.]]\n", + " [6. 8. 8. 6. 4.]\n", + " [6. 8. 8. 6. 4.]\n", + " [6. 8. 8. 6. 4.]]\n", "Depth 3\n", "[[0. 0. 0. 0. 0.]\n", " [3. 4. 4. 3. 2.]\n", - " [3. 4. 4. 3. 2.]\n", - " [3. 4. 4. 3. 2.]\n", - " [3. 4. 4. 3. 2.]]\n", + " [6. 8. 8. 6. 4.]\n", + " [6. 8. 8. 6. 4.]\n", + " [6. 8. 8. 6. 4.]]\n", "Depth 4\n", "[[0. 0. 0. 0. 0.]\n", " [3. 4. 4. 3. 2.]\n", - " [3. 4. 4. 3. 2.]\n", - " [3. 4. 4. 3. 2.]\n", - " [3. 4. 4. 3. 2.]]\n" + " [6. 8. 8. 6. 4.]\n", + " [6. 8. 8. 6. 4.]\n", + " [6. 8. 8. 6. 4.]]\n" ], "name": "stdout" } ] }, + { + "cell_type": "markdown", + "metadata": { + "id": "ANQnjmvPwI3K", + "colab_type": "text" + }, + "source": [ + "#### Horizontal stack A" + ] + }, { "cell_type": "code", "metadata": { @@ -989,20 +1402,57 @@ "colab_type": "code", "colab": { "base_uri": "https://localhost:8080/", - "height": 538 + "height": 937 }, - "outputId": "cd8ec46e-f49e-4761-d7d3-f1d328148263" + "outputId": "298206a2-bd57-469f-f497-84ab21a970b5" }, "source": [ - "horizontal_A_stack = build_test_stack_3d('A', kernel_size=(1, 1, 4))\n", - "val = horizontal_A_stack.predict(test_ones_3d)\n", - "print_test_3d(val)" + "mask_type = 'A'\n", + "kernel_size=(4, 4, 4)\n", + "\n", + "conv = MaskedConv3D(mask_type=mask_type,\n", + " filters=1,\n", + " kernel_size=kernel_size, \n", + " padding='same',\n", + " kernel_initializer='ones', \n", + " bias_initializer='zeros')\n", + "\n", + "result = conv(test_ones_3d)\n", + "\n", + "print('MASK')\n", + "print_3d(conv.mask.numpy().squeeze())\n", + "print('')\n", + "print('OUTPUT')\n", + "print_3d(result.numpy().squeeze())" ], - "execution_count": 26, + "execution_count": 173, "outputs": [ { "output_type": "stream", "text": [ + "MASK\n", + "Depth 0\n", + "[[0. 0. 0. 0.]\n", + " [0. 0. 0. 0.]\n", + " [0. 0. 0. 0.]\n", + " [0. 0. 0. 0.]]\n", + "Depth 1\n", + "[[0. 0. 0. 0.]\n", + " [1. 0. 0. 0.]\n", + " [0. 0. 0. 0.]\n", + " [0. 0. 0. 0.]]\n", + "Depth 2\n", + "[[0. 0. 0. 0.]\n", + " [0. 0. 0. 0.]\n", + " [0. 0. 0. 0.]\n", + " [0. 0. 0. 0.]]\n", + "Depth 3\n", + "[[0. 0. 0. 0.]\n", + " [0. 0. 0. 0.]\n", + " [0. 0. 0. 0.]\n", + " [0. 0. 0. 0.]]\n", + "\n", + "OUTPUT\n", "Depth 0\n", "[[0. 1. 1. 1. 1.]\n", " [0. 1. 1. 1. 1.]\n", @@ -1038,6 +1488,16 @@ } ] }, + { + "cell_type": "markdown", + "metadata": { + "id": "KJzAJ40EwTL3", + "colab_type": "text" + }, + "source": [ + "#### Horizontal stack B" + ] + }, { "cell_type": "code", "metadata": { @@ -1045,20 +1505,49 @@ "colab_type": "code", "colab": { "base_uri": "https://localhost:8080/", - "height": 538 + "height": 799 }, - "outputId": "95efb670-2e51-462d-8b0d-97e4cc89852b" + "outputId": "fa304a57-4168-4bfd-d722-1503a2bc54e1" }, "source": [ - "horizontal_B_stack = build_test_stack_3d('B', kernel_size=(1, 1, 4))\n", - "val = horizontal_B_stack.predict(test_ones_3d)\n", - "print_test_3d(val)" + "mask_type = 'B'\n", + "kernel_size=(3, 3, 3)\n", + "\n", + "conv = MaskedConv3D(mask_type=mask_type,\n", + " filters=1,\n", + " kernel_size=kernel_size, \n", + " padding='same',\n", + " kernel_initializer='ones', \n", + " bias_initializer='zeros')\n", + "\n", + "result = conv(test_ones_3d)\n", + "\n", + "print('MASK')\n", + "print_3d(conv.mask.numpy().squeeze())\n", + "print('')\n", + "print('OUTPUT')\n", + "print_3d(result.numpy().squeeze())" ], - "execution_count": 27, + "execution_count": 174, "outputs": [ { "output_type": "stream", "text": [ + "MASK\n", + "Depth 0\n", + "[[0. 0. 0.]\n", + " [0. 0. 0.]\n", + " [0. 0. 0.]]\n", + "Depth 1\n", + "[[0. 0. 0.]\n", + " [1. 1. 0.]\n", + " [0. 0. 0.]]\n", + "Depth 2\n", + "[[0. 0. 0.]\n", + " [0. 0. 0.]\n", + " [0. 0. 0.]]\n", + "\n", + "OUTPUT\n", "Depth 0\n", "[[1. 2. 2. 2. 2.]\n", " [1. 2. 2. 2. 2.]\n", @@ -1101,7 +1590,7 @@ "colab_type": "text" }, "source": [ - "## Creating 2D cropping solution" + "## Creating 2D cropped solution" ] }, { @@ -1112,8 +1601,6 @@ "colab": {} }, "source": [ - "import math\n", - "\n", "class VerticalCroppedConv2d(tf.keras.Model):\n", " def __init__(self,\n", " filters,\n", @@ -1286,6 +1773,16 @@ "###Tests with kernel_size 3" ] }, + { + "cell_type": "markdown", + "metadata": { + "id": "DdcDFcMbxwpZ", + "colab_type": "text" + }, + "source": [ + "#### Vertical stack" + ] + }, { "cell_type": "code", "metadata": { @@ -1293,24 +1790,57 @@ "colab_type": "code", "colab": { "base_uri": "https://localhost:8080/", - "height": 86 + "height": 191 }, - "outputId": "194d0b97-c5ed-4cbc-c82c-82a4885ce7a9" + "outputId": "49313976-a256-48c4-b198-0e4bf5a66bc9" }, "source": [ - "vertical_stack = build_test_croppedv_stack_2d()\n", - "val = vertical_stack.predict(test_ones_2d)\n", - "print(val[0,:,:,0].squeeze())" + "kernel_size=(2, 3)\n", + "kernel_h, kernel_w = kernel_size\n", + "\n", + "\n", + "\n", + "padding1 = keras.layers.ZeroPadding2D(padding=((1,0),0))\n", + "\n", + "padding2 = keras.layers.ZeroPadding2D(padding=((kernel_h-1, 0),(int((kernel_w-1)/2),int((kernel_w-1)/2))))\n", + "conv = keras.layers.Conv2D(filters=1,\n", + " kernel_size=kernel_size,\n", + " strides=1,\n", + " padding='valid',\n", + " kernel_initializer='ones', \n", + " bias_initializer='zeros')\n", + "\n", + "cropping = keras.layers.Cropping2D(cropping=((0, 1), 0))\n", + "\n", + "\n", + "x = padding1(test_ones_2d)\n", + "x = padding2(x)\n", + "x = conv(x)\n", + "result = cropping(x)\n", + "\n", + "\n", + "print('KERNEL')\n", + "print(conv.weights[0].numpy().squeeze())\n", + "print('')\n", + "print('OUTPUT')\n", + "print(result.numpy().squeeze())\n", + "\n" ], - "execution_count": 59, + "execution_count": 182, "outputs": [ { "output_type": "stream", "text": [ - "[[2. 3. 3. 3. 2.]\n", + "KERNEL\n", + "[[1. 1. 1.]\n", + " [1. 1. 1.]]\n", + "\n", + "OUTPUT\n", + "[[0. 0. 0. 0. 0.]\n", + " [2. 3. 3. 3. 2.]\n", " [4. 6. 6. 6. 4.]\n", - " [6. 9. 9. 9. 6.]\n", - " [6. 9. 9. 9. 6.]]\n" + " [4. 6. 6. 6. 4.]\n", + " [4. 6. 6. 6. 4.]]\n" ], "name": "stdout" } @@ -1338,8 +1868,25 @@ "source": [ "REFERENCES\n", "\n", + "https://wiki.math.uwaterloo.ca/statwiki/index.php?title=STAT946F17/Conditional_Image_Generation_with_PixelCNN_Decoders#Gated_PixelCNN\n", + "\n", + "https://www.slideshare.net/suga93/conditional-image-generation-with-pixelcnn-decoders\n", + "\n", "https://www.youtube.com/watch?v=1BURwCCYNEI" ] + }, + { + "cell_type": "code", + "metadata": { + "id": "jTI9ts7i7Wch", + "colab_type": "code", + "colab": {} + }, + "source": [ + "" + ], + "execution_count": 0, + "outputs": [] } ] } \ No newline at end of file