-
Notifications
You must be signed in to change notification settings - Fork 0
/
clusters-postgresql-data-proc-and-vm.tf
270 lines (231 loc) · 8.84 KB
/
clusters-postgresql-data-proc-and-vm.tf
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
# Infrastructure for the Yandex Cloud Managed Service for PostgreSQL cluster, Yandex Data Processing cluster, and Virtual Machine
#
# RU: https://yandex.cloud/ru/docs/managed-postgresql/tutorials/sqoop
# EN: https://yandex.cloud/en/docs/managed-postgresql/tutorials/sqoop
#
# Set the configuration of the Managed Service for PostgreSQL cluster, Yandex Data Processing cluster, and Virtual Machine:
locals {
folder_id = "" # Your folder ID
network_id = "" # Network ID for the Managed Service for PostgreSQL cluster, Yandex Data Processing cluster, and VM
subnet_id = "" # Subnet ID (enable NAT for this subnet)
storage_sa_id = "" # Service account ID for creating a bucket in Object Storage
data_proc_sa = "" # Yandex Data Processing service account name. It must be unique in the folder.
pg_cluster_version = "14" # PostgreSQL version. See the complete list of supported versions in https://yandex.cloud/en/docs/managed-postgresql/.
pg_cluster_db = "db1" # Database name
pg_cluster_username = "user1" # Database owner's name
pg_cluster_password = "" # Database owner's password
vm_image_id = "" # Public image ID from https://yandex.cloud/en/docs/compute/operations/images-with-pre-installed-software/get-list
vm_username = "" # Username for VM. Images with Ubuntu Linux use the `ubuntu` username by default.
vm_public_key = "" # Full path to the SSH public key for VM
bucket_name = "" # Object Storage bucket name. It must be unique throughout Object Storage.
dp_public_key = "" # Full path to the SSH public key for the Yandex Data Processing cluster
}
# Security groups for the Managed Service for PostgreSQL cluster, Yandex Data Processing cluster, and VM
resource "yandex_vpc_security_group" "cluster-security-group" {
description = "Security group for the Managed Service for PostgreSQL cluster"
network_id = local.network_id
ingress {
description = "Allow connections to the cluster from the Internet"
protocol = "TCP"
port = 6432
v4_cidr_blocks = ["0.0.0.0/0"]
}
}
resource "yandex_vpc_security_group" "vm-security-group" {
description = "Security group for the VM"
network_id = local.network_id
ingress {
description = "Allow SSH connections to VM from the Internet"
protocol = "TCP"
port = 22
v4_cidr_blocks = ["0.0.0.0/0"]
}
egress {
description = "Allow outgoing connections to any required resource"
protocol = "ANY"
from_port = 0
to_port = 65535
v4_cidr_blocks = ["0.0.0.0/0"]
}
}
resource "yandex_vpc_security_group" "data-proc-security-group" {
description = "Security group for the Yandex Data Processing cluster"
network_id = local.network_id
ingress {
description = "Allow any incoming traffic within the security group"
protocol = "ANY"
from_port = 0
to_port = 65535
predefined_target = "self_security_group"
}
egress {
description = "Allow any outgoing traffic within the security group"
protocol = "ANY"
from_port = 0
to_port = 65535
predefined_target = "self_security_group"
}
egress {
description = "Allow connections to the HTTPS port"
protocol = "TCP"
port = 443
v4_cidr_blocks = ["0.0.0.0/0"]
}
}
# The service account for the Yandex Data Processing cluster
resource "yandex_iam_service_account" "data-proc-sa" {
description = "Service account to manage the Yandex Data Processing cluster"
name = local.data_proc_sa
}
# Assign the `dataproc.agent` role to the service account
resource "yandex_resourcemanager_folder_iam_binding" "dataproc-agent" {
folder_id = local.folder_id
role = "dataproc.agent"
members = [
"serviceAccount:${yandex_iam_service_account.data-proc-sa.id}",
]
}
# Assign the `dataproc.provisioner` role to the service account
resource "yandex_resourcemanager_folder_iam_binding" "dataproc-provisioner" {
folder_id = local.folder_id
role = "dataproc.provisioner"
members = [
"serviceAccount:${yandex_iam_service_account.data-proc-sa.id}",
]
}
# Assign the `monitoring-viewer` role to the service account
resource "yandex_resourcemanager_folder_iam_binding" "monitoring-viewer" {
folder_id = local.folder_id
role = "monitoring.viewer"
members = [
"serviceAccount:${yandex_iam_service_account.data-proc-sa.id}",
]
}
# Assign the `storage.viewer` role to the service account
resource "yandex_resourcemanager_folder_iam_binding" "bucket-viewer" {
folder_id = local.folder_id
role = "storage.viewer"
members = [
"serviceAccount:${yandex_iam_service_account.data-proc-sa.id}",
]
}
# Assign the `storage.uploader` role to the service account
resource "yandex_resourcemanager_folder_iam_binding" "bucket-uploader" {
folder_id = local.folder_id
role = "storage.uploader"
members = [
"serviceAccount:${yandex_iam_service_account.data-proc-sa.id}",
]
}
# Infrastructure for the Managed Service for PostgreSQL cluster
resource "yandex_mdb_postgresql_cluster" "postgresql-cluster" {
description = "Managed Service for PostgreSQL cluster"
name = "postgresql-cluster"
environment = "PRODUCTION"
network_id = local.network_id
security_group_ids = [yandex_vpc_security_group.cluster-security-group.id]
config {
version = local.pg_cluster_version
resources {
resource_preset_id = "s2.micro" # 2 vCPU, 8 GB RAM
disk_type_id = "network-hdd"
disk_size = "10" # GB
}
}
host {
zone = "ru-central1-a"
subnet_id = local.subnet_id
}
}
# Database of the Managed Service for PostgreSQL cluster
resource "yandex_mdb_postgresql_database" "db1" {
cluster_id = yandex_mdb_postgresql_cluster.postgresql-cluster.id
name = local.pg_cluster_db
owner = yandex_mdb_postgresql_user.user1.name
}
# User of the Managed Service for PostgreSQL cluster
resource "yandex_mdb_postgresql_user" "user1" {
cluster_id = yandex_mdb_postgresql_cluster.postgresql-cluster.id
name = local.pg_cluster_username
password = local.pg_cluster_password
}
# VM infrastructure
resource "yandex_compute_instance" "vm-linux" {
description = "Virtual Machine in Yandex Compute Cloud"
name = "vm-linux"
platform_id = "standard-v3" # Intel Ice Lake
zone = "ru-central1-a"
resources {
cores = 2
memory = 2 # GB
}
boot_disk {
initialize_params {
image_id = local.vm_image_id
}
}
network_interface {
subnet_id = local.subnet_id
nat = true # Required for connection from the Internet
security_group_ids = [
yandex_vpc_security_group.vm-security-group.id,
yandex_vpc_security_group.cluster-security-group.id
]
}
metadata = {
ssh-keys = "${local.vm_username}:${file(local.vm_public_key)}" # Username and the SSH public key full path
}
}
# Infrastructure for the Object Storage bucket
resource "yandex_iam_service_account_static_access_key" "bucket-key" {
description = "Static key for the Object Storage bucket"
service_account_id = local.storage_sa_id
}
# Object Storage bucket
resource "yandex_storage_bucket" "storage-bucket" {
bucket = local.bucket_name
access_key = yandex_iam_service_account_static_access_key.bucket-key.access_key
secret_key = yandex_iam_service_account_static_access_key.bucket-key.secret_key
}
# Infrastructure for the Yandex Data Processing cluster
resource "yandex_dataproc_cluster" "my-dp-cluster" {
description = "Yandex Data Processing cluster"
depends_on = [yandex_resourcemanager_folder_iam_binding.dataproc-agent]
bucket = yandex_storage_bucket.storage-bucket.bucket
name = "my-dp-cluster"
service_account_id = yandex_iam_service_account.data-proc-sa.id
zone_id = "ru-central1-a"
cluster_config {
version_id = "1.4"
hadoop {
services = ["HBASE", "HDFS", "HIVE", "MAPREDUCE", "SQOOP", "YARN", "ZOOKEEPER"]
properties = {
"yarn:yarn.resourcemanager.am.max-attempts" = 5
"hive:hive.execution.engine" = "mr"
}
ssh_public_keys = [file(local.dp_public_key)]
}
subcluster_spec {
name = "main"
role = "MASTERNODE"
resources {
resource_preset_id = "s2.micro" # 2 vCPU, 8 GB RAM
disk_type_id = "network-hdd"
disk_size = 20 # GB
}
subnet_id = local.subnet_id
hosts_count = 1
}
subcluster_spec {
name = "data"
role = "DATANODE"
resources {
resource_preset_id = "s2.micro" # 2 vCPU, 8 GB RAM
disk_type_id = "network-hdd"
disk_size = 20 # GB
}
subnet_id = local.subnet_id
hosts_count = 1
}
}
}