-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathrun_scratch
executable file
·193 lines (155 loc) · 7.32 KB
/
run_scratch
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
#!/bin/bash
# bail out if anything fails
set -e
# helm install --name <release> stable/<chart> --set prometheusOperator.createCustomResource=false -f <yaml>
# helm upgrade -f <yaml> <release> stable/<chart>
# kubectl port-forward -n monitoring deploy/prometheus-server 9090:9090
# grafana on localhost:3000
# kubectl port-forward <grafana_pod> -n monitoring 3000:3000
# get grafana admin pw
# default grafana creds: admin:prom-operator
# kubectl get secret --namespace grafana grafana -o jsonpath="{.data.admin-password}" | base64 --decode ; echo
# In grafana dashboard
# - click '+' & select 'import'
# - 3131 for 'Kubernetes All Nodes' Dashboard
# - 3146 for 'Kubernetes Pods' Dashboard
# - select 'Prometheus' under data sources
# - click 'import'
old_setup_monitoring () {
echo ">>> setup_monitoring starting..."
kubectl create namespace monitoring
# install prometheus
helm install stable/prometheus \
--name prometheus \
--namespace monitoring \
--set alertmanager.persistentVolume.storageClass="gp2" \
--set server.persistentVolume.storageClass="gp2"
# install grafana
helm install stable/grafana \
--name grafana \
--namespace monitoring \
--set persistence.storageClassName="gp2" \
--set adminPassword="notadmin" \
--set datasources."datasources\.yaml".apiVersion=1 \
--set datasources."datasources\.yaml".datasources[0].name=Prometheus \
--set datasources."datasources\.yaml".datasources[0].type=prometheus \
--set datasources."datasources\.yaml".datasources[0].url=http://prometheus-server.prometheus.svc.cluster.local \
--set datasources."datasources\.yaml".datasources[0].access=proxy \
--set datasources."datasources\.yaml".datasources[0].isDefault=true \
--set service.type=LoadBalancer
# check prometheus namespace
kubectl get all -n monitoring
# debug
#kubectl port-forward -n prometheus deploy/prometheus-server 8080:9090
# get grafana elb url
export ELB=$(kubectl get svc -n grafana grafana -o jsonpath='{.status.loadBalancer.ingress[0].hostname}')
echo "grafana_elb_url: http://$ELB"
echo ">>> setup_monitoring complete"
}
containerize () {
DOCKER_USER=ajgrande924
# docker login
cd app/spark/master
docker build -t ${DOCKER_USER}/spark-master .
# docker push ${DOCKER_USER}/spark-master
}
test_jump_pod () {
kubectl create namespace spark-pi
kubectl apply -f kubernetes/spark-exp/spark-role.yaml
kubectl auth can-i create pod --as=system:serviceaccount:spark-pi:spark-pi -n spark-pi
# jump pod
kubectl run --generator=run-pod/v1 jump-pod --rm -i --tty --serviceaccount=spark-pi --namespace=spark-pi --image vitamingaugau/spark:spark-2.4.4 sh
# inside jump pod
export SA=spark-pi
export NAMESPACE=spark-pi
export TOKEN=/var/run/secrets/kubernetes.io/serviceaccount/token
export CACERT=/var/run/secrets/kubernetes.io/serviceaccount/ca.crt
# scala jar example
/opt/spark/bin/spark-submit \
--master=k8s://https://2003D3175F75CB25EE1389647B0467C6.gr7.us-west-2.eks.amazonaws.com:443 \
--deploy-mode cluster \
--name spark-pi \
--class org.apache.spark.examples.SparkPi \
--conf spark.kubernetes.driver.pod.name=spark-pi-driver \
--conf spark.kubernetes.container.image=vitamingaugau/spark:spark-2.4.4 \
--conf spark.kubernetes.namespace=$NAMESPACE \
--conf spark.kubernetes.authenticate.driver.serviceAccountName=$SA \
--conf spark.kubernetes.authenticate.submission.caCertFile=$CACERT \
--conf spark.kubernetes.authenticate.submission.oauthTokenFile=$TOKEN \
--conf spark.executor.instances=2 \
local:///opt/spark/examples/target/scala-2.11/jars/spark-examples_2.11-2.4.4.jar 20000
}
build_spark () {
docker container run \
--privileged -it \
--name spark-build \
-v /var/run/docker.sock:/var/run/docker.sock \
-v ${PWD}/app/spark:/tmp \
-e USER=${SECRET_USER} \
-w /opt \
docker:dind \
sh /tmp/build.sh
}
# kubectl run spark-base \
# --rm -it --labels="app=spark-client" \
# --image bde2020/spark-base:2.4.3-hadoop2.7 \
# -- bash ./spark/bin/spark-submit --class CLASS_TO_RUN --master spark://spark-master:7077 --deploy-mode client --conf spark.driver.host=spark-client URL_TO_YOUR_APP
wip () {
kubectl create namespace dev
kubectl apply -f kubernetes/scale-app/scale-secret.yaml
kubectl apply -f kubernetes/scale-app/scale-postgres.yaml
kubectl apply -f kubernetes/scale-app/scale-spark.yaml
kubectl delete -f kubernetes/scale-app/scale-spark.yaml
kubectl delete -f kubernetes/scale-app/scale-postgres.yaml
kubectl delete -f kubernetes/scale-app/scale-secret.yaml
kubectl create namespace dev
}
wip_submit_spark_job () {
# startup spark client pod
kubectl run \
--generator=run-pod/v1 spark-client \
--labels="app=spark-client" \
--rm -it --image ajgrande924/spark-base:latest sh
# inside pod
python3 /examples/python/test_boto3.py
# submit sample app - test_pi.py
/spark/bin/spark-submit --master spark://spark-master:7077 --deploy-mode client --conf spark.driver.host=spark-client /examples/python/test_pi.py 2
# magic - works
export PYSPARK_PYTHON=python3
/spark/bin/spark-shell --packages org.postgresql:postgresql:42.1.1
/spark/bin/spark-submit --driver-class-path /root/.ivy2/jars/org.postgresql_postgresql-42.1.1.jar --conf spark.executor.extraClassPath=/root/.ivy2/jars/org.postgresql_postgresql-42.1.1.jar --jars /root/.ivy2/jars/org.postgresql_postgresql-42.1.1.jar --master spark://spark-master:7077 --deploy-mode client --conf spark.driver.host=spark-client /examples/python/main.py
# revised to test
/spark/bin/spark-submit \
--driver-class-path /jars/org.postgresql_postgresql-42.1.1.jar \
--conf spark.executor.extraClassPath=/jars/org.postgresql_postgresql-42.1.1.jar \
--jars /jars/org.postgresql_postgresql-42.1.1.jar \
--master spark://spark-master:7077 \
--deploy-mode client \
--conf spark.driver.host=spark-client /examples/python/main_subset.py
# graveyard
/spark/bin/spark-submit --packages org.postgresql:postgresql:42.1.1 --master spark://spark-master:7077 --deploy-mode client --conf spark.driver.host=spark-client /examples/python/main.py
/spark/bin/spark-submit --driver-class-path /root/.ivy2/jars/org.postgresql_postgresql-42.1.1.jar --master spark://spark-master:7077 /python/main.py 1000
# check spark master ui
kubectl port-forward <spark_master_pod> 8080:8080
# WIP
#Creating some variables to make the docker run command more readable
#App jar environment used by the spark-submit image
SPARK_APPLICATION_JAR_LOCATION="/opt/spark-apps/crimes-app.jar"
#App main class environment used by the spark-submit image
SPARK_APPLICATION_MAIN_CLASS="org.mvb.applications.CrimesApp"
#Extra submit args used by the spark-submit image
SPARK_SUBMIT_ARGS="--conf spark.executor.extraJavaOptions='-Dconfig-path=/opt/spark-apps/dev/config.conf'"
#We have to use the same network as the spark cluster(internally the image resolves spark master as spark://spark-master:7077)
docker run --network docker-spark-cluster_spark-network \
-v /mnt/spark-apps:/opt/spark-apps \
--env SPARK_APPLICATION_JAR_LOCATION=$SPARK_APPLICATION_JAR_LOCATION \
--env SPARK_APPLICATION_MAIN_CLASS=$SPARK_APPLICATION_MAIN_CLASS \
spark-submit:2.3.1
/spark/bin/spark-submit \
--driver-memory 4g \
--executor-memory 4g \
--master spark://spark-master:7077 \
/python/main.py \
1000
}
"$@"