diff --git a/deploy/aws/emr_genie_bootstrap.sh b/deploy/aws/emr_genie_bootstrap.sh new file mode 100755 index 00000000000..a1fa724dbb1 --- /dev/null +++ b/deploy/aws/emr_genie_bootstrap.sh @@ -0,0 +1,48 @@ +#!/bin/bash + +## +# +# Copyright 2013 Netflix, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +## + +set -x +set -e + +# Install Tomcat +cd $HOME; wget http://mirror.sdunix.com/apache/tomcat/tomcat-6/v6.0.37/bin/apache-tomcat-6.0.37.tar.gz +tar zxvf apache-tomcat-6.0.37.tar.gz + +# Change port to 7001 to work out of the box +sed -i -e "s#8080#7001#g" /home/hadoop/apache-tomcat-6.0.37/conf/server.xml + +# TODO: update /home/hadoop/apache-tomcat-6.0.37/conf/web.xml to enable directory browsing + +# Set up Genie specific properties +export CATALINA_HOME=/home/hadoop/apache-tomcat-6.0.37 +export CATALINA_OPTS="-Darchaius.deployment.applicationId=genie -Dnetflix.datacenter=cloud" + +# Set up directories needed +mkdir -p /mnt/tomcat/genie-jobs; +ln -fs /mnt/tomcat/genie-jobs $CATALINA_HOME/webapps +mkdir -p /home/hadoop/.versions/pig-0.11.1/conf; touch /home/hadoop/.versions/pig-0.11.1/conf/pig.properties + +# Set up genie - get the latest from GitHub +git clone https://github.com/Netflix/genie.git +cd $HOME/genie; ./gradlew clean build -x test +cd $HOME/genie; ./local_deploy.sh + +# Start Tomcat +cd $CATALINA_HOME/logs; $CATALINA_HOME/bin/startup.sh; diff --git a/deploy/aws/emr_genie_launch.sh b/deploy/aws/emr_genie_launch.sh new file mode 100755 index 00000000000..eead2e699b0 --- /dev/null +++ b/deploy/aws/emr_genie_launch.sh @@ -0,0 +1,33 @@ +#!/bin/bash + +## +# +# Copyright 2013 Netflix, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +## + +if [ "$EMR_GENIE_BOOTSTRAP_LOC" == "" ]; then + echo "Set EMR_GENIE_BOOTSTRAP_LOC to point to emr_genie_bootstrap.sh on S3" + exit 1 +fi + +echo "Using EMR Genie bootstrap action from: $EMR_GENIE_BOOTSTRAP_LOC" + +# Launching EMR +elastic-mapreduce --create --alive --instance-type m1.xlarge --instance-count 2 \ + --ssh --debug --trace --visible-to-all-users --name "Genie Testing" --ami-version "2.4.2" \ + --hive-interactive --hive-versions 0.11.0 --pig-interactive --pig-versions 0.11.1 \ + --bootstrap-action s3://elasticmapreduce/bootstrap-actions/run-if \ + --args "instance.isMaster=true,$EMR_GENIE_BOOTSTRAP_LOC" \ No newline at end of file diff --git a/deploy/aws/emr_genie_postinstall.sh b/deploy/aws/emr_genie_postinstall.sh new file mode 100755 index 00000000000..cace2955f56 --- /dev/null +++ b/deploy/aws/emr_genie_postinstall.sh @@ -0,0 +1,29 @@ +#!/bin/bash + +## +# +# Copyright 2013 Netflix, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +## + +# On Master + +# ensure that HDFS is writable by all users (e.g. genietest) +hadoop fs -chmod -R 777 / + +# Register EMR cluster +export SERVICE_BASE_URL=http://localhost:7001 +cd $HOME/genie/genie-web/src/test/python/utils +python populateEMRConfigs.py diff --git a/deploy/aws/emr_genie_test.sh b/deploy/aws/emr_genie_test.sh new file mode 100755 index 00000000000..af03ca9362e --- /dev/null +++ b/deploy/aws/emr_genie_test.sh @@ -0,0 +1,33 @@ +#!/bin/bash + +## +# +# Copyright 2013 Netflix, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +## + +# Set up some environment variables for testing +export SERVICE_BASE_URL=http://localhost:7001 +export GENIE_TEST_PREFIX=file:///home/hadoop + +cd $HOME/genie/genie-web/src/test/python/jobs + +# Test some jobs + +python hadoopFSTest.py + +python hiveJobTestWithAttachments.py + +python pigJobTestWithAttachments.py diff --git a/genie-web/src/main/resources/genie.properties b/genie-web/src/main/resources/genie.properties index a010846e76e..373ddf68879 100644 --- a/genie-web/src/main/resources/genie.properties +++ b/genie-web/src/main/resources/genie.properties @@ -47,19 +47,19 @@ netflix.genie.server.clusterLoadBalancerImpl=com.netflix.genie.server.services.i ## Execution Service system properties # java home -netflix.genie.server.java.home=/etc/alternatives/jre +netflix.genie.server.java.home=/usr/lib/jvm/java-6-sun # hadoop home for various versions -netflix.genie.server.hadoop.home=/apps/hadoop/current -netflix.genie.server.hadoop.1.0.3.home=/apps/hadoop/1.0 +netflix.genie.server.hadoop.home=/home/hadoop/.versions/1.0.3 +netflix.genie.server.hadoop.1.0.3.home=/home/hadoop/.versions/1.0.3 # hive home for various versions -netflix.genie.server.hive.home=/apps/hive/current -netflix.genie.server.hive.0.11.home=/apps/hive/0.11 +netflix.genie.server.hive.home=/home/hadoop/.versions/hive-0.11.0 +netflix.genie.server.hive.0.11.home=/home/hadoop/.versions/hive-0.11.0 # pig home for various versions -netflix.genie.server.pig.home=/apps/pig/current -netflix.genie.server.pig.0.11.home=/apps/pig/0.11 +netflix.genie.server.pig.home=/home/hadoop/.versions/pig-0.11.1 +netflix.genie.server.pig.0.11.home=/home/hadoop/.versions/pig-0.11.1 # REST URI for the execution service netflix.genie.server.job.resource.prefix=genie/v0/jobs @@ -70,7 +70,7 @@ netflix.genie.server.job.resource.prefix=genie/v0/jobs # Location of genie scripts - currently part of genie-web/conf/system/apps/genie/bin # Change this appropriately to point to above location -netflix.genie.server.sys.home=/apps/genie/bin +netflix.genie.server.sys.home=/home/hadoop/genie/genie-web/conf/system/apps/genie/bin # The relative path for the prefix directory inside Tomcat's "webapps/" # that Genie will use for its working directory diff --git a/genie-web/src/test/python/jobs/hadoopFSTest.py b/genie-web/src/test/python/jobs/hadoopFSTest.py index 3f9e1705996..d688c5f6586 100644 --- a/genie-web/src/test/python/jobs/hadoopFSTest.py +++ b/genie-web/src/test/python/jobs/hadoopFSTest.py @@ -40,7 +40,6 @@ def testXmlSubmitjob(): hadoop laptop hadoop - prod adHoc fs -ls / diff --git a/genie-web/src/test/python/jobs/hiveJobTestWithAttachments.py b/genie-web/src/test/python/jobs/hiveJobTestWithAttachments.py index aef4875538e..d9f79fe7f2a 100644 --- a/genie-web/src/test/python/jobs/hiveJobTestWithAttachments.py +++ b/genie-web/src/test/python/jobs/hiveJobTestWithAttachments.py @@ -38,7 +38,7 @@ def testJsonSubmitjob(): # write out a temporary file with our query/dependencies query = tempfile.NamedTemporaryFile(delete=False) name = query.name - query.write("select count(*) from dual;") + query.write("show tables;") query.close() # read it back in as base64 encoded binary diff --git a/genie-web/src/test/python/jobs/pigJobTestWithAttachments.py b/genie-web/src/test/python/jobs/pigJobTestWithAttachments.py new file mode 100644 index 00000000000..af0bdf49430 --- /dev/null +++ b/genie-web/src/test/python/jobs/pigJobTestWithAttachments.py @@ -0,0 +1,93 @@ +## +# +# Copyright 2013 Netflix, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +## + +import sys +sys.path.append('../utils') + +import time +import eureka +import jobs + +import os +import tempfile +import base64 + +# the S3 prefix where the tests are located +GENIE_TEST_PREFIX = os.getenv("GENIE_TEST_PREFIX") + +# get the serviceUrl from the eureka client +serviceUrl = eureka.EurekaClient().getServiceBaseUrl() + '/genie/v0/jobs' + +def testJsonSubmitjob(): + print "Running testJsonSubmitjob " + # write out a temporary file with our query/dependencies + query = tempfile.NamedTemporaryFile(delete=False) + name = query.name + queryFile = 'pig-' + str(time.time()) + '.q' + query.write("fs -copyFromLocal %s %s; cmd = load '%s'; dump cmd;" % (queryFile, queryFile, queryFile)) + query.close() + + # read it back in as base64 encoded binary + query = open(name, "rb") + contents = base64.b64encode(query.read()) + print contents + query.close() + os.unlink(name) + + payload = ''' + { + "jobInfo": + { + "jobName": "PIG-JOB-TEST", + "description": "This is a test", + "userName" : "genietest", + "groupName" : "hadoop", + "jobType": "pig", + "configuration": "prod", + "schedule": "adHoc", + "cmdArgs": "-f ''' + queryFile + '''", + "attachments": { + "data": "''' + contents + '''", + "name": "''' + queryFile + '''" + } + } + } + ''' + print payload + print "\n" + return jobs.submitJob(serviceUrl, payload) + +# driver method for all tests +if __name__ == "__main__": + print "Running unit tests:\n" + jobID = testJsonSubmitjob() + print "\n" + while True: + print jobs.getJobInfo(serviceUrl, jobID) + print "\n" + status = jobs.getJobStatus(serviceUrl, jobID) + print status + print "\n" + + if (status != 'RUNNING') and (status != 'INIT'): + print "Final status: ", status + print "Job has terminated - exiting" + break + + time.sleep(5) + diff --git a/genie-web/src/test/python/utils/populateEMRConfigs.py b/genie-web/src/test/python/utils/populateEMRConfigs.py new file mode 100644 index 00000000000..594f29d219d --- /dev/null +++ b/genie-web/src/test/python/utils/populateEMRConfigs.py @@ -0,0 +1,95 @@ +## +# +# Copyright 2013 Netflix, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +## + +import sys +import json +import urllib2 +import restclient +import eureka + +baseUrl = eureka.EurekaClient().getServiceBaseUrl() + '/genie/' + +def populateProdHive(): + serviceUrl = baseUrl + 'v0/config/hive/hiveconf-prodhive-emr' + payload = ''' + + + prodhive + PROD + file:///home/hadoop/.versions/hive-0.11.0/conf/hive-site.xml + produser + ACTIVE + 0.11 + + + ''' + restclient.put(serviceUrl=serviceUrl, payload=payload, contentType='application/xml') + +def populateProdPig(): + serviceUrl = baseUrl + 'v0/config/pig/pigconf-prodpig-emr' + payload = ''' + + + prodpig + PROD + file:///home/hadoop/.versions/pig-0.11.1/conf/pig.properties + produser + ACTIVE + 0.11 + + + ''' + restclient.put(serviceUrl=serviceUrl, payload=payload, contentType='application/xml') + +def populateCluster(): + serviceUrl = baseUrl + 'v0/config/cluster/clusterconf-emr' + payload = ''' + + + clusterconf-emr + true + true + file:///home/hadoop/.versions/1.0.3/conf/mapred-site.xml + file:///home/hadoop/.versions/1.0.3/conf/core-site.xml + file:///home/hadoop/.versions/1.0.3/conf/hdfs-site.xml + produser + hiveconf-prodhive-emr + pigconf-prodpig-emr + 1.0.3 + UP + + + ''' + restclient.put(serviceUrl=serviceUrl, payload=payload, contentType='application/xml') + +# driver method for populating configs on master node of EMR +if __name__ == "__main__": + + print "Registering EMR cluster\n" + print "################################" + print "Adding config for prodhive:\n" + populateProdHive() + print "################################" + print "Adding config for prodpig:\n" + populateProdPig() + print "################################" + print "Adding config for cluster:\n" + populateCluster() + print "################################" + print "\nDone" +