Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Pull request for Issues #9 #39

Merged
merged 10 commits into from
Oct 30, 2013
48 changes: 48 additions & 0 deletions deploy/aws/emr_genie_bootstrap.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
#!/bin/bash

##
#
# Copyright 2013 Netflix, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
##

set -x
set -e

# Install Tomcat
cd $HOME; wget http://mirror.sdunix.com/apache/tomcat/tomcat-6/v6.0.37/bin/apache-tomcat-6.0.37.tar.gz
tar zxvf apache-tomcat-6.0.37.tar.gz

# Change port to 7001 to work out of the box
sed -i -e "s#8080#7001#g" /home/hadoop/apache-tomcat-6.0.37/conf/server.xml

# TODO: update /home/hadoop/apache-tomcat-6.0.37/conf/web.xml to enable directory browsing

# Set up Genie specific properties
export CATALINA_HOME=/home/hadoop/apache-tomcat-6.0.37
export CATALINA_OPTS="-Darchaius.deployment.applicationId=genie -Dnetflix.datacenter=cloud"

# Set up directories needed
mkdir -p /mnt/tomcat/genie-jobs;
ln -fs /mnt/tomcat/genie-jobs $CATALINA_HOME/webapps
mkdir -p /home/hadoop/.versions/pig-0.11.1/conf; touch /home/hadoop/.versions/pig-0.11.1/conf/pig.properties

# Set up genie - get the latest from GitHub
git clone https://github.com/Netflix/genie.git
cd $HOME/genie; ./gradlew clean build -x test
cd $HOME/genie; ./local_deploy.sh

# Start Tomcat
cd $CATALINA_HOME/logs; $CATALINA_HOME/bin/startup.sh;
33 changes: 33 additions & 0 deletions deploy/aws/emr_genie_launch.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
#!/bin/bash

##
#
# Copyright 2013 Netflix, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
##

if [ "$EMR_GENIE_BOOTSTRAP_LOC" == "" ]; then
echo "Set EMR_GENIE_BOOTSTRAP_LOC to point to emr_genie_bootstrap.sh on S3"
exit 1
fi

echo "Using EMR Genie bootstrap action from: $EMR_GENIE_BOOTSTRAP_LOC"

# Launching EMR
elastic-mapreduce --create --alive --instance-type m1.xlarge --instance-count 2 \
--ssh --debug --trace --visible-to-all-users --name "Genie Testing" --ami-version "2.4.2" \
--hive-interactive --hive-versions 0.11.0 --pig-interactive --pig-versions 0.11.1 \
--bootstrap-action s3://elasticmapreduce/bootstrap-actions/run-if \
--args "instance.isMaster=true,$EMR_GENIE_BOOTSTRAP_LOC"
29 changes: 29 additions & 0 deletions deploy/aws/emr_genie_postinstall.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
#!/bin/bash

##
#
# Copyright 2013 Netflix, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
##

# On Master

# ensure that HDFS is writable by all users (e.g. genietest)
hadoop fs -chmod -R 777 /

# Register EMR cluster
export SERVICE_BASE_URL=http://localhost:7001
cd $HOME/genie/genie-web/src/test/python/utils
python populateEMRConfigs.py
33 changes: 33 additions & 0 deletions deploy/aws/emr_genie_test.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
#!/bin/bash

##
#
# Copyright 2013 Netflix, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
##

# Set up some environment variables for testing
export SERVICE_BASE_URL=http://localhost:7001
export GENIE_TEST_PREFIX=file:///home/hadoop

cd $HOME/genie/genie-web/src/test/python/jobs

# Test some jobs

python hadoopFSTest.py

python hiveJobTestWithAttachments.py

python pigJobTestWithAttachments.py
16 changes: 8 additions & 8 deletions genie-web/src/main/resources/genie.properties
Original file line number Diff line number Diff line change
Expand Up @@ -47,19 +47,19 @@ netflix.genie.server.clusterLoadBalancerImpl=com.netflix.genie.server.services.i
## Execution Service system properties

# java home
netflix.genie.server.java.home=/etc/alternatives/jre
netflix.genie.server.java.home=/usr/lib/jvm/java-6-sun

# hadoop home for various versions
netflix.genie.server.hadoop.home=/apps/hadoop/current
netflix.genie.server.hadoop.1.0.3.home=/apps/hadoop/1.0
netflix.genie.server.hadoop.home=/home/hadoop/.versions/1.0.3
netflix.genie.server.hadoop.1.0.3.home=/home/hadoop/.versions/1.0.3

# hive home for various versions
netflix.genie.server.hive.home=/apps/hive/current
netflix.genie.server.hive.0.11.home=/apps/hive/0.11
netflix.genie.server.hive.home=/home/hadoop/.versions/hive-0.11.0
netflix.genie.server.hive.0.11.home=/home/hadoop/.versions/hive-0.11.0

# pig home for various versions
netflix.genie.server.pig.home=/apps/pig/current
netflix.genie.server.pig.0.11.home=/apps/pig/0.11
netflix.genie.server.pig.home=/home/hadoop/.versions/pig-0.11.1
netflix.genie.server.pig.0.11.home=/home/hadoop/.versions/pig-0.11.1

# REST URI for the execution service
netflix.genie.server.job.resource.prefix=genie/v0/jobs
Expand All @@ -70,7 +70,7 @@ netflix.genie.server.job.resource.prefix=genie/v0/jobs

# Location of genie scripts - currently part of genie-web/conf/system/apps/genie/bin
# Change this appropriately to point to above location
netflix.genie.server.sys.home=/apps/genie/bin
netflix.genie.server.sys.home=/home/hadoop/genie/genie-web/conf/system/apps/genie/bin

# The relative path for the prefix directory inside Tomcat's "webapps/"
# that Genie will use for its working directory
Expand Down
1 change: 0 additions & 1 deletion genie-web/src/test/python/jobs/hadoopFSTest.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,6 @@ def testXmlSubmitjob():
<groupName>hadoop</groupName>
<userAgent>laptop</userAgent>
<jobType>hadoop</jobType>
<configuration>prod</configuration>
<schedule>adHoc</schedule>
<cmdArgs>fs -ls /</cmdArgs>
</jobInfo>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ def testJsonSubmitjob():
# write out a temporary file with our query/dependencies
query = tempfile.NamedTemporaryFile(delete=False)
name = query.name
query.write("select count(*) from dual;")
query.write("show tables;")
query.close()

# read it back in as base64 encoded binary
Expand Down
93 changes: 93 additions & 0 deletions genie-web/src/test/python/jobs/pigJobTestWithAttachments.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
##
#
# Copyright 2013 Netflix, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
##

import sys
sys.path.append('../utils')

import time
import eureka
import jobs

import os
import tempfile
import base64

# the S3 prefix where the tests are located
GENIE_TEST_PREFIX = os.getenv("GENIE_TEST_PREFIX")

# get the serviceUrl from the eureka client
serviceUrl = eureka.EurekaClient().getServiceBaseUrl() + '/genie/v0/jobs'

def testJsonSubmitjob():
print "Running testJsonSubmitjob "
# write out a temporary file with our query/dependencies
query = tempfile.NamedTemporaryFile(delete=False)
name = query.name
queryFile = 'pig-' + str(time.time()) + '.q'
query.write("fs -copyFromLocal %s %s; cmd = load '%s'; dump cmd;" % (queryFile, queryFile, queryFile))
query.close()

# read it back in as base64 encoded binary
query = open(name, "rb")
contents = base64.b64encode(query.read())
print contents
query.close()
os.unlink(name)

payload = '''
{
"jobInfo":
{
"jobName": "PIG-JOB-TEST",
"description": "This is a test",
"userName" : "genietest",
"groupName" : "hadoop",
"jobType": "pig",
"configuration": "prod",
"schedule": "adHoc",
"cmdArgs": "-f ''' + queryFile + '''",
"attachments": {
"data": "''' + contents + '''",
"name": "''' + queryFile + '''"
}
}
}
'''
print payload
print "\n"
return jobs.submitJob(serviceUrl, payload)

# driver method for all tests
if __name__ == "__main__":
print "Running unit tests:\n"
jobID = testJsonSubmitjob()
print "\n"
while True:
print jobs.getJobInfo(serviceUrl, jobID)
print "\n"
status = jobs.getJobStatus(serviceUrl, jobID)
print status
print "\n"

if (status != 'RUNNING') and (status != 'INIT'):
print "Final status: ", status
print "Job has terminated - exiting"
break

time.sleep(5)

95 changes: 95 additions & 0 deletions genie-web/src/test/python/utils/populateEMRConfigs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
##
#
# Copyright 2013 Netflix, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
##

import sys
import json
import urllib2
import restclient
import eureka

baseUrl = eureka.EurekaClient().getServiceBaseUrl() + '/genie/'

def populateProdHive():
serviceUrl = baseUrl + 'v0/config/hive/hiveconf-prodhive-emr'
payload = '''
<request>
<hiveConfig>
<name>prodhive</name>
<type>PROD</type>
<s3HiveSiteXml>file:///home/hadoop/.versions/hive-0.11.0/conf/hive-site.xml</s3HiveSiteXml>
<user>produser</user>
<status>ACTIVE</status>
<hiveVersion>0.11</hiveVersion>
</hiveConfig>
</request>
'''
restclient.put(serviceUrl=serviceUrl, payload=payload, contentType='application/xml')

def populateProdPig():
serviceUrl = baseUrl + 'v0/config/pig/pigconf-prodpig-emr'
payload = '''
<request>
<pigConfig>
<name>prodpig</name>
<type>PROD</type>
<s3PigProperties>file:///home/hadoop/.versions/pig-0.11.1/conf/pig.properties</s3PigProperties>
<user>produser</user>
<status>ACTIVE</status>
<pigVersion>0.11</pigVersion>
</pigConfig>
</request>
'''
restclient.put(serviceUrl=serviceUrl, payload=payload, contentType='application/xml')

def populateCluster():
serviceUrl = baseUrl + 'v0/config/cluster/clusterconf-emr'
payload = '''
<request>
<clusterConfig>
<name>clusterconf-emr</name>
<prod>true</prod>
<adHoc>true</adHoc>
<s3MapredSiteXml>file:///home/hadoop/.versions/1.0.3/conf/mapred-site.xml</s3MapredSiteXml>
<s3CoreSiteXml>file:///home/hadoop/.versions/1.0.3/conf/core-site.xml</s3CoreSiteXml>
<s3HdfsSiteXml>file:///home/hadoop/.versions/1.0.3/conf/hdfs-site.xml</s3HdfsSiteXml>
<user>produser</user>
<prodHiveConfigId>hiveconf-prodhive-emr</prodHiveConfigId>
<prodPigConfigId>pigconf-prodpig-emr</prodPigConfigId>
<hadoopVersion>1.0.3</hadoopVersion>
<status>UP</status>
</clusterConfig>
</request>
'''
restclient.put(serviceUrl=serviceUrl, payload=payload, contentType='application/xml')

# driver method for populating configs on master node of EMR
if __name__ == "__main__":

print "Registering EMR cluster\n"
print "################################"
print "Adding config for prodhive:\n"
populateProdHive()
print "################################"
print "Adding config for prodpig:\n"
populateProdPig()
print "################################"
print "Adding config for cluster:\n"
populateCluster()
print "################################"
print "\nDone"