diff --git a/deploy/aws/emr_genie_bootstrap.sh b/deploy/aws/emr_genie_bootstrap.sh
new file mode 100755
index 00000000000..a1fa724dbb1
--- /dev/null
+++ b/deploy/aws/emr_genie_bootstrap.sh
@@ -0,0 +1,48 @@
+#!/bin/bash
+
+##
+#
+# Copyright 2013 Netflix, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+##
+
+set -x
+set -e
+
+# Install Tomcat
+cd $HOME; wget http://mirror.sdunix.com/apache/tomcat/tomcat-6/v6.0.37/bin/apache-tomcat-6.0.37.tar.gz
+tar zxvf apache-tomcat-6.0.37.tar.gz
+
+# Change port to 7001 to work out of the box
+sed -i -e "s#8080#7001#g" /home/hadoop/apache-tomcat-6.0.37/conf/server.xml
+
+# TODO: update /home/hadoop/apache-tomcat-6.0.37/conf/web.xml to enable directory browsing
+
+# Set up Genie specific properties
+export CATALINA_HOME=/home/hadoop/apache-tomcat-6.0.37
+export CATALINA_OPTS="-Darchaius.deployment.applicationId=genie -Dnetflix.datacenter=cloud"
+
+# Set up directories needed
+mkdir -p /mnt/tomcat/genie-jobs;
+ln -fs /mnt/tomcat/genie-jobs $CATALINA_HOME/webapps
+mkdir -p /home/hadoop/.versions/pig-0.11.1/conf; touch /home/hadoop/.versions/pig-0.11.1/conf/pig.properties
+
+# Set up genie - get the latest from GitHub
+git clone https://github.com/Netflix/genie.git
+cd $HOME/genie; ./gradlew clean build -x test
+cd $HOME/genie; ./local_deploy.sh
+
+# Start Tomcat
+cd $CATALINA_HOME/logs; $CATALINA_HOME/bin/startup.sh;
diff --git a/deploy/aws/emr_genie_launch.sh b/deploy/aws/emr_genie_launch.sh
new file mode 100755
index 00000000000..eead2e699b0
--- /dev/null
+++ b/deploy/aws/emr_genie_launch.sh
@@ -0,0 +1,33 @@
+#!/bin/bash
+
+##
+#
+# Copyright 2013 Netflix, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+##
+
+if [ "$EMR_GENIE_BOOTSTRAP_LOC" == "" ]; then
+ echo "Set EMR_GENIE_BOOTSTRAP_LOC to point to emr_genie_bootstrap.sh on S3"
+ exit 1
+fi
+
+echo "Using EMR Genie bootstrap action from: $EMR_GENIE_BOOTSTRAP_LOC"
+
+# Launching EMR
+elastic-mapreduce --create --alive --instance-type m1.xlarge --instance-count 2 \
+ --ssh --debug --trace --visible-to-all-users --name "Genie Testing" --ami-version "2.4.2" \
+ --hive-interactive --hive-versions 0.11.0 --pig-interactive --pig-versions 0.11.1 \
+ --bootstrap-action s3://elasticmapreduce/bootstrap-actions/run-if \
+ --args "instance.isMaster=true,$EMR_GENIE_BOOTSTRAP_LOC"
\ No newline at end of file
diff --git a/deploy/aws/emr_genie_postinstall.sh b/deploy/aws/emr_genie_postinstall.sh
new file mode 100755
index 00000000000..cace2955f56
--- /dev/null
+++ b/deploy/aws/emr_genie_postinstall.sh
@@ -0,0 +1,29 @@
+#!/bin/bash
+
+##
+#
+# Copyright 2013 Netflix, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+##
+
+# On Master
+
+# ensure that HDFS is writable by all users (e.g. genietest)
+hadoop fs -chmod -R 777 /
+
+# Register EMR cluster
+export SERVICE_BASE_URL=http://localhost:7001
+cd $HOME/genie/genie-web/src/test/python/utils
+python populateEMRConfigs.py
diff --git a/deploy/aws/emr_genie_test.sh b/deploy/aws/emr_genie_test.sh
new file mode 100755
index 00000000000..af03ca9362e
--- /dev/null
+++ b/deploy/aws/emr_genie_test.sh
@@ -0,0 +1,33 @@
+#!/bin/bash
+
+##
+#
+# Copyright 2013 Netflix, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+##
+
+# Set up some environment variables for testing
+export SERVICE_BASE_URL=http://localhost:7001
+export GENIE_TEST_PREFIX=file:///home/hadoop
+
+cd $HOME/genie/genie-web/src/test/python/jobs
+
+# Test some jobs
+
+python hadoopFSTest.py
+
+python hiveJobTestWithAttachments.py
+
+python pigJobTestWithAttachments.py
diff --git a/genie-web/src/main/resources/genie.properties b/genie-web/src/main/resources/genie.properties
index a010846e76e..373ddf68879 100644
--- a/genie-web/src/main/resources/genie.properties
+++ b/genie-web/src/main/resources/genie.properties
@@ -47,19 +47,19 @@ netflix.genie.server.clusterLoadBalancerImpl=com.netflix.genie.server.services.i
## Execution Service system properties
# java home
-netflix.genie.server.java.home=/etc/alternatives/jre
+netflix.genie.server.java.home=/usr/lib/jvm/java-6-sun
# hadoop home for various versions
-netflix.genie.server.hadoop.home=/apps/hadoop/current
-netflix.genie.server.hadoop.1.0.3.home=/apps/hadoop/1.0
+netflix.genie.server.hadoop.home=/home/hadoop/.versions/1.0.3
+netflix.genie.server.hadoop.1.0.3.home=/home/hadoop/.versions/1.0.3
# hive home for various versions
-netflix.genie.server.hive.home=/apps/hive/current
-netflix.genie.server.hive.0.11.home=/apps/hive/0.11
+netflix.genie.server.hive.home=/home/hadoop/.versions/hive-0.11.0
+netflix.genie.server.hive.0.11.home=/home/hadoop/.versions/hive-0.11.0
# pig home for various versions
-netflix.genie.server.pig.home=/apps/pig/current
-netflix.genie.server.pig.0.11.home=/apps/pig/0.11
+netflix.genie.server.pig.home=/home/hadoop/.versions/pig-0.11.1
+netflix.genie.server.pig.0.11.home=/home/hadoop/.versions/pig-0.11.1
# REST URI for the execution service
netflix.genie.server.job.resource.prefix=genie/v0/jobs
@@ -70,7 +70,7 @@ netflix.genie.server.job.resource.prefix=genie/v0/jobs
# Location of genie scripts - currently part of genie-web/conf/system/apps/genie/bin
# Change this appropriately to point to above location
-netflix.genie.server.sys.home=/apps/genie/bin
+netflix.genie.server.sys.home=/home/hadoop/genie/genie-web/conf/system/apps/genie/bin
# The relative path for the prefix directory inside Tomcat's "webapps/"
# that Genie will use for its working directory
diff --git a/genie-web/src/test/python/jobs/hadoopFSTest.py b/genie-web/src/test/python/jobs/hadoopFSTest.py
index 3f9e1705996..d688c5f6586 100644
--- a/genie-web/src/test/python/jobs/hadoopFSTest.py
+++ b/genie-web/src/test/python/jobs/hadoopFSTest.py
@@ -40,7 +40,6 @@ def testXmlSubmitjob():
hadoop
laptop
hadoop
- prod
adHoc
fs -ls /
diff --git a/genie-web/src/test/python/jobs/hiveJobTestWithAttachments.py b/genie-web/src/test/python/jobs/hiveJobTestWithAttachments.py
index aef4875538e..d9f79fe7f2a 100644
--- a/genie-web/src/test/python/jobs/hiveJobTestWithAttachments.py
+++ b/genie-web/src/test/python/jobs/hiveJobTestWithAttachments.py
@@ -38,7 +38,7 @@ def testJsonSubmitjob():
# write out a temporary file with our query/dependencies
query = tempfile.NamedTemporaryFile(delete=False)
name = query.name
- query.write("select count(*) from dual;")
+ query.write("show tables;")
query.close()
# read it back in as base64 encoded binary
diff --git a/genie-web/src/test/python/jobs/pigJobTestWithAttachments.py b/genie-web/src/test/python/jobs/pigJobTestWithAttachments.py
new file mode 100644
index 00000000000..af0bdf49430
--- /dev/null
+++ b/genie-web/src/test/python/jobs/pigJobTestWithAttachments.py
@@ -0,0 +1,93 @@
+##
+#
+# Copyright 2013 Netflix, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+##
+
+import sys
+sys.path.append('../utils')
+
+import time
+import eureka
+import jobs
+
+import os
+import tempfile
+import base64
+
+# the S3 prefix where the tests are located
+GENIE_TEST_PREFIX = os.getenv("GENIE_TEST_PREFIX")
+
+# get the serviceUrl from the eureka client
+serviceUrl = eureka.EurekaClient().getServiceBaseUrl() + '/genie/v0/jobs'
+
+def testJsonSubmitjob():
+ print "Running testJsonSubmitjob "
+ # write out a temporary file with our query/dependencies
+ query = tempfile.NamedTemporaryFile(delete=False)
+ name = query.name
+ queryFile = 'pig-' + str(time.time()) + '.q'
+ query.write("fs -copyFromLocal %s %s; cmd = load '%s'; dump cmd;" % (queryFile, queryFile, queryFile))
+ query.close()
+
+ # read it back in as base64 encoded binary
+ query = open(name, "rb")
+ contents = base64.b64encode(query.read())
+ print contents
+ query.close()
+ os.unlink(name)
+
+ payload = '''
+ {
+ "jobInfo":
+ {
+ "jobName": "PIG-JOB-TEST",
+ "description": "This is a test",
+ "userName" : "genietest",
+ "groupName" : "hadoop",
+ "jobType": "pig",
+ "configuration": "prod",
+ "schedule": "adHoc",
+ "cmdArgs": "-f ''' + queryFile + '''",
+ "attachments": {
+ "data": "''' + contents + '''",
+ "name": "''' + queryFile + '''"
+ }
+ }
+ }
+ '''
+ print payload
+ print "\n"
+ return jobs.submitJob(serviceUrl, payload)
+
+# driver method for all tests
+if __name__ == "__main__":
+ print "Running unit tests:\n"
+ jobID = testJsonSubmitjob()
+ print "\n"
+ while True:
+ print jobs.getJobInfo(serviceUrl, jobID)
+ print "\n"
+ status = jobs.getJobStatus(serviceUrl, jobID)
+ print status
+ print "\n"
+
+ if (status != 'RUNNING') and (status != 'INIT'):
+ print "Final status: ", status
+ print "Job has terminated - exiting"
+ break
+
+ time.sleep(5)
+
diff --git a/genie-web/src/test/python/utils/populateEMRConfigs.py b/genie-web/src/test/python/utils/populateEMRConfigs.py
new file mode 100644
index 00000000000..594f29d219d
--- /dev/null
+++ b/genie-web/src/test/python/utils/populateEMRConfigs.py
@@ -0,0 +1,95 @@
+##
+#
+# Copyright 2013 Netflix, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+##
+
+import sys
+import json
+import urllib2
+import restclient
+import eureka
+
+baseUrl = eureka.EurekaClient().getServiceBaseUrl() + '/genie/'
+
+def populateProdHive():
+ serviceUrl = baseUrl + 'v0/config/hive/hiveconf-prodhive-emr'
+ payload = '''
+
+
+ prodhive
+ PROD
+ file:///home/hadoop/.versions/hive-0.11.0/conf/hive-site.xml
+ produser
+ ACTIVE
+ 0.11
+
+
+ '''
+ restclient.put(serviceUrl=serviceUrl, payload=payload, contentType='application/xml')
+
+def populateProdPig():
+ serviceUrl = baseUrl + 'v0/config/pig/pigconf-prodpig-emr'
+ payload = '''
+
+
+ prodpig
+ PROD
+ file:///home/hadoop/.versions/pig-0.11.1/conf/pig.properties
+ produser
+ ACTIVE
+ 0.11
+
+
+ '''
+ restclient.put(serviceUrl=serviceUrl, payload=payload, contentType='application/xml')
+
+def populateCluster():
+ serviceUrl = baseUrl + 'v0/config/cluster/clusterconf-emr'
+ payload = '''
+
+
+ clusterconf-emr
+ true
+ true
+ file:///home/hadoop/.versions/1.0.3/conf/mapred-site.xml
+ file:///home/hadoop/.versions/1.0.3/conf/core-site.xml
+ file:///home/hadoop/.versions/1.0.3/conf/hdfs-site.xml
+ produser
+ hiveconf-prodhive-emr
+ pigconf-prodpig-emr
+ 1.0.3
+ UP
+
+
+ '''
+ restclient.put(serviceUrl=serviceUrl, payload=payload, contentType='application/xml')
+
+# driver method for populating configs on master node of EMR
+if __name__ == "__main__":
+
+ print "Registering EMR cluster\n"
+ print "################################"
+ print "Adding config for prodhive:\n"
+ populateProdHive()
+ print "################################"
+ print "Adding config for prodpig:\n"
+ populateProdPig()
+ print "################################"
+ print "Adding config for cluster:\n"
+ populateCluster()
+ print "################################"
+ print "\nDone"
+