Path: blob/develop/tests/unit/customizations/emr/test_add_steps.py
1569 views
# Copyright 2014 Amazon.com, Inc. or its affiliates. All Rights Reserved.1#2# Licensed under the Apache License, Version 2.0 (the "License"). You3# may not use this file except in compliance with the License. A copy of4# the License is located at5#6# http://aws.amazon.com/apache2.0/7#8# or in the "license" file accompanying this file. This file is9# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF10# ANY KIND, either express or implied. See the License for the specific11# language governing permissions and limitations under the License.12import os13import copy141516from awscli.testutils import mock17from tests.unit.customizations.emr import EMRBaseAWSCommandParamsTest as \18BaseAWSCommandParamsTest192021class TestAddSteps(BaseAWSCommandParamsTest):22prefix = 'emr add-steps --cluster-id j-ABC --steps '2324STREAMING_ARGS = 'Args=-files,' + \25's3://elasticmapreduce/samples/wordcount/wordSplitter.py,' + \26'-mapper,wordSplitter.py,' + \27'-reducer,aggregate,' + \28'-input,s3://elasticmapreduce/samples/wordcount/input,' + \29'-output,s3://mybucket/wordcount/output/2014-04-18/12-15-24'30STREAMING_HADOOP_SCRIPT_RUNNER_STEP = \31{'Jar': '/home/hadoop/contrib/streaming/hadoop-streaming.jar',32'Args':33['-files',34's3://elasticmapreduce/samples/wordcount/wordSplitter.py',35'-mapper',36'wordSplitter.py',37'-reducer',38'aggregate',39'-input',40's3://elasticmapreduce/samples/wordcount/input',41'-output',42's3://mybucket/wordcount/output/2014-04-18/12-15-24']43}44STREAMING_HADOOP_COMMAND_RUNNER_STEP = \45{'Jar': 'command-runner.jar',46'Args':47['hadoop-streaming',48'-files',49's3://elasticmapreduce/samples/wordcount/wordSplitter.py',50'-mapper',51'wordSplitter.py',52'-reducer',53'aggregate',54'-input',55's3://elasticmapreduce/samples/wordcount/input',56'-output',57's3://mybucket/wordcount/output/2014-04-18/12-15-24']58}5960HIVE_BASIC_ARGS = 'Args=-f,' + \61's3://elasticmapreduce/samples/hive-ads/libs/model-build.q'6263HIVE_DEFAULT_SCRIPT_RUNNER_STEP = \64{'Jar':65('s3://us-east-1.elasticmapreduce/'66'libs/script-runner/script-runner.jar'),67'Args':68['s3://us-east-1.elasticmapreduce/libs/hive/hive-script',69'--run-hive-script',70'--hive-versions',71'latest',72'--args',73'-f',74's3://elasticmapreduce/samples/hive-ads/libs/model-build.q'75]76}7778HIVE_DEFAULT_COMMAND_RUNNER_STEP = \79{'Jar':80('command-runner.jar'),81'Args':82['hive-script',83'--run-hive-script',84'--args',85'-f',86's3://elasticmapreduce/samples/hive-ads/libs/model-build.q'87]88}8990PIG_BASIC_ARGS = 'Args=-f,' + \91's3://elasticmapreduce/samples/pig-apache/do-reports2.pig'9293PIG_DEFAULT_SCRIPT_RUNNER_STEP = \94{'Jar':95('s3://us-east-1.elasticmapreduce/libs/'96'script-runner/script-runner.jar'),97'Args':98['s3://us-east-1.elasticmapreduce/libs/pig/pig-script',99'--run-pig-script',100'--pig-versions',101'latest',102'--args',103'-f',104's3://elasticmapreduce/samples/'105'pig-apache/do-reports2.pig',106]}107108PIG_DEFAULT_COMMAND_RUNNER_STEP = \109{'Jar':110('command-runner.jar'),111'Args':112['pig-script',113'--run-pig-script',114'--args',115'-f',116's3://elasticmapreduce/samples/'117'pig-apache/do-reports2.pig',118]}119120IMPALA_BASIC_ARGS = 'Args=' + \121'--impala-script,s3://myimpala/input,' + \122'--console-output-path,s3://myimpala/output'123124IMPALA_BASIC_SCRIPT_RUNNER_STEP = \125{'Jar':126('s3://us-east-1.elasticmapreduce/libs/'127'script-runner/script-runner.jar'),128'Args':129['s3://us-east-1.elasticmapreduce/libs/impala/setup-impala',130'--run-impala-script',131'--impala-script',132's3://myimpala/input',133'--console-output-path',134's3://myimpala/output'135]136}137138SPARK_SUBMIT_BASIC_ARGS = 'Args=' + \139'[--deploy-mode,' + \140'cluster,' + \141'--conf,' + \142'k1=v1,' + \143's3://mybucket/myfolder/app.jar,' + \144'k2=v2]'145146SPARK_SUBMIT_SCRIPT_RUNNER_STEP = \147{148'Jar':149('s3://us-east-1.elasticmapreduce/libs/'150'script-runner/script-runner.jar'),151'Args':152['/home/hadoop/spark/bin/spark-submit',153'--deploy-mode',154'cluster',155'--conf',156'k1=v1',157's3://mybucket/myfolder/app.jar',158'k2=v2'159]160}161162SPARK_SUBMIT_COMMAND_RUNNER_STEP = \163{164'Jar': 'command-runner.jar',165'Args':166['spark-submit',167'--deploy-mode',168'cluster',169'--conf',170'k1=v1',171's3://mybucket/myfolder/app.jar',172'k2=v2'173]174}175176def test_unknown_step_type(self):177cmd = self.prefix + 'Type=unknown'178expected_error_msg = '\naws: error: ' + \179'The step type unknown is not supported.\n'180self.assert_error_for_ami_and_release_based_clusters(181cmd=cmd, expected_error_msg=expected_error_msg,182expected_result_release=expected_error_msg)183184def test_default_step_type_name_action_on_failure(self):185cmd = self.prefix + 'Jar=s3://mybucket/mytest.jar'186expected_result = {187'JobFlowId': 'j-ABC',188'Steps': [189{'Name': 'Custom JAR',190'ActionOnFailure': 'CONTINUE',191'HadoopJarStep': {'Jar': 's3://mybucket/mytest.jar'}192}193]194}195196self.assert_params_for_ami_and_release_based_clusters(197cmd=cmd, expected_result=expected_result,198expected_result_release=expected_result)199200def test_custom_jar_step_missing_jar(self):201cmd = self.prefix + 'Name=CustomJarMissingJar'202expected_error_msg = '\naws: error: The following ' + \203'required parameters are missing for CustomJARStepConfig: Jar.\n'204self.assert_error_for_ami_and_release_based_clusters(205cmd=cmd, expected_error_msg=expected_error_msg,206expected_result_release=expected_error_msg)207208def test_custom_jar_step_with_all_fields(self):209cmd = self.prefix + (210'Name=Custom,Type=Custom_jar,'211'Jar=s3://mybucket/mytest.jar,'212'Args=arg1,arg2,MainClass=mymainclass,'213'ActionOnFailure=TERMINATE_CLUSTER,'214'Properties=k1=v1\,k2=v2\,k3')215expected_result = {216'JobFlowId': 'j-ABC',217'Steps': [218{'Name': 'Custom',219'ActionOnFailure': 'TERMINATE_CLUSTER',220'HadoopJarStep':221{'Jar': 's3://mybucket/mytest.jar',222'Args': ['arg1', 'arg2'],223'MainClass': 'mymainclass',224'Properties':225[{'Key': 'k1', 'Value': 'v1'},226{'Key': 'k2', 'Value': 'v2'},227{'Key': 'k3', 'Value': ''}228]229}230}231]232}233234self.assert_params_for_ami_and_release_based_clusters(235cmd=cmd, expected_result=expected_result,236expected_result_release=expected_result)237238def test_streaming_step_with_default_fields(self):239cmd = self.prefix + 'Type=Streaming,' + self.STREAMING_ARGS240expected_result = {241'JobFlowId': 'j-ABC',242'Steps': [243{'Name': 'Streaming program',244'ActionOnFailure': 'CONTINUE',245'HadoopJarStep': self.STREAMING_HADOOP_SCRIPT_RUNNER_STEP246}247]248}249expected_result_release = copy.deepcopy(expected_result)250expected_result_release['Steps'][0]['HadoopJarStep'] = \251self.STREAMING_HADOOP_COMMAND_RUNNER_STEP252253self.assert_params_for_ami_and_release_based_clusters(254cmd=cmd, expected_result=expected_result,255expected_result_release=expected_result_release)256257def test_step_with_execution_role_arn(self):258cmd = self.prefix + 'Type=Streaming,' + self.STREAMING_ARGS259cmd += ' --execution-role-arn arn:aws:iam::123456789010:role/sample '260expected_result = {261'ExecutionRoleArn': 'arn:aws:iam::123456789010:role/sample',262'JobFlowId': 'j-ABC',263'Steps': [264{'Name': 'Streaming program',265'ActionOnFailure': 'CONTINUE',266'HadoopJarStep': self.STREAMING_HADOOP_SCRIPT_RUNNER_STEP267}268]269}270expected_result_release = copy.deepcopy(expected_result)271expected_result_release['Steps'][0]['HadoopJarStep'] = \272self.STREAMING_HADOOP_COMMAND_RUNNER_STEP273274self.assert_params_for_ami_and_release_based_clusters(275cmd=cmd, expected_result=expected_result,276expected_result_release=expected_result_release)277278def test_streaming_step_missing_args(self):279cmd = self.prefix + 'Type=Streaming'280expected_error_msg = '\naws: error: The following ' + \281'required parameters are missing for StreamingStepConfig: Args.\n'282self.assert_error_for_ami_and_release_based_clusters(283cmd=cmd, expected_error_msg=expected_error_msg,284expected_result_release=expected_error_msg)285286def test_streaming_jar_with_all_fields(self):287test_step_config = 'Type=Streaming,' + \288'Name=StreamingStepAllFields,' + \289'ActionOnFailure=CANCEL_AND_WAIT,' + \290self.STREAMING_ARGS291cmd = self.prefix + test_step_config292expected_result = {293'JobFlowId': 'j-ABC',294'Steps': [295{'Name': 'StreamingStepAllFields',296'ActionOnFailure': 'CANCEL_AND_WAIT',297'HadoopJarStep': self.STREAMING_HADOOP_SCRIPT_RUNNER_STEP298}299]300}301302expected_result_release = copy.deepcopy(expected_result)303expected_result_release['Steps'][0]['HadoopJarStep'] = \304self.STREAMING_HADOOP_COMMAND_RUNNER_STEP305306self.assert_params_for_ami_and_release_based_clusters(307cmd=cmd, expected_result=expected_result,308expected_result_release=expected_result_release)309310def test_hive_step_with_default_fields(self):311cmd = self.prefix + 'Type=Hive,' + self.HIVE_BASIC_ARGS312expected_result = {313'JobFlowId': 'j-ABC',314'Steps': [315{'Name': 'Hive program',316'ActionOnFailure': 'CONTINUE',317'HadoopJarStep': self.HIVE_DEFAULT_SCRIPT_RUNNER_STEP318}]319}320expected_result_release = copy.deepcopy(expected_result)321expected_result_release['Steps'][0]['HadoopJarStep'] = \322self.HIVE_DEFAULT_COMMAND_RUNNER_STEP323324self.assert_params_for_ami_and_release_based_clusters(325cmd=cmd, expected_result=expected_result,326expected_result_release=expected_result_release)327328def test_hive_step_missing_args(self):329cmd = self.prefix + 'Type=Hive'330expected_error_msg = '\naws: error: The following ' + \331'required parameters are missing for HiveStepConfig: Args.\n'332333self.assert_error_for_ami_and_release_based_clusters(334cmd=cmd, expected_error_msg=expected_error_msg,335expected_result_release=expected_error_msg)336337def test_hive_step_with_all_fields(self):338test_step_config = \339'Type=Hive,' + \340'ActionOnFailure=CANCEL_AND_WAIT,' + \341'Name=HiveWithAllFields,' + \342self.HIVE_BASIC_ARGS343cmd = self.prefix + test_step_config344expected_result = {345'JobFlowId': 'j-ABC',346'Steps': [347{'Name': 'HiveWithAllFields',348'ActionOnFailure': 'CANCEL_AND_WAIT',349'HadoopJarStep': self.HIVE_DEFAULT_SCRIPT_RUNNER_STEP350}]351}352expected_result_release = copy.deepcopy(expected_result)353expected_result_release['Steps'][0]['HadoopJarStep'] = \354self.HIVE_DEFAULT_COMMAND_RUNNER_STEP355356self.assert_params_for_ami_and_release_based_clusters(357cmd=cmd, expected_result=expected_result,358expected_result_release=expected_result_release)359360def test_pig_step_with_default_fields(self):361cmd = self.prefix + 'Type=Pig,' + self.PIG_BASIC_ARGS362expected_result = {363'JobFlowId': 'j-ABC',364'Steps': [365{'Name': 'Pig program',366'ActionOnFailure': 'CONTINUE',367'HadoopJarStep': self.PIG_DEFAULT_SCRIPT_RUNNER_STEP368}]369}370expected_result_release = copy.deepcopy(expected_result)371expected_result_release['Steps'][0]['HadoopJarStep'] = \372self.PIG_DEFAULT_COMMAND_RUNNER_STEP373374self.assert_params_for_ami_and_release_based_clusters(375cmd=cmd, expected_result=expected_result,376expected_result_release=expected_result_release)377378def test_pig_missing_args(self):379cmd = self.prefix + 'Type=Pig'380expected_error_msg = '\naws: error: The following ' + \381'required parameters are missing for PigStepConfig: Args.\n'382self.assert_error_for_ami_and_release_based_clusters(383cmd=cmd, expected_error_msg=expected_error_msg,384expected_result_release=expected_error_msg)385386def test_pig_step_with_all_fields(self):387test_step_config = \388'Name=PigWithAllFields,' + \389'Type=Pig,' + \390self.PIG_BASIC_ARGS + ',' + \391'ActionOnFailure=CANCEL_AND_WAIT'392cmd = self.prefix + test_step_config393expected_result = {394'JobFlowId': 'j-ABC',395'Steps': [396{'Name': 'PigWithAllFields',397'ActionOnFailure': 'CANCEL_AND_WAIT',398'HadoopJarStep': self.PIG_DEFAULT_SCRIPT_RUNNER_STEP399}400]401}402expected_result_release = copy.deepcopy(expected_result)403expected_result_release['Steps'][0]['HadoopJarStep'] = \404self.PIG_DEFAULT_COMMAND_RUNNER_STEP405406self.assert_params_for_ami_and_release_based_clusters(407cmd=cmd, expected_result=expected_result,408expected_result_release=expected_result_release)409410def test_impala_step_with_default_fields(self):411test_step_config = 'Type=Impala,' + \412self.IMPALA_BASIC_ARGS413cmd = self.prefix + test_step_config414expected_result = {415'JobFlowId': 'j-ABC',416'Steps': [417{'Name': 'Impala program',418'ActionOnFailure': 'CONTINUE',419'HadoopJarStep': self.IMPALA_BASIC_SCRIPT_RUNNER_STEP420}]421}422self.assert_params_for_cmd(cmd, expected_result)423424def test_SPARK_SUBMIT_SCRIPT_RUNNER_STEP(self):425cmd = self.prefix + 'Type=SPARK,' + \426self.SPARK_SUBMIT_BASIC_ARGS427expected_result = {428'JobFlowId': 'j-ABC',429'Steps': [430{'Name': 'Spark application',431'ActionOnFailure': 'CONTINUE',432'HadoopJarStep': self.SPARK_SUBMIT_SCRIPT_RUNNER_STEP433}]434}435expected_result_release = copy.deepcopy(expected_result)436expected_result_release['Steps'][0]['HadoopJarStep'] = \437self.SPARK_SUBMIT_COMMAND_RUNNER_STEP438439self.assert_params_for_ami_and_release_based_clusters(440cmd=cmd, expected_result=expected_result,441expected_result_release=expected_result_release)442443def test_spark_missing_arg(self):444cmd = self.prefix + 'Type=SPARK'445expected_error_msg = '\naws: error: The following ' + \446'required parameters are missing for SparkStepConfig: Args.\n'447self.assert_error_for_ami_and_release_based_clusters(448cmd=cmd, expected_error_msg=expected_error_msg,449expected_result_release=expected_error_msg)450451def test_impala_missing_args(self):452cmd = self.prefix + 'Type=Impala'453expected_error_msg = '\naws: error: The following ' + \454'required parameters are missing for ImpalaStepConfig: Args.\n'455self.assert_error_for_ami_and_release_based_clusters(456cmd=cmd, expected_error_msg=expected_error_msg,457expected_result_release=None)458459def test_impala_step_with_all_fields(self):460test_step_config = \461'Name=ImpalaWithAllFields,' + \462'Type=Impala,' + \463self.IMPALA_BASIC_ARGS + ',' + \464'ActionOnFailure=CANCEL_AND_WAIT'465cmd = self.prefix + test_step_config466expected_result = {467'JobFlowId': 'j-ABC',468'Steps': [469{'Name': 'ImpalaWithAllFields',470'ActionOnFailure': 'CANCEL_AND_WAIT',471'HadoopJarStep': self.IMPALA_BASIC_SCRIPT_RUNNER_STEP472}473]474}475self.assert_params_for_ami_and_release_based_clusters(476cmd=cmd, expected_result=expected_result,477expected_result_release=None)478479def test_impala_step_with_release(self):480test_step_config = 'Type=Impala,' + self.IMPALA_BASIC_ARGS481cmd = self.prefix + test_step_config482expected_result_release = '\naws: error: The step type impala ' + \483'is not supported.\n'484485self.assert_error_for_ami_and_release_based_clusters(486cmd=cmd, expected_error_msg=None,487expected_result_release=expected_result_release)488489def test_empty_step_args(self):490cmd = self.prefix + 'Type=Streaming,Args='491expected_error_msg = ('\naws: error: The prameter Args cannot '492'be an empty list.\n')493self.assert_error_for_ami_and_release_based_clusters(494cmd=cmd, expected_error_msg=expected_error_msg,495expected_result_release=expected_error_msg)496497cmd = self.prefix + 'Type=Pig,Args='498self.assert_error_for_ami_and_release_based_clusters(499cmd=cmd, expected_error_msg=expected_error_msg,500expected_result_release=expected_error_msg)501502cmd = self.prefix + 'Type=Hive,Args='503self.assert_error_for_ami_and_release_based_clusters(504cmd=cmd, expected_error_msg=expected_error_msg,505expected_result_release=expected_error_msg)506507cmd = self.prefix + 'Args='508expected_error_msg = ('\naws: error: The following required parameters'509' are missing for CustomJARStepConfig: Jar.\n')510self.assert_error_for_ami_and_release_based_clusters(511cmd=cmd, expected_error_msg=expected_error_msg,512expected_result_release=expected_error_msg)513514def test_all_step_types(self):515test_step_config = 'Jar=s3://mybucket/mytest.jar ' + \516' Type=Streaming,' + self.STREAMING_ARGS + \517' Type=Hive,' + self.HIVE_BASIC_ARGS + \518' Type=Pig,' + self.PIG_BASIC_ARGS + \519' Type=Impala,' + self.IMPALA_BASIC_ARGS520cmd = self.prefix + test_step_config521expected_result = {522'JobFlowId': 'j-ABC',523'Steps': [524{'Name': 'Custom JAR',525'ActionOnFailure': 'CONTINUE',526'HadoopJarStep': {'Jar': 's3://mybucket/mytest.jar'}527},528{'Name': 'Streaming program',529'ActionOnFailure': 'CONTINUE',530'HadoopJarStep': self.STREAMING_HADOOP_SCRIPT_RUNNER_STEP531},532{'Name': 'Hive program',533'ActionOnFailure': 'CONTINUE',534'HadoopJarStep': self.HIVE_DEFAULT_SCRIPT_RUNNER_STEP535},536{'Name': 'Pig program',537'ActionOnFailure': 'CONTINUE',538'HadoopJarStep': self.PIG_DEFAULT_SCRIPT_RUNNER_STEP539},540{'Name': 'Impala program',541'ActionOnFailure': 'CONTINUE',542'HadoopJarStep': self.IMPALA_BASIC_SCRIPT_RUNNER_STEP543}544]545}546547self.assert_params_for_ami_and_release_based_clusters(548cmd=cmd, expected_result=expected_result,549expected_result_release=None)550551def test_all_step_types_release(self):552test_step_config = 'Jar=s3://mybucket/mytest.jar ' + \553' Type=Streaming,' + self.STREAMING_ARGS + \554' Type=Hive,' + self.HIVE_BASIC_ARGS + \555' Type=Pig,' + self.PIG_BASIC_ARGS556557cmd = self.prefix + test_step_config558expected_result_release = {559'JobFlowId': 'j-ABC',560'Steps': [561{'Name': 'Custom JAR',562'ActionOnFailure': 'CONTINUE',563'HadoopJarStep': {'Jar': 's3://mybucket/mytest.jar'}564},565{'Name': 'Streaming program',566'ActionOnFailure': 'CONTINUE',567'HadoopJarStep': self.STREAMING_HADOOP_COMMAND_RUNNER_STEP568},569{'Name': 'Hive program',570'ActionOnFailure': 'CONTINUE',571'HadoopJarStep': self.HIVE_DEFAULT_COMMAND_RUNNER_STEP572},573{'Name': 'Pig program',574'ActionOnFailure': 'CONTINUE',575'HadoopJarStep': self.PIG_DEFAULT_COMMAND_RUNNER_STEP576}577]578}579580self.assert_params_for_ami_and_release_based_clusters(581cmd=cmd, expected_result=None,582expected_result_release=expected_result_release)583584def test_all_step_types_from_json(self):585data_path = os.path.join(586os.path.dirname(__file__), 'input_steps.json')587cmd = self.prefix + 'file://' + data_path588hive_script_runner_step = copy.deepcopy(589self.HIVE_DEFAULT_SCRIPT_RUNNER_STEP)590hive_script_runner_step['Args'] += \591['-d',592'INPUT=s3://elasticmapreduce/samples/hive-ads/tables',593'-d',594'OUTPUT=s3://mybucket/hive-ads/output/2014-04-18/11-07-32',595'-d',596'LIBS=s3://elasticmapreduce/samples/hive-ads/libs'597]598pig_script_runner_step = copy.deepcopy(599self.PIG_DEFAULT_SCRIPT_RUNNER_STEP)600pig_script_runner_step['Args'] += \601['-p',602'INPUT=s3://elasticmapreduce/samples/pig-apache/input',603'-p',604'OUTPUT=s3://mybucket/pig-apache/output/2014-04-21/20-09-28'605]606607expected_result = {608'JobFlowId': 'j-ABC',609'Steps': [610{'Name': 'Custom JAR step',611'ActionOnFailure': 'CANCEL_AND_WAIT',612'HadoopJarStep': {'Jar': 's3://mybucket/mytest.jar'}613},614{'Name': 'Streaming step',615'ActionOnFailure': 'CANCEL_AND_WAIT',616'HadoopJarStep': self.STREAMING_HADOOP_SCRIPT_RUNNER_STEP617},618{'Name': 'Hive step',619'ActionOnFailure': 'TERMINATE_CLUSTER',620'HadoopJarStep': hive_script_runner_step621},622{'Name': 'Pig step',623'ActionOnFailure': 'TERMINATE_CLUSTER',624'HadoopJarStep': pig_script_runner_step625},626{'Name': 'Impala step',627'ActionOnFailure': 'CANCEL_AND_WAIT',628'HadoopJarStep': self.IMPALA_BASIC_SCRIPT_RUNNER_STEP629}630]631}632self.assert_params_for_cmd(cmd, expected_result)633634@mock.patch('awscli.customizations.emr.'635'emrutils.get_release_label')636def assert_params_for_ami_and_release_based_clusters(637self, grl_patch, cmd, expected_result, expected_result_release):638if expected_result:639grl_patch.return_value = None640self.assert_params_for_cmd(cmd, expected_result)641if expected_result_release:642grl_patch.return_value = 'emr-4.0'643self.assert_params_for_cmd(cmd, expected_result_release)644645@mock.patch('awscli.customizations.emr.'646'emrutils.get_release_label')647def assert_error_for_ami_and_release_based_clusters(648self, grl_patch, cmd, expected_error_msg,649expected_result_release):650if expected_error_msg:651grl_patch.return_value = None652result = self.run_cmd(cmd, 255)653self.assertEqual(expected_error_msg, result[1])654if expected_result_release:655grl_patch.return_value = 'emr-4.0'656result = self.run_cmd(cmd, 255)657self.assertEqual(expected_result_release, result[1])658659if __name__ == "__main__":660unittest.main()661662663