Path: blob/develop/tests/unit/customizations/emr/test_add_steps.py
2624 views
# Copyright 2014 Amazon.com, Inc. or its affiliates. All Rights Reserved.1#2# Licensed under the Apache License, Version 2.0 (the "License"). You3# may not use this file except in compliance with the License. A copy of4# the License is located at5#6# http://aws.amazon.com/apache2.0/7#8# or in the "license" file accompanying this file. This file is9# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF10# ANY KIND, either express or implied. See the License for the specific11# language governing permissions and limitations under the License.12import copy13import os1415from awscli.testutils import mock16from tests.unit.customizations.emr import (17EMRBaseAWSCommandParamsTest as BaseAWSCommandParamsTest,18)192021class TestAddSteps(BaseAWSCommandParamsTest):22prefix = 'emr add-steps --cluster-id j-ABC --steps '2324STREAMING_ARGS = (25'Args=-files,'26+ 's3://elasticmapreduce/samples/wordcount/wordSplitter.py,'27+ '-mapper,wordSplitter.py,'28+ '-reducer,aggregate,'29+ '-input,s3://elasticmapreduce/samples/wordcount/input,'30+ '-output,s3://mybucket/wordcount/output/2014-04-18/12-15-24'31)32STREAMING_HADOOP_SCRIPT_RUNNER_STEP = {33'Jar': '/home/hadoop/contrib/streaming/hadoop-streaming.jar',34'Args': [35'-files',36's3://elasticmapreduce/samples/wordcount/wordSplitter.py',37'-mapper',38'wordSplitter.py',39'-reducer',40'aggregate',41'-input',42's3://elasticmapreduce/samples/wordcount/input',43'-output',44's3://mybucket/wordcount/output/2014-04-18/12-15-24',45],46}47STREAMING_HADOOP_COMMAND_RUNNER_STEP = {48'Jar': 'command-runner.jar',49'Args': [50'hadoop-streaming',51'-files',52's3://elasticmapreduce/samples/wordcount/wordSplitter.py',53'-mapper',54'wordSplitter.py',55'-reducer',56'aggregate',57'-input',58's3://elasticmapreduce/samples/wordcount/input',59'-output',60's3://mybucket/wordcount/output/2014-04-18/12-15-24',61],62}6364HIVE_BASIC_ARGS = (65'Args=-f,'66+ 's3://elasticmapreduce/samples/hive-ads/libs/model-build.q'67)6869HIVE_DEFAULT_SCRIPT_RUNNER_STEP = {70'Jar': (71's3://us-east-1.elasticmapreduce/'72'libs/script-runner/script-runner.jar'73),74'Args': [75's3://us-east-1.elasticmapreduce/libs/hive/hive-script',76'--run-hive-script',77'--hive-versions',78'latest',79'--args',80'-f',81's3://elasticmapreduce/samples/hive-ads/libs/model-build.q',82],83}8485HIVE_DEFAULT_COMMAND_RUNNER_STEP = {86'Jar': ('command-runner.jar'),87'Args': [88'hive-script',89'--run-hive-script',90'--args',91'-f',92's3://elasticmapreduce/samples/hive-ads/libs/model-build.q',93],94}9596PIG_BASIC_ARGS = (97'Args=-f,' + 's3://elasticmapreduce/samples/pig-apache/do-reports2.pig'98)99100PIG_DEFAULT_SCRIPT_RUNNER_STEP = {101'Jar': (102's3://us-east-1.elasticmapreduce/libs/'103'script-runner/script-runner.jar'104),105'Args': [106's3://us-east-1.elasticmapreduce/libs/pig/pig-script',107'--run-pig-script',108'--pig-versions',109'latest',110'--args',111'-f',112's3://elasticmapreduce/samples/' 'pig-apache/do-reports2.pig',113],114}115116PIG_DEFAULT_COMMAND_RUNNER_STEP = {117'Jar': ('command-runner.jar'),118'Args': [119'pig-script',120'--run-pig-script',121'--args',122'-f',123's3://elasticmapreduce/samples/' 'pig-apache/do-reports2.pig',124],125}126127IMPALA_BASIC_ARGS = (128'Args='129+ '--impala-script,s3://myimpala/input,'130+ '--console-output-path,s3://myimpala/output'131)132133IMPALA_BASIC_SCRIPT_RUNNER_STEP = {134'Jar': (135's3://us-east-1.elasticmapreduce/libs/'136'script-runner/script-runner.jar'137),138'Args': [139's3://us-east-1.elasticmapreduce/libs/impala/setup-impala',140'--run-impala-script',141'--impala-script',142's3://myimpala/input',143'--console-output-path',144's3://myimpala/output',145],146}147148SPARK_SUBMIT_BASIC_ARGS = (149'Args='150+ '[--deploy-mode,'151+ 'cluster,'152+ '--conf,'153+ 'k1=v1,'154+ 's3://mybucket/myfolder/app.jar,'155+ 'k2=v2]'156)157158SPARK_SUBMIT_SCRIPT_RUNNER_STEP = {159'Jar': (160's3://us-east-1.elasticmapreduce/libs/'161'script-runner/script-runner.jar'162),163'Args': [164'/home/hadoop/spark/bin/spark-submit',165'--deploy-mode',166'cluster',167'--conf',168'k1=v1',169's3://mybucket/myfolder/app.jar',170'k2=v2',171],172}173174SPARK_SUBMIT_COMMAND_RUNNER_STEP = {175'Jar': 'command-runner.jar',176'Args': [177'spark-submit',178'--deploy-mode',179'cluster',180'--conf',181'k1=v1',182's3://mybucket/myfolder/app.jar',183'k2=v2',184],185}186187def test_unknown_step_type(self):188cmd = self.prefix + 'Type=unknown'189expected_error_msg = (190'\naws: error: ' + 'The step type unknown is not supported.\n'191)192self.assert_error_for_ami_and_release_based_clusters(193cmd=cmd,194expected_error_msg=expected_error_msg,195expected_result_release=expected_error_msg,196)197198def test_default_step_type_name_action_on_failure(self):199cmd = self.prefix + 'Jar=s3://mybucket/mytest.jar'200expected_result = {201'JobFlowId': 'j-ABC',202'Steps': [203{204'Name': 'Custom JAR',205'ActionOnFailure': 'CONTINUE',206'HadoopJarStep': {'Jar': 's3://mybucket/mytest.jar'},207}208],209}210211self.assert_params_for_ami_and_release_based_clusters(212cmd=cmd,213expected_result=expected_result,214expected_result_release=expected_result,215)216217def test_custom_jar_step_missing_jar(self):218cmd = self.prefix + 'Name=CustomJarMissingJar'219expected_error_msg = (220'\naws: error: The following '221+ 'required parameters are missing for CustomJARStepConfig: Jar.\n'222)223self.assert_error_for_ami_and_release_based_clusters(224cmd=cmd,225expected_error_msg=expected_error_msg,226expected_result_release=expected_error_msg,227)228229def test_custom_jar_step_with_all_fields(self):230cmd = self.prefix + (231'Name=Custom,Type=Custom_jar,'232'Jar=s3://mybucket/mytest.jar,'233'Args=arg1,arg2,MainClass=mymainclass,'234'ActionOnFailure=TERMINATE_CLUSTER,'235'LogUri="TestLogUri",'236'EncryptionKeyArn="TestEncryptionKeyArn",'237'Properties=k1=v1\\,k2=v2\\,k3'238)239expected_result = {240'JobFlowId': 'j-ABC',241'Steps': [242{243'Name': 'Custom',244'ActionOnFailure': 'TERMINATE_CLUSTER',245'HadoopJarStep': {246'Jar': 's3://mybucket/mytest.jar',247'Args': ['arg1', 'arg2'],248'MainClass': 'mymainclass',249'Properties': [250{'Key': 'k1', 'Value': 'v1'},251{'Key': 'k2', 'Value': 'v2'},252{'Key': 'k3', 'Value': ''},253],254},255'StepMonitoringConfiguration': {256'S3MonitoringConfiguration': {257'LogUri': "TestLogUri",258'EncryptionKeyArn': "TestEncryptionKeyArn",259}260},261}262],263}264265self.assert_params_for_ami_and_release_based_clusters(266cmd=cmd,267expected_result=expected_result,268expected_result_release=expected_result,269)270271def test_custom_jar_step_with_step_monitoring_configuration_log_uri_only(272self,273):274cmd = self.prefix + (275'Name=Custom,Type=Custom_jar,'276'Jar=s3://mybucket/mytest.jar,'277'Args=arg1,arg2,MainClass=mymainclass,'278'ActionOnFailure=TERMINATE_CLUSTER,'279'LogUri="TestLogUri"'280)281expected_result = {282'JobFlowId': 'j-ABC',283'Steps': [284{285'Name': 'Custom',286'ActionOnFailure': 'TERMINATE_CLUSTER',287'HadoopJarStep': {288'Jar': 's3://mybucket/mytest.jar',289'Args': ['arg1', 'arg2'],290'MainClass': 'mymainclass',291},292'StepMonitoringConfiguration': {293'S3MonitoringConfiguration': {'LogUri': "TestLogUri"}294},295}296],297}298299self.assert_params_for_ami_and_release_based_clusters(300cmd=cmd,301expected_result=expected_result,302expected_result_release=expected_result,303)304305def test_custom_jar_step_with_step_monitoring_configuration_encryption_key_arn_only(306self,307):308cmd = self.prefix + (309'Name=Custom,Type=Custom_jar,'310'Jar=s3://mybucket/mytest.jar,'311'Args=arg1,arg2,MainClass=mymainclass,'312'ActionOnFailure=TERMINATE_CLUSTER,'313'EncryptionKeyArn="TestEncryptionKeyArn"'314)315expected_result = {316'JobFlowId': 'j-ABC',317'Steps': [318{319'Name': 'Custom',320'ActionOnFailure': 'TERMINATE_CLUSTER',321'HadoopJarStep': {322'Jar': 's3://mybucket/mytest.jar',323'Args': ['arg1', 'arg2'],324'MainClass': 'mymainclass',325},326'StepMonitoringConfiguration': {327'S3MonitoringConfiguration': {328'EncryptionKeyArn': "TestEncryptionKeyArn"329}330},331}332],333}334335self.assert_params_for_ami_and_release_based_clusters(336cmd=cmd,337expected_result=expected_result,338expected_result_release=expected_result,339)340341def test_custom_jar_step_with_step_monitoring_configuration_no_log_uri_or_encryption_key_arn(342self,343):344cmd = self.prefix + (345'Name=Custom,Type=Custom_jar,'346'Jar=s3://mybucket/mytest.jar,'347'Args=arg1,arg2,MainClass=mymainclass,'348'ActionOnFailure=TERMINATE_CLUSTER'349)350expected_result = {351'JobFlowId': 'j-ABC',352'Steps': [353{354'Name': 'Custom',355'ActionOnFailure': 'TERMINATE_CLUSTER',356'HadoopJarStep': {357'Jar': 's3://mybucket/mytest.jar',358'Args': ['arg1', 'arg2'],359'MainClass': 'mymainclass',360},361}362],363}364365self.assert_params_for_ami_and_release_based_clusters(366cmd=cmd,367expected_result=expected_result,368expected_result_release=expected_result,369)370371def test_streaming_step_with_default_fields(self):372cmd = self.prefix + 'Type=Streaming,' + self.STREAMING_ARGS373expected_result = {374'JobFlowId': 'j-ABC',375'Steps': [376{377'Name': 'Streaming program',378'ActionOnFailure': 'CONTINUE',379'HadoopJarStep': self.STREAMING_HADOOP_SCRIPT_RUNNER_STEP,380}381],382}383expected_result_release = copy.deepcopy(expected_result)384expected_result_release['Steps'][0]['HadoopJarStep'] = (385self.STREAMING_HADOOP_COMMAND_RUNNER_STEP386)387388self.assert_params_for_ami_and_release_based_clusters(389cmd=cmd,390expected_result=expected_result,391expected_result_release=expected_result_release,392)393394def test_step_with_execution_role_arn(self):395cmd = self.prefix + 'Type=Streaming,' + self.STREAMING_ARGS396cmd += ' --execution-role-arn arn:aws:iam::123456789010:role/sample '397expected_result = {398'ExecutionRoleArn': 'arn:aws:iam::123456789010:role/sample',399'JobFlowId': 'j-ABC',400'Steps': [401{402'Name': 'Streaming program',403'ActionOnFailure': 'CONTINUE',404'HadoopJarStep': self.STREAMING_HADOOP_SCRIPT_RUNNER_STEP,405}406],407}408expected_result_release = copy.deepcopy(expected_result)409expected_result_release['Steps'][0]['HadoopJarStep'] = (410self.STREAMING_HADOOP_COMMAND_RUNNER_STEP411)412413self.assert_params_for_ami_and_release_based_clusters(414cmd=cmd,415expected_result=expected_result,416expected_result_release=expected_result_release,417)418419def test_streaming_step_missing_args(self):420cmd = self.prefix + 'Type=Streaming'421expected_error_msg = (422'\naws: error: The following '423+ 'required parameters are missing for StreamingStepConfig: Args.\n'424)425self.assert_error_for_ami_and_release_based_clusters(426cmd=cmd,427expected_error_msg=expected_error_msg,428expected_result_release=expected_error_msg,429)430431def test_streaming_jar_with_all_fields(self):432test_step_config = (433'Type=Streaming,'434+ 'Name=StreamingStepAllFields,'435+ 'ActionOnFailure=CANCEL_AND_WAIT,'436+ self.STREAMING_ARGS437+ ','438+ 'LogUri="TestLogUri",'439+ 'EncryptionKeyArn="TestEncryptionKeyArn"'440)441cmd = self.prefix + test_step_config442expected_result = {443'JobFlowId': 'j-ABC',444'Steps': [445{446'Name': 'StreamingStepAllFields',447'ActionOnFailure': 'CANCEL_AND_WAIT',448'HadoopJarStep': self.STREAMING_HADOOP_SCRIPT_RUNNER_STEP,449'StepMonitoringConfiguration': {450'S3MonitoringConfiguration': {451'LogUri': "TestLogUri",452'EncryptionKeyArn': "TestEncryptionKeyArn",453}454},455}456],457}458459expected_result_release = copy.deepcopy(expected_result)460expected_result_release['Steps'][0]['HadoopJarStep'] = (461self.STREAMING_HADOOP_COMMAND_RUNNER_STEP462)463464self.assert_params_for_ami_and_release_based_clusters(465cmd=cmd,466expected_result=expected_result,467expected_result_release=expected_result_release,468)469470def test_streaming_jar_with_step_monitoring_configuration_log_uri_only(471self,472):473test_step_config = (474'Type=Streaming,'475+ 'Name=StreamingStepAllFields,'476+ 'ActionOnFailure=CANCEL_AND_WAIT,'477+ self.STREAMING_ARGS478+ ','479+ 'LogUri="TestLogUri"'480)481cmd = self.prefix + test_step_config482expected_result = {483'JobFlowId': 'j-ABC',484'Steps': [485{486'Name': 'StreamingStepAllFields',487'ActionOnFailure': 'CANCEL_AND_WAIT',488'HadoopJarStep': self.STREAMING_HADOOP_SCRIPT_RUNNER_STEP,489'StepMonitoringConfiguration': {490'S3MonitoringConfiguration': {'LogUri': "TestLogUri"}491},492}493],494}495496expected_result_release = copy.deepcopy(expected_result)497expected_result_release['Steps'][0]['HadoopJarStep'] = (498self.STREAMING_HADOOP_COMMAND_RUNNER_STEP499)500501self.assert_params_for_ami_and_release_based_clusters(502cmd=cmd,503expected_result=expected_result,504expected_result_release=expected_result_release,505)506507def test_streaming_jar_with_step_monitoring_configuration_encryption_key_arn_only(508self,509):510test_step_config = (511'Type=Streaming,'512+ 'Name=StreamingStepAllFields,'513+ 'ActionOnFailure=CANCEL_AND_WAIT,'514+ self.STREAMING_ARGS515+ ','516+ 'EncryptionKeyArn="TestEncryptionKeyArn"'517)518cmd = self.prefix + test_step_config519expected_result = {520'JobFlowId': 'j-ABC',521'Steps': [522{523'Name': 'StreamingStepAllFields',524'ActionOnFailure': 'CANCEL_AND_WAIT',525'HadoopJarStep': self.STREAMING_HADOOP_SCRIPT_RUNNER_STEP,526'StepMonitoringConfiguration': {527'S3MonitoringConfiguration': {528'EncryptionKeyArn': "TestEncryptionKeyArn"529}530},531}532],533}534535expected_result_release = copy.deepcopy(expected_result)536expected_result_release['Steps'][0]['HadoopJarStep'] = (537self.STREAMING_HADOOP_COMMAND_RUNNER_STEP538)539540self.assert_params_for_ami_and_release_based_clusters(541cmd=cmd,542expected_result=expected_result,543expected_result_release=expected_result_release,544)545546def test_streaming_jar_with_step_monitoring_configuration_no_log_uri_or_encryption_key_arn(547self,548):549test_step_config = (550'Type=Streaming,'551+ 'Name=StreamingStepAllFields,'552+ 'ActionOnFailure=CANCEL_AND_WAIT,'553+ self.STREAMING_ARGS554)555cmd = self.prefix + test_step_config556expected_result = {557'JobFlowId': 'j-ABC',558'Steps': [559{560'Name': 'StreamingStepAllFields',561'ActionOnFailure': 'CANCEL_AND_WAIT',562'HadoopJarStep': self.STREAMING_HADOOP_SCRIPT_RUNNER_STEP,563}564],565}566567expected_result_release = copy.deepcopy(expected_result)568expected_result_release['Steps'][0]['HadoopJarStep'] = (569self.STREAMING_HADOOP_COMMAND_RUNNER_STEP570)571572self.assert_params_for_ami_and_release_based_clusters(573cmd=cmd,574expected_result=expected_result,575expected_result_release=expected_result_release,576)577578def test_hive_step_with_default_fields(self):579cmd = self.prefix + 'Type=Hive,' + self.HIVE_BASIC_ARGS580expected_result = {581'JobFlowId': 'j-ABC',582'Steps': [583{584'Name': 'Hive program',585'ActionOnFailure': 'CONTINUE',586'HadoopJarStep': self.HIVE_DEFAULT_SCRIPT_RUNNER_STEP,587}588],589}590expected_result_release = copy.deepcopy(expected_result)591expected_result_release['Steps'][0]['HadoopJarStep'] = (592self.HIVE_DEFAULT_COMMAND_RUNNER_STEP593)594595self.assert_params_for_ami_and_release_based_clusters(596cmd=cmd,597expected_result=expected_result,598expected_result_release=expected_result_release,599)600601def test_hive_step_missing_args(self):602cmd = self.prefix + 'Type=Hive'603expected_error_msg = (604'\naws: error: The following '605+ 'required parameters are missing for HiveStepConfig: Args.\n'606)607608self.assert_error_for_ami_and_release_based_clusters(609cmd=cmd,610expected_error_msg=expected_error_msg,611expected_result_release=expected_error_msg,612)613614def test_hive_step_with_all_fields(self):615test_step_config = (616'Type=Hive,'617+ 'ActionOnFailure=CANCEL_AND_WAIT,'618+ 'Name=HiveWithAllFields,'619+ self.HIVE_BASIC_ARGS620+ ','621+ 'LogUri="TestLogUri",'622+ 'EncryptionKeyArn="TestEncryptionKeyArn"'623)624cmd = self.prefix + test_step_config625expected_result = {626'JobFlowId': 'j-ABC',627'Steps': [628{629'Name': 'HiveWithAllFields',630'ActionOnFailure': 'CANCEL_AND_WAIT',631'HadoopJarStep': self.HIVE_DEFAULT_SCRIPT_RUNNER_STEP,632'StepMonitoringConfiguration': {633'S3MonitoringConfiguration': {634'LogUri': "TestLogUri",635'EncryptionKeyArn': "TestEncryptionKeyArn",636}637},638}639],640}641expected_result_release = copy.deepcopy(expected_result)642expected_result_release['Steps'][0]['HadoopJarStep'] = (643self.HIVE_DEFAULT_COMMAND_RUNNER_STEP644)645646self.assert_params_for_ami_and_release_based_clusters(647cmd=cmd,648expected_result=expected_result,649expected_result_release=expected_result_release,650)651652def test_hive_step_with_step_monitoring_configuration_log_uri_only(self):653test_step_config = (654'Type=Hive,'655+ 'ActionOnFailure=CANCEL_AND_WAIT,'656+ 'Name=HiveWithAllFields,'657+ self.HIVE_BASIC_ARGS658+ ','659+ 'LogUri="TestLogUri"'660)661cmd = self.prefix + test_step_config662expected_result = {663'JobFlowId': 'j-ABC',664'Steps': [665{666'Name': 'HiveWithAllFields',667'ActionOnFailure': 'CANCEL_AND_WAIT',668'HadoopJarStep': self.HIVE_DEFAULT_SCRIPT_RUNNER_STEP,669'StepMonitoringConfiguration': {670'S3MonitoringConfiguration': {671'LogUri': "TestLogUri",672}673},674}675],676}677expected_result_release = copy.deepcopy(expected_result)678expected_result_release['Steps'][0]['HadoopJarStep'] = (679self.HIVE_DEFAULT_COMMAND_RUNNER_STEP680)681682self.assert_params_for_ami_and_release_based_clusters(683cmd=cmd,684expected_result=expected_result,685expected_result_release=expected_result_release,686)687688def test_hive_step_with_step_monitoring_configuration_encryption_key_arn_only(689self,690):691test_step_config = (692'Type=Hive,'693+ 'ActionOnFailure=CANCEL_AND_WAIT,'694+ 'Name=HiveWithAllFields,'695+ self.HIVE_BASIC_ARGS696+ ','697+ 'EncryptionKeyArn="TestEncryptionKeyArn"'698)699cmd = self.prefix + test_step_config700expected_result = {701'JobFlowId': 'j-ABC',702'Steps': [703{704'Name': 'HiveWithAllFields',705'ActionOnFailure': 'CANCEL_AND_WAIT',706'HadoopJarStep': self.HIVE_DEFAULT_SCRIPT_RUNNER_STEP,707'StepMonitoringConfiguration': {708'S3MonitoringConfiguration': {709'EncryptionKeyArn': "TestEncryptionKeyArn"710}711},712}713],714}715expected_result_release = copy.deepcopy(expected_result)716expected_result_release['Steps'][0]['HadoopJarStep'] = (717self.HIVE_DEFAULT_COMMAND_RUNNER_STEP718)719720self.assert_params_for_ami_and_release_based_clusters(721cmd=cmd,722expected_result=expected_result,723expected_result_release=expected_result_release,724)725726def test_hive_step_with_step_monitoring_configuration_no_log_uri_or_encryption_key_arn(727self,728):729test_step_config = (730'Type=Hive,'731+ 'ActionOnFailure=CANCEL_AND_WAIT,'732+ 'Name=HiveWithAllFields,'733+ self.HIVE_BASIC_ARGS734)735cmd = self.prefix + test_step_config736expected_result = {737'JobFlowId': 'j-ABC',738'Steps': [739{740'Name': 'HiveWithAllFields',741'ActionOnFailure': 'CANCEL_AND_WAIT',742'HadoopJarStep': self.HIVE_DEFAULT_SCRIPT_RUNNER_STEP,743}744],745}746expected_result_release = copy.deepcopy(expected_result)747expected_result_release['Steps'][0]['HadoopJarStep'] = (748self.HIVE_DEFAULT_COMMAND_RUNNER_STEP749)750751self.assert_params_for_ami_and_release_based_clusters(752cmd=cmd,753expected_result=expected_result,754expected_result_release=expected_result_release,755)756757def test_pig_step_with_default_fields(self):758cmd = self.prefix + 'Type=Pig,' + self.PIG_BASIC_ARGS759expected_result = {760'JobFlowId': 'j-ABC',761'Steps': [762{763'Name': 'Pig program',764'ActionOnFailure': 'CONTINUE',765'HadoopJarStep': self.PIG_DEFAULT_SCRIPT_RUNNER_STEP,766}767],768}769expected_result_release = copy.deepcopy(expected_result)770expected_result_release['Steps'][0]['HadoopJarStep'] = (771self.PIG_DEFAULT_COMMAND_RUNNER_STEP772)773774self.assert_params_for_ami_and_release_based_clusters(775cmd=cmd,776expected_result=expected_result,777expected_result_release=expected_result_release,778)779780def test_pig_missing_args(self):781cmd = self.prefix + 'Type=Pig'782expected_error_msg = (783'\naws: error: The following '784+ 'required parameters are missing for PigStepConfig: Args.\n'785)786self.assert_error_for_ami_and_release_based_clusters(787cmd=cmd,788expected_error_msg=expected_error_msg,789expected_result_release=expected_error_msg,790)791792def test_pig_step_with_all_fields(self):793test_step_config = (794'Name=PigWithAllFields,'795+ 'Type=Pig,'796+ self.PIG_BASIC_ARGS797+ ','798+ 'ActionOnFailure=CANCEL_AND_WAIT,'799+ 'LogUri="TestLogUri",'800+ 'EncryptionKeyArn="TestEncryptionKeyArn"'801)802cmd = self.prefix + test_step_config803expected_result = {804'JobFlowId': 'j-ABC',805'Steps': [806{807'Name': 'PigWithAllFields',808'ActionOnFailure': 'CANCEL_AND_WAIT',809'HadoopJarStep': self.PIG_DEFAULT_SCRIPT_RUNNER_STEP,810'StepMonitoringConfiguration': {811'S3MonitoringConfiguration': {812'LogUri': "TestLogUri",813'EncryptionKeyArn': "TestEncryptionKeyArn",814}815},816}817],818}819expected_result_release = copy.deepcopy(expected_result)820expected_result_release['Steps'][0]['HadoopJarStep'] = (821self.PIG_DEFAULT_COMMAND_RUNNER_STEP822)823824self.assert_params_for_ami_and_release_based_clusters(825cmd=cmd,826expected_result=expected_result,827expected_result_release=expected_result_release,828)829830def test_pig_step_with_step_monitoring_configuration_log_uri_only(self):831test_step_config = (832'Name=PigWithAllFields,'833+ 'Type=Pig,'834+ self.PIG_BASIC_ARGS835+ ','836+ 'ActionOnFailure=CANCEL_AND_WAIT,'837+ 'LogUri="TestLogUri"'838)839cmd = self.prefix + test_step_config840expected_result = {841'JobFlowId': 'j-ABC',842'Steps': [843{844'Name': 'PigWithAllFields',845'ActionOnFailure': 'CANCEL_AND_WAIT',846'HadoopJarStep': self.PIG_DEFAULT_SCRIPT_RUNNER_STEP,847'StepMonitoringConfiguration': {848'S3MonitoringConfiguration': {'LogUri': "TestLogUri"}849},850}851],852}853expected_result_release = copy.deepcopy(expected_result)854expected_result_release['Steps'][0]['HadoopJarStep'] = (855self.PIG_DEFAULT_COMMAND_RUNNER_STEP856)857858self.assert_params_for_ami_and_release_based_clusters(859cmd=cmd,860expected_result=expected_result,861expected_result_release=expected_result_release,862)863864def test_pig_step_with_step_monitoring_configuration_encryption_key_arn_only(865self,866):867test_step_config = (868'Name=PigWithAllFields,'869+ 'Type=Pig,'870+ self.PIG_BASIC_ARGS871+ ','872+ 'ActionOnFailure=CANCEL_AND_WAIT,'873+ 'EncryptionKeyArn="TestEncryptionKeyArn"'874)875cmd = self.prefix + test_step_config876expected_result = {877'JobFlowId': 'j-ABC',878'Steps': [879{880'Name': 'PigWithAllFields',881'ActionOnFailure': 'CANCEL_AND_WAIT',882'HadoopJarStep': self.PIG_DEFAULT_SCRIPT_RUNNER_STEP,883'StepMonitoringConfiguration': {884'S3MonitoringConfiguration': {885'EncryptionKeyArn': "TestEncryptionKeyArn"886}887},888}889],890}891expected_result_release = copy.deepcopy(expected_result)892expected_result_release['Steps'][0]['HadoopJarStep'] = (893self.PIG_DEFAULT_COMMAND_RUNNER_STEP894)895896self.assert_params_for_ami_and_release_based_clusters(897cmd=cmd,898expected_result=expected_result,899expected_result_release=expected_result_release,900)901902def test_pig_step_with_step_monitoring_configuration_no_log_uri_or_encryption_key_arn(903self,904):905test_step_config = (906'Name=PigWithAllFields,'907+ 'Type=Pig,'908+ self.PIG_BASIC_ARGS909+ ','910+ 'ActionOnFailure=CANCEL_AND_WAIT'911)912cmd = self.prefix + test_step_config913expected_result = {914'JobFlowId': 'j-ABC',915'Steps': [916{917'Name': 'PigWithAllFields',918'ActionOnFailure': 'CANCEL_AND_WAIT',919'HadoopJarStep': self.PIG_DEFAULT_SCRIPT_RUNNER_STEP,920}921],922}923expected_result_release = copy.deepcopy(expected_result)924expected_result_release['Steps'][0]['HadoopJarStep'] = (925self.PIG_DEFAULT_COMMAND_RUNNER_STEP926)927928self.assert_params_for_ami_and_release_based_clusters(929cmd=cmd,930expected_result=expected_result,931expected_result_release=expected_result_release,932)933934def test_impala_step_with_default_fields(self):935test_step_config = 'Type=Impala,' + self.IMPALA_BASIC_ARGS936cmd = self.prefix + test_step_config937expected_result = {938'JobFlowId': 'j-ABC',939'Steps': [940{941'Name': 'Impala program',942'ActionOnFailure': 'CONTINUE',943'HadoopJarStep': self.IMPALA_BASIC_SCRIPT_RUNNER_STEP,944}945],946}947self.assert_params_for_cmd(cmd, expected_result)948949def test_SPARK_SUBMIT_SCRIPT_RUNNER_STEP(self):950cmd = self.prefix + 'Type=SPARK,' + self.SPARK_SUBMIT_BASIC_ARGS951expected_result = {952'JobFlowId': 'j-ABC',953'Steps': [954{955'Name': 'Spark application',956'ActionOnFailure': 'CONTINUE',957'HadoopJarStep': self.SPARK_SUBMIT_SCRIPT_RUNNER_STEP,958}959],960}961expected_result_release = copy.deepcopy(expected_result)962expected_result_release['Steps'][0]['HadoopJarStep'] = (963self.SPARK_SUBMIT_COMMAND_RUNNER_STEP964)965966self.assert_params_for_ami_and_release_based_clusters(967cmd=cmd,968expected_result=expected_result,969expected_result_release=expected_result_release,970)971972def test_spark_missing_arg(self):973cmd = self.prefix + 'Type=SPARK'974expected_error_msg = (975'\naws: error: The following '976+ 'required parameters are missing for SparkStepConfig: Args.\n'977)978self.assert_error_for_ami_and_release_based_clusters(979cmd=cmd,980expected_error_msg=expected_error_msg,981expected_result_release=expected_error_msg,982)983984def test_spark_step_with_step_monitoring_configuration(self):985cmd = (986self.prefix987+ 'Type=SPARK,'988+ self.SPARK_SUBMIT_BASIC_ARGS989+ ','990+ 'LogUri="TestLogUri",'991+ 'EncryptionKeyArn="TestEncryptionKeyArn"'992)993expected_result = {994'JobFlowId': 'j-ABC',995'Steps': [996{997'Name': 'Spark application',998'ActionOnFailure': 'CONTINUE',999'HadoopJarStep': self.SPARK_SUBMIT_SCRIPT_RUNNER_STEP,1000'StepMonitoringConfiguration': {1001'S3MonitoringConfiguration': {1002'LogUri': "TestLogUri",1003'EncryptionKeyArn': "TestEncryptionKeyArn",1004}1005},1006}1007],1008}1009expected_result_release = copy.deepcopy(expected_result)1010expected_result_release['Steps'][0]['HadoopJarStep'] = (1011self.SPARK_SUBMIT_COMMAND_RUNNER_STEP1012)10131014self.assert_params_for_ami_and_release_based_clusters(1015cmd=cmd,1016expected_result=expected_result,1017expected_result_release=expected_result_release,1018)10191020def test_spark_step_with_step_monitoring_configuration_log_uri_only(self):1021cmd = (1022self.prefix1023+ 'Type=SPARK,'1024+ self.SPARK_SUBMIT_BASIC_ARGS1025+ ','1026+ 'LogUri="TestLogUri"'1027)1028expected_result = {1029'JobFlowId': 'j-ABC',1030'Steps': [1031{1032'Name': 'Spark application',1033'ActionOnFailure': 'CONTINUE',1034'HadoopJarStep': self.SPARK_SUBMIT_SCRIPT_RUNNER_STEP,1035'StepMonitoringConfiguration': {1036'S3MonitoringConfiguration': {'LogUri': "TestLogUri"}1037},1038}1039],1040}1041expected_result_release = copy.deepcopy(expected_result)1042expected_result_release['Steps'][0]['HadoopJarStep'] = (1043self.SPARK_SUBMIT_COMMAND_RUNNER_STEP1044)10451046self.assert_params_for_ami_and_release_based_clusters(1047cmd=cmd,1048expected_result=expected_result,1049expected_result_release=expected_result_release,1050)10511052def test_spark_step_with_step_monitoring_configuration_encryption_key_arn_only(1053self,1054):1055cmd = (1056self.prefix1057+ 'Type=SPARK,'1058+ self.SPARK_SUBMIT_BASIC_ARGS1059+ ','1060+ 'EncryptionKeyArn="TestEncryptionKeyArn"'1061)1062expected_result = {1063'JobFlowId': 'j-ABC',1064'Steps': [1065{1066'Name': 'Spark application',1067'ActionOnFailure': 'CONTINUE',1068'HadoopJarStep': self.SPARK_SUBMIT_SCRIPT_RUNNER_STEP,1069'StepMonitoringConfiguration': {1070'S3MonitoringConfiguration': {1071'EncryptionKeyArn': "TestEncryptionKeyArn"1072}1073},1074}1075],1076}1077expected_result_release = copy.deepcopy(expected_result)1078expected_result_release['Steps'][0]['HadoopJarStep'] = (1079self.SPARK_SUBMIT_COMMAND_RUNNER_STEP1080)10811082self.assert_params_for_ami_and_release_based_clusters(1083cmd=cmd,1084expected_result=expected_result,1085expected_result_release=expected_result_release,1086)10871088def test_spark_step_with_step_monitoring_configuration_no_log_uri_or_encryption_key_arn(1089self,1090):1091cmd = self.prefix + 'Type=SPARK,' + self.SPARK_SUBMIT_BASIC_ARGS1092expected_result = {1093'JobFlowId': 'j-ABC',1094'Steps': [1095{1096'Name': 'Spark application',1097'ActionOnFailure': 'CONTINUE',1098'HadoopJarStep': self.SPARK_SUBMIT_SCRIPT_RUNNER_STEP,1099}1100],1101}1102expected_result_release = copy.deepcopy(expected_result)1103expected_result_release['Steps'][0]['HadoopJarStep'] = (1104self.SPARK_SUBMIT_COMMAND_RUNNER_STEP1105)11061107self.assert_params_for_ami_and_release_based_clusters(1108cmd=cmd,1109expected_result=expected_result,1110expected_result_release=expected_result_release,1111)11121113def test_impala_missing_args(self):1114cmd = self.prefix + 'Type=Impala'1115expected_error_msg = (1116'\naws: error: The following '1117+ 'required parameters are missing for ImpalaStepConfig: Args.\n'1118)1119self.assert_error_for_ami_and_release_based_clusters(1120cmd=cmd,1121expected_error_msg=expected_error_msg,1122expected_result_release=None,1123)11241125def test_impala_step_with_all_fields(self):1126test_step_config = (1127'Name=ImpalaWithAllFields,'1128+ 'Type=Impala,'1129+ self.IMPALA_BASIC_ARGS1130+ ','1131+ 'ActionOnFailure=CANCEL_AND_WAIT,'1132+ 'LogUri="TestLogUri",'1133+ 'EncryptionKeyArn="TestEncryptionKeyArn"'1134)1135cmd = self.prefix + test_step_config1136expected_result = {1137'JobFlowId': 'j-ABC',1138'Steps': [1139{1140'Name': 'ImpalaWithAllFields',1141'ActionOnFailure': 'CANCEL_AND_WAIT',1142'HadoopJarStep': self.IMPALA_BASIC_SCRIPT_RUNNER_STEP,1143'StepMonitoringConfiguration': {1144'S3MonitoringConfiguration': {1145'LogUri': "TestLogUri",1146'EncryptionKeyArn': "TestEncryptionKeyArn",1147}1148},1149}1150],1151}1152self.assert_params_for_ami_and_release_based_clusters(1153cmd=cmd,1154expected_result=expected_result,1155expected_result_release=None,1156)11571158def test_impala_step_with_step_monitoring_configuration_log_uri_only(self):1159test_step_config = (1160'Name=ImpalaWithAllFields,'1161+ 'Type=Impala,'1162+ self.IMPALA_BASIC_ARGS1163+ ','1164+ 'ActionOnFailure=CANCEL_AND_WAIT,'1165+ 'LogUri="TestLogUri"'1166)1167cmd = self.prefix + test_step_config1168expected_result = {1169'JobFlowId': 'j-ABC',1170'Steps': [1171{1172'Name': 'ImpalaWithAllFields',1173'ActionOnFailure': 'CANCEL_AND_WAIT',1174'HadoopJarStep': self.IMPALA_BASIC_SCRIPT_RUNNER_STEP,1175'StepMonitoringConfiguration': {1176'S3MonitoringConfiguration': {'LogUri': "TestLogUri"}1177},1178}1179],1180}1181self.assert_params_for_ami_and_release_based_clusters(1182cmd=cmd,1183expected_result=expected_result,1184expected_result_release=None,1185)11861187def test_impala_step_with_step_monitoring_configuration_encryption_key_arn_only(1188self,1189):1190test_step_config = (1191'Name=ImpalaWithAllFields,'1192+ 'Type=Impala,'1193+ self.IMPALA_BASIC_ARGS1194+ ','1195+ 'ActionOnFailure=CANCEL_AND_WAIT,'1196+ 'EncryptionKeyArn="TestEncryptionKeyArn"'1197)1198cmd = self.prefix + test_step_config1199expected_result = {1200'JobFlowId': 'j-ABC',1201'Steps': [1202{1203'Name': 'ImpalaWithAllFields',1204'ActionOnFailure': 'CANCEL_AND_WAIT',1205'HadoopJarStep': self.IMPALA_BASIC_SCRIPT_RUNNER_STEP,1206'StepMonitoringConfiguration': {1207'S3MonitoringConfiguration': {1208'EncryptionKeyArn': "TestEncryptionKeyArn"1209}1210},1211}1212],1213}1214self.assert_params_for_ami_and_release_based_clusters(1215cmd=cmd,1216expected_result=expected_result,1217expected_result_release=None,1218)12191220def test_impala_step_with_step_monitoring_configuration_no_log_uri_or_encryption_key_arn(1221self,1222):1223test_step_config = (1224'Name=ImpalaWithAllFields,'1225+ 'Type=Impala,'1226+ self.IMPALA_BASIC_ARGS1227+ ','1228+ 'ActionOnFailure=CANCEL_AND_WAIT'1229)1230cmd = self.prefix + test_step_config1231expected_result = {1232'JobFlowId': 'j-ABC',1233'Steps': [1234{1235'Name': 'ImpalaWithAllFields',1236'ActionOnFailure': 'CANCEL_AND_WAIT',1237'HadoopJarStep': self.IMPALA_BASIC_SCRIPT_RUNNER_STEP,1238}1239],1240}1241self.assert_params_for_ami_and_release_based_clusters(1242cmd=cmd,1243expected_result=expected_result,1244expected_result_release=None,1245)12461247def test_impala_step_with_release(self):1248test_step_config = 'Type=Impala,' + self.IMPALA_BASIC_ARGS1249cmd = self.prefix + test_step_config1250expected_result_release = (1251'\naws: error: The step type impala ' + 'is not supported.\n'1252)12531254self.assert_error_for_ami_and_release_based_clusters(1255cmd=cmd,1256expected_error_msg=None,1257expected_result_release=expected_result_release,1258)12591260def test_empty_step_args(self):1261cmd = self.prefix + 'Type=Streaming,Args='1262expected_error_msg = (1263'\naws: error: The prameter Args cannot ' 'be an empty list.\n'1264)1265self.assert_error_for_ami_and_release_based_clusters(1266cmd=cmd,1267expected_error_msg=expected_error_msg,1268expected_result_release=expected_error_msg,1269)12701271cmd = self.prefix + 'Type=Pig,Args='1272self.assert_error_for_ami_and_release_based_clusters(1273cmd=cmd,1274expected_error_msg=expected_error_msg,1275expected_result_release=expected_error_msg,1276)12771278cmd = self.prefix + 'Type=Hive,Args='1279self.assert_error_for_ami_and_release_based_clusters(1280cmd=cmd,1281expected_error_msg=expected_error_msg,1282expected_result_release=expected_error_msg,1283)12841285cmd = self.prefix + 'Args='1286expected_error_msg = (1287'\naws: error: The following required parameters'1288' are missing for CustomJARStepConfig: Jar.\n'1289)1290self.assert_error_for_ami_and_release_based_clusters(1291cmd=cmd,1292expected_error_msg=expected_error_msg,1293expected_result_release=expected_error_msg,1294)12951296def test_all_step_types(self):1297test_step_config = (1298'Jar=s3://mybucket/mytest.jar '1299+ ' Type=Streaming,'1300+ self.STREAMING_ARGS1301+ ' Type=Hive,'1302+ self.HIVE_BASIC_ARGS1303+ ' Type=Pig,'1304+ self.PIG_BASIC_ARGS1305+ ' Type=Impala,'1306+ self.IMPALA_BASIC_ARGS1307)1308cmd = self.prefix + test_step_config1309expected_result = {1310'JobFlowId': 'j-ABC',1311'Steps': [1312{1313'Name': 'Custom JAR',1314'ActionOnFailure': 'CONTINUE',1315'HadoopJarStep': {'Jar': 's3://mybucket/mytest.jar'},1316},1317{1318'Name': 'Streaming program',1319'ActionOnFailure': 'CONTINUE',1320'HadoopJarStep': self.STREAMING_HADOOP_SCRIPT_RUNNER_STEP,1321},1322{1323'Name': 'Hive program',1324'ActionOnFailure': 'CONTINUE',1325'HadoopJarStep': self.HIVE_DEFAULT_SCRIPT_RUNNER_STEP,1326},1327{1328'Name': 'Pig program',1329'ActionOnFailure': 'CONTINUE',1330'HadoopJarStep': self.PIG_DEFAULT_SCRIPT_RUNNER_STEP,1331},1332{1333'Name': 'Impala program',1334'ActionOnFailure': 'CONTINUE',1335'HadoopJarStep': self.IMPALA_BASIC_SCRIPT_RUNNER_STEP,1336},1337],1338}13391340self.assert_params_for_ami_and_release_based_clusters(1341cmd=cmd,1342expected_result=expected_result,1343expected_result_release=None,1344)13451346def test_all_step_types_release(self):1347test_step_config = (1348'Jar=s3://mybucket/mytest.jar '1349+ ' Type=Streaming,'1350+ self.STREAMING_ARGS1351+ ' Type=Hive,'1352+ self.HIVE_BASIC_ARGS1353+ ' Type=Pig,'1354+ self.PIG_BASIC_ARGS1355)13561357cmd = self.prefix + test_step_config1358expected_result_release = {1359'JobFlowId': 'j-ABC',1360'Steps': [1361{1362'Name': 'Custom JAR',1363'ActionOnFailure': 'CONTINUE',1364'HadoopJarStep': {'Jar': 's3://mybucket/mytest.jar'},1365},1366{1367'Name': 'Streaming program',1368'ActionOnFailure': 'CONTINUE',1369'HadoopJarStep': self.STREAMING_HADOOP_COMMAND_RUNNER_STEP,1370},1371{1372'Name': 'Hive program',1373'ActionOnFailure': 'CONTINUE',1374'HadoopJarStep': self.HIVE_DEFAULT_COMMAND_RUNNER_STEP,1375},1376{1377'Name': 'Pig program',1378'ActionOnFailure': 'CONTINUE',1379'HadoopJarStep': self.PIG_DEFAULT_COMMAND_RUNNER_STEP,1380},1381],1382}13831384self.assert_params_for_ami_and_release_based_clusters(1385cmd=cmd,1386expected_result=None,1387expected_result_release=expected_result_release,1388)13891390def test_all_step_types_from_json(self):1391data_path = os.path.join(os.path.dirname(__file__), 'input_steps.json')1392cmd = self.prefix + 'file://' + data_path1393hive_script_runner_step = copy.deepcopy(1394self.HIVE_DEFAULT_SCRIPT_RUNNER_STEP1395)1396hive_script_runner_step['Args'] += [1397'-d',1398'INPUT=s3://elasticmapreduce/samples/hive-ads/tables',1399'-d',1400'OUTPUT=s3://mybucket/hive-ads/output/2014-04-18/11-07-32',1401'-d',1402'LIBS=s3://elasticmapreduce/samples/hive-ads/libs',1403]1404pig_script_runner_step = copy.deepcopy(1405self.PIG_DEFAULT_SCRIPT_RUNNER_STEP1406)1407pig_script_runner_step['Args'] += [1408'-p',1409'INPUT=s3://elasticmapreduce/samples/pig-apache/input',1410'-p',1411'OUTPUT=s3://mybucket/pig-apache/output/2014-04-21/20-09-28',1412]14131414expected_result = {1415'JobFlowId': 'j-ABC',1416'Steps': [1417{1418'Name': 'Custom JAR step',1419'ActionOnFailure': 'CANCEL_AND_WAIT',1420'HadoopJarStep': {'Jar': 's3://mybucket/mytest.jar'},1421},1422{1423'Name': 'Streaming step',1424'ActionOnFailure': 'CANCEL_AND_WAIT',1425'HadoopJarStep': self.STREAMING_HADOOP_SCRIPT_RUNNER_STEP,1426},1427{1428'Name': 'Hive step',1429'ActionOnFailure': 'TERMINATE_CLUSTER',1430'HadoopJarStep': hive_script_runner_step,1431},1432{1433'Name': 'Pig step',1434'ActionOnFailure': 'TERMINATE_CLUSTER',1435'HadoopJarStep': pig_script_runner_step,1436},1437{1438'Name': 'Impala step',1439'ActionOnFailure': 'CANCEL_AND_WAIT',1440'HadoopJarStep': self.IMPALA_BASIC_SCRIPT_RUNNER_STEP,1441},1442],1443}1444self.assert_params_for_cmd(cmd, expected_result)14451446@mock.patch('awscli.customizations.emr.' 'emrutils.get_release_label')1447def assert_params_for_ami_and_release_based_clusters(1448self, grl_patch, cmd, expected_result, expected_result_release1449):1450if expected_result:1451grl_patch.return_value = None1452self.assert_params_for_cmd(cmd, expected_result)1453if expected_result_release:1454grl_patch.return_value = 'emr-4.0'1455self.assert_params_for_cmd(cmd, expected_result_release)14561457@mock.patch('awscli.customizations.emr.' 'emrutils.get_release_label')1458def assert_error_for_ami_and_release_based_clusters(1459self, grl_patch, cmd, expected_error_msg, expected_result_release1460):1461if expected_error_msg:1462grl_patch.return_value = None1463result = self.run_cmd(cmd, 255)1464self.assertEqual(expected_error_msg, result[1])1465if expected_result_release:1466grl_patch.return_value = 'emr-4.0'1467result = self.run_cmd(cmd, 255)1468self.assertEqual(expected_result_release, result[1])146914701471if __name__ == "__main__":1472unittest.main()147314741475