--class:你的应用的启动类 (如 org.apache.spark.examples.SparkPi)--queue--master:指定Master的地址,默认为Local--deploy-mode:是否发布你的驱动到worker节点(cluster) 或者作为一个本地客户端 (client) (default: client)--num-executors--executor-cores--executor-memory--driver-memory #任意的Spark配置属性, 格式key=value--conf "spark.yarn.appMasterEnv.JAVA_HOME=/home/iteblog/java/jdk1.8.0_25" \--conf "spark.executorEnv.JAVA_HOME=/home/iteblog/java/jdk1.8.0_25" \ #spark-shell提交Spark Application如何解决依赖库--driver-class-path 多个jar包之间用冒号分割(windows是分号 linux是冒号表示结束) #依赖的jar--jarsfile:/home/hadoop/SparkSqlTest-1.0-SNAPSHOT.jar 参数一 参数二 。。。。。。
spark-submit \ --master yarn \ --deploy-mode cluster \ --queue default \ --num-executors 30 \ --executor-memory 1G \ --executor-cores 1 \ --driver-memory 1G \ --py-files spark_dist/data_processing.zip \ --jars hdfs://apm/jar-file/ip_udf-0.0.2-SNAPSHOT-jar-with-dependencies.jar \ driver.py prod data_processing.apm_overseas.main
#!/bin/bash#定时提交APP_HOME=/home/spark/sparkjobJAR_HOME=/home/spark/packyesterday=`date +"%Y-%m-%d" -d "-1 days"`for i in $JAR_HOME/*.jar;do app_CLASSPATH=$i,${app_CLASSPATH}donelen=${#app_CLASSPATH}-1JAR_PATH=${app_CLASSPATH:0:len}/usr/hdp/2.6.1.0-129/spark2/bin/spark-submit \ --class com.kd.tonze.userPortrait.UsersLabelJob \ --master yarn \ --deploy-mode client \ --num-executors 6 \ --jars $JAR_PATH \ $APP_HOME/kgraphx_2.11-0.0.1.jar $yesterday