Last active
January 15, 2016 12:30
-
-
Save deenar/08fc4ac0da3bdaff10fb to your computer and use it in GitHub Desktop.
CDH 5.4 and Spark 1.5.1
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
sysJupiterDev@gbrdcr00015n02: /bigdata/projects/MERCURY | |
$ ls spark-1.5.1-bin-hadoop2.6/conf/yarn-conf/ | |
core-site.xml hadoop-env.sh hdfs-site.xml hive-site.xml mapred-site.xml ssl-client.xml topology.map topology.py yarn-site.xml |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
1) Download spark | |
http://spark.apache.org/downloads.html, spark-1.5.1-bin-hadoop2.6.tgz | |
2) If you have another version of CDH, it might be safer to http://spark.apache.org/docs/latest/building-spark.html | |
build/mvn -Pyarn -Phadoop-2.4 -Dhadoop.version=2.4.0 -DskipTests clean package | |
3) or use spark with your own version of HAdoop | |
4) | |
cd spark-1.5.1-bin-hadoop2.6 | |
5) Find location of your existing spark install | |
readlink -f spark-submit | |
6) | |
cd /opt/cloudera/parcels/CDH-5.4.0-1.cdh5.4.0.p0.27 | |
/opt/cloudera/parcels/CDH-5.4.0-1.cdh5.4.0.p0.27/lib/spark/conf -> /etc/spark/conf | |
7) cp /etc/spark/conf to /opt/tools/spark-1.5.1-bin-hadoop2.6/conf | |
# change spark.yarn.jar | |
# change SPARK_HOME | |
# copy or reference conf/yarn-conf | |
8) cp /etc/spark/conf/yarn-conf /opt/tools/spark-1.5.1-bin-hadoop2.6/conf/yarn-conf |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
spark.eventLog.dir=hdfs://nameservice1/user/spark/applicationHistory | |
spark.eventLog.enabled=true | |
spark.serializer=org.apache.spark.serializer.KryoSerializer | |
spark.shuffle.service.enabled=true | |
spark.shuffle.service.port=7337 | |
spark.yarn.historyServer.address=http://myhost:18088 | |
spark.master=yarn-client | |
spark.yarn.jar=/opt/tools/spark-1.5.1-bin-hadoop2.6/lib/spark-assembly-1.5.1-hadoop2.6.0.jar |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env bash | |
## | |
# Generated by Cloudera Manager and should not be modified directly | |
## | |
if [ -z "$SPARK_CONF_DIR" ]; then | |
export SPARK_CONF_DIR=$(cd $(dirname $BASH_SOURCE) && pwd) | |
fi | |
export SPARK_HOME=/opt/tools/spark-1.5.1-bin-hadoop2.6/ | |
export DEFAULT_HADOOP_HOME=/opt/cloudera/parcels/CDH-5.4.0-1.cdh5.4.0.p0.27/lib/hadoop | |
### Path of Spark assembly jar in HDFS | |
export SPARK_JAR_HDFS_PATH=${SPARK_JAR_HDFS_PATH:-''} | |
### Extra libraries needed by some Spark subsystems. | |
CDH_HIVE_HOME=${HIVE_HOME:-'/opt/cloudera/parcels/CDH-5.4.0-1.cdh5.4.0.p0.27/lib/hive'} | |
CDH_FLUME_HOME=${FLUME_HOME:-'/opt/cloudera/parcels/CDH-5.4.0-1.cdh5.4.0.p0.27/lib/flume-ng'} | |
CDH_PARQUET_HOME=${PARQUET_HOME:-'/opt/cloudera/parcels/CDH-5.4.0-1.cdh5.4.0.p0.27/lib/hadoop/../parquet'} | |
CDH_AVRO_HOME=${AVRO_HOME:-'/opt/cloudera/parcels/CDH-5.4.0-1.cdh5.4.0.p0.27/lib/hadoop/../avro'} | |
HADOOP_EXTRA_CLASSPATH=${HADOOP_CLASSPATH:-''} | |
export HADOOP_HOME=${HADOOP_HOME:-$DEFAULT_HADOOP_HOME} | |
if [ -n "$HADOOP_HOME" ]; then | |
LD_LIBRARY_PATH=$LD_LIBRARY_PATH:${HADOOP_HOME}/lib/native | |
fi | |
SPARK_EXTRA_LIB_PATH="" | |
if [ -n "$SPARK_EXTRA_LIB_PATH" ]; then | |
LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$SPARK_EXTRA_LIB_PATH | |
fi | |
export LD_LIBRARY_PATH | |
export HADOOP_CONF_DIR=${HADOOP_CONF_DIR:-$SPARK_CONF_DIR/yarn-conf} | |
# This is needed to support old CDH versions that use a forked version | |
# of compute-classpath.sh. | |
export SCALA_LIBRARY_PATH=${SPARK_HOME}/lib | |
# Set distribution classpath. This is only used in CDH 5.3 and later. | |
SPARK_DIST_CLASSPATH="$HADOOP_HOME/client/*" | |
SPARK_DIST_CLASSPATH="$SPARK_DIST_CLASSPATH:$($HADOOP_HOME/bin/hadoop --config $HADOOP_CONF_DIR classpath)" | |
SPARK_DIST_CLASSPATH="$SPARK_DIST_CLASSPATH:$CDH_HIVE_HOME/lib/*" | |
SPARK_DIST_CLASSPATH="$SPARK_DIST_CLASSPATH:$CDH_FLUME_HOME/lib/*" | |
SPARK_DIST_CLASSPATH="$SPARK_DIST_CLASSPATH:$CDH_PARQUET_HOME/lib/*" | |
SPARK_DIST_CLASSPATH="$SPARK_DIST_CLASSPATH:$CDH_AVRO_HOME/*" | |
if [ -n "$HADOOP_EXTRA_CLASSPATH" ]; then | |
SPARK_DIST_CLASSPATH="$SPARK_DIST_CLASSPATH:$HADOOP_EXTRA_CLASSPATH" | |
fi | |
export SPARK_DIST_CLASSPATH |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment