Shell Script Rewrite Overview
Allen Wittenauer
Twitter: @_a__w_ (1 a 2 w 1)
Email: aw @ apache.org!
3
What is the shell code?!
!
! bin/*!
! etc/hadoop/*sh!
! libexec/*!
! sbin/*!
!
CUTTING, DOUG
1710 554 6239
2005
APACHE SOFTWARE FOUNDATION
6 https://www.flickr.com/photos/new_and_used_tires/6549497793/
7
8 https://www.flickr.com/photos/hkuchera/5084213883
9
10
11 https://www.flickr.com/photos/83633410@N07/7658225516/
“[The scripts] finally got to
you, didn’t they?”
13
Primary Goals!
Consistency!
Code and Config Simplification!
De-clash Parameters!
Documentation!
!
Secondary Goals!
Backward Compatibility!
“Lost” Ideas and Fixes!
14 https://www.flickr.com/photos/k6mmc/2176537668/
15
!
!
Tuesday, August 19, 2014 majority committed into trunk:!
!
!
!
!
!
... followed by many fixes & enhancements from the
community
16
https://www.flickr.com/photos/ifindkarma/9304374538/	
  
https://www.flickr.com/photos/liveandrock/2650732780/
17
Old:!
! hadoop -> hadoop-config.sh -> hadoop-env.sh!
! yarn -> yarn-config.sh -> yarn-env.sh!
! hdfs-> hdfs-config.sh -> hadoop-env.sh !
!
New:!
! hadoop -> hadoop-config.sh! -> hadoop-functions.sh!
! ! ! ! ! ! ! -> hadoop-env.sh!
! yarn -> yarn-config.sh! -> hadoop-config.sh -> (above)!
! ! ! ! ! ! -> yarn-env.sh!
! hdfs -> hdfs-config.sh! -> hadoop-config.sh -> (above)!
18
Old:!
! yarn-env.sh:!
	
  	
   	
   JAVA_HOME=xyz	
  
! hadoop-env.sh:!
	
   	
   JAVA_HOME=xyz	
  
! mapred-env.sh:!
	
   	
   JAVA_HOME=xyz	
   	
  
New:!
! hadoop-env.sh!
	
   	
   JAVA_HOME=xyz	
  
! OS X:!
	
   	
   JAVA_HOME=$(/usr/libexec/java_home)
19
Old:!
! xyz_OPT=“-­‐Xmx4g”	
  hdfs	
  namenode	
  
	
   	
   java	
  …	
  -­‐Xmx1000	
  …	
  -­‐Xmx4g	
  …	
  
	
   !
! Command line size: ~2500 bytes!
New:!
! xyz_OPT=“-­‐Xmx4g”	
  hdfs	
  namenode	
  
	
   	
   java	
  …	
  -­‐Xmx4g	
  …	
  
!
! Command line size: ~1750 bytes
20
! $	
  TOOL_PATH=blah:blah:blah	
  hadoop	
  distcp	
  /old	
  /new	
  
	
   Error:	
  could	
  not	
  find	
  or	
  load	
  main	
  class	
  
org.apache.hadoop.tools.DistCp!
!
Old:!
! $	
  bash	
  -­‐x	
  hadoop	
  distcp	
  /old	
  /new	
  
+	
  this=/home/aw/HADOOP/hadoop-­‐3.0.0-­‐SNAPSHOT/bin/hadoop	
  
+++	
  dirname	
  -­‐-­‐	
  /home/aw/HADOOP/hadoop-­‐3.0.0-­‐SNAPSHOT/bin/hadoop	
  
++	
  cd	
  -­‐P	
  -­‐-­‐	
  /home/aw/HADOOP/hadoop-­‐3.0.0-­‐SNAPSHOT/bin	
  
++	
  pwd	
  -­‐P	
  
+	
  bin=/home/aw/HADOOP/hadoop-­‐3.0.0-­‐SNAPSHOT/bin	
  
+	
  DEFAULT_LIBEXEC_DIR=/home/aw/HADOOP/hadoop-­‐3.0.0-­‐SNAPSHOT/bin/../libexec	
  
+	
  HADOOP_LIBEXEC_DIR=/home/aw/HADOOP/hadoop-­‐3.0.0-­‐SNAPSHOT/bin/../libexec	
  
+	
  [[	
  -­‐f	
  /home/aw/HADOOP/hadoop-­‐3.0.0-­‐SNAPSHOT/bin/../libexec/hadoop-­‐
config.sh	
  ]]	
  
…	
  
!
21
New:!
! $	
  TOOL_PATH=blah:blah:blah	
  hadoop	
  -­‐-­‐debug	
  
distcp	
  /tmp/	
  /1	
  
	
   DEBUG:	
  HADOOP_CONF_DIR=/home/aw/HADOOP/conf	
  
	
   DEBUG:	
  Initial	
  CLASSPATH=/home/aw/HADOOP/conf	
  
	
   	
   	
   	
   	
   …	
  
	
   DEBUG:	
  Append	
  CLASSPATH:	
  /home/aw/HADOOP/
hadoop-­‐3.0.0-­‐SNAPSHOT/share/hadoop/mapreduce/*	
  
	
   DEBUG:	
  Injecting	
  TOOL_PATH	
  into	
  CLASSPATH	
  
	
   DEBUG:	
  Rejected	
  CLASSPATH:	
  blah:blah:blah	
  (does	
  
not	
  exist)	
  
	
   	
   	
   	
   	
   …	
  
!
22
Old:!
! hdfs help!
23 https://www.flickr.com/photos/joshuamckenty/2297179486/
24
New:!
! hdfs help!
25
Old:!
!
	
   hadoop	
  thisisnotacommand	
  
! ! == stack trace!
New:!
	
   hadoop	
  thisisnotacommand	
  
! ! == hadoop help
26
Old:!
! sbin/hadoop-­‐daemon.sh	
  start	
  namenode	
  
	
  	
   sbin/yarn-­‐daemon.sh	
  start	
  resourcemanager	
  
!
New:!
! bin/hdfs	
  -­‐-­‐daemon	
  start	
  namenode	
  
	
  	
   bin/yarn	
  -­‐-­‐daemon	
  start	
  resourcemanager	
  
!
! + common daemon start/stop/status routines
27
hdfs	
  namenode vs hadoop-­‐daemon.sh	
  namenode	
  
!
Old:!!
! - effectively different code paths!
! - no pid vs pid!
! ! - wait for socket for failure!
New:!
! - same code path !
! - hadoop-­‐daemon.sh	
  cmd => hdfs	
  -­‐-­‐daemon	
  cmd !
! ! - both generate pid!
! - hdfs	
  -­‐-­‐daemon	
  status	
  namenode
28
Old:!
! “mkdir:	
  cannot	
  create	
  <dir>”!
! “chown:	
  cannot	
  change	
  permission	
  of	
  <dir>”!
! !
New:!
! “WARNING:	
  <dir>	
  does	
  not	
  exist.	
  Creating.”!
! “ERROR:	
  Unable	
  to	
  create	
  <dir>.	
  Aborting.”!
! “ERROR:	
  Cannot	
  write	
  to	
  <dir>.”
29
Old:!
! (foo)	
  >	
  (foo).out	
  
	
   rm	
  (foo).out	
  
	
   	
   = Open file handle!
!
New:!
	
   (foo)	
  >>	
  (foo).out	
  
	
   rm	
  (foo).out	
  
! ! = Closed file handle!
! ! = rotatable .out files!
30
Old:!
! sbin/*-­‐daemons.sh	
  -­‐>	
  slaves.sh	
  blah!
! (several hundred ssh processes later)!
! *crash*! !
!
New:!
! sbin/*-­‐daemons.sh -> hadoop-­‐functions.sh	
  
! slaves.sh -> hadoop-­‐functions.sh	
  
! pdsh or (if enabled) xargs	
  -­‐P!
! *real work gets done*
31
Old:!
	
   egrep	
  -­‐c	
  ‘^#’	
  hadoop-­‐branch-­‐2/…/*-­‐env.sh	
  
! ! ! hadoop-env.sh: 59!
! ! ! mapred-env.sh: 21!
! ! ! yarn-env.sh: 60!
New:!
! egrep	
  -­‐c	
  ‘^#’	
  hadoop-­‐trunk/…/*-­‐env.sh	
  
! ! ! hadoop-env.sh: 333!
! ! ! mapred-env.sh: 40!
! ! ! yarn-env.sh: 112!
! ! ! + hadoop-layout.sh.example : 77!
! ! ! + hadoop-user-functions.sh.example: 109
But wait! There’s more!
33
!
! HADOOP_namenode_USER=hdfs !
! ! hdfs	
  namenode only works as hdfs!
! ! Fun: HADOOP_fs_USER=aw!
! ! ! hadoop	
  fs only works as aw!
!
! hadoop	
  -­‐-­‐loglevel	
  WARN !
! ! ! => WARN,whatever!
! hadoop	
  -­‐-­‐loglevel	
  DEBUG	
  -­‐-­‐daemon	
  start	
  	
  
	
   	
   => start daemon in DEBUG mode!
34
!
Old:!
! HADOOP_HEAPSIZE=15234	
  	
  	
  	
  	
  <-­‐-­‐-­‐	
  M	
  only	
  
	
   JAVA_HEAP_MAX="hahahah	
  you	
  set	
  something	
  in	
  
HADOOP_HEAPSIZE"	
  
!
New:!
! HADOOP_HEAPSIZE_MAX=15g	
  
	
   HADOOP_HEAPSIZE_MIN=10g	
  	
  	
  	
  <-­‐-­‐-­‐	
  units!	
  
	
   JAVA_HEAP_MAX	
  removed	
  =>	
  
	
   	
   no	
  Xmx	
  settings	
  ==	
  Java	
  default	
  
35
!
Old:!
! Lots of different yet same variables for settings	
  
!
New:!
! Deprecated	
  ~60	
  variables	
  
	
   ${HDFS|YARN|KMS|HTTPFS|*}_{foo}	
  =>	
  	
  
	
   	
   HADOOP_{foo}
36
!
Old:!
! "I wonder what's in HADOOP_CLIENT_OPTS?"!
! "I want to override just this one thing in *-env.sh."!
!
New:!
! ${HOME}/.hadooprc
37
!
shellprofile.d!
!
! bash snippets to easily inject:!
! ! classpath!
! ! JNI!
! ! Java command line options!
! ! ... and more!
38 https://www.flickr.com/photos/83633410@N07/7658230838/
Power Users Rejoice:!
Function Overrides
40
Default *.out log rotation:!
!
function	
  hadoop_rotate_log	
  
{	
  
	
  	
  local	
  log=$1;	
  
	
  	
  local	
  num=${2:-­‐5};	
  
!
	
  	
  if	
  [[	
  -­‐f	
  "${log}"	
  ]];	
  then	
  #	
  rotate	
  logs	
  
	
  	
  	
  	
  while	
  [[	
  ${num}	
  -­‐gt	
  1	
  ]];	
  do	
  
	
  	
  	
  	
  	
  let	
  prev=${num}-­‐1	
  
	
  	
  	
  	
  	
  	
  if	
  [[	
  -­‐f	
  "${log}.${prev}"	
  ]];	
  then	
  
	
  	
  	
  	
  	
  	
  	
  	
  mv	
  "${log}.${prev}"	
  "${log}.${num}"	
  
	
  	
  	
  	
  	
  	
  fi	
  
	
  	
  	
  	
  	
  	
  num=${prev}	
  
	
  	
  	
  	
  done	
  
	
  	
  	
  	
  mv	
  "${log}"	
  "${log}.${num}"	
  
	
  	
  fi	
  
}
namenode.out.1	
  -­‐>	
  namenode.out.2	
  
namenode.out	
  -­‐>	
  namenode.out.1
41
Put a replacement rotate function w/gzip support in hadoop-user-functions.sh!!
!
function	
  hadoop_rotate_log	
  
{	
  
	
  	
  local	
  log=$1;	
  
	
  	
  local	
  num=${2:-­‐5};	
  
!
	
  	
  if	
  [[	
  -­‐f	
  "${log}"	
  ]];	
  then	
  
	
  	
  	
  	
  while	
  [[	
  ${num}	
  -­‐gt	
  1	
  ]];	
  do	
  
	
  	
  	
  	
  	
  	
  let	
  prev=${num}-­‐1	
  
	
  	
  	
  	
  	
  	
  if	
  [[	
  -­‐f	
  "${log}.${prev}.gz"	
  ]];	
  then	
  
	
  	
  	
  	
  	
  	
  	
  	
  mv	
  "${log}.${prev}.gz"	
  "${log}.${num}.gz"	
  
	
  	
  	
  	
  	
  	
  fi	
  
	
  	
  	
  	
  	
  	
  num=${prev}	
  
	
  	
  	
  	
  done	
  
	
  	
  	
  	
  mv	
  "${log}"	
  "${log}.${num}"	
  
	
  	
  	
  	
  gzip	
  -­‐9	
  "${log}.${num}"	
  
	
  	
  fi	
  
}
namenode.out.1.gz	
  -­‐>	
  namenode.out.2.gz	
  
namenode.out	
  -­‐>	
  namenode.out.1	
  
gzip	
  -­‐9	
  namenode.out.1	
  -­‐>	
  namenode.out.1.gz
What if we wanted to log
every daemon start in
syslog?
43
Default daemon starter:!
!
function	
  hadoop_start_daemon	
  
{	
  
	
  	
  local	
  command=$1	
  
	
  	
  local	
  class=$2	
  
	
  	
  shift	
  2	
  
!
	
  	
  hadoop_debug	
  "Final	
  CLASSPATH:	
  ${CLASSPATH}"	
  
	
  	
  hadoop_debug	
  "Final	
  HADOOP_OPTS:	
  ${HADOOP_OPTS}"	
  
!
	
  	
  export	
  CLASSPATH	
  
	
  	
  exec	
  "${JAVA}"	
  "-­‐Dproc_${command}"	
  ${HADOOP_OPTS}	
  "$
{class}"	
  "$@"	
  
}	
  
44
Put a replacement start function in hadoop-user-functions.sh!!
!
function	
  hadoop_start_daemon	
  
{	
  
	
  	
  local	
  command=$1	
  
	
  	
  local	
  class=$2	
  
	
  	
  shift	
  2	
  
!
	
  	
  hadoop_debug	
  "Final	
  CLASSPATH:	
  ${CLASSPATH}"	
  
	
  	
  hadoop_debug	
  "Final	
  HADOOP_OPTS:	
  ${HADOOP_OPTS}"	
  
!
	
  	
  export	
  CLASSPATH	
  
	
  	
  logger	
  -­‐i	
  -­‐p	
  local0.notice	
  -­‐t	
  hadoop	
  "Started	
  ${COMMAND}"	
  
	
  	
  exec	
  "${JAVA}"	
  "-­‐Dproc_${command}"	
  ${HADOOP_OPTS}	
  "$
{class}"	
  "$@"	
  
}
Secure Daemons
What if we could start them
as non-root?
47
Setup:!
!
sudoers (either /etc/sudoers or in LDAP):!
!
hdfs	
   ALL=(root:root)	
  NOPASSWD:	
  /usr/bin/jsvc!
!
hadoop-env.sh:!
!
HADOOP_SECURE_COMMAND=/usr/sbin/sudo	
  
48
# hadoop-user-functions.sh: (partial code below)!
function	
  hadoop_start_secure_daemon	
  
{	
  
	
  	
   	
   	
   	
   	
   	
   	
  …	
  
	
  	
  jsvc="${JSVC_HOME}/jsvc"	
  
!
	
  	
  if	
  [[	
  “${USER}”	
  -­‐ne	
  "${HADOOP_SECURE_USER}"	
  ]];	
  then	
  	
  
	
  	
  	
  	
  hadoop_error	
  "You	
  must	
  be	
  ${HADOOP_SECURE_USER}	
  in	
  order	
  to	
  start	
  a	
  
secure	
  ${daemonname}"	
  
	
  	
  	
  	
  exit	
  1	
  
	
  	
  fi	
  	
  
	
  	
  	
   	
   	
   	
   	
   	
   …	
  
	
  	
  exec	
  /usr/sbin/sudo	
  "${jsvc}"	
  "-­‐Dproc_${daemonname}"	
  	
  
	
  	
  -­‐outfile	
  "${daemonoutfile}"	
  -­‐errfile	
  "${daemonerrfile}"	
  	
  
	
  	
  -­‐pidfile	
  "${daemonpidfile}"	
  -­‐nodetach	
  -­‐home	
  "${JAVA_HOME}"	
  	
  
	
  	
  —user	
  "${HADOOP_SECURE_USER}"	
  	
  
	
  	
  -­‐cp	
  "${CLASSPATH}"	
  ${HADOOP_OPTS}	
  "${class}"	
  "$@"	
  
}
49
$ hdfs	
  datanode!
sudo launches jsvc as root!
jsvc launches secure datanode!
!
!
In order to get -­‐-­‐daemon	
  start to work, one other
function needs to get replaced*, but that’s a SMOP, now
that you know how!!
!
!
* - hadoop_start_secure_daemon_wrapper	
  assumes it
is running as root!
50
Lots more, but out of time... e.g.:!
!
! Internals for contributors!
! Unit tests!
! API documentation!
! Other projects in the works!
! ...!
!
! Reminder: This is in trunk. Ask vendors their plans!
51 https://www.flickr.com/photos/nateone/3768979925
Altiscale copyright 2015. All rights reserved.52

Apache Hadoop Shell Rewrite

  • 1.
    Shell Script RewriteOverview Allen Wittenauer
  • 2.
    Twitter: @_a__w_ (1a 2 w 1) Email: aw @ apache.org!
  • 3.
    3 What is theshell code?! ! ! bin/*! ! etc/hadoop/*sh! ! libexec/*! ! sbin/*! !
  • 5.
    CUTTING, DOUG 1710 5546239 2005 APACHE SOFTWARE FOUNDATION
  • 6.
  • 7.
  • 8.
  • 9.
  • 10.
  • 11.
  • 12.
    “[The scripts] finallygot to you, didn’t they?”
  • 13.
    13 Primary Goals! Consistency! Code andConfig Simplification! De-clash Parameters! Documentation! ! Secondary Goals! Backward Compatibility! “Lost” Ideas and Fixes!
  • 14.
  • 15.
    15 ! ! Tuesday, August 19,2014 majority committed into trunk:! ! ! ! ! ! ... followed by many fixes & enhancements from the community
  • 16.
  • 17.
    17 Old:! ! hadoop ->hadoop-config.sh -> hadoop-env.sh! ! yarn -> yarn-config.sh -> yarn-env.sh! ! hdfs-> hdfs-config.sh -> hadoop-env.sh ! ! New:! ! hadoop -> hadoop-config.sh! -> hadoop-functions.sh! ! ! ! ! ! ! ! -> hadoop-env.sh! ! yarn -> yarn-config.sh! -> hadoop-config.sh -> (above)! ! ! ! ! ! ! -> yarn-env.sh! ! hdfs -> hdfs-config.sh! -> hadoop-config.sh -> (above)!
  • 18.
    18 Old:! ! yarn-env.sh:!       JAVA_HOME=xyz   ! hadoop-env.sh:!     JAVA_HOME=xyz   ! mapred-env.sh:!     JAVA_HOME=xyz     New:! ! hadoop-env.sh!     JAVA_HOME=xyz   ! OS X:!     JAVA_HOME=$(/usr/libexec/java_home)
  • 19.
    19 Old:! ! xyz_OPT=“-­‐Xmx4g”  hdfs  namenode       java  …  -­‐Xmx1000  …  -­‐Xmx4g  …     ! ! Command line size: ~2500 bytes! New:! ! xyz_OPT=“-­‐Xmx4g”  hdfs  namenode       java  …  -­‐Xmx4g  …   ! ! Command line size: ~1750 bytes
  • 20.
    20 ! $  TOOL_PATH=blah:blah:blah  hadoop  distcp  /old  /new     Error:  could  not  find  or  load  main  class   org.apache.hadoop.tools.DistCp! ! Old:! ! $  bash  -­‐x  hadoop  distcp  /old  /new   +  this=/home/aw/HADOOP/hadoop-­‐3.0.0-­‐SNAPSHOT/bin/hadoop   +++  dirname  -­‐-­‐  /home/aw/HADOOP/hadoop-­‐3.0.0-­‐SNAPSHOT/bin/hadoop   ++  cd  -­‐P  -­‐-­‐  /home/aw/HADOOP/hadoop-­‐3.0.0-­‐SNAPSHOT/bin   ++  pwd  -­‐P   +  bin=/home/aw/HADOOP/hadoop-­‐3.0.0-­‐SNAPSHOT/bin   +  DEFAULT_LIBEXEC_DIR=/home/aw/HADOOP/hadoop-­‐3.0.0-­‐SNAPSHOT/bin/../libexec   +  HADOOP_LIBEXEC_DIR=/home/aw/HADOOP/hadoop-­‐3.0.0-­‐SNAPSHOT/bin/../libexec   +  [[  -­‐f  /home/aw/HADOOP/hadoop-­‐3.0.0-­‐SNAPSHOT/bin/../libexec/hadoop-­‐ config.sh  ]]   …   !
  • 21.
    21 New:! ! $  TOOL_PATH=blah:blah:blah  hadoop  -­‐-­‐debug   distcp  /tmp/  /1     DEBUG:  HADOOP_CONF_DIR=/home/aw/HADOOP/conf     DEBUG:  Initial  CLASSPATH=/home/aw/HADOOP/conf             …     DEBUG:  Append  CLASSPATH:  /home/aw/HADOOP/ hadoop-­‐3.0.0-­‐SNAPSHOT/share/hadoop/mapreduce/*     DEBUG:  Injecting  TOOL_PATH  into  CLASSPATH     DEBUG:  Rejected  CLASSPATH:  blah:blah:blah  (does   not  exist)             …   !
  • 22.
  • 23.
  • 24.
  • 25.
    25 Old:! !   hadoop  thisisnotacommand   ! ! == stack trace! New:!   hadoop  thisisnotacommand   ! ! == hadoop help
  • 26.
    26 Old:! ! sbin/hadoop-­‐daemon.sh  start  namenode       sbin/yarn-­‐daemon.sh  start  resourcemanager   ! New:! ! bin/hdfs  -­‐-­‐daemon  start  namenode       bin/yarn  -­‐-­‐daemon  start  resourcemanager   ! ! + common daemon start/stop/status routines
  • 27.
    27 hdfs  namenode vshadoop-­‐daemon.sh  namenode   ! Old:!! ! - effectively different code paths! ! - no pid vs pid! ! ! - wait for socket for failure! New:! ! - same code path ! ! - hadoop-­‐daemon.sh  cmd => hdfs  -­‐-­‐daemon  cmd ! ! ! - both generate pid! ! - hdfs  -­‐-­‐daemon  status  namenode
  • 28.
    28 Old:! ! “mkdir:  cannot  create  <dir>”! ! “chown:  cannot  change  permission  of  <dir>”! ! ! New:! ! “WARNING:  <dir>  does  not  exist.  Creating.”! ! “ERROR:  Unable  to  create  <dir>.  Aborting.”! ! “ERROR:  Cannot  write  to  <dir>.”
  • 29.
    29 Old:! ! (foo)  >  (foo).out     rm  (foo).out       = Open file handle! ! New:!   (foo)  >>  (foo).out     rm  (foo).out   ! ! = Closed file handle! ! ! = rotatable .out files!
  • 30.
    30 Old:! ! sbin/*-­‐daemons.sh  -­‐>  slaves.sh  blah! ! (several hundred ssh processes later)! ! *crash*! ! ! New:! ! sbin/*-­‐daemons.sh -> hadoop-­‐functions.sh   ! slaves.sh -> hadoop-­‐functions.sh   ! pdsh or (if enabled) xargs  -­‐P! ! *real work gets done*
  • 31.
    31 Old:!   egrep  -­‐c  ‘^#’  hadoop-­‐branch-­‐2/…/*-­‐env.sh   ! ! ! hadoop-env.sh: 59! ! ! ! mapred-env.sh: 21! ! ! ! yarn-env.sh: 60! New:! ! egrep  -­‐c  ‘^#’  hadoop-­‐trunk/…/*-­‐env.sh   ! ! ! hadoop-env.sh: 333! ! ! ! mapred-env.sh: 40! ! ! ! yarn-env.sh: 112! ! ! ! + hadoop-layout.sh.example : 77! ! ! ! + hadoop-user-functions.sh.example: 109
  • 32.
  • 33.
    33 ! ! HADOOP_namenode_USER=hdfs ! !! hdfs  namenode only works as hdfs! ! ! Fun: HADOOP_fs_USER=aw! ! ! ! hadoop  fs only works as aw! ! ! hadoop  -­‐-­‐loglevel  WARN ! ! ! ! => WARN,whatever! ! hadoop  -­‐-­‐loglevel  DEBUG  -­‐-­‐daemon  start         => start daemon in DEBUG mode!
  • 34.
    34 ! Old:! ! HADOOP_HEAPSIZE=15234          <-­‐-­‐-­‐  M  only     JAVA_HEAP_MAX="hahahah  you  set  something  in   HADOOP_HEAPSIZE"   ! New:! ! HADOOP_HEAPSIZE_MAX=15g     HADOOP_HEAPSIZE_MIN=10g        <-­‐-­‐-­‐  units!     JAVA_HEAP_MAX  removed  =>       no  Xmx  settings  ==  Java  default  
  • 35.
    35 ! Old:! ! Lots ofdifferent yet same variables for settings   ! New:! ! Deprecated  ~60  variables     ${HDFS|YARN|KMS|HTTPFS|*}_{foo}  =>         HADOOP_{foo}
  • 36.
    36 ! Old:! ! "I wonderwhat's in HADOOP_CLIENT_OPTS?"! ! "I want to override just this one thing in *-env.sh."! ! New:! ! ${HOME}/.hadooprc
  • 37.
    37 ! shellprofile.d! ! ! bash snippetsto easily inject:! ! ! classpath! ! ! JNI! ! ! Java command line options! ! ! ... and more!
  • 38.
  • 39.
  • 40.
    40 Default *.out logrotation:! ! function  hadoop_rotate_log   {      local  log=$1;      local  num=${2:-­‐5};   !    if  [[  -­‐f  "${log}"  ]];  then  #  rotate  logs          while  [[  ${num}  -­‐gt  1  ]];  do            let  prev=${num}-­‐1              if  [[  -­‐f  "${log}.${prev}"  ]];  then                  mv  "${log}.${prev}"  "${log}.${num}"              fi              num=${prev}          done          mv  "${log}"  "${log}.${num}"      fi   } namenode.out.1  -­‐>  namenode.out.2   namenode.out  -­‐>  namenode.out.1
  • 41.
    41 Put a replacementrotate function w/gzip support in hadoop-user-functions.sh!! ! function  hadoop_rotate_log   {      local  log=$1;      local  num=${2:-­‐5};   !    if  [[  -­‐f  "${log}"  ]];  then          while  [[  ${num}  -­‐gt  1  ]];  do              let  prev=${num}-­‐1              if  [[  -­‐f  "${log}.${prev}.gz"  ]];  then                  mv  "${log}.${prev}.gz"  "${log}.${num}.gz"              fi              num=${prev}          done          mv  "${log}"  "${log}.${num}"          gzip  -­‐9  "${log}.${num}"      fi   } namenode.out.1.gz  -­‐>  namenode.out.2.gz   namenode.out  -­‐>  namenode.out.1   gzip  -­‐9  namenode.out.1  -­‐>  namenode.out.1.gz
  • 42.
    What if wewanted to log every daemon start in syslog?
  • 43.
    43 Default daemon starter:! ! function  hadoop_start_daemon   {      local  command=$1      local  class=$2      shift  2   !    hadoop_debug  "Final  CLASSPATH:  ${CLASSPATH}"      hadoop_debug  "Final  HADOOP_OPTS:  ${HADOOP_OPTS}"   !    export  CLASSPATH      exec  "${JAVA}"  "-­‐Dproc_${command}"  ${HADOOP_OPTS}  "$ {class}"  "$@"   }  
  • 44.
    44 Put a replacementstart function in hadoop-user-functions.sh!! ! function  hadoop_start_daemon   {      local  command=$1      local  class=$2      shift  2   !    hadoop_debug  "Final  CLASSPATH:  ${CLASSPATH}"      hadoop_debug  "Final  HADOOP_OPTS:  ${HADOOP_OPTS}"   !    export  CLASSPATH      logger  -­‐i  -­‐p  local0.notice  -­‐t  hadoop  "Started  ${COMMAND}"      exec  "${JAVA}"  "-­‐Dproc_${command}"  ${HADOOP_OPTS}  "$ {class}"  "$@"   }
  • 45.
  • 46.
    What if wecould start them as non-root?
  • 47.
    47 Setup:! ! sudoers (either /etc/sudoersor in LDAP):! ! hdfs   ALL=(root:root)  NOPASSWD:  /usr/bin/jsvc! ! hadoop-env.sh:! ! HADOOP_SECURE_COMMAND=/usr/sbin/sudo  
  • 48.
    48 # hadoop-user-functions.sh: (partialcode below)! function  hadoop_start_secure_daemon   {                  …      jsvc="${JSVC_HOME}/jsvc"   !    if  [[  “${USER}”  -­‐ne  "${HADOOP_SECURE_USER}"  ]];  then            hadoop_error  "You  must  be  ${HADOOP_SECURE_USER}  in  order  to  start  a   secure  ${daemonname}"          exit  1      fi                     …      exec  /usr/sbin/sudo  "${jsvc}"  "-­‐Dproc_${daemonname}"        -­‐outfile  "${daemonoutfile}"  -­‐errfile  "${daemonerrfile}"        -­‐pidfile  "${daemonpidfile}"  -­‐nodetach  -­‐home  "${JAVA_HOME}"        —user  "${HADOOP_SECURE_USER}"        -­‐cp  "${CLASSPATH}"  ${HADOOP_OPTS}  "${class}"  "$@"   }
  • 49.
    49 $ hdfs  datanode! sudolaunches jsvc as root! jsvc launches secure datanode! ! ! In order to get -­‐-­‐daemon  start to work, one other function needs to get replaced*, but that’s a SMOP, now that you know how!! ! ! * - hadoop_start_secure_daemon_wrapper  assumes it is running as root!
  • 50.
    50 Lots more, butout of time... e.g.:! ! ! Internals for contributors! ! Unit tests! ! API documentation! ! Other projects in the works! ! ...! ! ! Reminder: This is in trunk. Ask vendors their plans!
  • 51.
  • 52.
    Altiscale copyright 2015.All rights reserved.52